From cb309752c2aeec9da37342f78b9a4d32e3c83fed Mon Sep 17 00:00:00 2001 From: Thomas NOEL Date: Tue, 20 Feb 2018 15:52:36 +0100 Subject: [PATCH] solis: try to convert files to PDF (#21997) --- debian/control | 3 ++- passerelle/apps/solis/models.py | 38 +++++++++++++++++++++++++++++----- setup.py | 1 + tests/test_solis.py | 45 ++++++++++++++++++++++++++++++++++++----- 4 files changed, 76 insertions(+), 11 deletions(-) diff --git a/debian/control b/debian/control index fc845e2..6a0f016 100644 --- a/debian/control +++ b/debian/control @@ -26,7 +26,8 @@ Depends: ${python:Depends}, python-cmislib (>= 0.5), python-cmislib (< 0.6), python-lxml, python-dateutil, - python-pyproj + python-pyproj, + python-pil Recommends: python-soappy, python-phpserialize Description: Uniform access to multiple data sources and services (Python module) diff --git a/passerelle/apps/solis/models.py b/passerelle/apps/solis/models.py index 76c87eb..cffda34 100644 --- a/passerelle/apps/solis/models.py +++ b/passerelle/apps/solis/models.py @@ -16,7 +16,9 @@ import base64 import json +from PIL import Image import re +from StringIO import StringIO import unicodedata from django.db import models @@ -62,6 +64,18 @@ def unflat(flatten_dict, separator='_'): return dict_ +def convert_to_pdf(content): + if content.startswith('%PDF'): + return content + image = Image.open(StringIO(content)) + if image.mode != 'RGB': + # PDF cannot handle alpha (RGBA) + image = image.convert('RGB') + out = StringIO() + image.save(out, format='PDF') + return out.getvalue() + + def keystore_upload_to(instance, filename): return '%s/%s/keystore/%s' % (instance.get_connector_slug(), instance.id, filename) @@ -302,14 +316,24 @@ class Solis(BaseResource): # handle specific file: and del: keys files = [] delete_keys = [] + files_failed_pdf_conversion = [] for key, value in payload.items(): # extract files from payload, to send them before the request if key.startswith('file:'): - if (isinstance(value, dict) and 'content' in value and 'content_type' in value): - filename = key[5:] - binary_content = base64.b64decode(value['content']) - files.append(('files', (filename, binary_content, value['content_type']))) delete_keys.append(key) + if value is None: + continue + filename = key[5:] + if isinstance(value, dict) and 'content' in value: + content = base64.b64decode(value['content']) + try: + content = convert_to_pdf(content) + except: + files_failed_pdf_conversion.append(filename) + else: + files.append(('files', (filename, content, 'application/pdf'))) + else: + files_failed_pdf_conversion.append(filename) # Solis doesn't accept somes values or dict-of-values if there are empty # (for example is there is not "conjoint"): remove all these keys if a # specific "del:key_prefix":true entry exists (for example "del:conjoint") @@ -340,7 +364,11 @@ class Solis(BaseResource): integration_data['uidPiecesJointes'] = sendfiles.get('id') response = self.request('asg/apa/integrationDemandeApa', data=integration_data) - return {'data': response, 'sendfiles': sendfiles} + return { + 'data': response, + 'files_sent': sendfiles, + 'files_failed_pdf_conversion': files_failed_pdf_conversion + } @endpoint(name='referential', perm='can_access', pattern=r'^(?P[\w-]+)/(?P[\w-]+)/$', diff --git a/setup.py b/setup.py index 4306bcc..20772a2 100755 --- a/setup.py +++ b/setup.py @@ -101,6 +101,7 @@ setup(name='passerelle', 'feedparser', 'lxml', 'python-dateutil', + 'Pillow', ], cmdclass={ 'build': build, diff --git a/tests/test_solis.py b/tests/test_solis.py index 91aed34..b5dfed8 100644 --- a/tests/test_solis.py +++ b/tests/test_solis.py @@ -507,24 +507,59 @@ def test_solis_apa_integration(app, solis): assert requests_post.call_args[1]['json']['demandeApa']['conjoint']['nom'] == 'Conjnom' assert resp.json['err'] == 0 - # add a file + # add files requests_post.reset_mock() requests_post.side_effect = [ - utils.FakedResponse(content='{"id": "foo", "nbFichiersAcceptes": 1}', status_code=200), + utils.FakedResponse(content='{"id": "foo", "nbFichiersAcceptes": 3}', status_code=200), utils.FakedResponse(content='', status_code=204)] demande['file:etat_civil_001.pdf'] = { - 'content': 'Y29pbg==', + 'content': 'JVBERmZha2U=', 'content_type': 'application/pdf', 'filename': 'whatever.pdf', } - demande['file:etat_civil_002.pdf'] = None + demande['file:etat_civil_002.pdf'] = { + # jpeg, will be converted to PDF + 'content': '/9j/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkICQkKDA8MCgsOCw' + 'kJDRENDg8QEBEQCgwSExIQEw8QEBD/yQALCAABAAEBAREA/8wABgAQEAX/2gAIAQEAAD8A0s8g/9k=', + 'content_type': 'image/jpeg', + 'filename': 'image.jpg', + } + demande['file:etat_civil_003.pdf'] = { + # transparent png (RGBA), will be converted to RGB and then PDF + 'content': 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAACklEQVR4nGMAAQAABQ' + 'ABDQottAAAAABJRU5ErkJggg==', + 'content_type': 'image/png', + 'filename': 'image.png', + } + demande['file:etat_civil_004.pdf'] = { + 'content': 'Y29pbg==', # bad content, conversion will fail + 'content_type': 'image/png', + 'filename': 'image.png', + } + demande['file:etat_civil_005.pdf'] = { + 'content': 'Y29pbg==', + 'content_type': 'video/mp4', # not a image, cannot convert + 'filename': 'video.mp4', + } + demande['file:etat_civil_006.pdf'] = { + 'content_type': 'video/mp4', # no content, cannot convert + } + demande['file:etat_civil_007.pdf'] = None resp = app.post_json(url, params=demande, status=200) + assert requests_post.call_count == 2 # post files + demandeApa + sent_files = requests_post.call_args_list[0][1]['files'] + assert len(sent_files) == 3 + for file_ in sent_files: + assert file_[1][1].startswith('%PDF') # file entries are removed from demandeApa JSON dict assert 'file:etat_civil_001.pdf' not in requests_post.call_args[1]['json']['demandeApa'] assert 'file:etat_civil_002.pdf' not in requests_post.call_args[1]['json']['demandeApa'] assert resp.json['err'] == 0 assert resp.json['data'] is None - assert resp.json['sendfiles'] == {'id': 'foo', 'nbFichiersAcceptes': 1} + assert resp.json['files_sent'] == {'id': 'foo', 'nbFichiersAcceptes': 3} + assert set(resp.json['files_failed_pdf_conversion']) == set(['etat_civil_004.pdf', + 'etat_civil_005.pdf', + 'etat_civil_006.pdf']) # invalid inputs requests_post.reset_mock() -- 2.16.1