From 290a7852ebdcc7153ba7d6d23db8783bea866f3d Mon Sep 17 00:00:00 2001 From: Thomas NOEL Date: Tue, 20 Feb 2018 15:52:36 +0100 Subject: [PATCH] solis: try to convert files to PDF (#21997) --- debian/control | 3 ++- passerelle/apps/solis/models.py | 32 +++++++++++++++++++++++++++++--- setup.py | 1 + tests/test_solis.py | 40 +++++++++++++++++++++++++++++++++++----- 4 files changed, 67 insertions(+), 9 deletions(-) diff --git a/debian/control b/debian/control index fc845e2..6a0f016 100644 --- a/debian/control +++ b/debian/control @@ -26,7 +26,8 @@ Depends: ${python:Depends}, python-cmislib (>= 0.5), python-cmislib (< 0.6), python-lxml, python-dateutil, - python-pyproj + python-pyproj, + python-pil Recommends: python-soappy, python-phpserialize Description: Uniform access to multiple data sources and services (Python module) diff --git a/passerelle/apps/solis/models.py b/passerelle/apps/solis/models.py index 76c87eb..166e6f6 100644 --- a/passerelle/apps/solis/models.py +++ b/passerelle/apps/solis/models.py @@ -18,6 +18,8 @@ import base64 import json import re import unicodedata +from PIL import Image +from StringIO import StringIO from django.db import models from django.template.loader import get_template @@ -62,6 +64,21 @@ def unflat(flatten_dict, separator='_'): return dict_ +def convert_to_pdf(content, content_type): + content_type = content_type.lower() + if content_type == 'application/pdf': + return content + if content_type.startswith('image/'): + image = Image.open(StringIO(content)) + if image.mode != 'RGB': + # PDF cannot handle alpha (RGBA) + image = image.convert('RGB') + out = StringIO() + image.save(out, format='PDF') + return out.getvalue() + raise ValueError('cannot convert %s to PDF' % content_type) + + def keystore_upload_to(instance, filename): return '%s/%s/keystore/%s' % (instance.get_connector_slug(), instance.id, filename) @@ -302,13 +319,18 @@ class Solis(BaseResource): # handle specific file: and del: keys files = [] delete_keys = [] + files_failed_pdf_conversion = [] for key, value in payload.items(): # extract files from payload, to send them before the request if key.startswith('file:'): if (isinstance(value, dict) and 'content' in value and 'content_type' in value): filename = key[5:] - binary_content = base64.b64decode(value['content']) - files.append(('files', (filename, binary_content, value['content_type']))) + try: + pdf_content = convert_to_pdf(base64.b64decode(value['content']), + value['content_type']) + files.append(('files', (filename, pdf_content, 'application.pdf'))) + except: + files_failed_pdf_conversion.append(filename) delete_keys.append(key) # Solis doesn't accept somes values or dict-of-values if there are empty # (for example is there is not "conjoint"): remove all these keys if a @@ -340,7 +362,11 @@ class Solis(BaseResource): integration_data['uidPiecesJointes'] = sendfiles.get('id') response = self.request('asg/apa/integrationDemandeApa', data=integration_data) - return {'data': response, 'sendfiles': sendfiles} + return { + 'data': response, + 'files_sent': sendfiles, + 'files_failed_pdf_conversion': files_failed_pdf_conversion + } @endpoint(name='referential', perm='can_access', pattern=r'^(?P[\w-]+)/(?P[\w-]+)/$', diff --git a/setup.py b/setup.py index 4306bcc..20772a2 100755 --- a/setup.py +++ b/setup.py @@ -101,6 +101,7 @@ setup(name='passerelle', 'feedparser', 'lxml', 'python-dateutil', + 'Pillow', ], cmdclass={ 'build': build, diff --git a/tests/test_solis.py b/tests/test_solis.py index 91aed34..3e26e3c 100644 --- a/tests/test_solis.py +++ b/tests/test_solis.py @@ -507,24 +507,54 @@ def test_solis_apa_integration(app, solis): assert requests_post.call_args[1]['json']['demandeApa']['conjoint']['nom'] == 'Conjnom' assert resp.json['err'] == 0 - # add a file + # add files requests_post.reset_mock() requests_post.side_effect = [ - utils.FakedResponse(content='{"id": "foo", "nbFichiersAcceptes": 1}', status_code=200), + utils.FakedResponse(content='{"id": "foo", "nbFichiersAcceptes": 3}', status_code=200), utils.FakedResponse(content='', status_code=204)] demande['file:etat_civil_001.pdf'] = { - 'content': 'Y29pbg==', + 'content': 'JVBERmZha2U=', 'content_type': 'application/pdf', 'filename': 'whatever.pdf', } - demande['file:etat_civil_002.pdf'] = None + demande['file:etat_civil_002.pdf'] = { + # jpeg, will be converted to PDF + 'content': '/9j/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkICQkKDA8MCgsOCw' + 'kJDRENDg8QEBEQCgwSExIQEw8QEBD/yQALCAABAAEBAREA/8wABgAQEAX/2gAIAQEAAD8A0s8g/9k=', + 'content_type': 'image/jpeg', + 'filename': 'image.jpg', + } + demande['file:etat_civil_003.pdf'] = { + # transparent png (RGBA), will be converted to RGB and then PDF + 'content': 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAACklEQVR4nGMAAQAABQ' + 'ABDQottAAAAABJRU5ErkJggg==', + 'content_type': 'image/png', + 'filename': 'image.png', + } + demande['file:etat_civil_004.pdf'] = { + 'content': 'Y29pbg==', # bad content, conversion will fail + 'content_type': 'image/png', + 'filename': 'image.png', + } + demande['file:etat_civil_005.pdf'] = { + 'content': 'Y29pbg==', + 'content_type': 'video/mp4', # not a image, cannot convert + 'filename': 'video.mp4', + } + demande['file:etat_civil_006.pdf'] = None resp = app.post_json(url, params=demande, status=200) + assert requests_post.call_count == 2 # post files + demandeApa + sent_files = requests_post.call_args_list[0][1]['files'] + assert len(sent_files) == 3 + for file_ in sent_files: + assert file_[1][1].startswith('%PDF') # file entries are removed from demandeApa JSON dict assert 'file:etat_civil_001.pdf' not in requests_post.call_args[1]['json']['demandeApa'] assert 'file:etat_civil_002.pdf' not in requests_post.call_args[1]['json']['demandeApa'] assert resp.json['err'] == 0 assert resp.json['data'] is None - assert resp.json['sendfiles'] == {'id': 'foo', 'nbFichiersAcceptes': 1} + assert resp.json['files_sent'] == {'id': 'foo', 'nbFichiersAcceptes': 3} + assert set(resp.json['files_failed_pdf_conversion']) == set(['etat_civil_004.pdf', 'etat_civil_005.pdf']) # invalid inputs requests_post.reset_mock() -- 2.16.1