From 723d9c8f1b2ba8aa746aeea7387ec1b39f8a0929 Mon Sep 17 00:00:00 2001 From: Benjamin Dauvergne Date: Thu, 28 Feb 2019 16:13:20 +0100 Subject: [PATCH 2/2] actesweb: work-around not latin15 encodable characters (fixes #30995) --- debian/control | 3 ++- passerelle/apps/actesweb/models.py | 4 ++++ passerelle/utils/conversion.py | 25 +++++++++++++++++++++++++ setup.py | 1 + 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/debian/control b/debian/control index 96947d8..c09542a 100644 --- a/debian/control +++ b/debian/control @@ -29,7 +29,8 @@ Depends: ${python:Depends}, python-pyproj, python-pil, python-zeep, - python-jsonschema + python-jsonschema, + python-unidecode Recommends: python-soappy, python-phpserialize Description: Uniform access to multiple data sources and services (Python module) diff --git a/passerelle/apps/actesweb/models.py b/passerelle/apps/actesweb/models.py index 00fb70d..855e854 100644 --- a/passerelle/apps/actesweb/models.py +++ b/passerelle/apps/actesweb/models.py @@ -31,6 +31,7 @@ from django.utils.translation import ugettext_lazy as _ from passerelle.base.models import BaseResource from passerelle.utils.api import endpoint from passerelle.utils.jsonresponse import APIError +from passerelle.utils.conversion import to_encoding_with_fallback @contextlib.contextmanager @@ -80,6 +81,9 @@ class ActesWeb(BaseResource): else: os.makedirs(tmp_dir) + # ensure demand_content can be encoded to latin15 + demand_content = to_encoding_with_fallback(demand_content, 'iso-8859-15').decode('iso-8859-15') + filename = '%s.DEM' % now().strftime('%Y-%m-%d_%H-%M-%S_%f') filepath = os.path.join(self.basepath, filename) with named_tempfile(dir=tmp_dir, suffix='.DEM', delete=False) as tpf: diff --git a/passerelle/utils/conversion.py b/passerelle/utils/conversion.py index 3d9cdea..f062077 100644 --- a/passerelle/utils/conversion.py +++ b/passerelle/utils/conversion.py @@ -14,9 +14,12 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import re +import unicodedata import warnings from StringIO import StringIO +import unidecode from PIL import Image @@ -37,3 +40,25 @@ def to_pdf(content): out = StringIO() image.save(out, format='PDF') return out.getvalue() + + +# copied from +# https://stackoverflow.com/questions/10294032/python-replace-typographical-quotes-dashes-etc-with-their-ascii-counterparts +def char_filter(string): + '''Fallback to ASCII char if found''' + latin = re.compile('[a-zA-Z]+') + for char in unicodedata.normalize('NFC', string): + decoded = unidecode.unidecode(char) + if latin.match(decoded): + yield char + else: + yield decoded + + +def clean_string(string): + return "".join(char_filter(string)) + + +def to_encoding_with_fallback(s, encoding): + s = clean_string(s) + return s.encode(encoding, 'replace') diff --git a/setup.py b/setup.py index 96a5320..dd42a17 100755 --- a/setup.py +++ b/setup.py @@ -107,6 +107,7 @@ setup(name='passerelle', 'jsonschema', 'zeep < 3.0', 'pycrypto', + 'unidecode', ], cmdclass={ 'build': build, -- 2.20.1