0001-utils-factorize-use-of-unicodedata-module-38237.patch
passerelle/apps/csvdatasource/models.py | ||
---|---|---|
12 | 12 |
# GNU Affero General Public License for more details. |
13 | 13 |
# |
14 | 14 |
# You should have received a copy of the GNU Affero General Public License |
15 | 15 |
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
16 | 16 | |
17 | 17 |
import os |
18 | 18 |
import re |
19 | 19 |
import csv |
20 |
import unicodedata |
|
21 | 20 |
from collections import OrderedDict |
22 | 21 | |
23 | 22 |
import six |
24 | 23 |
import pytz |
25 | 24 | |
26 | 25 |
from pyexcel_ods import get_data as get_data_ods |
27 | 26 |
from pyexcel_xls import get_data as get_data_xls |
28 | 27 | |
... | ... | |
34 | 33 |
from django.db import models, transaction |
35 | 34 |
from django.core.exceptions import ValidationError |
36 | 35 |
from django.utils.translation import ugettext_lazy as _ |
37 | 36 | |
38 | 37 |
from passerelle.base.models import BaseResource |
39 | 38 |
from passerelle.utils import batch |
40 | 39 |
from passerelle.utils.jsonresponse import APIError |
41 | 40 |
from passerelle.utils.api import endpoint |
41 |
from passerelle.utils.conversion import normalize |
|
42 | 42 | |
43 | 43 |
identifier_re = re.compile(r"^[^\d\W]\w*\Z", re.UNICODE) |
44 | 44 | |
45 | 45 | |
46 | 46 |
code_cache = OrderedDict() |
47 | 47 | |
48 | 48 | |
49 | 49 |
def get_code(expr): |
... | ... | |
51 | 51 |
if len(code_cache) > 1024: |
52 | 52 |
for key in list(code_cache.keys())[:len(code_cache) - 1024]: |
53 | 53 |
code_cache.pop(key) |
54 | 54 |
if expr not in code_cache: |
55 | 55 |
code_cache[expr] = compile(expr, '<inline>', 'eval') |
56 | 56 |
return code_cache[expr] |
57 | 57 | |
58 | 58 | |
59 |
def normalize(value): |
|
60 |
return unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') |
|
61 | ||
62 | ||
63 | 59 |
class Query(models.Model): |
64 | 60 |
resource = models.ForeignKey('CsvDataSource', on_delete=models.CASCADE) |
65 | 61 |
slug = models.SlugField(_('Name (slug)')) |
66 | 62 |
label = models.CharField(_('Label'), max_length=100) |
67 | 63 |
description = models.TextField(_('Description'), blank=True) |
68 | 64 |
filters = models.TextField( |
69 | 65 |
_('Filters'), |
70 | 66 |
blank=True, |
passerelle/apps/gdc/views.py | ||
---|---|---|
1 |
import unicodedata |
|
2 | ||
3 | 1 |
from django.http import Http404 |
4 | 2 |
from django.views.generic.base import View |
5 | 3 |
from django.views.generic.detail import SingleObjectMixin, DetailView |
6 | 4 | |
7 | 5 |
from passerelle.compat import json_loads |
8 | 6 |
import passerelle.utils as utils |
7 |
from passerelle.utils.conversion import normalize |
|
9 | 8 | |
10 | 9 |
from .models import Gdc, phpserialize, phpserialize_loads, SOAPpy |
11 | 10 | |
12 | 11 | |
13 | 12 |
class GdcCrash(Exception): |
14 | 13 |
pass |
15 | 14 | |
16 | 15 | |
... | ... | |
133 | 132 |
if voie_str and not voie_id: |
134 | 133 |
# look for a voie with that name, so we can provide an identifier |
135 | 134 |
# to gdc |
136 | 135 |
try: |
137 | 136 |
voies = get_voies(self.get_object().service_url, insee) |
138 | 137 |
except GdcCrash: |
139 | 138 |
result = {'result': 'gdc soap crash'} |
140 | 139 |
return utils.response_for_json(request, result) |
141 |
normalized_voie = unicodedata.normalize('NFKD', voie_str).encode('ascii', 'ignore').decode('ascii') |
|
142 |
normalized_voie = normalized_voie.upper() |
|
140 |
normalized_voie = normalize(voie_str).decode('ascii').upper() |
|
143 | 141 |
for k, v in voies: |
144 | 142 |
if v == normalized_voie or k == normalized_voie: |
145 | 143 |
voie_id = k |
146 | 144 |
break |
147 | 145 | |
148 | 146 |
objet = self.request.GET.get('objet') |
149 | 147 |
if objet is None: |
150 | 148 |
objet = data['fields'].get('objet_raw') |
passerelle/apps/solis/models.py | ||
---|---|---|
12 | 12 |
# GNU Affero General Public License for more details. |
13 | 13 |
# |
14 | 14 |
# You should have received a copy of the GNU Affero General Public License |
15 | 15 |
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
16 | 16 | |
17 | 17 |
import base64 |
18 | 18 |
import copy |
19 | 19 |
import re |
20 |
import unicodedata |
|
21 | 20 | |
22 | 21 |
from django.db import models |
23 | 22 |
from django.template.loader import get_template |
24 | 23 |
from django.utils.encoding import force_text |
25 | 24 |
from django.utils.translation import ugettext_lazy as _ |
26 | 25 |
from django.utils.http import urlencode |
27 | 26 | |
28 | 27 |
from passerelle.base.models import BaseResource |
29 | 28 |
from passerelle.compat import json_loads |
30 | 29 |
from passerelle.utils.api import endpoint |
31 | 30 |
from passerelle.utils.jsonresponse import APIError |
32 |
from passerelle.utils.conversion import to_pdf |
|
33 | ||
34 | ||
35 |
def simplify(s): |
|
36 |
''' |
|
37 |
Simplify a string, trying to transform it to lower ascii chars (a-z, 0-9) |
|
38 |
and minimize spaces. Used to compare strings on ?q=something requests. |
|
39 |
''' |
|
40 |
if not s: |
|
41 |
return '' |
|
42 |
s = force_text(s, 'utf-8', 'ignore') |
|
43 |
s = force_text(unicodedata.normalize('NFKD', s).encode('ascii', 'ignore')) |
|
44 |
s = re.sub(r'[^\w\s\'-]', '', s) |
|
45 |
s = re.sub(r'[\s\'_-]+', ' ', s) |
|
46 |
return s.strip().lower() |
|
31 |
from passerelle.utils.conversion import simplify, to_pdf |
|
47 | 32 | |
48 | 33 | |
49 | 34 |
def unflat(flatten_dict, separator='_'): |
50 | 35 |
''' |
51 | 36 |
Expand a "flatten" dict: |
52 | 37 |
>>> unflat({'foo': 'bar', 'two_foo': 'one', 'two_bar': 'two'}) |
53 | 38 |
{'foo': 'bar', 'two': {'foo': 'one', 'bar': 'two'}} |
54 | 39 |
''' |
passerelle/apps/sp_fr/models.py | ||
---|---|---|
17 | 17 | |
18 | 18 |
import re |
19 | 19 |
import os |
20 | 20 |
import stat |
21 | 21 |
import zipfile |
22 | 22 |
import collections |
23 | 23 |
import base64 |
24 | 24 |
import datetime |
25 |
import unicodedata |
|
26 | 25 | |
27 | 26 |
from lxml import etree as ET |
28 | 27 | |
29 | 28 |
from django.core.urlresolvers import reverse |
30 | 29 |
from django.core.files import File |
31 | 30 |
from django.db import models, transaction |
32 | 31 |
from django.template import engines |
33 | 32 |
from django.utils import six |
... | ... | |
35 | 34 | |
36 | 35 |
from jsonfield import JSONField |
37 | 36 | |
38 | 37 |
from passerelle.base.models import BaseResource |
39 | 38 |
from passerelle.utils.api import endpoint |
40 | 39 |
from passerelle.utils.sftp import SFTPField |
41 | 40 |
from passerelle.utils.wcs import FormDefField, get_wcs_choices |
42 | 41 |
from passerelle.utils.xml import text_content |
42 |
from passerelle.utils.conversion import normalize |
|
43 | 43 | |
44 | 44 |
from .xsd import Schema |
45 | 45 | |
46 | 46 |
MAX_REQUESTS_PER_ITERATION = 200 |
47 | 47 | |
48 | 48 |
PROCEDURE_DOC = 'DOC' |
49 | 49 |
PROCEDURE_RCO = 'recensementCitoyen' |
50 | 50 |
PROCEDURE_DDPACS = 'depotDossierPACS' |
... | ... | |
77 | 77 | |
78 | 78 | |
79 | 79 |
def simplify(s): |
80 | 80 |
'''Simplify XML node tag names because XSD from DGME are garbage''' |
81 | 81 |
if not s: |
82 | 82 |
return '' |
83 | 83 |
if not isinstance(s, six.text_type): |
84 | 84 |
s = six.text_type(s, 'utf-8', 'ignore') |
85 |
s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore')
|
|
85 |
s = normalize(s)
|
|
86 | 86 |
s = re.sub(r'[^\w\s\'-_]', '', s) |
87 | 87 |
s = s.replace('-', '_') |
88 | 88 |
s = re.sub(r'[\s\']+', '', s) |
89 | 89 |
return s.strip().lower() |
90 | 90 | |
91 | 91 | |
92 | 92 |
class Resource(BaseResource): |
93 | 93 |
category = _('Business Process Connectors') |
passerelle/utils/conversion.py | ||
---|---|---|
91 | 91 |
content = six.text_type(repr(args)) if args != [] else '' |
92 | 92 |
except Exception: |
93 | 93 |
content = '<exception-while-rendering-args>' |
94 | 94 |
except AttributeError: |
95 | 95 |
content = '' |
96 | 96 |
return u'%s(%s)' % (e.__class__.__name__, content) |
97 | 97 | |
98 | 98 | |
99 |
def normalize(s): |
|
100 |
return unicodedata.normalize('NFKD', s).encode('ascii', 'ignore') |
|
101 | ||
102 | ||
99 | 103 |
def simplify(s): |
100 | 104 |
''' |
101 | 105 |
Simplify a string, trying to transform it to lower ascii chars (a-z, 0-9) |
102 | 106 |
and minimize spaces. Used to compare strings on ?q=something requests. |
103 | 107 |
''' |
104 | 108 |
if not s: |
105 | 109 |
return '' |
106 | 110 |
s = force_text(s, 'utf-8', 'ignore') |
107 |
s = force_str(unicodedata.normalize('NFKD', s).encode('ascii', 'ignore'))
|
|
111 |
s = force_text(normalize(s))
|
|
108 | 112 |
s = re.sub(r'[^\w\s\'-]', '', s) |
109 | 113 |
s = re.sub(r'[\s\'_-]+', ' ', s) |
110 | 114 |
return s.strip().lower() |
111 | 115 | |
112 | 116 | |
113 | 117 |
def num2deg(xtile, ytile, zoom): |
114 | 118 |
# http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#Python |
115 | 119 |
n = 2.0 ** zoom |
116 |
- |