0001-utils-factorize-use-of-unicodedata-module-38237.patch
passerelle/apps/base_adresse/models.py | ||
---|---|---|
1 | 1 |
import bz2 |
2 | 2 |
import json |
3 | 3 |
import urlparse |
4 |
import unicodedata |
|
5 | 4 | |
6 | 5 |
from requests import RequestException |
7 | 6 | |
... | ... | |
15 | 14 | |
16 | 15 |
from passerelle.base.models import BaseResource |
17 | 16 |
from passerelle.utils.api import endpoint |
18 |
from passerelle.utils.conversion import simplify |
|
17 |
from passerelle.utils.conversion import normalize_lower, simplify
|
|
19 | 18 |
from passerelle.utils.jsonresponse import APIError |
20 | 19 | |
21 | 20 | |
... | ... | |
383 | 382 |
class UnaccentNameMixin(object): |
384 | 383 | |
385 | 384 |
def save(self, *args, **kwargs): |
386 |
self.unaccent_name = unicodedata.normalize('NFKD', self.name).encode('ascii', 'ignore').lower()
|
|
385 |
self.unaccent_name = normalize_lower(self.name)
|
|
387 | 386 |
super(UnaccentNameMixin, self).save(*args, **kwargs) |
388 | 387 | |
389 | 388 |
passerelle/apps/csvdatasource/models.py | ||
---|---|---|
17 | 17 |
import os |
18 | 18 |
import re |
19 | 19 |
import csv |
20 |
import unicodedata |
|
21 | 20 |
from collections import OrderedDict |
22 | 21 | |
23 | 22 |
import six |
... | ... | |
39 | 38 |
from passerelle.utils import batch |
40 | 39 |
from passerelle.utils.jsonresponse import APIError |
41 | 40 |
from passerelle.utils.api import endpoint |
41 |
from passerelle.utils.conversion import normalize_lower |
|
42 | 42 | |
43 | 43 |
identifier_re = re.compile(r"^[^\d\W]\w*\Z", re.UNICODE) |
44 | 44 | |
... | ... | |
56 | 56 |
return code_cache[expr] |
57 | 57 | |
58 | 58 | |
59 |
def normalize(value): |
|
60 |
return unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') |
|
61 | ||
62 | ||
63 | 59 |
class Query(models.Model): |
64 | 60 |
resource = models.ForeignKey('CsvDataSource', on_delete=models.CASCADE) |
65 | 61 |
slug = models.SlugField(_('Name (slug)')) |
... | ... | |
325 | 321 |
row_vars['query'] = kwargs |
326 | 322 |
for i, (code, expr) in enumerate(codes): |
327 | 323 |
try: |
328 |
result = eval(code, {'normalize': normalize}, row_vars)
|
|
324 |
result = eval(code, {'normalize_lower': normalize_lower}, row_vars)
|
|
329 | 325 |
except Exception as e: |
330 | 326 |
data = { |
331 | 327 |
'expr': expr, |
... | ... | |
400 | 396 |
# filtering is done there after projection because we need a projection named text for |
401 | 397 |
# retro-compatibility with previous use of the csvdatasource with select2 |
402 | 398 |
if 'q' in request.GET: |
403 |
filters = ["%s in normalize(text.lower())" % repr(normalize(request.GET['q'].lower()))]
|
|
399 |
filters = ["%s in normalize_lower(text)" % repr(normalize_lower(request.GET['q']))]
|
|
404 | 400 |
data = [row for new_row, row in stream_expressions(filters, data, kind='filters') |
405 | 401 |
if new_row[0]] |
406 | 402 |
passerelle/apps/gdc/views.py | ||
---|---|---|
1 | 1 |
import json |
2 |
import unicodedata |
|
3 | 2 | |
4 | 3 |
from django.http import Http404 |
5 | 4 |
from django.views.generic.base import View |
6 | 5 |
from django.views.generic.detail import SingleObjectMixin, DetailView |
7 | 6 | |
8 | 7 |
import passerelle.utils as utils |
8 |
from passerelle.utils.conversion import normalize |
|
9 | 9 | |
10 | 10 |
from .models import Gdc, phpserialize, phpserialize_loads, SOAPpy |
11 | 11 | |
... | ... | |
138 | 138 |
except GdcCrash: |
139 | 139 |
result = {'result': 'gdc soap crash'} |
140 | 140 |
return utils.response_for_json(request, result) |
141 |
normalized_voie = unicodedata.normalize('NFKD', voie_str).encode('ascii', 'ignore').decode('ascii') |
|
142 |
normalized_voie = normalized_voie.upper() |
|
141 |
normalized_voie = normalize(voie_str).decode('ascii').upper() |
|
143 | 142 |
for k, v in voies: |
144 | 143 |
if v == normalized_voie or k == normalized_voie: |
145 | 144 |
voie_id = k |
passerelle/apps/solis/models.py | ||
---|---|---|
18 | 18 |
import copy |
19 | 19 |
import json |
20 | 20 |
import re |
21 |
import unicodedata |
|
22 | 21 | |
23 | 22 |
from django.db import models |
24 | 23 |
from django.template.loader import get_template |
... | ... | |
29 | 28 |
from passerelle.base.models import BaseResource |
30 | 29 |
from passerelle.utils.api import endpoint |
31 | 30 |
from passerelle.utils.jsonresponse import APIError |
32 |
from passerelle.utils.conversion import to_pdf |
|
33 | ||
34 | ||
35 |
def simplify(s): |
|
36 |
''' |
|
37 |
Simplify a string, trying to transform it to lower ascii chars (a-z, 0-9) |
|
38 |
and minimize spaces. Used to compare strings on ?q=something requests. |
|
39 |
''' |
|
40 |
if not s: |
|
41 |
return '' |
|
42 |
if not isinstance(s, unicode): |
|
43 |
s = unicode(s, 'utf-8', 'ignore') |
|
44 |
s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore') |
|
45 |
s = re.sub(r'[^\w\s\'-]', '', s) |
|
46 |
s = re.sub(r'[\s\'_-]+', ' ', s) |
|
47 |
return s.strip().lower() |
|
31 |
from passerelle.utils.conversion import simplify, to_pdf |
|
48 | 32 | |
49 | 33 | |
50 | 34 |
def unflat(flatten_dict, separator='_'): |
passerelle/apps/sp_fr/models.py | ||
---|---|---|
22 | 22 |
import collections |
23 | 23 |
import base64 |
24 | 24 |
import datetime |
25 |
import unicodedata |
|
26 | 25 | |
27 | 26 |
from lxml import etree as ET |
28 | 27 | |
... | ... | |
40 | 39 |
from passerelle.utils.sftp import SFTPField |
41 | 40 |
from passerelle.utils.wcs import FormDefField, get_wcs_choices |
42 | 41 |
from passerelle.utils.xml import text_content |
42 |
from passerelle.utils.conversion import normalize |
|
43 | 43 | |
44 | 44 |
from .xsd import Schema |
45 | 45 | |
... | ... | |
82 | 82 |
return '' |
83 | 83 |
if not isinstance(s, six.text_type): |
84 | 84 |
s = six.text_type(s, 'utf-8', 'ignore') |
85 |
s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore')
|
|
85 |
s = normalize(s)
|
|
86 | 86 |
s = re.sub(r'[^\w\s\'-_]', '', s) |
87 | 87 |
s = s.replace('-', '_') |
88 | 88 |
s = re.sub(r'[\s\']+', '', s) |
passerelle/contrib/agoraplus/normalize.py | ||
---|---|---|
14 | 14 |
# You should have received a copy of the GNU Affero General Public License |
15 | 15 |
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
16 | 16 | |
17 |
import unicodedata |
|
18 | 17 |
import re |
19 | 18 |
from datetime import datetime, timedelta |
20 | 19 |
from decimal import Decimal |
... | ... | |
23 | 22 |
# handle years before 1900 |
24 | 23 |
from django.utils import datetime_safe |
25 | 24 | |
25 |
from passerelle.utils.conversion import normalize |
|
26 | ||
26 | 27 |
# NNNN@mailprov.no emails are not real ones: we will ignore them. |
27 | 28 |
# (They are used by Agora+ to create "fake" login for each user id=NNNN) |
28 | 29 |
PROVISIONING_DOMAIN = 'mailprov.no' |
... | ... | |
41 | 42 |
return '' |
42 | 43 |
if not isinstance(s, unicode): |
43 | 44 |
s = s.decode('utf-8', errors='ignore') |
44 |
s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore')
|
|
45 |
s = normalize(s)
|
|
45 | 46 |
s = re.sub(r'[^\w\s\'%s]' % space, '', s).strip().lower() |
46 | 47 |
s = re.sub(r'[\s\'%s]+' % space, space, s) |
47 | 48 |
return s |
passerelle/contrib/agoraplus/views.py | ||
---|---|---|
15 | 15 |
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
16 | 16 | |
17 | 17 |
import json |
18 |
import unicodedata |
|
19 | 18 |
import logging |
20 | 19 | |
21 | 20 |
from django.core.exceptions import ObjectDoesNotExist |
... | ... | |
27 | 26 |
from django.utils.http import urlencode |
28 | 27 | |
29 | 28 |
import passerelle.utils as utils |
29 |
from passerelle.utils.conversion import normalize_lower |
|
30 | 30 | |
31 | 31 |
from .models import AgoraPlus, AgoraPlusLink, AgoraAPIError |
32 | 32 |
from .wcs import Formdata |
... | ... | |
36 | 36 |
logger = logging.getLogger('passerelle.contrib.agoraplus') |
37 | 37 | |
38 | 38 | |
39 |
def normalize_lower(s): |
|
40 |
return unicodedata.normalize('NFKD', s).encode('ascii', 'ignore').lower() |
|
41 | ||
42 | ||
43 | 39 |
class AgoraPlusDetailView(GenericDetailView): |
44 | 40 |
model = AgoraPlus |
45 | 41 |
template_name = 'passerelle/contrib/agoraplus/detail.html' |
passerelle/utils/conversion.py | ||
---|---|---|
94 | 94 |
return u'%s(%s)' % (e.__class__.__name__, content) |
95 | 95 | |
96 | 96 | |
97 |
def normalize(s): |
|
98 |
return unicodedata.normalize('NFKD', s).encode('ascii', 'ignore') |
|
99 | ||
100 |
def normalize_lower(s): |
|
101 |
return normalize(s).lower() |
|
102 | ||
97 | 103 |
def simplify(s): |
98 | 104 |
''' |
99 | 105 |
Simplify a string, trying to transform it to lower ascii chars (a-z, 0-9) |
... | ... | |
103 | 109 |
return '' |
104 | 110 |
if not isinstance(s, unicode): |
105 | 111 |
s = unicode(s, 'utf-8', 'ignore') |
106 |
s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore')
|
|
112 |
s = normalize(s)
|
|
107 | 113 |
s = re.sub(r'[^\w\s\'-]', '', s) |
108 | 114 |
s = re.sub(r'[\s\'_-]+', ' ', s) |
109 | 115 |
return s.strip().lower() |
110 |
- |