Projet

Général

Profil

0001-utils-factorize-use-of-unicodedata-module-38237.patch

Nicolas Roche, 09 décembre 2019 12:59

Télécharger (10 ko)

Voir les différences:

Subject: [PATCH] utils: factorize use of unicodedata module (#38237)

 passerelle/apps/base_adresse/models.py    |  5 ++---
 passerelle/apps/csvdatasource/models.py   | 10 +++-------
 passerelle/apps/gdc/views.py              |  5 ++---
 passerelle/apps/solis/models.py           | 18 +-----------------
 passerelle/apps/sp_fr/models.py           |  4 ++--
 passerelle/contrib/agoraplus/normalize.py |  5 +++--
 passerelle/contrib/agoraplus/views.py     |  6 +-----
 passerelle/utils/conversion.py            |  8 +++++++-
 8 files changed, 21 insertions(+), 40 deletions(-)
passerelle/apps/base_adresse/models.py
1 1
import bz2
2 2
import json
3 3
import urlparse
4
import unicodedata
5 4

  
6 5
from requests import RequestException
7 6

  
......
15 14

  
16 15
from passerelle.base.models import BaseResource
17 16
from passerelle.utils.api import endpoint
18
from passerelle.utils.conversion import simplify
17
from passerelle.utils.conversion import normalize_lower, simplify
19 18
from passerelle.utils.jsonresponse import APIError
20 19

  
21 20

  
......
383 382
class UnaccentNameMixin(object):
384 383

  
385 384
    def save(self, *args, **kwargs):
386
        self.unaccent_name = unicodedata.normalize('NFKD', self.name).encode('ascii', 'ignore').lower()
385
        self.unaccent_name = normalize_lower(self.name)
387 386
        super(UnaccentNameMixin, self).save(*args, **kwargs)
388 387

  
389 388

  
passerelle/apps/csvdatasource/models.py
17 17
import os
18 18
import re
19 19
import csv
20
import unicodedata
21 20
from collections import OrderedDict
22 21

  
23 22
import six
......
39 38
from passerelle.utils import batch
40 39
from passerelle.utils.jsonresponse import APIError
41 40
from passerelle.utils.api import endpoint
41
from passerelle.utils.conversion import normalize_lower
42 42

  
43 43
identifier_re = re.compile(r"^[^\d\W]\w*\Z", re.UNICODE)
44 44

  
......
56 56
    return code_cache[expr]
57 57

  
58 58

  
59
def normalize(value):
60
    return unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
61

  
62

  
63 59
class Query(models.Model):
64 60
    resource = models.ForeignKey('CsvDataSource', on_delete=models.CASCADE)
65 61
    slug = models.SlugField(_('Name (slug)'))
......
325 321
                row_vars['query'] = kwargs
326 322
                for i, (code, expr) in enumerate(codes):
327 323
                    try:
328
                        result = eval(code, {'normalize': normalize}, row_vars)
324
                        result = eval(code, {'normalize_lower': normalize_lower}, row_vars)
329 325
                    except Exception as e:
330 326
                        data = {
331 327
                            'expr': expr,
......
400 396
        # filtering is done there after projection because we need a projection named text for
401 397
        # retro-compatibility with previous use of the csvdatasource with select2
402 398
        if 'q' in request.GET:
403
            filters = ["%s in normalize(text.lower())" % repr(normalize(request.GET['q'].lower()))]
399
            filters = ["%s in normalize_lower(text)" % repr(normalize_lower(request.GET['q']))]
404 400
            data = [row for new_row, row in stream_expressions(filters, data, kind='filters')
405 401
                    if new_row[0]]
406 402

  
passerelle/apps/gdc/views.py
1 1
import json
2
import unicodedata
3 2

  
4 3
from django.http import Http404
5 4
from django.views.generic.base import View
6 5
from django.views.generic.detail import SingleObjectMixin, DetailView
7 6

  
8 7
import passerelle.utils as utils
8
from passerelle.utils.conversion import normalize
9 9

  
10 10
from .models import Gdc, phpserialize, phpserialize_loads, SOAPpy
11 11

  
......
138 138
            except GdcCrash:
139 139
                result = {'result': 'gdc soap crash'}
140 140
                return utils.response_for_json(request, result)
141
            normalized_voie = unicodedata.normalize('NFKD', voie_str).encode('ascii', 'ignore').decode('ascii')
142
            normalized_voie = normalized_voie.upper()
141
            normalized_voie = normalize(voie_str).decode('ascii').upper()
143 142
            for k, v in voies:
144 143
                if v == normalized_voie or k == normalized_voie:
145 144
                    voie_id = k
passerelle/apps/solis/models.py
18 18
import copy
19 19
import json
20 20
import re
21
import unicodedata
22 21

  
23 22
from django.db import models
24 23
from django.template.loader import get_template
......
29 28
from passerelle.base.models import BaseResource
30 29
from passerelle.utils.api import endpoint
31 30
from passerelle.utils.jsonresponse import APIError
32
from passerelle.utils.conversion import to_pdf
33

  
34

  
35
def simplify(s):
36
    '''
37
    Simplify a string, trying to transform it to lower ascii chars (a-z, 0-9)
38
    and minimize spaces. Used to compare strings on ?q=something requests.
39
    '''
40
    if not s:
41
        return ''
42
    if not isinstance(s, unicode):
43
        s = unicode(s, 'utf-8', 'ignore')
44
    s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore')
45
    s = re.sub(r'[^\w\s\'-]', '', s)
46
    s = re.sub(r'[\s\'_-]+', ' ', s)
47
    return s.strip().lower()
31
from passerelle.utils.conversion import simplify, to_pdf
48 32

  
49 33

  
50 34
def unflat(flatten_dict, separator='_'):
passerelle/apps/sp_fr/models.py
22 22
import collections
23 23
import base64
24 24
import datetime
25
import unicodedata
26 25

  
27 26
from lxml import etree as ET
28 27

  
......
40 39
from passerelle.utils.sftp import SFTPField
41 40
from passerelle.utils.wcs import FormDefField, get_wcs_choices
42 41
from passerelle.utils.xml import text_content
42
from passerelle.utils.conversion import normalize
43 43

  
44 44
from .xsd import Schema
45 45

  
......
82 82
        return ''
83 83
    if not isinstance(s, six.text_type):
84 84
        s = six.text_type(s, 'utf-8', 'ignore')
85
    s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore')
85
    s = normalize(s)
86 86
    s = re.sub(r'[^\w\s\'-_]', '', s)
87 87
    s = s.replace('-', '_')
88 88
    s = re.sub(r'[\s\']+', '', s)
passerelle/contrib/agoraplus/normalize.py
14 14
# You should have received a copy of the GNU Affero General Public License
15 15
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 16

  
17
import unicodedata
18 17
import re
19 18
from datetime import datetime, timedelta
20 19
from decimal import Decimal
......
23 22
# handle years before 1900
24 23
from django.utils import datetime_safe
25 24

  
25
from passerelle.utils.conversion import normalize
26

  
26 27
# NNNN@mailprov.no emails are not real ones: we will ignore them.
27 28
# (They are used by Agora+ to create "fake" login for each user id=NNNN)
28 29
PROVISIONING_DOMAIN = 'mailprov.no'
......
41 42
        return ''
42 43
    if not isinstance(s, unicode):
43 44
        s = s.decode('utf-8', errors='ignore')
44
    s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore')
45
    s = normalize(s)
45 46
    s = re.sub(r'[^\w\s\'%s]' % space, '', s).strip().lower()
46 47
    s = re.sub(r'[\s\'%s]+' % space, space, s)
47 48
    return s
passerelle/contrib/agoraplus/views.py
15 15
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 16

  
17 17
import json
18
import unicodedata
19 18
import logging
20 19

  
21 20
from django.core.exceptions import ObjectDoesNotExist
......
27 26
from django.utils.http import urlencode
28 27

  
29 28
import passerelle.utils as utils
29
from passerelle.utils.conversion import normalize_lower
30 30

  
31 31
from .models import AgoraPlus, AgoraPlusLink, AgoraAPIError
32 32
from .wcs import Formdata
......
36 36
logger = logging.getLogger('passerelle.contrib.agoraplus')
37 37

  
38 38

  
39
def normalize_lower(s):
40
    return unicodedata.normalize('NFKD', s).encode('ascii', 'ignore').lower()
41

  
42

  
43 39
class AgoraPlusDetailView(GenericDetailView):
44 40
    model = AgoraPlus
45 41
    template_name = 'passerelle/contrib/agoraplus/detail.html'
passerelle/utils/conversion.py
94 94
    return u'%s(%s)' % (e.__class__.__name__, content)
95 95

  
96 96

  
97
def normalize(s):
98
    return unicodedata.normalize('NFKD', s).encode('ascii', 'ignore')
99

  
100
def normalize_lower(s):
101
    return normalize(s).lower()
102

  
97 103
def simplify(s):
98 104
    '''
99 105
    Simplify a string, trying to transform it to lower ascii chars (a-z, 0-9)
......
103 109
        return ''
104 110
    if not isinstance(s, unicode):
105 111
        s = unicode(s, 'utf-8', 'ignore')
106
    s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore')
112
    s = normalize(s)
107 113
    s = re.sub(r'[^\w\s\'-]', '', s)
108 114
    s = re.sub(r'[\s\'_-]+', ' ', s)
109 115
    return s.strip().lower()
110
-