Projet

Général

Profil

0001-utils-factorize-use-of-unicodedata-module-38237.patch

Nicolas Roche, 06 avril 2020 07:41

Télécharger (8,48 ko)

Voir les différences:

Subject: [PATCH] utils: factorize use of unicodedata module (#38237)

 passerelle/apps/csvdatasource/models.py | 4 ++--
 passerelle/apps/gdc/views.py            | 5 ++---
 passerelle/apps/solis/models.py         | 5 ++---
 passerelle/apps/sp_fr/models.py         | 4 ++--
 passerelle/utils/conversion.py          | 6 +++++-
 5 files changed, 13 insertions(+), 11 deletions(-)
passerelle/apps/csvdatasource/models.py
12 12
# GNU Affero General Public License for more details.
13 13
#
14 14
# You should have received a copy of the GNU Affero General Public License
15 15
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 16

  
17 17
import os
18 18
import re
19 19
import csv
20
import unicodedata
21 20
from collections import OrderedDict
22 21

  
23 22
import six
24 23
import pytz
25 24

  
26 25
from pyexcel_ods import get_data as get_data_ods
27 26
from pyexcel_xls import get_data as get_data_xls
28 27

  
......
34 33
from django.db import models, transaction
35 34
from django.core.exceptions import ValidationError
36 35
from django.utils.translation import ugettext_lazy as _
37 36

  
38 37
from passerelle.base.models import BaseResource
39 38
from passerelle.utils import batch
40 39
from passerelle.utils.jsonresponse import APIError
41 40
from passerelle.utils.api import endpoint
41
from passerelle.utils.conversion import normalize as normalize_text
42 42

  
43 43
identifier_re = re.compile(r"^[^\d\W]\w*\Z", re.UNICODE)
44 44

  
45 45

  
46 46
code_cache = OrderedDict()
47 47

  
48 48

  
49 49
def get_code(expr):
......
52 52
        for key in list(code_cache.keys())[:len(code_cache) - 1024]:
53 53
            code_cache.pop(key)
54 54
    if expr not in code_cache:
55 55
        code_cache[expr] = compile(expr, '<inline>', 'eval')
56 56
    return code_cache[expr]
57 57

  
58 58

  
59 59
def normalize(value):
60
    return unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
60
    return normalize_text(value).encode('ascii', 'ignore')
61 61

  
62 62

  
63 63
class Query(models.Model):
64 64
    resource = models.ForeignKey('CsvDataSource', on_delete=models.CASCADE)
65 65
    slug = models.SlugField(_('Name (slug)'))
66 66
    label = models.CharField(_('Label'), max_length=100)
67 67
    description = models.TextField(_('Description'), blank=True)
68 68
    filters = models.TextField(
passerelle/apps/gdc/views.py
1
import unicodedata
2

  
3 1
from django.http import Http404
4 2
from django.views.generic.base import View
5 3
from django.views.generic.detail import SingleObjectMixin, DetailView
6 4

  
7 5
from passerelle.compat import json_loads
8 6
import passerelle.utils as utils
7
from passerelle.utils.conversion import normalize
9 8

  
10 9
from .models import Gdc, phpserialize, phpserialize_loads, SOAPpy
11 10

  
12 11

  
13 12
class GdcCrash(Exception):
14 13
    pass
15 14

  
16 15

  
......
133 132
        if voie_str and not voie_id:
134 133
            # look for a voie with that name, so we can provide an identifier
135 134
            # to gdc
136 135
            try:
137 136
                voies = get_voies(self.get_object().service_url, insee)
138 137
            except GdcCrash:
139 138
                result = {'result': 'gdc soap crash'}
140 139
                return utils.response_for_json(request, result)
141
            normalized_voie = unicodedata.normalize('NFKD', voie_str).encode('ascii', 'ignore').decode('ascii')
140
            normalized_voie = normalize(voie_str).encode('ascii', 'ignore').decode('ascii')
142 141
            normalized_voie = normalized_voie.upper()
143 142
            for k, v in voies:
144 143
                if v == normalized_voie or k == normalized_voie:
145 144
                    voie_id = k
146 145
                    break
147 146

  
148 147
        objet = self.request.GET.get('objet')
149 148
        if objet is None:
passerelle/apps/solis/models.py
12 12
# GNU Affero General Public License for more details.
13 13
#
14 14
# You should have received a copy of the GNU Affero General Public License
15 15
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 16

  
17 17
import base64
18 18
import copy
19 19
import re
20
import unicodedata
21 20

  
22 21
from django.db import models
23 22
from django.template.loader import get_template
24 23
from django.utils.encoding import force_text
25 24
from django.utils.translation import ugettext_lazy as _
26 25
from django.utils.http import urlencode
27 26

  
28 27
from passerelle.base.models import BaseResource
29 28
from passerelle.compat import json_loads
30 29
from passerelle.utils.api import endpoint
31 30
from passerelle.utils.jsonresponse import APIError
32
from passerelle.utils.conversion import to_pdf
31
from passerelle.utils.conversion import normalize, to_pdf
33 32

  
34 33

  
35 34
def simplify(s):
36 35
    '''
37 36
    Simplify a string, trying to transform it to lower ascii chars (a-z, 0-9)
38 37
    and minimize spaces. Used to compare strings on ?q=something requests.
39 38
    '''
40 39
    if not s:
41 40
        return ''
42 41
    s = force_text(s, 'utf-8', 'ignore')
43
    s = force_text(unicodedata.normalize('NFKD', s).encode('ascii', 'ignore'))
42
    s = force_text(normalize(s).encode('ascii', 'ignore'))
44 43
    s = re.sub(r'[^\w\s\'-]', '', s)
45 44
    s = re.sub(r'[\s\'_-]+', ' ', s)
46 45
    return s.strip().lower()
47 46

  
48 47

  
49 48
def unflat(flatten_dict, separator='_'):
50 49
    '''
51 50
    Expand a "flatten" dict:
passerelle/apps/sp_fr/models.py
17 17

  
18 18
import re
19 19
import os
20 20
import stat
21 21
import zipfile
22 22
import collections
23 23
import base64
24 24
import datetime
25
import unicodedata
26 25

  
27 26
from lxml import etree as ET
28 27

  
29 28
from django.core.urlresolvers import reverse
30 29
from django.core.files import File
31 30
from django.db import models, transaction
32 31
from django.template import engines
33 32
from django.utils import six
......
35 34

  
36 35
from jsonfield import JSONField
37 36

  
38 37
from passerelle.base.models import BaseResource
39 38
from passerelle.utils.api import endpoint
40 39
from passerelle.utils.sftp import SFTPField
41 40
from passerelle.utils.wcs import FormDefField, get_wcs_choices
42 41
from passerelle.utils.xml import text_content
42
from passerelle.utils.conversion import normalize
43 43

  
44 44
from .xsd import Schema
45 45

  
46 46
MAX_REQUESTS_PER_ITERATION = 200
47 47

  
48 48
PROCEDURE_DOC = 'DOC'
49 49
PROCEDURE_RCO = 'recensementCitoyen'
50 50
PROCEDURE_DDPACS = 'depotDossierPACS'
......
77 77

  
78 78

  
79 79
def simplify(s):
80 80
    '''Simplify XML node tag names because XSD from DGME are garbage'''
81 81
    if not s:
82 82
        return ''
83 83
    if not isinstance(s, six.text_type):
84 84
        s = six.text_type(s, 'utf-8', 'ignore')
85
    s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore')
85
    s = normalize(s).encode('ascii', 'ignore')
86 86
    s = re.sub(r'[^\w\s\'-_]', '', s)
87 87
    s = s.replace('-', '_')
88 88
    s = re.sub(r'[\s\']+', '', s)
89 89
    return s.strip().lower()
90 90

  
91 91

  
92 92
class Resource(BaseResource):
93 93
    category = _('Business Process Connectors')
passerelle/utils/conversion.py
91 91
            content = six.text_type(repr(args)) if args != [] else ''
92 92
        except Exception:
93 93
            content = '<exception-while-rendering-args>'
94 94
    except AttributeError:
95 95
        content = ''
96 96
    return u'%s(%s)' % (e.__class__.__name__, content)
97 97

  
98 98

  
99
def normalize(s):
100
    return unicodedata.normalize('NFKD', s)
101

  
102

  
99 103
def simplify(s):
100 104
    '''
101 105
    Simplify a string, trying to transform it to lower ascii chars (a-z, 0-9)
102 106
    and minimize spaces. Used to compare strings on ?q=something requests.
103 107
    '''
104 108
    if not s:
105 109
        return ''
106 110
    s = force_text(s, 'utf-8', 'ignore')
107
    s = force_str(unicodedata.normalize('NFKD', s).encode('ascii', 'ignore'))
111
    s = force_str(normalize(s).encode('ascii', 'ignore'))
108 112
    s = re.sub(r'[^\w\s\'-]', '', s)
109 113
    s = re.sub(r'[\s\'_-]+', ' ', s)
110 114
    return s.strip().lower()
111 115

  
112 116

  
113 117
def num2deg(xtile, ytile, zoom):
114 118
    # http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#Python
115 119
    n = 2.0 ** zoom
116
-