From dfb21e99fec5de4a575e5ec51fc4ba7792faeb7a Mon Sep 17 00:00:00 2001 From: Serghei Mihai Date: Tue, 13 Nov 2018 09:42:55 +0100 Subject: [PATCH] csvdatasource: allow normalized search for unicode values (#26188) --- passerelle/apps/csvdatasource/lookups.py | 17 +++++++++++++ passerelle/apps/csvdatasource/views.py | 13 +++++++--- tests/test_csv_datasource.py | 31 ++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/passerelle/apps/csvdatasource/lookups.py b/passerelle/apps/csvdatasource/lookups.py index 667cdc07..08fe7282 100644 --- a/passerelle/apps/csvdatasource/lookups.py +++ b/passerelle/apps/csvdatasource/lookups.py @@ -1,5 +1,7 @@ DELIMITER = '__' +import unicodedata + class InvalidOperatorError(Exception): pass @@ -13,6 +15,9 @@ def is_int(value): except (ValueError, TypeError): return False +def normalize(value): + return unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') + class Lookup(object): @@ -22,6 +27,12 @@ class Lookup(object): def icontains(self, key, value): return lambda x: value.lower() in x[key].lower() + def normcontains(self, key, value): + return lambda x: normalize(value) in normalize(x[key]) + + def inormcontains(self, key, value): + return lambda x: normalize(value.lower()) in normalize(x[key].lower()) + def gt(self, key, value): return lambda x: int(x[key]) > int(value) @@ -54,6 +65,12 @@ class Lookup(object): def ieq(self, key, value): return lambda x: value.lower() == x[key].lower() + def normeq(self, key, value): + return lambda x: normalize(value) == normalize(x[key]) + + def inormeq(self, key, value): + return lambda x: normalize(value.lower()) == normalize(x[key].lower()) + def ne(self, key, value): if is_int(value): return lambda x: int(value) != int(x[key]) diff --git a/passerelle/apps/csvdatasource/views.py b/passerelle/apps/csvdatasource/views.py index 1754c2d2..f1d03aca 100644 --- a/passerelle/apps/csvdatasource/views.py +++ b/passerelle/apps/csvdatasource/views.py @@ -38,13 +38,20 @@ class CsvDataView(View, SingleObjectMixin): params = request.GET case_insensitive = 'case-insensitive' in params + normalized = 'normalized' in params query = params.get('q', None) + if normalized: + lookup = 'normcontains' + else: + lookup = 'contains' + if query: if case_insensitive: - filters['text__icontains'] = query.lower() - else: - filters['text__contains'] = query + lookup = 'i' + lookup + query = query.lower() + + filters['text__%s' % lookup] = query # builds filters according to csv file header for column_title in [t.strip() for t in obj.columns_keynames.split(',') if t]: diff --git a/tests/test_csv_datasource.py b/tests/test_csv_datasource.py index 0d707df6..61cae319 100644 --- a/tests/test_csv_datasource.py +++ b/tests/test_csv_datasource.py @@ -182,6 +182,12 @@ def test_unicode_filter_data(client, setup, filetype): assert 'text' in item assert filter_criteria in item['text'] + # require normalized comparison + filters['normalized'] = '' + filters['text'] = 'Benoit' + resp = client.get(url, filters) + assert len(result) + def test_unicode_case_insensitive_filter_data(client, setup, filetype): csvdata, url = setup(',id,,text,', filename=filetype, data=get_file_content(filetype)) filter_criteria = u'anaëlle' @@ -194,6 +200,12 @@ def test_unicode_case_insensitive_filter_data(client, setup, filetype): assert 'text' in item assert filter_criteria.lower() in item['text'].lower() + # require normalized comparison + filters['normalized'] = '' + filters['text'] = 'anaelle' + resp = client.get(url, filters) + assert len(result) + def test_data_bom(client, setup): csvdata, url = setup('fam,id,, text,sexe ', data=data_bom) filters = {'text':'Eliot'} @@ -258,6 +270,25 @@ def test_advanced_filters_combo(client, setup, filetype): assert result[0]['id'] == '22' assert result[0]['lname'] == 'MARTIN' +def test_normalized_filters(client, setup, filetype): + csvdata, url = setup(filename=filetype, data=get_file_content(filetype)) + filters = { + 'fname__normcontains': 'aëlle' + } + resp = client.get(url, filters) + result = parse_response(resp) + assert len(result) == 1 + assert result[0]['id'] == '46' + assert result[0]['lname'] == 'WILSON-LUZAYADIO' + + filters = { + 'fname__inormcontains': 'ANAëllE' + } + resp = client.get(url, filters) + result = parse_response(resp) + assert len(result) == 1 + assert result[0]['fname'] == u'Anaëlle' + def test_unknown_operator(client, setup, filetype): csvdata, url = setup(filename=filetype, data=get_file_content(filetype)) filters = {'id__whatever': '25', 'fname__icontains':'Eliot'} -- 2.19.1