From b1f5732e7334e20c2397939a7944ad1a128f3f00 Mon Sep 17 00:00:00 2001 From: Benjamin Dauvergne Date: Mon, 31 Aug 2020 10:46:13 +0200 Subject: [PATCH] misc: accept duplicate fields with the same type (#42429) --- tests/conftest.py | 10 +++++++++- tests/olap.model | 10 ++++++++-- tests/test_wcs.py | 20 ++++++++++++++++++++ wcs_olap/feeder.py | 31 ++++++++++++++++++++----------- 4 files changed, 57 insertions(+), 14 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 70176e4..20925dd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -102,9 +102,13 @@ formdef.fields = [ fields.ItemField(id='4', label='4rth field', type='item', varname='itemOpen'), fields.StringField(id='5', label='5th field', type='string', anonymise=False, varname='stringCaseSensitive-é'), fields.BoolField(id='6', label='6th field duplicate', type='bool', varname='duplicate'), - fields.StringField(id='7', label='7th field duplicate', type='string', anonymise=False, varname='duplicate'), + fields.StringField(id='7', label='7th field bad duplicate', type='string', anonymise=False, varname='duplicate'), fields.StringField(id='8', label='8th field integer', type='string', anonymise=False, varname='integer', validation={'type': 'digits'}), + fields.StringField(id='9', label='8th field duplicate', type='string', anonymise=False, + required=False, varname='good_duplicate'), + fields.StringField(id='10', label='9th field good duplicate', type='string', anonymise=False, + required=False, varname='good_duplicate'), ] formdef.store() @@ -124,16 +128,20 @@ for i in range(50): formdata.data['2_display'] = 'foo' formdata.data['4'] = 'open_one' formdata.data['4_display'] = 'open_one' + formdata.data['9'] = 'a' elif i%4 == 1: formdata.data['2'] = 'bar' formdata.data['2_display'] = 'bar' formdata.data['4'] = 'open_two' formdata.data['4_display'] = 'open_two' + formdata.data['10'] = 'b' else: formdata.data['2'] = 'baz' formdata.data['2_display'] = 'baz' formdata.data['4'] = "open'three" formdata.data['4_display'] = "open'three" + formdata.data['9'] = 'a' + formdata.data['10'] = 'b' formdata.data['3'] = bool(i % 2) formdata.data['8'] = str(i % 10) diff --git a/tests/olap.model b/tests/olap.model index 2e63cc3..efcbde5 100644 --- a/tests/olap.model +++ b/tests/olap.model @@ -287,6 +287,13 @@ "name": "stringCaseSensitive-é", "type": "string", "value": "\"field_stringCaseSensitive-é\"" + }, + { + "filter": true, + "label": "9th field good duplicate", + "name": "good_duplicate", + "type": "string", + "value": "\"field_good_duplicate\"" } ], "fact_table" : "\"formdata_demande\"", @@ -419,8 +426,7 @@ ], "name" : "formdata_demande", "warnings": [ - "le champ « 6th field duplicate » a un nom de variable dupliqué « duplicate »", - "le champ « 7th field duplicate » a un nom de variable dupliqué « duplicate »" + "Le champ « 7th field bad duplicate » a un nom de variable dupliqué « duplicate » mais pas le même type que « 6th field duplicate », tous les champs avec ce nom seront ignorés (string != bool)." ] } ], diff --git a/tests/test_wcs.py b/tests/test_wcs.py index 70de2fe..0fcad1d 100644 --- a/tests/test_wcs.py +++ b/tests/test_wcs.py @@ -54,6 +54,11 @@ def test_wcs_fixture(wcs, postgres_db, tmpdir, olap_cmd, caplog): for url_with_limit in url_with_limits: assert 'limit=500&' in url_with_limit + assert ( + 'Le champ « 7th field bad duplicate » a un nom de variable dupliqué ' + '« duplicate » mais pas le même type que « 6th field duplicate », ' + 'tous les champs avec ce nom seront ignorés (string != bool).') in caplog.text + expected_schema = [ ('agent', 'id'), ('agent', 'label'), @@ -103,6 +108,7 @@ def test_wcs_fixture(wcs, postgres_db, tmpdir, olap_cmd, caplog): ('formdata_demande', 'field_itemOpen'), ('formdata_demande', 'field_stringCaseSensitive-\xe9'), ('formdata_demande', 'field_integer'), + ('formdata_demande', 'field_good_duplicate'), ('formdata_demande', 'function__receiver'), ('formdata_demande_field_item', 'id'), ('formdata_demande_field_item', 'label'), @@ -167,6 +173,19 @@ FULL OUTER JOIN public.dates AS dates ON series.date = dates.date WHERE dates.da expected_json_schema['pg_dsn'] = postgres_db.dsn assert json_schema == expected_json_schema + # verify data in duplicated columns + with postgres_db.conn() as conn: + with conn.cursor() as c: + c.execute('SET search_path = olap') + c.execute('SELECT field_good_duplicate, count(id) ' + 'FROM formdata_demande ' + 'GROUP BY field_good_duplicate ' + 'ORDER BY field_good_duplicate') + assert dict(c.fetchall()) == { + 'a': 37, + 'b': 13, + } + def test_requests_exception(wcs, postgres_db, tmpdir, olap_cmd, caplog): @httmock.urlmatch() @@ -189,6 +208,7 @@ def test_requests_not_ok(wcs, postgres_db, tmpdir, olap_cmd, caplog): olap_cmd(no_log_errors=False) assert 'invalid signature' in caplog.text + def test_requests_not_json(wcs, postgres_db, tmpdir, olap_cmd, caplog): @httmock.urlmatch() def return_invalid_json(url, request): diff --git a/wcs_olap/feeder.py b/wcs_olap/feeder.py index 5e36400..24b20fb 100644 --- a/wcs_olap/feeder.py +++ b/wcs_olap/feeder.py @@ -1023,26 +1023,35 @@ class WcsFormdefFeeder(object): fields += self.formdef.schema.workflow.fields # filter duplicates - duplicate_varnames = set() self.good_fields = good_fields = OrderedDict() + bad_varnames = set() for field in fields: if field.type not in ('item', 'bool', 'string'): continue if field.anonymise is True: continue if not field.varname: - add_warning('le champ « %s » n\' a pas de nom de variable, il a été ignoré' % field.label) + add_warning('Le champ « %s » n\' a pas de nom de variable, il a été ignoré' % field.label) continue - if field.varname in good_fields: - # duplicate found - duplicate_varnames.add(field.varname) - add_warning('le champ « %(label)s » a un nom de variable dupliqué « %(varname)s »' % { - 'label': good_fields[field.varname].label, - 'varname': field.varname - }) + if field.varname in good_fields and good_fields[field.varname].type != field.type: + # duplicate found, but type is not coherent + add_warning( + 'Le champ « %(label)s » a un nom de variable dupliqué « %(varname)s » ' + 'mais pas le même type que « %(label_good)s », tous les champs avec ce nom seront ignorés ' + '(%(type)s != %(type_good)s).' % { + 'label': field.label, + 'varname': field.varname, + 'type': field.type, + 'label_good': good_fields[field.varname].label, + 'type_good': good_fields[field.varname].type, + } + ) del self.good_fields[field.varname] - if field.varname in duplicate_varnames: - add_warning('le champ « %(label)s » a un nom de variable dupliqué « %(varname)s »' % field.__dict__) + bad_varnames.add(field.varname) + continue + if field.varname in bad_varnames: + add_warning('Le champ « %s » n\' est un doublon d\'un champ de type différent, il a été ignoré' + % field.label) continue self.good_fields[field.varname] = field -- 2.28.0