From 69bd7c220339703081751c6d5e91d9adbb983aa9 Mon Sep 17 00:00:00 2001 From: Benjamin Dauvergne Date: Mon, 4 May 2020 21:40:53 +0200 Subject: [PATCH] misc: accept duplicate fields with the same type (#42429) wcs-olap expects w.c.s. API to return only the first value for each duplicate field. --- tests/conftest.py | 10 +++++++++- tests/olap.model | 17 +++++++++++++++-- tests/test_wcs.py | 21 +++++++++++++++++++++ wcs_olap/feeder.py | 26 ++++++++++++++------------ 4 files changed, 59 insertions(+), 15 deletions(-) diff --git tests/conftest.py tests/conftest.py index dec004b..70d2b84 100644 --- tests/conftest.py +++ tests/conftest.py @@ -102,7 +102,11 @@ formdef.fields = [ fields.ItemField(id='4', label='4rth field', type='item', varname='itemOpen'), fields.StringField(id='5', label='5th field', type='string', anonymise=False, varname='stringCaseSensitive-é'), fields.BoolField(id='6', label='6th field duplicate', type='bool', varname='duplicate'), - fields.StringField(id='7', label='7th field duplicate', type='string', anonymise=False, varname='duplicate'), + fields.StringField(id='7', label='7th field bad duplicate', type='string', anonymise=False, varname='duplicate'), + fields.StringField(id='8', label='8th field duplicate', type='string', anonymise=False, + required=False, varname='good_duplicate'), + fields.StringField(id='9', label='9th field good duplicate', type='string', anonymise=False, + required=False, varname='good_duplicate'), ] formdef.store() @@ -122,16 +126,20 @@ for i in range(50): formdata.data['2_display'] = 'foo' formdata.data['4'] = 'open_one' formdata.data['4_display'] = 'open_one' + formdata.data['8'] = 'a' elif i%4 == 1: formdata.data['2'] = 'bar' formdata.data['2_display'] = 'bar' formdata.data['4'] = 'open_two' formdata.data['4_display'] = 'open_two' + formdata.data['9'] = 'b' else: formdata.data['2'] = 'baz' formdata.data['2_display'] = 'baz' formdata.data['4'] = "open'three" formdata.data['4_display'] = "open'three" + formdata.data['8'] = 'a' + formdata.data['9'] = 'b' formdata.data['3'] = bool(i % 2) if i%3 == 0: diff --git tests/olap.model tests/olap.model index 3eba035..833e097 100644 --- tests/olap.model +++ tests/olap.model @@ -287,6 +287,20 @@ "name": "stringCaseSensitive-é", "type": "string", "value": "\"field_stringCaseSensitive-é\"" + }, + { + "filter": true, + "label": "7th field bad duplicate", + "name": "duplicate", + "type": "string", + "value": "\"field_duplicate\"" + }, + { + "filter": true, + "label": "9th field good duplicate", + "name": "good_duplicate", + "type": "string", + "value": "\"field_good_duplicate\"" } ], "fact_table" : "\"formdata_demande\"", @@ -413,8 +427,7 @@ ], "name" : "formdata_demande", "warnings": [ - "le champ « 6th field duplicate » a un nom de variable dupliqué « duplicate »", - "le champ « 7th field duplicate » a un nom de variable dupliqué « duplicate »" + "Le champ « 7th field bad duplicate » a un nom de variable dupliqué « duplicate » mais pas le même type que « 6th field duplicate », il sera ignoré (string != bool)." ] } ], diff --git tests/test_wcs.py tests/test_wcs.py index 71b1dc4..52628c1 100644 --- tests/test_wcs.py +++ tests/test_wcs.py @@ -20,6 +20,11 @@ def test_wcs_fixture(wcs, postgres_db, tmpdir, olap_cmd, caplog): olap_cmd() + assert ( + 'Le champ « 7th field bad duplicate » a un nom de variable dupliqué ' + '« duplicate » mais pas le même type que « 6th field duplicate », ' + 'il sera ignoré (string != bool).') in caplog.text + expected_schema = [ ('agent', 'id'), ('agent', 'label'), @@ -68,6 +73,8 @@ def test_wcs_fixture(wcs, postgres_db, tmpdir, olap_cmd, caplog): ('formdata_demande', 'field_bool'), ('formdata_demande', 'field_itemOpen'), ('formdata_demande', 'field_stringCaseSensitive-\xe9'), + ('formdata_demande', 'field_duplicate'), + ('formdata_demande', 'field_good_duplicate'), ('formdata_demande', 'function__receiver'), ('formdata_demande_field_item', 'id'), ('formdata_demande_field_item', 'label'), @@ -104,6 +111,19 @@ def test_wcs_fixture(wcs, postgres_db, tmpdir, olap_cmd, caplog): expected_json_schema['pg_dsn'] = postgres_db.dsn assert json_schema == expected_json_schema + # verify data in duplicated columns + with postgres_db.conn() as conn: + with conn.cursor() as c: + c.execute('SET search_path = olap') + c.execute('SELECT field_good_duplicate, count(id) ' + 'FROM formdata_demande ' + 'GROUP BY field_good_duplicate ' + 'ORDER BY field_good_duplicate') + assert dict(c.fetchall()) == { + 'a': 37, + 'b': 13, + } + def test_requests_exception(wcs, postgres_db, tmpdir, olap_cmd, caplog): @httmock.urlmatch() @@ -126,6 +146,7 @@ def test_requests_not_ok(wcs, postgres_db, tmpdir, olap_cmd, caplog): olap_cmd(no_log_errors=False) assert 'invalid signature' in caplog.text + def test_requests_not_json(wcs, postgres_db, tmpdir, olap_cmd, caplog): @httmock.urlmatch() def return_invalid_json(url, request): diff --git wcs_olap/feeder.py wcs_olap/feeder.py index 024d0b4..0bd9213 100644 --- wcs_olap/feeder.py +++ wcs_olap/feeder.py @@ -992,7 +992,6 @@ class WcsFormdefFeeder(object): fields += self.formdef.schema.workflow.fields # filter duplicates - duplicate_varnames = set() self.good_fields = good_fields = OrderedDict() for field in fields: if field.type not in ('item', 'bool', 'string'): @@ -1000,19 +999,22 @@ class WcsFormdefFeeder(object): if field.anonymise is True: continue if not field.varname: - add_warning('le champ « %s » n\' a pas de nom de variable, il a été ignoré' % field.label) + add_warning('Le champ « %s » n\' a pas de nom de variable, il a été ignoré' % field.label) continue - if field.varname in good_fields: - # duplicate found - duplicate_varnames.add(field.varname) - add_warning('le champ « %(label)s » a un nom de variable dupliqué « %(varname)s »' % { - 'label': good_fields[field.varname].label, - 'varname': field.varname - }) + if field.varname in good_fields and good_fields[field.varname].type != field.type: + # duplicate found, but type is not coherent + add_warning( + 'Le champ « %(label)s » a un nom de variable dupliqué « %(varname)s » ' + 'mais pas le même type que « %(label_good)s », il sera ignoré ' + '(%(type)s != %(type_good)s).' % { + 'label': field.label, + 'varname': field.varname, + 'type': field.type, + 'label_good': good_fields[field.varname].label, + 'type_good': good_fields[field.varname].type, + } + ) del self.good_fields[field.varname] - if field.varname in duplicate_varnames: - add_warning('le champ « %(label)s » a un nom de variable dupliqué « %(varname)s »' % field.__dict__) - continue self.good_fields[field.varname] = field for field in good_fields.values(): -- 2.26.0