From b5d408b07d81945f399243bf754829df9dc0a026 Mon Sep 17 00:00:00 2001 From: Benjamin Dauvergne Date: Tue, 15 Oct 2019 12:00:08 +0200 Subject: [PATCH 5/9] utils: add JSON flattening helpers (#35818) * flatten/unflatten JSON document * flatten JSON schema (to helper user in producing flattened JSON documents, not to validate, validation must be done through unflattening then validating using the original JSON schema) --- passerelle/utils/json.py | 152 +++++++++++++++++++++++++++ passerelle/utils/jsonresponse.py | 2 + tests/test_utils_json.py | 174 +++++++++++++++++++++++++++++++ 3 files changed, 328 insertions(+) create mode 100644 passerelle/utils/json.py create mode 100644 tests/test_utils_json.py diff --git a/passerelle/utils/json.py b/passerelle/utils/json.py new file mode 100644 index 00000000..19227493 --- /dev/null +++ b/passerelle/utils/json.py @@ -0,0 +1,152 @@ +# passerelle - uniform access to multiple data sources and services +# Copyright (C) 2018 Entr'ouvert +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# passerelle - uniform access to multiple data sources and services +# Copyright (C) 2018 Entr'ouvert +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from __future__ import unicode_literals + +from django.utils import six + + +def unflatten(d, separator='__'): + '''Transform: + + {"a__b__0__x": "1234"} + + into: + + {"a": {"b": [{"x": "1234"}]}} + ''' + if not isinstance(d, dict) or not d: # unflattening an empty dict has no sense + return d + + # ok d is a dict + + def map_digits(l): + return [int(x) if x.isdigit() else x for x in l] + keys = [(map_digits(key.split(separator)), key) for key in d] + keys.sort() + + def set_path(path, orig_key, d, value, i=0): + assert path + + key, tail = path[i], path[i + 1:] + + if not tail: # end of path, set thevalue + if isinstance(key, int): + assert isinstance(d, list) + if len(d) != key: + raise ValueError('incomplete array before %s' % orig_key) + d.append(value) + else: + assert isinstance(d, dict) + d[key] = value + else: + new = [] if isinstance(tail[0], int) else {} + + if isinstance(key, int): + assert isinstance(d, list) + if len(d) < key: + raise ValueError('incomplete array before %s in %s' % ( + separator.join(map(str, path[:i + 1])), + orig_key)) + elif len(d) == key: + d.append(new) + else: + new = d[key] + else: + new = d.setdefault(key, new) + set_path(path, orig_key, new, value, i + 1) + + # Is the first level an array or a dict ? + if isinstance(keys[0][0][0], int): + new = [] + else: + new = {} + for path, key in keys: + value = d[key] + set_path(path, key, new, value) + return new + + +def flatten(data, separator='__'): + assert isinstance(data, (list, dict)) + + def helper(data): + if isinstance(data, list): + for i, value in enumerate(data): + for path, value in helper(value): + yield [str(i)] + path, value + elif isinstance(data, dict): + for key, value in six.iteritems(data): + for path, value in helper(value): + yield [str(key)] + path, value + else: + yield [], data + return {separator.join(path): value for path, value in helper(data)} + + +def flatten_json_schema(schema, separator='__'): + assert isinstance(schema, dict) + + def helper(prefix, schema): + if 'oneOf' in schema: + schemas_by_keys = {} + for subschema in schema['oneOf']: + for key, schema in helper(prefix, subschema): + schemas_by_keys.setdefault(key, []).append(schema) + for key in schemas_by_keys: + schemas = schemas_by_keys[key] + if len(schemas) > 1: + yield key, {'oneOf': schemas} + else: + yield key, schemas[0] + elif schema['type'] == 'array': + prefix = prefix + '__' if prefix else prefix + subschema = schema['items'] + max_items = schema.get('maxItems', 3) + for i in range(max_items): + for key, schema in helper(str(i), subschema): + yield '%s%s' % (prefix, key), schema + elif schema['type'] == 'object': + prefix = prefix + '__' if prefix else prefix + properties = schema['properties'] + for key in properties: + for subkey, schema in helper(key, properties[key]): + yield '%s%s' % (prefix, subkey), schema + else: + yield prefix, schema + + return { + 'type': 'object', + 'properties': { + key: schema for key, schema in helper('', schema) + }, + 'additionalProperties': False, + } diff --git a/passerelle/utils/jsonresponse.py b/passerelle/utils/jsonresponse.py index b9b23dce..63336352 100644 --- a/passerelle/utils/jsonresponse.py +++ b/passerelle/utils/jsonresponse.py @@ -2,6 +2,8 @@ # django-jsonresponse (https://github.com/jjay/django-jsonresponse) distributed # under BSD license +from __future__ import absolute_import + import datetime import json import functools diff --git a/tests/test_utils_json.py b/tests/test_utils_json.py new file mode 100644 index 00000000..eb04c53e --- /dev/null +++ b/tests/test_utils_json.py @@ -0,0 +1,174 @@ +# passerelle - uniform access to multiple data sources and services +# Copyright (C) 2018 Entr'ouvert +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# passerelle - uniform access to multiple data sources and services +# Copyright (C) 2018 Entr'ouvert +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import pytest + +import jsonschema + +from passerelle.utils.json import flatten, unflatten, flatten_json_schema + + +def test_unflatten_base(): + assert unflatten('') == '' + assert unflatten('a') == 'a' + assert unflatten([]) == [] + assert unflatten([1]) == [1] + assert unflatten({}) == {} + assert unflatten(0) == 0 + assert unflatten(1) == 1 + assert unflatten(False) is False + assert unflatten(True) is True + + +def test_unflatten_dict(): + assert unflatten({ + 'a__b__0': 1, + 'a__c__1': 'a', + 'a__b__1': True, + 'a__c__0': [1], + }) == { + 'a': { + 'b': [1, True], + 'c': [[1], 'a'], + } + } + + +def test_unflatten_array(): + assert unflatten({ + '0__b__0': 1, + '1__c__1': 'a', + '0__b__1': True, + '1__c__0': [1], + }) == [{'b': [1, True]}, + {'c': [[1], 'a']}] + + +def test_unflatten_missing_final_index(): + with pytest.raises(ValueError) as exc_info: + unflatten({ + '1': 1 + }) + assert 'incomplete' in exc_info.value.args[0] + + +def test_unflatten_missing_intermediate_index(): + with pytest.raises(ValueError) as exc_info: + unflatten({ + 'a__1__b': 1 + }) + assert 'incomplete' in exc_info.value.args[0] + + +def test_flatten_array_schema(): + schema = { + 'type': 'array', + 'items': { + 'type': 'object', + 'properties': { + 'a': { + 'type': 'string', + }, + 'b': { + 'type': 'integer', + }, + 'c': { + 'type': 'array', + 'items': { + 'type': 'integer', + } + } + }, + 'additionalProperties': False, + } + } + flattened_schema = flatten_json_schema(schema) + data = [ + {'a': 'a', 'b': 1, 'c': [1, 2, 3]}, + {'a': 'a', 'b': 1, 'c': [1, 2, 3]}, + {'a': 'a', 'b': 1, 'c': [1, 2, 3]}, + ] + flattened_data = flatten(data) + + jsonschema.validate(schema=schema, instance=data) + assert flattened_schema == { + 'type': 'object', + 'properties': { + '0__a': {'type': 'string'}, + '0__b': {'type': 'integer'}, + '0__c__0': {'type': 'integer'}, + '0__c__1': {'type': 'integer'}, + '0__c__2': {'type': 'integer'}, + '1__a': {'type': 'string'}, + '1__b': {'type': 'integer'}, + '1__c__0': {'type': 'integer'}, + '1__c__1': {'type': 'integer'}, + '1__c__2': {'type': 'integer'}, + '2__a': {'type': 'string'}, + '2__b': {'type': 'integer'}, + '2__c__0': {'type': 'integer'}, + '2__c__1': {'type': 'integer'}, + '2__c__2': {'type': 'integer'}, + }, + 'additionalProperties': False, + } + jsonschema.validate(schema=flattened_schema, instance=flattened_data) + assert data == unflatten(flattened_data) + + +def test_flatten_dict_schema(): + assert flatten_json_schema({ + 'type': 'object', + 'properties': { + 'a': { + 'type': 'string', + }, + 'b': { + 'type': 'integer', + }, + 'c': { + 'type': 'array', + 'items': { + 'type': 'integer', + } + } + } + }) == { + 'type': 'object', + 'properties': { + 'a': {'type': 'string'}, + 'b': {'type': 'integer'}, + 'c__0': {'type': 'integer'}, + 'c__1': {'type': 'integer'}, + 'c__2': {'type': 'integer'}, + }, + 'additionalProperties': False, + } -- 2.23.0