Project

General

Profile

0005-utils-add-JSON-flattening-helpers-35818.patch

Benjamin Dauvergne, 24 Oct 2019 11:05 PM

Download (12.2 KB)

View differences:

Subject: [PATCH 5/8] utils: add JSON flattening helpers (#35818)

* flatten/unflatten JSON document
* flatten JSON schema (to helper user in producing flattened JSON
  documents, not to validate, validation must be done through
  unflattening then validating using the original JSON schema)
 passerelle/utils/json.py         | 152 +++++++++++++++++++++++++++
 passerelle/utils/jsonresponse.py |   2 +
 tests/test_utils_json.py         | 174 +++++++++++++++++++++++++++++++
 3 files changed, 328 insertions(+)
 create mode 100644 passerelle/utils/json.py
 create mode 100644 tests/test_utils_json.py
passerelle/utils/json.py
1
# passerelle - uniform access to multiple data sources and services
2
# Copyright (C) 2018 Entr'ouvert
3
#
4
# This program is free software: you can redistribute it and/or modify it
5
# under the terms of the GNU Affero General Public License as published
6
# by the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU Affero General Public License for more details.
13
#
14
# You should have received a copy of the GNU Affero General Public License
15
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
# passerelle - uniform access to multiple data sources and services
17
# Copyright (C) 2018 Entr'ouvert
18
#
19
# This program is free software: you can redistribute it and/or modify it
20
# under the terms of the GNU Affero General Public License as published
21
# by the Free Software Foundation, either version 3 of the License, or
22
# (at your option) any later version.
23
#
24
# This program is distributed in the hope that it will be useful,
25
# but WITHOUT ANY WARRANTY; without even the implied warranty of
26
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
27
# GNU Affero General Public License for more details.
28
#
29
# You should have received a copy of the GNU Affero General Public License
30
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
31

  
32
from __future__ import unicode_literals
33

  
34
from django.utils import six
35

  
36

  
37
def unflatten(d, separator='__'):
38
    '''Transform:
39

  
40
          {"a__b__0__x": "1234"}
41

  
42
       into:
43

  
44
          {"a": {"b": [{"x": "1234"}]}}
45
    '''
46
    if not isinstance(d, dict) or not d:  # unflattening an empty dict has no sense
47
        return d
48

  
49
    # ok d is a dict
50

  
51
    def map_digits(l):
52
        return [int(x) if x.isdigit() else x for x in l]
53
    keys = [(map_digits(key.split(separator)), key) for key in d]
54
    keys.sort()
55

  
56
    def set_path(path, orig_key, d, value, i=0):
57
        assert path
58

  
59
        key, tail = path[i], path[i + 1:]
60

  
61
        if not tail:  # end of path, set thevalue
62
            if isinstance(key, int):
63
                assert isinstance(d, list)
64
                if len(d) != key:
65
                    raise ValueError('incomplete array before %s' % orig_key)
66
                d.append(value)
67
            else:
68
                assert isinstance(d, dict)
69
                d[key] = value
70
        else:
71
            new = [] if isinstance(tail[0], int) else {}
72

  
73
            if isinstance(key, int):
74
                assert isinstance(d, list)
75
                if len(d) < key:
76
                    raise ValueError('incomplete array before %s in %s' % (
77
                        separator.join(map(str, path[:i + 1])),
78
                        orig_key))
79
                elif len(d) == key:
80
                    d.append(new)
81
                else:
82
                    new = d[key]
83
            else:
84
                new = d.setdefault(key, new)
85
            set_path(path, orig_key, new, value, i + 1)
86

  
87
    # Is the first level an array or a dict ?
88
    if isinstance(keys[0][0][0], int):
89
        new = []
90
    else:
91
        new = {}
92
    for path, key in keys:
93
        value = d[key]
94
        set_path(path, key, new, value)
95
    return new
96

  
97

  
98
def flatten(data, separator='__'):
99
    assert isinstance(data, (list, dict))
100

  
101
    def helper(data):
102
        if isinstance(data, list):
103
            for i, value in enumerate(data):
104
                for path, value in helper(value):
105
                    yield [str(i)] + path, value
106
        elif isinstance(data, dict):
107
            for key, value in six.iteritems(data):
108
                for path, value in helper(value):
109
                    yield [str(key)] + path, value
110
        else:
111
            yield [], data
112
    return {separator.join(path): value for path, value in helper(data)}
113

  
114

  
115
def flatten_json_schema(schema, separator='__'):
116
    assert isinstance(schema, dict)
117

  
118
    def helper(prefix, schema):
119
        if 'oneOf' in schema:
120
            schemas_by_keys = {}
121
            for subschema in schema['oneOf']:
122
                for key, schema in helper(prefix, subschema):
123
                    schemas_by_keys.setdefault(key, []).append(schema)
124
            for key in schemas_by_keys:
125
                schemas = schemas_by_keys[key]
126
                if len(schemas) > 1:
127
                    yield key, {'oneOf': schemas}
128
                else:
129
                    yield key, schemas[0]
130
        elif schema['type'] == 'array':
131
            prefix = prefix + '__' if prefix else prefix
132
            subschema = schema['items']
133
            max_items = schema.get('maxItems', 3)
134
            for i in range(max_items):
135
                for key, schema in helper(str(i), subschema):
136
                    yield '%s%s' % (prefix, key), schema
137
        elif schema['type'] == 'object':
138
            prefix = prefix + '__' if prefix else prefix
139
            properties = schema['properties']
140
            for key in properties:
141
                for subkey, schema in helper(key, properties[key]):
142
                    yield '%s%s' % (prefix, subkey), schema
143
        else:
144
            yield prefix, schema
145

  
146
    return {
147
        'type': 'object',
148
        'properties': {
149
            key: schema for key, schema in helper('', schema)
150
        },
151
        'additionalProperties': False,
152
    }
passerelle/utils/jsonresponse.py
2 2
# django-jsonresponse (https://github.com/jjay/django-jsonresponse) distributed
3 3
# under BSD license
4 4

  
5
from __future__ import absolute_import
6

  
5 7
import datetime
6 8
import json
7 9
import functools
tests/test_utils_json.py
1
# passerelle - uniform access to multiple data sources and services
2
# Copyright (C) 2018 Entr'ouvert
3
#
4
# This program is free software: you can redistribute it and/or modify it
5
# under the terms of the GNU Affero General Public License as published
6
# by the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU Affero General Public License for more details.
13
#
14
# You should have received a copy of the GNU Affero General Public License
15
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
# passerelle - uniform access to multiple data sources and services
17
# Copyright (C) 2018 Entr'ouvert
18
#
19
# This program is free software: you can redistribute it and/or modify it
20
# under the terms of the GNU Affero General Public License as published
21
# by the Free Software Foundation, either version 3 of the License, or
22
# (at your option) any later version.
23
#
24
# This program is distributed in the hope that it will be useful,
25
# but WITHOUT ANY WARRANTY; without even the implied warranty of
26
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
27
# GNU Affero General Public License for more details.
28
#
29
# You should have received a copy of the GNU Affero General Public License
30
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
31

  
32
import pytest
33

  
34
import jsonschema
35

  
36
from passerelle.utils.json import flatten, unflatten, flatten_json_schema
37

  
38

  
39
def test_unflatten_base():
40
    assert unflatten('') == ''
41
    assert unflatten('a') == 'a'
42
    assert unflatten([]) == []
43
    assert unflatten([1]) == [1]
44
    assert unflatten({}) == {}
45
    assert unflatten(0) == 0
46
    assert unflatten(1) == 1
47
    assert unflatten(False) is False
48
    assert unflatten(True) is True
49

  
50

  
51
def test_unflatten_dict():
52
    assert unflatten({
53
        'a__b__0': 1,
54
        'a__c__1': 'a',
55
        'a__b__1': True,
56
        'a__c__0': [1],
57
    }) == {
58
        'a': {
59
            'b': [1, True],
60
            'c': [[1], 'a'],
61
        }
62
    }
63

  
64

  
65
def test_unflatten_array():
66
    assert unflatten({
67
        '0__b__0': 1,
68
        '1__c__1': 'a',
69
        '0__b__1': True,
70
        '1__c__0': [1],
71
    }) == [{'b': [1, True]},
72
           {'c': [[1], 'a']}]
73

  
74

  
75
def test_unflatten_missing_final_index():
76
    with pytest.raises(ValueError) as exc_info:
77
        unflatten({
78
            '1': 1
79
        })
80
    assert 'incomplete' in exc_info.value.args[0]
81

  
82

  
83
def test_unflatten_missing_intermediate_index():
84
    with pytest.raises(ValueError) as exc_info:
85
        unflatten({
86
            'a__1__b': 1
87
        })
88
    assert 'incomplete' in exc_info.value.args[0]
89

  
90

  
91
def test_flatten_array_schema():
92
    schema = {
93
        'type': 'array',
94
        'items': {
95
            'type': 'object',
96
            'properties': {
97
                'a': {
98
                    'type': 'string',
99
                },
100
                'b': {
101
                    'type': 'integer',
102
                },
103
                'c': {
104
                    'type': 'array',
105
                    'items': {
106
                        'type': 'integer',
107
                    }
108
                }
109
            },
110
            'additionalProperties': False,
111
        }
112
    }
113
    flattened_schema = flatten_json_schema(schema)
114
    data = [
115
        {'a': 'a', 'b': 1, 'c': [1, 2, 3]},
116
        {'a': 'a', 'b': 1, 'c': [1, 2, 3]},
117
        {'a': 'a', 'b': 1, 'c': [1, 2, 3]},
118
    ]
119
    flattened_data = flatten(data)
120

  
121
    jsonschema.validate(schema=schema, instance=data)
122
    assert flattened_schema == {
123
        'type': 'object',
124
        'properties': {
125
            '0__a': {'type': 'string'},
126
            '0__b': {'type': 'integer'},
127
            '0__c__0': {'type': 'integer'},
128
            '0__c__1': {'type': 'integer'},
129
            '0__c__2': {'type': 'integer'},
130
            '1__a': {'type': 'string'},
131
            '1__b': {'type': 'integer'},
132
            '1__c__0': {'type': 'integer'},
133
            '1__c__1': {'type': 'integer'},
134
            '1__c__2': {'type': 'integer'},
135
            '2__a': {'type': 'string'},
136
            '2__b': {'type': 'integer'},
137
            '2__c__0': {'type': 'integer'},
138
            '2__c__1': {'type': 'integer'},
139
            '2__c__2': {'type': 'integer'},
140
        },
141
        'additionalProperties': False,
142
    }
143
    jsonschema.validate(schema=flattened_schema, instance=flattened_data)
144
    assert data == unflatten(flattened_data)
145

  
146

  
147
def test_flatten_dict_schema():
148
    assert flatten_json_schema({
149
        'type': 'object',
150
        'properties': {
151
            'a': {
152
                'type': 'string',
153
            },
154
            'b': {
155
                'type': 'integer',
156
            },
157
            'c': {
158
                'type': 'array',
159
                'items': {
160
                    'type': 'integer',
161
                }
162
            }
163
        }
164
    }) == {
165
        'type': 'object',
166
        'properties': {
167
            'a': {'type': 'string'},
168
            'b': {'type': 'integer'},
169
            'c__0': {'type': 'integer'},
170
            'c__1': {'type': 'integer'},
171
            'c__2': {'type': 'integer'},
172
        },
173
        'additionalProperties': False,
174
    }
0
-