Projet

Général

Profil

0003-utils-add-conversion-from-XMLSchema-to-JSON-schema-3.patch

Benjamin Dauvergne, 15 octobre 2019 14:08

Télécharger (9,84 ko)

Voir les différences:

Subject: [PATCH 3/3] utils: add conversion from XMLSchema to JSON schema
 (#35818)

We target the Draft 7 jsonschema specification.
 passerelle/utils/xml.py | 140 ++++++++++++++++++++++++++++++++++++++++
 setup.py                |   1 +
 tests/test_utils_xml.py |  52 ++++++++++++++-
 3 files changed, 192 insertions(+), 1 deletion(-)
passerelle/utils/xml.py
14 14
# You should have received a copy of the GNU Affero General Public License
15 15
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 16

  
17
from collections import OrderedDict
18

  
19
import jsonschema
20
import xmlschema
21

  
17 22

  
18 23
def text_content(node):
19 24
    '''Extract text content from node and all its children. Equivalent to
......
81 86
                if child_content:
82 87
                    d[child.tag].append(child_content)
83 88
    return d
89

  
90

  
91
class JSONSchemaFromXMLSchema(object):
92
    def __init__(self, xml_schema, root_element):
93
        if not isinstance(xml_schema, xmlschema.XMLSchema):
94
            xml_schema = xmlschema.XMLSchema(xml_schema)
95
        self.xml_schema = xml_schema
96
        self.json_schema = {
97
            'type': 'object',
98
            'properties': {
99
                root_element: self.element_to_jsonschema(
100
                    xml_schema.elements[root_element]),
101
            },
102
            'additionalProperties': False
103
        }
104

  
105
    @classmethod
106
    def simpletype_to_jsonschema(cls, simple_type):
107
        assert isinstance(simple_type, xmlschema.validators.XsdSimpleType)
108

  
109
        if isinstance(simple_type, xmlschema.validators.XsdAtomicBuiltin):
110
            if (simple_type.min_length
111
                    or simple_type.max_length
112
                    or simple_type.white_space not in ('collapse', 'preserve')
113
                    or simple_type.patterns):
114
                raise NotImplementedError(simple_type)
115

  
116
            if simple_type.name == xmlschema.qnames.XSD_STRING:
117
                schema = {'type': 'string'}
118
            elif simple_type.name == xmlschema.qnames.XSD_INTEGER:
119
                schema = {'type': 'integer'}
120
            elif simple_type.name == xmlschema.qnames.XSD_BOOLEAN:
121
                schema = {'type': 'boolean'}
122
            elif simple_type.name == xmlschema.qnames.XSD_DOUBLE:
123
                schema = {'type': 'number'}
124
            else:
125
                raise NotImplementedError(simple_type)
126
            return schema
127
        elif isinstance(simple_type, xmlschema.validators.XsdAtomicRestriction):
128
            if (simple_type.white_space not in ('collapse', 'preserve')
129
                    or simple_type.patterns):
130
                raise NotImplementedError(simple_type)
131
            schema = OrderedDict(cls.simpletype_to_jsonschema(simple_type.base_type))
132
            for validator in simple_type.validators:
133
                if isinstance(validator, xmlschema.validators.XsdEnumerationFacets):
134
                    schema['enum'] = validator.enumeration
135
                elif (isinstance(validator, xmlschema.validators.XsdMinLengthFacet)
136
                      and simple_type.base_type.name == xmlschema.qnames.XSD_STRING):
137
                    schema['minLength'] = validator.value
138
                elif (isinstance(validator, xmlschema.validators.XsdMaxLengthFacet)
139
                      and simple_type.base_type.name == xmlschema.qnames.XSD_STRING):
140
                    schema['maxLength'] = validator.value
141
                elif (isinstance(validator, xmlschema.validators.XsdLengthFacet)
142
                      and simple_type.base_type.name == xmlschema.qnames.XSD_STRING):
143
                    schema['minLength'] = validator.value
144
                    schema['maxLength'] = validator.value
145
                else:
146
                    raise NotImplementedError(validator)
147
            return schema
148
        raise NotImplementedError(simple_type)
149

  
150
    @classmethod
151
    def attributegroup_to_jsonschema(cls, attributegroup, schema):
152
        assert isinstance(attributegroup, xmlschema.validators.XsdAttributeGroup)
153

  
154
        properties = schema.setdefault('properties', OrderedDict())
155
        for component in attributegroup.iter_component():
156
            if component.ref:
157
                raise NotImplementedError(component)
158
            else:
159
                properties[component.name] = cls.simpletype_to_jsonschema(component.type)
160

  
161
    @classmethod
162
    def group_to_jsonschema(cls, group, schema):
163
        assert isinstance(group, xmlschema.validators.XsdGroup)
164

  
165
        properties = schema.setdefault('properties', OrderedDict())
166
        for component in group:
167
            if isinstance(component, xmlschema.validators.XsdElement):
168
                properties[component.name] = cls.element_to_jsonschema(component)
169
            elif isinstance(component, xmlschema.validators.XsdGroup):
170
                cls.group_to_jsonschema(component, schema)
171
            else:
172
                raise NotImplementedError(component)
173

  
174
    @classmethod
175
    def type_to_jsonschema(cls, xmltype, depth=0):
176
        assert isinstance(xmltype, xmlschema.validators.XsdType)
177

  
178
        if xmltype.is_simple():
179
            schema = cls.simpletype_to_jsonschema(xmltype)
180
            if depth == 0:
181
                schema = {'oneOf': [schema, {'type': 'null'}]}
182
            return schema
183
        elif xmltype.has_simple_content():
184
            base_schema = cls.type_to_jsonschema(xmltype.base_type, depth=depth + 1)
185
            if not xmltype.attributes:
186
                schema = base_schema
187
            else:
188
                cls.attributegroup_to_jsonschema(xmltype.attributes, schema)
189
                schema['properties']['$'] = base_schema
190
            if depth == 0:
191
                schema = {'oneOf': [schema, {'type': 'null'}]}
192
            return schema
193
        else:
194
            if xmltype.has_mixed_content() or not xmltype.is_element_only():
195
                raise NotImplementedError(xmltype)
196

  
197
            schema = OrderedDict({'type': 'object'})
198
            schema['additionalProperties'] = False
199
            if xmltype.attributes:
200
                cls.attributegroup_to_jsonschema(schema)
201
            cls.group_to_jsonschema(xmltype.content_type, schema)
202
            return schema
203

  
204
    @classmethod
205
    def element_to_jsonschema(cls, element):
206
        assert isinstance(element, xmlschema.validators.XsdElement)
207

  
208
        is_array = element.max_occurs > 1 or element.max_occurs is None
209
        type_schema = cls.type_to_jsonschema(element.type)
210
        if is_array:
211
            d = {
212
                'type': 'array',
213
                'items': type_schema,
214
                'minItems': element.min_occurs,
215
            }
216
            if element.max_occurs is not None:
217
                d['maxItems'] = element.max_occurs
218
            return d
219
        else:
220
            return type_schema
221

  
222
    def validate(self, instance):
223
        return jsonschema.validate(instance=instance, schema=self.json_schema)
setup.py
109 109
            'unidecode',
110 110
            'paramiko',
111 111
            'pdfrw',
112
            'xmlschema',
112 113
        ],
113 114
        cmdclass={
114 115
            'build': build,
tests/test_utils_xml.py
1
# passerelle - uniform access to multiple data sources and services
2
# Copyright (C) 2019 Entr'ouvert
3
#
4
# This program is free software: you can redistribute it and/or modify it
5
# under the terms of the GNU Affero General Public License as published
6
# by the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU Affero General Public License for more details.
13
#
14
# You should have received a copy of the GNU Affero General Public License
15
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16

  
1 17
import xml.etree.ElementTree as ET
2 18

  
3
from passerelle.utils.xml import to_json, text_content
19
import xmlschema
20

  
21
import jsonschema
22

  
23
from passerelle.utils.xml import to_json, text_content, JSONSchemaFromXMLSchema
24
from passerelle.utils.json import flatten_json_schema, flatten, unflatten
4 25

  
5 26

  
6 27
def test_text_content():
......
31 52
            {'text3': '4'},
32 53
        ]
33 54
    }
55

  
56

  
57
def test_xmlschema_to_jsonschema():
58
    schema_path = 'passerelle/apps/sp_fr/depotDossierPACS.XSD'
59

  
60
    # go from XML to JSON,
61
    # convert XMLSchema to JSONSchema
62
    # validate jsonschema, on converted data,
63
    # flatten the JSON schema,
64
    # flatten the data,
65
    # validate flattened data with flatenned JSON schema
66
    # unflatten data
67
    # convert unflattened data to XML
68
    # convert XML to JSON
69
    # then compare to initially converted JSON data
70
    schema = xmlschema.XMLSchema(schema_path, converter=xmlschema.UnorderedConverter)
71
    json_schema = JSONSchemaFromXMLSchema(schema, 'PACS')
72
    d = schema.elements['PACS'].decode(ET.parse('tests/data/pacs-doc.xml').getroot())
73
    d = {'PACS': d}
74
    json_schema.validate(d)
75
    flattened_json_schema = flatten_json_schema(json_schema.json_schema)
76
    flattened_d = flatten(d)
77
    jsonschema.validate(instance=flattened_d, schema=flattened_json_schema)
78
    d2 = unflatten(d)
79
    json_schema.validate(d2)
80

  
81
    tree = schema.elements['PACS'].encode(d2['PACS'])
82
    d3 = schema.elements['PACS'].decode(tree)
83
    assert d == {'PACS': d3}
34
-