Project

General

Profile

0007-utils-add-conversion-from-XMLSchema-to-JSON-schema-3.patch

Benjamin Dauvergne, 25 Oct 2019 02:23 PM

Download (13.1 KB)

View differences:

Subject: [PATCH 7/9] utils: add conversion from XMLSchema to JSON schema
 (#35818)

We target the Draft 7 jsonschema specification.
 passerelle/utils/xml.py | 141 ++++++++++++++++++++++++++++++++++++++++
 setup.py                |   1 +
 tests/data/pacs-doc.xml | 101 ++++++++++++++++++++++++++++
 tests/test_utils_xml.py |  52 ++++++++++++++-
 4 files changed, 294 insertions(+), 1 deletion(-)
 create mode 100644 tests/data/pacs-doc.xml
passerelle/utils/xml.py
14 14
# You should have received a copy of the GNU Affero General Public License
15 15
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 16

  
17
from collections import OrderedDict
18

  
19
import jsonschema
20
import xmlschema
21

  
17 22

  
18 23
def text_content(node):
19 24
    '''Extract text content from node and all its children. Equivalent to
......
81 86
                if child_content:
82 87
                    d[child.tag].append(child_content)
83 88
    return d
89

  
90

  
91
class JSONSchemaFromXMLSchema(object):
92
    def __init__(self, xml_schema, root_element):
93
        if not isinstance(xml_schema, xmlschema.XMLSchema):
94
            xml_schema = xmlschema.XMLSchema(xml_schema)
95
        self.xml_schema = xml_schema
96
        self.json_schema = {
97
            'type': 'object',
98
            'properties': {
99
                root_element: self.element_to_jsonschema(
100
                    xml_schema.elements[root_element]),
101
            },
102
            'required': [root_element],
103
            'additionalProperties': False,
104
        }
105

  
106
    @classmethod
107
    def simpletype_to_jsonschema(cls, simple_type):
108
        assert isinstance(simple_type, xmlschema.validators.XsdSimpleType)
109

  
110
        if isinstance(simple_type, xmlschema.validators.XsdAtomicBuiltin):
111
            if (simple_type.min_length
112
                    or simple_type.max_length
113
                    or simple_type.white_space not in ('collapse', 'preserve')
114
                    or simple_type.patterns):
115
                raise NotImplementedError(simple_type)
116

  
117
            if simple_type.name == xmlschema.qnames.XSD_STRING:
118
                schema = {'type': 'string'}
119
            elif simple_type.name == xmlschema.qnames.XSD_INTEGER:
120
                schema = {'type': 'integer'}
121
            elif simple_type.name == xmlschema.qnames.XSD_BOOLEAN:
122
                schema = {'type': 'boolean'}
123
            elif simple_type.name == xmlschema.qnames.XSD_DOUBLE:
124
                schema = {'type': 'number'}
125
            else:
126
                raise NotImplementedError(simple_type)
127
            return schema
128
        elif isinstance(simple_type, xmlschema.validators.XsdAtomicRestriction):
129
            if (simple_type.white_space not in ('collapse', 'preserve')
130
                    or simple_type.patterns):
131
                raise NotImplementedError(simple_type)
132
            schema = OrderedDict(cls.simpletype_to_jsonschema(simple_type.base_type))
133
            for validator in simple_type.validators:
134
                if isinstance(validator, xmlschema.validators.XsdEnumerationFacets):
135
                    schema['enum'] = validator.enumeration
136
                elif (isinstance(validator, xmlschema.validators.XsdMinLengthFacet)
137
                      and simple_type.base_type.name == xmlschema.qnames.XSD_STRING):
138
                    schema['minLength'] = validator.value
139
                elif (isinstance(validator, xmlschema.validators.XsdMaxLengthFacet)
140
                      and simple_type.base_type.name == xmlschema.qnames.XSD_STRING):
141
                    schema['maxLength'] = validator.value
142
                elif (isinstance(validator, xmlschema.validators.XsdLengthFacet)
143
                      and simple_type.base_type.name == xmlschema.qnames.XSD_STRING):
144
                    schema['minLength'] = validator.value
145
                    schema['maxLength'] = validator.value
146
                else:
147
                    raise NotImplementedError(validator)
148
            return schema
149
        raise NotImplementedError(simple_type)
150

  
151
    @classmethod
152
    def attributegroup_to_jsonschema(cls, attributegroup, schema):
153
        assert isinstance(attributegroup, xmlschema.validators.XsdAttributeGroup)
154

  
155
        properties = schema.setdefault('properties', OrderedDict())
156
        for component in attributegroup.iter_component():
157
            if component.ref:
158
                raise NotImplementedError(component)
159
            else:
160
                properties[component.name] = cls.simpletype_to_jsonschema(component.type)
161

  
162
    @classmethod
163
    def group_to_jsonschema(cls, group, schema):
164
        assert isinstance(group, xmlschema.validators.XsdGroup)
165

  
166
        properties = schema.setdefault('properties', OrderedDict())
167
        for component in group:
168
            if isinstance(component, xmlschema.validators.XsdElement):
169
                properties[component.name] = cls.element_to_jsonschema(component)
170
            elif isinstance(component, xmlschema.validators.XsdGroup):
171
                cls.group_to_jsonschema(component, schema)
172
            else:
173
                raise NotImplementedError(component)
174

  
175
    @classmethod
176
    def type_to_jsonschema(cls, xmltype, depth=0):
177
        assert isinstance(xmltype, xmlschema.validators.XsdType)
178

  
179
        if xmltype.is_simple():
180
            schema = cls.simpletype_to_jsonschema(xmltype)
181
            if depth == 0:
182
                schema = {'oneOf': [schema, {'type': 'null'}]}
183
            return schema
184
        elif xmltype.has_simple_content():
185
            base_schema = cls.type_to_jsonschema(xmltype.base_type, depth=depth + 1)
186
            if not xmltype.attributes:
187
                schema = base_schema
188
            else:
189
                cls.attributegroup_to_jsonschema(xmltype.attributes, schema)
190
                schema['properties']['$'] = base_schema
191
            if depth == 0:
192
                schema = {'oneOf': [schema, {'type': 'null'}]}
193
            return schema
194
        else:
195
            if xmltype.has_mixed_content() or not xmltype.is_element_only():
196
                raise NotImplementedError(xmltype)
197

  
198
            schema = OrderedDict({'type': 'object'})
199
            schema['additionalProperties'] = False
200
            if xmltype.attributes:
201
                cls.attributegroup_to_jsonschema(schema)
202
            cls.group_to_jsonschema(xmltype.content_type, schema)
203
            return schema
204

  
205
    @classmethod
206
    def element_to_jsonschema(cls, element):
207
        assert isinstance(element, xmlschema.validators.XsdElement)
208

  
209
        is_array = element.max_occurs > 1 or element.max_occurs is None
210
        type_schema = cls.type_to_jsonschema(element.type)
211
        if is_array:
212
            d = {
213
                'type': 'array',
214
                'items': type_schema,
215
                'minItems': element.min_occurs,
216
            }
217
            if element.max_occurs is not None:
218
                d['maxItems'] = element.max_occurs
219
            return d
220
        else:
221
            return type_schema
222

  
223
    def validate(self, instance):
224
        return jsonschema.validate(instance=instance, schema=self.json_schema)
setup.py
110 110
            'paramiko',
111 111
            'pdfrw',
112 112
            'httplib2',
113
            'xmlschema',
113 114
        ],
114 115
        cmdclass={
115 116
            'build': build,
tests/data/pacs-doc.xml
1
<?xml version="1.0" encoding="UTF-8" ?>
2
<PACS>
3
	<partenaire1>
4
		<civilite>MME</civilite>
5
        <nomNaissance>Doe</nomNaissance>
6
   		<prenoms>Jane</prenoms>
7
   		<codeNationalite>FRA</codeNationalite>
8
   		<codeNationalite>BHS</codeNationalite>
9
   		<codeNationalite>BEL</codeNationalite>
10
   		<jourNaissance>28</jourNaissance>
11
   		<moisNaissance>01</moisNaissance>
12
   		<anneeNaissance>1950</anneeNaissance>
13
   		<LieuNaissance>
14
			<localite>ST ETIENNE</localite>
15
			<codePostal>42000</codePostal>
16
			<codeInsee>42218</codeInsee>
17
			<departement>Loire</departement>
18
			<codePays>FRA</codePays>
19
		</LieuNaissance>
20
   		<ofpra>false</ofpra>
21
   		<mesureJuridique>true</mesureJuridique>
22
   		<adressePostale>
23
   					<NumeroLibelleVoie>1 rue du test</NumeroLibelleVoie>
24
		<Complement1>Appartement, étage, escalier</Complement1>
25
		<Complement2>Résidence, bâtiment ou immeuble</Complement2>
26
		<LieuDitBpCommuneDeleguee>BP1</LieuDitBpCommuneDeleguee>
27
		<CodePostal>05100</CodePostal>
28
		<Localite>VILLAR ST PANCRACE</Localite>
29
		<Pays>FRA</Pays>
30
   		</adressePostale>
31
   		<adresseElectronique>mates@entrouvert.com</adresseElectronique>
32
   		<telephone>+33123456789</telephone>
33
   				<titreIdentiteVerifie>true</titreIdentiteVerifie>
34
	</partenaire1>
35
	<partenaire2>
36
		<civilite>MME</civilite>
37
        <nomNaissance>Doe</nomNaissance>
38
   		<prenoms>Jane</prenoms>
39
   		<codeNationalite>BEL</codeNationalite>
40
   		   		   		<jourNaissance>28</jourNaissance>
41
   		<moisNaissance>01</moisNaissance>
42
   		<anneeNaissance>1982</anneeNaissance>
43
   		<LieuNaissance>
44
			<localite>CLERMONT FERRAND</localite>
45
			<codePostal>63000</codePostal>
46
			<codeInsee>63113</codeInsee>
47
			<departement>Puy-de-dôme</departement>
48
			<codePays>FRA</codePays>
49
		</LieuNaissance>
50
   		<ofpra>false</ofpra>
51
   		<mesureJuridique>true</mesureJuridique>
52
   		<adressePostale>
53
   					<NumeroLibelleVoie>2 rue du test</NumeroLibelleVoie>
54
								<CodePostal>05100</CodePostal>
55
		<Localite>VILLAR ST PANCRACE</Localite>
56
		<Pays>FRA</Pays>
57
   		</adressePostale>
58
   		<adresseElectronique>mates@entrouvert.com</adresseElectronique>
59
   		<telephone>+33123456789</telephone>
60
   				<titreIdentiteVerifie>false</titreIdentiteVerifie>
61
	</partenaire2>
62
	<convention>
63
						<conventionType>
64
			<aideMaterielMontant>100000</aideMaterielMontant>
65
			<regimePacs>legal</regimePacs>
66
			<aideMateriel>
67
				<typeAideMateriel>aideFixe</typeAideMateriel>
68
			</aideMateriel>
69
		</conventionType>
70
			</convention>
71
	<residenceCommune>
72
								<NumeroLibelleVoie>3 place du test</NumeroLibelleVoie>
73
								<CodePostal>05100</CodePostal>
74
		<Localite>VILLAR ST PANCRACE</Localite>
75
		<Pays></Pays>
76
	</residenceCommune>
77
	<attestationHonneur>
78
		<nonParente>true</nonParente>
79
		<residenceCommune>true</residenceCommune>
80
	</attestationHonneur>
81

  
82
</PACS>
83

  
84

  
85

  
86

  
87

  
88

  
89

  
90

  
91

  
92

  
93

  
94

  
95

  
96

  
97

  
98

  
99

  
100

  
101

  
tests/test_utils_xml.py
1
# passerelle - uniform access to multiple data sources and services
2
# Copyright (C) 2019 Entr'ouvert
3
#
4
# This program is free software: you can redistribute it and/or modify it
5
# under the terms of the GNU Affero General Public License as published
6
# by the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU Affero General Public License for more details.
13
#
14
# You should have received a copy of the GNU Affero General Public License
15
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16

  
1 17
import xml.etree.ElementTree as ET
2 18

  
3
from passerelle.utils.xml import to_json, text_content
19
import xmlschema
20

  
21
import jsonschema
22

  
23
from passerelle.utils.xml import to_json, text_content, JSONSchemaFromXMLSchema
24
from passerelle.utils.json import flatten_json_schema, flatten, unflatten
4 25

  
5 26

  
6 27
def test_text_content():
......
31 52
            {'text3': '4'},
32 53
        ]
33 54
    }
55

  
56

  
57
def test_xmlschema_to_jsonschema():
58
    schema_path = 'passerelle/apps/sp_fr/depotDossierPACS.XSD'
59

  
60
    # go from XML to JSON,
61
    # convert XMLSchema to JSONSchema
62
    # validate jsonschema, on converted data,
63
    # flatten the JSON schema,
64
    # flatten the data,
65
    # validate flattened data with flatenned JSON schema
66
    # unflatten data
67
    # convert unflattened data to XML
68
    # convert XML to JSON
69
    # then compare to initially converted JSON data
70
    schema = xmlschema.XMLSchema(schema_path, converter=xmlschema.UnorderedConverter)
71
    json_schema = JSONSchemaFromXMLSchema(schema, 'PACS')
72
    d = schema.elements['PACS'].decode(ET.parse('tests/data/pacs-doc.xml').getroot())
73
    d = {'PACS': d}
74
    json_schema.validate(d)
75
    flattened_json_schema = flatten_json_schema(json_schema.json_schema)
76
    flattened_d = flatten(d)
77
    jsonschema.validate(instance=flattened_d, schema=flattened_json_schema)
78
    d2 = unflatten(d)
79
    json_schema.validate(d2)
80

  
81
    tree = schema.elements['PACS'].encode(d2['PACS'])
82
    d3 = schema.elements['PACS'].decode(tree)
83
    assert d == {'PACS': d3}
34
-