Project

General

Profile

0006-utils-add-conversion-from-XMLSchema-to-JSON-schema-3.patch

Benjamin Dauvergne, 21 Oct 2019 10:25 AM

Download (13 KB)

View differences:

Subject: [PATCH 6/8] utils: add conversion from XMLSchema to JSON schema
 (#35818)

We target the Draft 7 jsonschema specification.
 passerelle/utils/xml.py | 139 ++++++++++++++++++++++++++++++++++++++++
 setup.py                |   1 +
 tests/data/pacs-doc.xml | 101 +++++++++++++++++++++++++++++
 tests/test_utils_xml.py |  52 ++++++++++++++-
 4 files changed, 292 insertions(+), 1 deletion(-)
 create mode 100644 tests/data/pacs-doc.xml
passerelle/utils/xml.py
14 14
# You should have received a copy of the GNU Affero General Public License
15 15
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 16

  
17
from collections import OrderedDict
18

  
19
import jsonschema
20
import xmlschema
21

  
17 22

  
18 23
def text_content(node):
19 24
    '''Extract text content from node and all its children. Equivalent to
......
81 86
                if child_content:
82 87
                    d[child.tag].append(child_content)
83 88
    return d
89

  
90

  
91
class JSONSchemaFromXMLSchema(object):
92
    def __init__(self, xml_schema, root_element):
93
        if not isinstance(xml_schema, xmlschema.XMLSchema):
94
            xml_schema = xmlschema.XMLSchema(xml_schema)
95
        self.xml_schema = xml_schema
96
        self.json_schema = {
97
            'type': 'object',
98
            'properties': {
99
                root_element: self.element_to_jsonschema(
100
                    xml_schema.elements[root_element]),
101
            },
102
        }
103

  
104
    @classmethod
105
    def simpletype_to_jsonschema(cls, simple_type):
106
        assert isinstance(simple_type, xmlschema.validators.XsdSimpleType)
107

  
108
        if isinstance(simple_type, xmlschema.validators.XsdAtomicBuiltin):
109
            if (simple_type.min_length
110
                    or simple_type.max_length
111
                    or simple_type.white_space not in ('collapse', 'preserve')
112
                    or simple_type.patterns):
113
                raise NotImplementedError(simple_type)
114

  
115
            if simple_type.name == xmlschema.qnames.XSD_STRING:
116
                schema = {'type': 'string'}
117
            elif simple_type.name == xmlschema.qnames.XSD_INTEGER:
118
                schema = {'type': 'integer'}
119
            elif simple_type.name == xmlschema.qnames.XSD_BOOLEAN:
120
                schema = {'type': 'boolean'}
121
            elif simple_type.name == xmlschema.qnames.XSD_DOUBLE:
122
                schema = {'type': 'number'}
123
            else:
124
                raise NotImplementedError(simple_type)
125
            return schema
126
        elif isinstance(simple_type, xmlschema.validators.XsdAtomicRestriction):
127
            if (simple_type.white_space not in ('collapse', 'preserve')
128
                    or simple_type.patterns):
129
                raise NotImplementedError(simple_type)
130
            schema = OrderedDict(cls.simpletype_to_jsonschema(simple_type.base_type))
131
            for validator in simple_type.validators:
132
                if isinstance(validator, xmlschema.validators.XsdEnumerationFacets):
133
                    schema['enum'] = validator.enumeration
134
                elif (isinstance(validator, xmlschema.validators.XsdMinLengthFacet)
135
                      and simple_type.base_type.name == xmlschema.qnames.XSD_STRING):
136
                    schema['minLength'] = validator.value
137
                elif (isinstance(validator, xmlschema.validators.XsdMaxLengthFacet)
138
                      and simple_type.base_type.name == xmlschema.qnames.XSD_STRING):
139
                    schema['maxLength'] = validator.value
140
                elif (isinstance(validator, xmlschema.validators.XsdLengthFacet)
141
                      and simple_type.base_type.name == xmlschema.qnames.XSD_STRING):
142
                    schema['minLength'] = validator.value
143
                    schema['maxLength'] = validator.value
144
                else:
145
                    raise NotImplementedError(validator)
146
            return schema
147
        raise NotImplementedError(simple_type)
148

  
149
    @classmethod
150
    def attributegroup_to_jsonschema(cls, attributegroup, schema):
151
        assert isinstance(attributegroup, xmlschema.validators.XsdAttributeGroup)
152

  
153
        properties = schema.setdefault('properties', OrderedDict())
154
        for component in attributegroup.iter_component():
155
            if component.ref:
156
                raise NotImplementedError(component)
157
            else:
158
                properties[component.name] = cls.simpletype_to_jsonschema(component.type)
159

  
160
    @classmethod
161
    def group_to_jsonschema(cls, group, schema):
162
        assert isinstance(group, xmlschema.validators.XsdGroup)
163

  
164
        properties = schema.setdefault('properties', OrderedDict())
165
        for component in group:
166
            if isinstance(component, xmlschema.validators.XsdElement):
167
                properties[component.name] = cls.element_to_jsonschema(component)
168
            elif isinstance(component, xmlschema.validators.XsdGroup):
169
                cls.group_to_jsonschema(component, schema)
170
            else:
171
                raise NotImplementedError(component)
172

  
173
    @classmethod
174
    def type_to_jsonschema(cls, xmltype, depth=0):
175
        assert isinstance(xmltype, xmlschema.validators.XsdType)
176

  
177
        if xmltype.is_simple():
178
            schema = cls.simpletype_to_jsonschema(xmltype)
179
            if depth == 0:
180
                schema = {'oneOf': [schema, {'type': 'null'}]}
181
            return schema
182
        elif xmltype.has_simple_content():
183
            base_schema = cls.type_to_jsonschema(xmltype.base_type, depth=depth + 1)
184
            if not xmltype.attributes:
185
                schema = base_schema
186
            else:
187
                cls.attributegroup_to_jsonschema(xmltype.attributes, schema)
188
                schema['properties']['$'] = base_schema
189
            if depth == 0:
190
                schema = {'oneOf': [schema, {'type': 'null'}]}
191
            return schema
192
        else:
193
            if xmltype.has_mixed_content() or not xmltype.is_element_only():
194
                raise NotImplementedError(xmltype)
195

  
196
            schema = OrderedDict({'type': 'object'})
197
            schema['additionalProperties'] = False
198
            if xmltype.attributes:
199
                cls.attributegroup_to_jsonschema(schema)
200
            cls.group_to_jsonschema(xmltype.content_type, schema)
201
            return schema
202

  
203
    @classmethod
204
    def element_to_jsonschema(cls, element):
205
        assert isinstance(element, xmlschema.validators.XsdElement)
206

  
207
        is_array = element.max_occurs > 1 or element.max_occurs is None
208
        type_schema = cls.type_to_jsonschema(element.type)
209
        if is_array:
210
            d = {
211
                'type': 'array',
212
                'items': type_schema,
213
                'minItems': element.min_occurs,
214
            }
215
            if element.max_occurs is not None:
216
                d['maxItems'] = element.max_occurs
217
            return d
218
        else:
219
            return type_schema
220

  
221
    def validate(self, instance):
222
        return jsonschema.validate(instance=instance, schema=self.json_schema)
setup.py
109 109
            'unidecode',
110 110
            'paramiko',
111 111
            'pdfrw',
112
            'xmlschema',
112 113
        ],
113 114
        cmdclass={
114 115
            'build': build,
tests/data/pacs-doc.xml
1
<?xml version="1.0" encoding="UTF-8" ?>
2
<PACS>
3
	<partenaire1>
4
		<civilite>MME</civilite>
5
        <nomNaissance>Doe</nomNaissance>
6
   		<prenoms>Jane</prenoms>
7
   		<codeNationalite>FRA</codeNationalite>
8
   		<codeNationalite>BHS</codeNationalite>
9
   		<codeNationalite>BEL</codeNationalite>
10
   		<jourNaissance>28</jourNaissance>
11
   		<moisNaissance>01</moisNaissance>
12
   		<anneeNaissance>1950</anneeNaissance>
13
   		<LieuNaissance>
14
			<localite>ST ETIENNE</localite>
15
			<codePostal>42000</codePostal>
16
			<codeInsee>42218</codeInsee>
17
			<departement>Loire</departement>
18
			<codePays>FRA</codePays>
19
		</LieuNaissance>
20
   		<ofpra>false</ofpra>
21
   		<mesureJuridique>true</mesureJuridique>
22
   		<adressePostale>
23
   					<NumeroLibelleVoie>1 rue du test</NumeroLibelleVoie>
24
		<Complement1>Appartement, étage, escalier</Complement1>
25
		<Complement2>Résidence, bâtiment ou immeuble</Complement2>
26
		<LieuDitBpCommuneDeleguee>BP1</LieuDitBpCommuneDeleguee>
27
		<CodePostal>05100</CodePostal>
28
		<Localite>VILLAR ST PANCRACE</Localite>
29
		<Pays>FRA</Pays>
30
   		</adressePostale>
31
   		<adresseElectronique>mates@entrouvert.com</adresseElectronique>
32
   		<telephone>+33123456789</telephone>
33
   				<titreIdentiteVerifie>true</titreIdentiteVerifie>
34
	</partenaire1>
35
	<partenaire2>
36
		<civilite>MME</civilite>
37
        <nomNaissance>Doe</nomNaissance>
38
   		<prenoms>Jane</prenoms>
39
   		<codeNationalite>BEL</codeNationalite>
40
   		   		   		<jourNaissance>28</jourNaissance>
41
   		<moisNaissance>01</moisNaissance>
42
   		<anneeNaissance>1982</anneeNaissance>
43
   		<LieuNaissance>
44
			<localite>CLERMONT FERRAND</localite>
45
			<codePostal>63000</codePostal>
46
			<codeInsee>63113</codeInsee>
47
			<departement>Puy-de-dôme</departement>
48
			<codePays>FRA</codePays>
49
		</LieuNaissance>
50
   		<ofpra>false</ofpra>
51
   		<mesureJuridique>true</mesureJuridique>
52
   		<adressePostale>
53
   					<NumeroLibelleVoie>2 rue du test</NumeroLibelleVoie>
54
								<CodePostal>05100</CodePostal>
55
		<Localite>VILLAR ST PANCRACE</Localite>
56
		<Pays>FRA</Pays>
57
   		</adressePostale>
58
   		<adresseElectronique>mates@entrouvert.com</adresseElectronique>
59
   		<telephone>+33123456789</telephone>
60
   				<titreIdentiteVerifie>false</titreIdentiteVerifie>
61
	</partenaire2>
62
	<convention>
63
						<conventionType>
64
			<aideMaterielMontant>100000</aideMaterielMontant>
65
			<regimePacs>legal</regimePacs>
66
			<aideMateriel>
67
				<typeAideMateriel>aideFixe</typeAideMateriel>
68
			</aideMateriel>
69
		</conventionType>
70
			</convention>
71
	<residenceCommune>
72
								<NumeroLibelleVoie>3 place du test</NumeroLibelleVoie>
73
								<CodePostal>05100</CodePostal>
74
		<Localite>VILLAR ST PANCRACE</Localite>
75
		<Pays></Pays>
76
	</residenceCommune>
77
	<attestationHonneur>
78
		<nonParente>true</nonParente>
79
		<residenceCommune>true</residenceCommune>
80
	</attestationHonneur>
81

  
82
</PACS>
83

  
84

  
85

  
86

  
87

  
88

  
89

  
90

  
91

  
92

  
93

  
94

  
95

  
96

  
97

  
98

  
99

  
100

  
101

  
tests/test_utils_xml.py
1
# passerelle - uniform access to multiple data sources and services
2
# Copyright (C) 2019 Entr'ouvert
3
#
4
# This program is free software: you can redistribute it and/or modify it
5
# under the terms of the GNU Affero General Public License as published
6
# by the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU Affero General Public License for more details.
13
#
14
# You should have received a copy of the GNU Affero General Public License
15
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16

  
1 17
import xml.etree.ElementTree as ET
2 18

  
3
from passerelle.utils.xml import to_json, text_content
19
import xmlschema
20

  
21
import jsonschema
22

  
23
from passerelle.utils.xml import to_json, text_content, JSONSchemaFromXMLSchema
24
from passerelle.utils.json import flatten_json_schema, flatten, unflatten
4 25

  
5 26

  
6 27
def test_text_content():
......
31 52
            {'text3': '4'},
32 53
        ]
33 54
    }
55

  
56

  
57
def test_xmlschema_to_jsonschema():
58
    schema_path = 'passerelle/apps/sp_fr/depotDossierPACS.XSD'
59

  
60
    # go from XML to JSON,
61
    # convert XMLSchema to JSONSchema
62
    # validate jsonschema, on converted data,
63
    # flatten the JSON schema,
64
    # flatten the data,
65
    # validate flattened data with flatenned JSON schema
66
    # unflatten data
67
    # convert unflattened data to XML
68
    # convert XML to JSON
69
    # then compare to initially converted JSON data
70
    schema = xmlschema.XMLSchema(schema_path, converter=xmlschema.UnorderedConverter)
71
    json_schema = JSONSchemaFromXMLSchema(schema, 'PACS')
72
    d = schema.elements['PACS'].decode(ET.parse('tests/data/pacs-doc.xml').getroot())
73
    d = {'PACS': d}
74
    json_schema.validate(d)
75
    flattened_json_schema = flatten_json_schema(json_schema.json_schema)
76
    flattened_d = flatten(d)
77
    jsonschema.validate(instance=flattened_d, schema=flattened_json_schema)
78
    d2 = unflatten(d)
79
    json_schema.validate(d2)
80

  
81
    tree = schema.elements['PACS'].encode(d2['PACS'])
82
    d3 = schema.elements['PACS'].decode(tree)
83
    assert d == {'PACS': d3}
34
-