0006-utils-add-conversion-from-XMLSchema-to-JSON-schema-3.patch
passerelle/utils/xml.py | ||
---|---|---|
14 | 14 |
# You should have received a copy of the GNU Affero General Public License |
15 | 15 |
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
16 | 16 | |
17 |
from collections import OrderedDict |
|
18 | ||
19 |
import jsonschema |
|
20 |
import xmlschema |
|
21 | ||
17 | 22 | |
18 | 23 |
def text_content(node): |
19 | 24 |
'''Extract text content from node and all its children. Equivalent to |
... | ... | |
81 | 86 |
if child_content: |
82 | 87 |
d[child.tag].append(child_content) |
83 | 88 |
return d |
89 | ||
90 | ||
91 |
class JSONSchemaFromXMLSchema(object): |
|
92 |
def __init__(self, xml_schema, root_element): |
|
93 |
if not isinstance(xml_schema, xmlschema.XMLSchema): |
|
94 |
xml_schema = xmlschema.XMLSchema(xml_schema) |
|
95 |
self.xml_schema = xml_schema |
|
96 |
self.json_schema = { |
|
97 |
'type': 'object', |
|
98 |
'properties': { |
|
99 |
root_element: self.element_to_jsonschema( |
|
100 |
xml_schema.elements[root_element]), |
|
101 |
}, |
|
102 |
} |
|
103 | ||
104 |
@classmethod |
|
105 |
def simpletype_to_jsonschema(cls, simple_type): |
|
106 |
assert isinstance(simple_type, xmlschema.validators.XsdSimpleType) |
|
107 | ||
108 |
if isinstance(simple_type, xmlschema.validators.XsdAtomicBuiltin): |
|
109 |
if (simple_type.min_length |
|
110 |
or simple_type.max_length |
|
111 |
or simple_type.white_space not in ('collapse', 'preserve') |
|
112 |
or simple_type.patterns): |
|
113 |
raise NotImplementedError(simple_type) |
|
114 | ||
115 |
if simple_type.name == xmlschema.qnames.XSD_STRING: |
|
116 |
schema = {'type': 'string'} |
|
117 |
elif simple_type.name == xmlschema.qnames.XSD_INTEGER: |
|
118 |
schema = {'type': 'integer'} |
|
119 |
elif simple_type.name == xmlschema.qnames.XSD_BOOLEAN: |
|
120 |
schema = {'type': 'boolean'} |
|
121 |
elif simple_type.name == xmlschema.qnames.XSD_DOUBLE: |
|
122 |
schema = {'type': 'number'} |
|
123 |
else: |
|
124 |
raise NotImplementedError(simple_type) |
|
125 |
return schema |
|
126 |
elif isinstance(simple_type, xmlschema.validators.XsdAtomicRestriction): |
|
127 |
if (simple_type.white_space not in ('collapse', 'preserve') |
|
128 |
or simple_type.patterns): |
|
129 |
raise NotImplementedError(simple_type) |
|
130 |
schema = OrderedDict(cls.simpletype_to_jsonschema(simple_type.base_type)) |
|
131 |
for validator in simple_type.validators: |
|
132 |
if isinstance(validator, xmlschema.validators.XsdEnumerationFacets): |
|
133 |
schema['enum'] = validator.enumeration |
|
134 |
elif (isinstance(validator, xmlschema.validators.XsdMinLengthFacet) |
|
135 |
and simple_type.base_type.name == xmlschema.qnames.XSD_STRING): |
|
136 |
schema['minLength'] = validator.value |
|
137 |
elif (isinstance(validator, xmlschema.validators.XsdMaxLengthFacet) |
|
138 |
and simple_type.base_type.name == xmlschema.qnames.XSD_STRING): |
|
139 |
schema['maxLength'] = validator.value |
|
140 |
elif (isinstance(validator, xmlschema.validators.XsdLengthFacet) |
|
141 |
and simple_type.base_type.name == xmlschema.qnames.XSD_STRING): |
|
142 |
schema['minLength'] = validator.value |
|
143 |
schema['maxLength'] = validator.value |
|
144 |
else: |
|
145 |
raise NotImplementedError(validator) |
|
146 |
return schema |
|
147 |
raise NotImplementedError(simple_type) |
|
148 | ||
149 |
@classmethod |
|
150 |
def attributegroup_to_jsonschema(cls, attributegroup, schema): |
|
151 |
assert isinstance(attributegroup, xmlschema.validators.XsdAttributeGroup) |
|
152 | ||
153 |
properties = schema.setdefault('properties', OrderedDict()) |
|
154 |
for component in attributegroup.iter_component(): |
|
155 |
if component.ref: |
|
156 |
raise NotImplementedError(component) |
|
157 |
else: |
|
158 |
properties[component.name] = cls.simpletype_to_jsonschema(component.type) |
|
159 | ||
160 |
@classmethod |
|
161 |
def group_to_jsonschema(cls, group, schema): |
|
162 |
assert isinstance(group, xmlschema.validators.XsdGroup) |
|
163 | ||
164 |
properties = schema.setdefault('properties', OrderedDict()) |
|
165 |
for component in group: |
|
166 |
if isinstance(component, xmlschema.validators.XsdElement): |
|
167 |
properties[component.name] = cls.element_to_jsonschema(component) |
|
168 |
elif isinstance(component, xmlschema.validators.XsdGroup): |
|
169 |
cls.group_to_jsonschema(component, schema) |
|
170 |
else: |
|
171 |
raise NotImplementedError(component) |
|
172 | ||
173 |
@classmethod |
|
174 |
def type_to_jsonschema(cls, xmltype, depth=0): |
|
175 |
assert isinstance(xmltype, xmlschema.validators.XsdType) |
|
176 | ||
177 |
if xmltype.is_simple(): |
|
178 |
schema = cls.simpletype_to_jsonschema(xmltype) |
|
179 |
if depth == 0: |
|
180 |
schema = {'oneOf': [schema, {'type': 'null'}]} |
|
181 |
return schema |
|
182 |
elif xmltype.has_simple_content(): |
|
183 |
base_schema = cls.type_to_jsonschema(xmltype.base_type, depth=depth + 1) |
|
184 |
if not xmltype.attributes: |
|
185 |
schema = base_schema |
|
186 |
else: |
|
187 |
cls.attributegroup_to_jsonschema(xmltype.attributes, schema) |
|
188 |
schema['properties']['$'] = base_schema |
|
189 |
if depth == 0: |
|
190 |
schema = {'oneOf': [schema, {'type': 'null'}]} |
|
191 |
return schema |
|
192 |
else: |
|
193 |
if xmltype.has_mixed_content() or not xmltype.is_element_only(): |
|
194 |
raise NotImplementedError(xmltype) |
|
195 | ||
196 |
schema = OrderedDict({'type': 'object'}) |
|
197 |
schema['additionalProperties'] = False |
|
198 |
if xmltype.attributes: |
|
199 |
cls.attributegroup_to_jsonschema(schema) |
|
200 |
cls.group_to_jsonschema(xmltype.content_type, schema) |
|
201 |
return schema |
|
202 | ||
203 |
@classmethod |
|
204 |
def element_to_jsonschema(cls, element): |
|
205 |
assert isinstance(element, xmlschema.validators.XsdElement) |
|
206 | ||
207 |
is_array = element.max_occurs > 1 or element.max_occurs is None |
|
208 |
type_schema = cls.type_to_jsonschema(element.type) |
|
209 |
if is_array: |
|
210 |
d = { |
|
211 |
'type': 'array', |
|
212 |
'items': type_schema, |
|
213 |
'minItems': element.min_occurs, |
|
214 |
} |
|
215 |
if element.max_occurs is not None: |
|
216 |
d['maxItems'] = element.max_occurs |
|
217 |
return d |
|
218 |
else: |
|
219 |
return type_schema |
|
220 | ||
221 |
def validate(self, instance): |
|
222 |
return jsonschema.validate(instance=instance, schema=self.json_schema) |
setup.py | ||
---|---|---|
109 | 109 |
'unidecode', |
110 | 110 |
'paramiko', |
111 | 111 |
'pdfrw', |
112 |
'xmlschema', |
|
112 | 113 |
], |
113 | 114 |
cmdclass={ |
114 | 115 |
'build': build, |
tests/data/pacs-doc.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8" ?> |
|
2 |
<PACS> |
|
3 |
<partenaire1> |
|
4 |
<civilite>MME</civilite> |
|
5 |
<nomNaissance>Doe</nomNaissance> |
|
6 |
<prenoms>Jane</prenoms> |
|
7 |
<codeNationalite>FRA</codeNationalite> |
|
8 |
<codeNationalite>BHS</codeNationalite> |
|
9 |
<codeNationalite>BEL</codeNationalite> |
|
10 |
<jourNaissance>28</jourNaissance> |
|
11 |
<moisNaissance>01</moisNaissance> |
|
12 |
<anneeNaissance>1950</anneeNaissance> |
|
13 |
<LieuNaissance> |
|
14 |
<localite>ST ETIENNE</localite> |
|
15 |
<codePostal>42000</codePostal> |
|
16 |
<codeInsee>42218</codeInsee> |
|
17 |
<departement>Loire</departement> |
|
18 |
<codePays>FRA</codePays> |
|
19 |
</LieuNaissance> |
|
20 |
<ofpra>false</ofpra> |
|
21 |
<mesureJuridique>true</mesureJuridique> |
|
22 |
<adressePostale> |
|
23 |
<NumeroLibelleVoie>1 rue du test</NumeroLibelleVoie> |
|
24 |
<Complement1>Appartement, étage, escalier</Complement1> |
|
25 |
<Complement2>Résidence, bâtiment ou immeuble</Complement2> |
|
26 |
<LieuDitBpCommuneDeleguee>BP1</LieuDitBpCommuneDeleguee> |
|
27 |
<CodePostal>05100</CodePostal> |
|
28 |
<Localite>VILLAR ST PANCRACE</Localite> |
|
29 |
<Pays>FRA</Pays> |
|
30 |
</adressePostale> |
|
31 |
<adresseElectronique>mates@entrouvert.com</adresseElectronique> |
|
32 |
<telephone>+33123456789</telephone> |
|
33 |
<titreIdentiteVerifie>true</titreIdentiteVerifie> |
|
34 |
</partenaire1> |
|
35 |
<partenaire2> |
|
36 |
<civilite>MME</civilite> |
|
37 |
<nomNaissance>Doe</nomNaissance> |
|
38 |
<prenoms>Jane</prenoms> |
|
39 |
<codeNationalite>BEL</codeNationalite> |
|
40 |
<jourNaissance>28</jourNaissance> |
|
41 |
<moisNaissance>01</moisNaissance> |
|
42 |
<anneeNaissance>1982</anneeNaissance> |
|
43 |
<LieuNaissance> |
|
44 |
<localite>CLERMONT FERRAND</localite> |
|
45 |
<codePostal>63000</codePostal> |
|
46 |
<codeInsee>63113</codeInsee> |
|
47 |
<departement>Puy-de-dôme</departement> |
|
48 |
<codePays>FRA</codePays> |
|
49 |
</LieuNaissance> |
|
50 |
<ofpra>false</ofpra> |
|
51 |
<mesureJuridique>true</mesureJuridique> |
|
52 |
<adressePostale> |
|
53 |
<NumeroLibelleVoie>2 rue du test</NumeroLibelleVoie> |
|
54 |
<CodePostal>05100</CodePostal> |
|
55 |
<Localite>VILLAR ST PANCRACE</Localite> |
|
56 |
<Pays>FRA</Pays> |
|
57 |
</adressePostale> |
|
58 |
<adresseElectronique>mates@entrouvert.com</adresseElectronique> |
|
59 |
<telephone>+33123456789</telephone> |
|
60 |
<titreIdentiteVerifie>false</titreIdentiteVerifie> |
|
61 |
</partenaire2> |
|
62 |
<convention> |
|
63 |
<conventionType> |
|
64 |
<aideMaterielMontant>100000</aideMaterielMontant> |
|
65 |
<regimePacs>legal</regimePacs> |
|
66 |
<aideMateriel> |
|
67 |
<typeAideMateriel>aideFixe</typeAideMateriel> |
|
68 |
</aideMateriel> |
|
69 |
</conventionType> |
|
70 |
</convention> |
|
71 |
<residenceCommune> |
|
72 |
<NumeroLibelleVoie>3 place du test</NumeroLibelleVoie> |
|
73 |
<CodePostal>05100</CodePostal> |
|
74 |
<Localite>VILLAR ST PANCRACE</Localite> |
|
75 |
<Pays></Pays> |
|
76 |
</residenceCommune> |
|
77 |
<attestationHonneur> |
|
78 |
<nonParente>true</nonParente> |
|
79 |
<residenceCommune>true</residenceCommune> |
|
80 |
</attestationHonneur> |
|
81 |
|
|
82 |
</PACS> |
|
83 |
|
|
84 |
|
|
85 |
|
|
86 |
|
|
87 |
|
|
88 |
|
|
89 |
|
|
90 |
|
|
91 |
|
|
92 |
|
|
93 |
|
|
94 |
|
|
95 |
|
|
96 |
|
|
97 |
|
|
98 |
|
|
99 |
|
|
100 |
|
|
101 |
|
tests/test_utils_xml.py | ||
---|---|---|
1 |
# passerelle - uniform access to multiple data sources and services |
|
2 |
# Copyright (C) 2019 Entr'ouvert |
|
3 |
# |
|
4 |
# This program is free software: you can redistribute it and/or modify it |
|
5 |
# under the terms of the GNU Affero General Public License as published |
|
6 |
# by the Free Software Foundation, either version 3 of the License, or |
|
7 |
# (at your option) any later version. |
|
8 |
# |
|
9 |
# This program is distributed in the hope that it will be useful, |
|
10 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
12 |
# GNU Affero General Public License for more details. |
|
13 |
# |
|
14 |
# You should have received a copy of the GNU Affero General Public License |
|
15 |
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
16 | ||
1 | 17 |
import xml.etree.ElementTree as ET |
2 | 18 | |
3 |
from passerelle.utils.xml import to_json, text_content |
|
19 |
import xmlschema |
|
20 | ||
21 |
import jsonschema |
|
22 | ||
23 |
from passerelle.utils.xml import to_json, text_content, JSONSchemaFromXMLSchema |
|
24 |
from passerelle.utils.json import flatten_json_schema, flatten, unflatten |
|
4 | 25 | |
5 | 26 | |
6 | 27 |
def test_text_content(): |
... | ... | |
31 | 52 |
{'text3': '4'}, |
32 | 53 |
] |
33 | 54 |
} |
55 | ||
56 | ||
57 |
def test_xmlschema_to_jsonschema(): |
|
58 |
schema_path = 'passerelle/apps/sp_fr/depotDossierPACS.XSD' |
|
59 | ||
60 |
# go from XML to JSON, |
|
61 |
# convert XMLSchema to JSONSchema |
|
62 |
# validate jsonschema, on converted data, |
|
63 |
# flatten the JSON schema, |
|
64 |
# flatten the data, |
|
65 |
# validate flattened data with flatenned JSON schema |
|
66 |
# unflatten data |
|
67 |
# convert unflattened data to XML |
|
68 |
# convert XML to JSON |
|
69 |
# then compare to initially converted JSON data |
|
70 |
schema = xmlschema.XMLSchema(schema_path, converter=xmlschema.UnorderedConverter) |
|
71 |
json_schema = JSONSchemaFromXMLSchema(schema, 'PACS') |
|
72 |
d = schema.elements['PACS'].decode(ET.parse('tests/data/pacs-doc.xml').getroot()) |
|
73 |
d = {'PACS': d} |
|
74 |
json_schema.validate(d) |
|
75 |
flattened_json_schema = flatten_json_schema(json_schema.json_schema) |
|
76 |
flattened_d = flatten(d) |
|
77 |
jsonschema.validate(instance=flattened_d, schema=flattened_json_schema) |
|
78 |
d2 = unflatten(d) |
|
79 |
json_schema.validate(d2) |
|
80 | ||
81 |
tree = schema.elements['PACS'].encode(d2['PACS']) |
|
82 |
d3 = schema.elements['PACS'].decode(tree) |
|
83 |
assert d == {'PACS': d3} |
|
34 |
- |