From 32cfeb50377a605cbfa071da465e2f30d8c27b18 Mon Sep 17 00:00:00 2001 From: Benjamin Dauvergne Date: Mon, 27 Jun 2022 14:24:18 +0200 Subject: [PATCH] base_adresses: work around missing departments and regions in generic api.geo files (#66625) --- .../migrations/0030_auto_20220627_1511.py | 18 +++ passerelle/apps/base_adresse/models.py | 107 +++++++++++++----- 2 files changed, 94 insertions(+), 31 deletions(-) create mode 100644 passerelle/apps/base_adresse/migrations/0030_auto_20220627_1511.py diff --git a/passerelle/apps/base_adresse/migrations/0030_auto_20220627_1511.py b/passerelle/apps/base_adresse/migrations/0030_auto_20220627_1511.py new file mode 100644 index 00000000..9b4570c7 --- /dev/null +++ b/passerelle/apps/base_adresse/migrations/0030_auto_20220627_1511.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.28 on 2022-06-27 13:11 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('base_adresse', '0029_auto_20220624_0827'), + ] + + operations = [ + migrations.AlterField( + model_name='regionmodel', + name='code', + field=models.CharField(max_length=3, verbose_name='Region code'), + ), + ] diff --git a/passerelle/apps/base_adresse/models.py b/passerelle/apps/base_adresse/models.py index 9149ab69..ee28567d 100644 --- a/passerelle/apps/base_adresse/models.py +++ b/passerelle/apps/base_adresse/models.py @@ -1,5 +1,6 @@ import datetime import gzip +import itertools import json from io import StringIO from urllib import parse as urlparse @@ -508,40 +509,84 @@ class BaseAdresse(AddressResource): return result def update_api_geo_data(self): - regions_json = self.get_api_geo_endpoint('regions') - departments_json = self.get_api_geo_endpoint('departements') + regions_json = {region['code']: region for region in self.get_api_geo_endpoint('regions')} + departements_json = { + departement['code']: departement for departement in self.get_api_geo_endpoint('departements') + } + regions = {} + departements = {} + + def get_region(code_region): + if code_region not in regions: + data = regions_json.get(code_region) or self.get_api_geo_endpoint(f'regions/{code_region}') + region, created = self.regionmodel_set.get_or_create( + code=data['code'], defaults={'name': data['nom']} + ) + if not created and region.name != data['nom']: + region.name = data['nom'] + region.save() + regions[code_region] = region + return regions[code_region] + + def get_departement(code_departement): + if code_departement not in departements: + data = departements_json.get(code_departement) or self.get_api_geo_endpoint( + f'departements/{code_departement}' + ) + region = get_region(data['codeRegion']) + departement, created = self.departmentmodel_set.get_or_create( + code=data['code'], defaults={'name': data['nom'], 'region': region} + ) + if not created and departement.name != data['nom'] or departement.region != region: + departement.name = data['nom'] + departement.region = region + departement.save() + departements[code_departement] = departement + return departements[code_departement] + + def grouper(it, size): + '''Split iterator in equal size chunk of `size` elements.''' + it = iter(it) + return iter(lambda: tuple(itertools.islice(it, size)), ()) + + city_pks = set() cities_json = self.get_api_geo_endpoint('communes') - if not (regions_json and departments_json and cities_json): + if not cities_json: return - start_update = timezone.now() - - for data in regions_json: - defaults = { - 'name': data['nom'], - } - self.regionmodel_set.update_or_create(code=data['code'], defaults=defaults) - self.regionmodel_set.filter(last_update__lt=start_update).delete() - for data in departments_json: - defaults = { - 'name': data['nom'], - 'region': self.regionmodel_set.get(code=data['codeRegion']), + for batch_data in grouper(cities_json, 1000): + cities = { + (city.code, city.zipcode): city + for city in self.citymodel_set.filter(code__in=[x['code'] for x in batch_data]) } - self.departmentmodel_set.update_or_create(code=data['code'], defaults=defaults) - self.departmentmodel_set.filter(last_update__lt=start_update).delete() - - for data in cities_json: - for zipcode in data['codesPostaux']: - defaults = { - 'name': data['nom'], - 'population': data.get('population', 0), - } - if data.get('codeDepartement'): - defaults['department'] = self.departmentmodel_set.get(code=data['codeDepartement']) - if data.get('codeRegion'): - defaults['region'] = self.regionmodel_set.get(code=data['codeRegion']) - self.citymodel_set.update_or_create(code=data['code'], zipcode=zipcode, defaults=defaults) - self.citymodel_set.filter(last_update__lt=start_update).delete() + for data in list(batch_data): + for zipcode in data['codesPostaux']: + defaults = { + 'name': data['nom'], + 'population': data.get('population', 0), + } + if data.get('codeDepartement'): + defaults['department'] = get_departement(data['codeDepartement']) + if data.get('codeRegion'): + defaults['region'] = get_region(data['codeRegion']) + if (data['code'], zipcode) in cities: + city, created = cities[(data['code'], zipcode)], False + else: + city, created = self.citymodel_set.get_or_create( + code=data['code'], zipcode=zipcode, defaults=defaults + ) + if not created and any( + getattr(city, key) != defaults.get(key) + for key in ['name', 'population', 'department', 'region'] + ): + for key in ['name', 'population', 'department', 'region']: + setattr(city, key, defaults.get(key)) + city.save() + city_pks.add(city.pk) + + self.regionmodel_set.exclude(code__in=regions.keys()).delete() + self.departmentmodel_set.exclude(code__in=departements.keys()).delete() + self.citymodel_set.exclude(pk__in=city_pks).delete() def clean_addresses_cache(self): old_addresses = self.addresscachemodel_set.filter( @@ -597,7 +642,7 @@ class RegionModel(UnaccentNameMixin, models.Model): name = models.CharField(_('Region name'), max_length=150) unaccent_name = models.CharField(_('Region name ascii char'), max_length=150, null=True) - code = models.CharField(_('Region code'), max_length=2) + code = models.CharField(_('Region code'), max_length=3) last_update = models.DateTimeField(_('Last update'), null=True, auto_now=True) resource = models.ForeignKey(BaseAdresse, on_delete=models.CASCADE, verbose_name=_('BAN Connector')) -- 2.35.1