Projet

Général

Profil

0001-base_adresse-use-new-ban-data-38204.patch

Lauréline Guérin, 27 avril 2020 10:12

Télécharger (14,9 ko)

Voir les différences:

Subject: [PATCH] base_adresse: use new ban data (#38204)

 passerelle/apps/base_adresse/models.py |  51 ++++++++++++++-----------
 tests/data/update_streets_test.bz2     | Bin 956 -> 0 bytes
 tests/data/update_streets_test.csv.gz  | Bin 0 -> 694 bytes
 tests/test_base_adresse.py             |  46 ++++++++++++----------
 tests/test_jobs.py                     |   7 ++--
 5 files changed, 59 insertions(+), 45 deletions(-)
 delete mode 100644 tests/data/update_streets_test.bz2
 create mode 100644 tests/data/update_streets_test.csv.gz
passerelle/apps/base_adresse/models.py
1
import bz2
1
import csv
2 2
import datetime
3
import gzip
3 4

  
4 5
from jsonfield import JSONField
5 6
from requests import RequestException
......
10 11
from django.utils.translation import ugettext_lazy as _
11 12
from django.utils import timezone
12 13
from django.utils import six
14
from django.utils.six import StringIO
13 15
from django.utils.six.moves.urllib import parse as urlparse
14 16

  
15 17
from passerelle.base.models import BaseResource
16
from passerelle.compat import json_loads
17 18
from passerelle.utils.api import endpoint
18 19
from passerelle.utils.conversion import simplify
19 20
from passerelle.utils.jsonresponse import APIError
......
354 355
                departments.add(zipcode[:2])
355 356

  
356 357
        for department in departments:
357
            ban_file = self.requests.get(
358
                'http://bano.openstreetmap.fr/BAN_odbl/BAN_odbl_{}-json.bz2'.format(department))
359
            if ban_file.status_code != 200:
358
            ban_gz = self.requests.get(
359
                'https://adresse.data.gouv.fr/data/ban/adresses/latest/csv/adresses-{}.csv.gz'.format(department))
360
            if ban_gz.status_code != 200:
360 361
                continue
361 362

  
362
            line = _not_found = object()
363
            for line in bz2.decompress(ban_file.content).splitlines():
364
                street_info = json_loads(line)
365
                if street_info['type'] == 'street' and street_info['postcode'].startswith(zipcodes):
366
                    if type(street_info['citycode']) is list:
367
                        street_info['citycode'] = six.text_type(street_info['citycode'][0])
368
                    if type(street_info['name']) is list:
369
                        street_info['name'] = six.text_type(street_info['name'][0])
370
                    street = StreetModel.objects.get_or_create(citycode=street_info['citycode'],
371
                                                               name=street_info['name'][:150])
372
                    street[0].city = street_info['city']
373
                    street[0].name = street_info['name'][:150]
374
                    street[0].zipcode = street_info['postcode']
375
                    street[0].type = street_info['type']
376
                    street[0].citycode = street_info['citycode']
377
                    street[0].save()
378
            if line is _not_found:
363
            street_info = _not_found = object()
364
            if six.PY3:
365
                ban_file = StringIO(gzip.decompress(ban_gz.content).decode('utf-8'))
366
            else:
367
                ban_file = gzip.GzipFile(fileobj=StringIO(ban_gz.content))
368
            reader = csv.DictReader(ban_file, delimiter=';')
369
            for street_info in reader:
370
                if street_info['code_postal'].startswith(zipcodes):
371
                    defaults = {
372
                        'city': street_info['nom_commune'],
373
                        'zipcode': street_info['code_postal'],
374
                        'type': 'street',
375
                        'citycode': street_info['code_insee']
376
                    }
377
                    street, created = StreetModel.objects.get_or_create(
378
                        citycode=street_info['code_insee'],
379
                        name=street_info['nom_voie'][:150],
380
                        defaults=defaults)
381
                    if not created:
382
                        for k, v in defaults.items():
383
                            setattr(street, k, v)
384
                        street.save()
385
            if street_info is _not_found:
379 386
                raise Exception('bano file is empty')
380 387

  
381 388
        self.get_streets_queryset().filter(last_update__lt=start_update).delete()
tests/test_base_adresse.py
348 348
    assert resp.json['err'] == 0
349 349
    assert len(resp.json['data']) == 0
350 350

  
351

  
351 352
@pytest.mark.usefixtures('mock_update_api_geo')
352 353
@mock.patch('passerelle.utils.Request.get')
353 354
def test_base_adresse_command_update(mocked_get, db, base_adresse):
354
    filepath = os.path.join(os.path.dirname(__file__), 'data', 'update_streets_test.bz2')
355
    mocked_get.return_value = utils.FakedResponse(content=open(filepath, 'rb').read(), status_code=200)
355
    filepath = os.path.join(os.path.dirname(__file__), 'data', 'update_streets_test.csv.gz')
356
    with open(filepath, 'rb') as ban_file:
357
        mocked_get.return_value = utils.FakedResponse(content=ban_file.read(), status_code=200)
356 358
    call_command('cron', 'daily')
357
    mocked_get.assert_called_once_with('http://bano.openstreetmap.fr/BAN_odbl/BAN_odbl_73-json.bz2')
359
    mocked_get.assert_called_once_with('https://adresse.data.gouv.fr/data/ban/adresses/latest/csv/adresses-73.csv.gz')
358 360
    streets = StreetModel.objects.all()
359 361
    assert len(streets) == 3
360 362
    street = StreetModel.objects.order_by('id').first()
361
    assert street.name == 'Chemin de la Vie, LA GRANGE DU TRIEU'
363
    assert street.name == 'Chemin de la Vie'
362 364
    assert street.zipcode == '73610'
363 365
    assert street.type == 'street'
364 366
    assert street.city == 'Aiguebelette-le-Lac'
......
372 374
@mock.patch('passerelle.utils.Request.get')
373 375
def test_base_adresse_command_hourly_update(mocked_get, db, base_adresse):
374 376
    base_adresse.update_api_geo_data = lambda: None
375
    filepath = os.path.join(os.path.dirname(__file__), 'data', 'update_streets_test.bz2')
376
    mocked_get.return_value = utils.FakedResponse(content=open(filepath, 'rb').read(), status_code=200)
377
    filepath = os.path.join(os.path.dirname(__file__), 'data', 'update_streets_test.csv.gz')
378
    with open(filepath, 'rb') as ban_file:
379
        mocked_get.return_value = utils.FakedResponse(content=ban_file.read(), status_code=200)
377 380
    # check the first hourly job downloads streets
378 381
    call_command('cron', 'hourly')
379
    mocked_get.assert_called_once_with('http://bano.openstreetmap.fr/BAN_odbl/BAN_odbl_73-json.bz2')
382
    mocked_get.assert_called_once_with('https://adresse.data.gouv.fr/data/ban/adresses/latest/csv/adresses-73.csv.gz')
380 383
    assert StreetModel.objects.all().count() == 3
381 384
    # check a second call doesn't download anything
382 385
    call_command('cron', 'hourly')
......
387 390
@mock.patch('passerelle.utils.Request.get')
388 391
def test_base_adresse_command_update_97x(mocked_get, db, base_adresse_97x):
389 392
    base_adresse_97x.update_api_geo_data = lambda: None
390
    filepath = os.path.join(os.path.dirname(__file__), 'data', 'update_streets_test.bz2')
391
    mocked_get.return_value = utils.FakedResponse(content=open(filepath, 'rb').read(), status_code=200)
393
    filepath = os.path.join(os.path.dirname(__file__), 'data', 'update_streets_test.csv.gz')
394
    with open(filepath, 'rb') as ban_file:
395
        mocked_get.return_value = utils.FakedResponse(content=ban_file.read(), status_code=200)
392 396
    call_command('cron', 'daily')
393
    mocked_get.assert_called_once_with('http://bano.openstreetmap.fr/BAN_odbl/BAN_odbl_974-json.bz2')
397
    mocked_get.assert_called_once_with('https://adresse.data.gouv.fr/data/ban/adresses/latest/csv/adresses-974.csv.gz')
394 398
    assert StreetModel.objects.count() == 2
395 399

  
396 400

  
......
398 402
@mock.patch('passerelle.utils.Request.get')
399 403
def test_base_adresse_command_update_corsica(mocked_get, db, base_adresse_corsica):
400 404
    base_adresse_corsica.update_api_geo_data = lambda: None
401
    filepath = os.path.join(os.path.dirname(__file__), 'data', 'update_streets_test.bz2')
402
    mocked_get.return_value = utils.FakedResponse(content=open(filepath, 'rb').read(), status_code=200)
405
    filepath = os.path.join(os.path.dirname(__file__), 'data', 'update_streets_test.csv.gz')
406
    with open(filepath, 'rb') as ban_file:
407
        mocked_get.return_value = utils.FakedResponse(content=ban_file.read(), status_code=200)
403 408
    call_command('cron', 'daily')
404 409
    assert mocked_get.call_count == 2
405
    mocked_get.assert_any_call('http://bano.openstreetmap.fr/BAN_odbl/BAN_odbl_2A-json.bz2')
406
    mocked_get.assert_any_call('http://bano.openstreetmap.fr/BAN_odbl/BAN_odbl_2B-json.bz2')
410
    mocked_get.assert_any_call('https://adresse.data.gouv.fr/data/ban/adresses/latest/csv/adresses-2A.csv.gz')
411
    mocked_get.assert_any_call('https://adresse.data.gouv.fr/data/ban/adresses/latest/csv/adresses-2B.csv.gz')
407 412
    assert StreetModel.objects.count() == 0
408 413

  
409 414

  
......
411 416
@mock.patch('passerelle.utils.Request.get')
412 417
def test_base_adresse_command_update_multiple(mocked_get, db, base_adresse_multiple):
413 418
    base_adresse_multiple.update_api_geo_data = lambda: None
414
    filepath = os.path.join(os.path.dirname(__file__), 'data', 'update_streets_test.bz2')
415
    mocked_get.return_value = utils.FakedResponse(content=open(filepath, 'rb').read(), status_code=200)
419
    filepath = os.path.join(os.path.dirname(__file__), 'data', 'update_streets_test.csv.gz')
420
    with open(filepath, 'rb') as ban_file:
421
        mocked_get.return_value = utils.FakedResponse(content=ban_file.read(), status_code=200)
416 422
    call_command('cron', 'daily')
417 423
    assert mocked_get.call_count == 4
418
    mocked_get.assert_any_call('http://bano.openstreetmap.fr/BAN_odbl/BAN_odbl_73-json.bz2')
419
    mocked_get.assert_any_call('http://bano.openstreetmap.fr/BAN_odbl/BAN_odbl_974-json.bz2')
420
    mocked_get.assert_any_call('http://bano.openstreetmap.fr/BAN_odbl/BAN_odbl_2A-json.bz2')
421
    mocked_get.assert_any_call('http://bano.openstreetmap.fr/BAN_odbl/BAN_odbl_2B-json.bz2')
424
    mocked_get.assert_any_call('https://adresse.data.gouv.fr/data/ban/adresses/latest/csv/adresses-73.csv.gz')
425
    mocked_get.assert_any_call('https://adresse.data.gouv.fr/data/ban/adresses/latest/csv/adresses-974.csv.gz')
426
    mocked_get.assert_any_call('https://adresse.data.gouv.fr/data/ban/adresses/latest/csv/adresses-2A.csv.gz')
427
    mocked_get.assert_any_call('https://adresse.data.gouv.fr/data/ban/adresses/latest/csv/adresses-2B.csv.gz')
422 428
    assert StreetModel.objects.count() == 5
423 429

  
424 430

  
tests/test_jobs.py
18 18

  
19 19
@mock.patch('passerelle.utils.Request.get')
20 20
def test_jobs(mocked_get, app, base_adresse, freezer):
21
    filepath = os.path.join(os.path.dirname(__file__), 'data', 'update_streets_test.bz2')
22
    mocked_get.return_value = utils.FakedResponse(content=open(filepath, 'rb').read(), status_code=200)
21
    filepath = os.path.join(os.path.dirname(__file__), 'data', 'update_streets_test.csv.gz')
22
    with open(filepath, 'rb') as ban_file:
23
        mocked_get.return_value = utils.FakedResponse(content=ban_file.read(), status_code=200)
23 24

  
24 25
    freezer.move_to('2019-01-01 00:00:00')
25 26
    job = base_adresse.add_job('update_streets_data')
......
30 31
    assert StreetModel.objects.count() == 3
31 32

  
32 33
    # don't delete streets if bano file is empty
33
    mocked_get.return_value = utils.FakedResponse(content='', status_code=200)
34
    mocked_get.return_value = utils.FakedResponse(content=b'', status_code=200)
34 35
    freezer.move_to('2019-01-01 12:00:00')
35 36
    job = base_adresse.add_job('update_streets_data')
36 37
    assert job.status == 'registered'
37
-