From e70eeb86d51fb7cf96516ebd9d275cf05a4cf0f5 Mon Sep 17 00:00:00 2001 From: Thomas NOEL Date: Fri, 18 Dec 2015 12:35:39 +0100 Subject: [PATCH] csv: remove BOM if exists (#8727) --- passerelle/apps/csvdatasource/models.py | 3 +++ tests/test_csv_datasource.py | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/passerelle/apps/csvdatasource/models.py b/passerelle/apps/csvdatasource/models.py index f03f820..28a1c92 100644 --- a/passerelle/apps/csvdatasource/models.py +++ b/passerelle/apps/csvdatasource/models.py @@ -56,6 +56,9 @@ class CsvDataSource(BaseResource): if not content: return None + # remove BOM + content = content.decode('utf-8-sig').encode('utf-8') + dialect = csv.Sniffer().sniff(content[:1024]) reader = csv.reader(content.splitlines(), dialect) diff --git a/tests/test_csv_datasource.py b/tests/test_csv_datasource.py index c05f770..bd83d25 100644 --- a/tests/test_csv_datasource.py +++ b/tests/test_csv_datasource.py @@ -27,6 +27,8 @@ data = """121;69981;DELANOUE;Eliot;H 216352;38;Dupont;BenoƮt;H """ +data_bom = data.decode('utf-8').encode('utf-8-sig') + from csvdatasource.models import CsvDataSource pytestmark = pytest.mark.django_db @@ -105,3 +107,10 @@ def test_unicode_case_insensitive_filter_data(): assert 'id' in item assert 'text' in item assert filter_criteria.lower() in item['text'].decode('utf-8') + +def test_data_bom(): + csv = CsvDataSource.objects.create(csv_file=File(StringIO(data_bom), 'data.csv'), + columns_keynames='fam,id,, text,sexe ') + result = csv.get_data('Eliot') + assert result[0] == {'id': '69981', 'text': 'Eliot', + 'fam': '121', 'sexe': 'H'} -- 2.6.4