From b1dc47637fc601844520da75147766e693c94f7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20P=C3=A9ters?= Date: Wed, 17 Jan 2018 15:42:46 +0100 Subject: [PATCH] misc: also simplify hashes and underscores (#21227) --- tests/test_misc.py | 5 +++++ wcs/qommon/misc.py | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test_misc.py b/tests/test_misc.py index 56e8828c..3a4c1e3a 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -91,6 +91,11 @@ def test_simplify_apostrophes(): assert simplify('test\'again') == 'test-again' assert simplify('test\'\'\'again') == 'test-again' +def test_simplify_dashes_and_underscores(): + assert simplify('8100-03_PT') == '8100-03-pt' + assert simplify('8100-03_PT', ' ') == '8100 03 pt' + assert simplify('8100-03_PT', '_') == '8100_03_pt' + def test_simplify_accented(): assert simplify(u'cliché') == 'cliche' assert simplify(u'cliché'.encode('iso-8859-1')) == 'cliche' diff --git a/wcs/qommon/misc.py b/wcs/qommon/misc.py index db1ff2c7..70a187f8 100644 --- a/wcs/qommon/misc.py +++ b/wcs/qommon/misc.py @@ -154,8 +154,8 @@ def simplify(s, space='-'): else: s = unicode(s, 'iso-8859-1', 'ignore') s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore') - s = re.sub(r'[^\w\s\'%s]' % space, '', s).strip().lower() - s = re.sub(r'[\s\'%s]+' % space, space, s) + s = re.sub(r'[^\w\s\'_\-%s]' % space, '', s).strip().lower() + s = re.sub(r'[\s\'_\-%s]+' % space, space, s) return s def get_datetime_language(): -- 2.15.1