From 126e9faec75fa8f1423eacb2ad8b41ed582f44d4 Mon Sep 17 00:00:00 2001 From: Benjamin Dauvergne Date: Fri, 22 Jan 2021 15:36:00 +0100 Subject: [PATCH] settings: decrease A2_DUPLICATES_THRESHOLD to 0.2 (#50445) The original limit of 0.7 is kept for the find-duplicates web-service API. --- src/authentic2/api_views.py | 2 +- src/authentic2/custom_user/managers.py | 6 +++--- tests/test_custom_user.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/authentic2/api_views.py b/src/authentic2/api_views.py index 66c9d16a..74b9da16 100644 --- a/src/authentic2/api_views.py +++ b/src/authentic2/api_views.py @@ -871,7 +871,7 @@ class UsersAPI(api_mixins.GetOrCreateMixinView, HookMixin, ExceptionHandlerMixin attributes = data.pop('attributes', {}) birthdate = attributes.get('birthdate') - qs = User.objects.find_duplicates(first_name, last_name, birthdate=birthdate) + qs = User.objects.find_duplicates(first_name, last_name, birthdate=birthdate, threshold=0.7) return Response({ 'data': DuplicateUserSerializer(qs, many=True).data, diff --git a/src/authentic2/custom_user/managers.py b/src/authentic2/custom_user/managers.py index 8c2ee6ac..6e97965f 100644 --- a/src/authentic2/custom_user/managers.py +++ b/src/authentic2/custom_user/managers.py @@ -77,7 +77,7 @@ class UserQuerySet(models.QuerySet): if qs.exists(): return qs - qs = self.find_duplicates(fullname=search, limit=None) + qs = self.find_duplicates(fullname=search, limit=None, threshold=0.2) extra_user_ids = set() attribute_values = AttributeValue.objects.filter(search_vector=SearchQuery(search), attribute__searchable=True) extra_user_ids.update(self.filter(attribute_values__in=attribute_values).values_list('id', flat=True)) @@ -92,10 +92,10 @@ class UserQuerySet(models.QuerySet): qs = qs.order_by('dist', 'last_name', 'first_name') return qs - def find_duplicates(self, first_name=None, last_name=None, fullname=None, birthdate=None, limit=5): + def find_duplicates(self, first_name=None, last_name=None, fullname=None, birthdate=None, limit=5, threshold=None): with connection.cursor() as cursor: cursor.execute( - "SET pg_trgm.similarity_threshold = %f" % app_settings.A2_DUPLICATES_THRESHOLD + "SET pg_trgm.similarity_threshold = %f" % (threshold or app_settings.A2_DUPLICATES_THRESHOLD) ) if fullname is not None: diff --git a/tests/test_custom_user.py b/tests/test_custom_user.py index dade33ea..f3b0b47c 100644 --- a/tests/test_custom_user.py +++ b/tests/test_custom_user.py @@ -127,12 +127,12 @@ def test_fts_trigram(fts): # dist attribute signals queryset from find_duplicates() assert hasattr(User.objects.free_text_search('darmettein')[0], 'dist') - assert User.objects.free_text_search('lea darmettein').count() == 1 + assert User.objects.free_text_search('lea darmettein').filter(dist__lte=0.3).count() == 1 assert hasattr(User.objects.free_text_search('darmettein')[0], 'dist') def test_fts_legacy(fts): - assert User.objects.free_text_search('rue des peupliers').count() == 2 + assert User.objects.free_text_search('rue des peupliers').count() == 3 def test_fts_legacy_and_trigram(fts): -- 2.29.2