From 6d7a01360f5caa3aa24a84412bd5f3489ddeddec Mon Sep 17 00:00:00 2001 From: Benjamin Dauvergne Date: Fri, 8 Jan 2021 12:00:46 +0100 Subject: [PATCH 2/2] custom_user: specialize free_text_search for common search terms (#49957) --- src/authentic2/custom_user/managers.py | 55 +++++++++++++++--- src/authentic2/utils/date.py | 33 +++++++++++ tests/test_custom_user.py | 78 ++++++++++++++++++++++++++ 3 files changed, 158 insertions(+), 8 deletions(-) create mode 100644 src/authentic2/utils/date.py diff --git a/src/authentic2/custom_user/managers.py b/src/authentic2/custom_user/managers.py index b1b9ffd3..cad3d68e 100644 --- a/src/authentic2/custom_user/managers.py +++ b/src/authentic2/custom_user/managers.py @@ -17,24 +17,66 @@ import datetime import logging import unicodedata +import uuid from django.contrib.contenttypes.models import ContentType from django.contrib.postgres.search import TrigramDistance +from django.core.exceptions import ValidationError from django.db import models, transaction, connection from django.db.models import F, Value, FloatField, Subquery, OuterRef from django.db.models.functions import Lower, Coalesce -from django.utils import six -from django.utils import timezone +from django.utils import timezone, six from django.contrib.auth.models import BaseUserManager from authentic2 import app_settings from authentic2.models import Attribute, AttributeValue, UserExternalId from authentic2.utils.lookups import Unaccent, ImmutableConcat +from authentic2.utils.date import parse_date +from authentic2.attribute_kinds import clean_number class UserQuerySet(models.QuerySet): def free_text_search(self, search): + search = search.strip() + + if '@' in search and len(search.split()) == 1: + return self.filter(email__icontains=search).order_by('email') + + try: + guid = uuid.UUID(search) + except ValueError: + pass + else: + return self.filter(uuid=guid.hex) + + try: + phone_number = clean_number(search) + except ValidationError: + pass + else: + attribute_values = AttributeValue.objects.filter( + content__contains=phone_number, attribute__kind='phone_number') + qs = self.filter(attribute_values__in=attribute_values).order_by('last_name', 'first_name') + if qs.exists(): + return qs + + try: + date = parse_date(search) + except ValueError: + pass + else: + attribute_values = AttributeValue.objects.filter( + content__contains=date.isoformat(), attribute__kind='birthdate') + qs = self.filter(attribute_values__in=attribute_values).order_by('last_name', 'first_name') + if qs.exists(): + return qs + + qs1 = self.find_duplicates(fullname=search, limit=None) + qs2 = self.free_text_search_attributes(search).annotate(dist=Value(1.0)) + return (qs1.distinct() | qs2).order_by('dist', 'last_name', 'first_name') + + def free_text_search_attributes(self, search): terms = search.split() if not terms: @@ -44,7 +86,6 @@ class UserQuerySet(models.QuerySet): queries = [] for term in terms: q = None - specific_queries = [] for a in searchable_attributes: kind = a.get_kind() @@ -62,9 +103,6 @@ class UserQuerySet(models.QuerySet): q = ( models.query.Q(username__icontains=term) - | models.query.Q(first_name__icontains=term) - | models.query.Q(last_name__icontains=term) - | models.query.Q(email__icontains=term) ) for a in searchable_attributes: if a.name in ('first_name', 'last_name'): @@ -78,7 +116,7 @@ class UserQuerySet(models.QuerySet): self = self.distinct() return self - def find_duplicates(self, first_name=None, last_name=None, fullname=None, birthdate=None): + def find_duplicates(self, first_name=None, last_name=None, fullname=None, birthdate=None, limit=5): with connection.cursor() as cursor: cursor.execute( "SET pg_trgm.similarity_threshold = %f" % app_settings.A2_DUPLICATES_THRESHOLD @@ -96,7 +134,8 @@ class UserQuerySet(models.QuerySet): qs = qs.filter(name__trigram_similar=name) qs = qs.annotate(dist=TrigramDistance('name', name)) qs = qs.order_by('dist') - qs = qs[:5] + if limit is not None: + qs = qs[:limit] # alter distance according to additionnal parameters if birthdate: diff --git a/src/authentic2/utils/date.py b/src/authentic2/utils/date.py new file mode 100644 index 00000000..40e9ff86 --- /dev/null +++ b/src/authentic2/utils/date.py @@ -0,0 +1,33 @@ +# authentic2 - versatile identity manager +# Copyright (C) 2010-2019 Entr'ouvert +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from datetime import datetime + +from django.utils import formats + + +def parse_date(formatted_date): + parsed_date = None + for date_format in formats.get_format('DATE_INPUT_FORMATS'): + try: + parsed_date = datetime.strptime(formatted_date, date_format) + except ValueError: + continue + else: + break + if not parsed_date: + raise ValueError + return parsed_date.date() diff --git a/tests/test_custom_user.py b/tests/test_custom_user.py index 870baaca..f78a8d9a 100644 --- a/tests/test_custom_user.py +++ b/tests/test_custom_user.py @@ -14,10 +14,15 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +from datetime import date + from django.contrib.auth import get_user_model +from authentic2.models import Attribute from django_rbac.utils import get_permission_model, get_role_model +import pytest + Permission = get_permission_model() Role = get_role_model() User = get_user_model() @@ -59,3 +64,76 @@ def test_user_mark_as_deleted(db): User.objects.create(username='foo2', email='foo@example.net') assert len(User.objects.filter(email='foo@example.net')) == 2 assert len(User.objects.filter(email='foo@example.net', deleted__isnull=True)) == 1 + + +@pytest.fixture +def fts(db): + Attribute.objects.create(name='adresse', label='adresse', searchable=True, kind='string') + Attribute.objects.create(name='telephone', label='telephone', searchable=True, kind='phone_number') + Attribute.objects.create(name='dob', label='dob', searchable=True, kind='birthdate') + user1 = User.objects.create( + username='foo1234', + first_name='Jo', + last_name='darmettein', + email='jean.darmette@example.net' + ) + user2 = User.objects.create( + username='bar1234', + first_name='Lea', + last_name='darmettein', + email='micheline.darmette@example.net' + ) + user3 = User.objects.create( + first_name='', + last_name='peuplier', + ) + user1.attributes.adresse = '4 rue des peupliers 13001 MARSEILLE' + user2.attributes.adresse = '4 rue des peupliers 13001 MARSEILLE' + user1.attributes.telephone = '0601020304' + user2.attributes.telephone = '0601020305' + user1.attributes.dob = date(1970, 1, 1) + user2.attributes.dob = date(1972, 2, 2) + return locals() + + +def test_fts_uuid(fts): + assert User.objects.free_text_search(fts['user1'].uuid).count() == 1 + assert User.objects.free_text_search(fts['user2'].uuid).count() == 1 + + +def test_fts_phone(fts): + assert User.objects.free_text_search('01020304').count() == 1 + assert User.objects.free_text_search('06010203').count() == 2 + + +def test_fts_dob(fts): + assert User.objects.free_text_search('01/01/1970').count() == 1 + assert User.objects.free_text_search('02/02/1972').count() == 1 + assert User.objects.free_text_search('03/03/1973').count() == 0 + + +def test_fts_email(fts): + assert User.objects.free_text_search('jean.darmette@example.net').count() == 1 + assert User.objects.free_text_search('micheline.darmette@example.net').count() == 1 + + +def test_fts_username(fts): + assert User.objects.free_text_search('foo1234').count() == 1 + assert User.objects.free_text_search('bar1234').count() == 1 + + +def test_fts_trigram(fts): + assert User.objects.free_text_search('darmettein').count() == 2 + # dist attribute signals queryset from find_duplicates() + assert hasattr(User.objects.free_text_search('darmettein')[0], 'dist') + + assert User.objects.free_text_search('lea darmettein').count() == 1 + assert hasattr(User.objects.free_text_search('darmettein')[0], 'dist') + + +def test_fts_legacy(fts): + assert User.objects.free_text_search('rue des peupliers').count() == 2 + + +def test_fts_legacy_and_trigram(fts): + assert User.objects.free_text_search('peuplier').count() == 3 -- 2.29.2