From 891776f9c073a789c3392564c6a0143ff0bb34c3 Mon Sep 17 00:00:00 2001 From: Valentin Deniaud Date: Wed, 3 Nov 2021 17:44:01 +0100 Subject: [PATCH] search: index external links data independantly (#58269) --- combo/apps/search/utils.py | 32 +++++++++++--------------------- tests/test_search.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 21 deletions(-) diff --git a/combo/apps/search/utils.py b/combo/apps/search/utils.py index faa7783c..2a2f22ee 100644 --- a/combo/apps/search/utils.py +++ b/combo/apps/search/utils.py @@ -48,7 +48,6 @@ def index_site(): # populate ContentType cache ContentType.objects.get_for_models(*cell_classes) IndexedCell.objects.all().delete() - external_urls = {} validity_info_list = list(ValidityInfo.objects.select_related('content_type')) pages_by_pk = { p.pk: p for p in (Page.objects.prefetch_related(Prefetch('groups', to_attr='prefetched_groups'))) @@ -90,26 +89,17 @@ def index_site(): for link_data in cell.get_external_links_data(): # index external links - indexed_cell = external_urls.get(link_data.get('url')) - if indexed_cell is None: - # create an entry for that link. - indexed_cell = IndexedCell( - cell_type=cell_type, - cell_pk=cell.id, - page_id=cell.page_id, - public_access=bool(cell.page.public and cell.public), - url=link_data['url'], - title=link_data['title'], - indexed_text=link_data.get('text') or '', - ) - indexed_cell.save() - set_cell_groups(indexed_cell, cell) - external_urls[indexed_cell.url] = indexed_cell - else: - # if that link already exists, add detailed texts - indexed_cell.indexed_text += ' ' + link_data['title'] - indexed_cell.indexed_text += ' ' + (link_data.get('text') or '') - indexed_cell.save() + indexed_cell = IndexedCell( + cell_type=cell_type, + cell_pk=cell.id, + page_id=cell.page_id, + public_access=bool(cell.page.public and cell.public), + url=link_data['url'], + title=link_data['title'], + indexed_text=link_data.get('text') or '', + ) + indexed_cell.save() + set_cell_groups(indexed_cell, cell) def search_site(request, query, pages=None, with_description=None): diff --git a/tests/test_search.py b/tests/test_search.py index 3cd0fc4c..e8d8b2f5 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -1430,3 +1430,32 @@ def test_search_by_page_title(app): assert hits[0]['text'] == page_of_interest.title assert hits[0]['url'] == f'/{page_of_interest.slug}/' assert hits[0]['rank'] > hits[1]['rank'] + + +def test_search_same_link_multiple_pages(settings, app): + settings.KNOWN_SERVICES = {} + + page = Page.objects.create(title='first page', slug='one') + LinkCell.objects.create( + title='foobar', url='http://example.net', page=page, placeholder='content', order=0 + ) + second_page = Page.objects.create(title='second page', slug='two') + LinkCell.objects.create( + title='barfoo', url='http://example.net', page=second_page, placeholder='content', order=0 + ) + index_site() + + cell = SearchCell.objects.create( + page=page, placeholder='content', _search_services={'data': ['_text_page_one']}, order=1 + ) + resp = app.get('/ajax/search/%s/_text_page_one/?q=foobar' % cell.pk, status=200) + assert resp.text.count('