Projet

Général

Profil

0005-search-better-queries-for-index_site-40252.patch

Lauréline Guérin, 27 mars 2020 18:02

Télécharger (5,89 ko)

Voir les différences:

Subject: [PATCH 5/7] search: better queries for index_site (#40252)

 combo/apps/search/utils.py | 65 ++++++++++++++++++++++++++------------
 tests/test_search.py       |  2 +-
 2 files changed, 46 insertions(+), 21 deletions(-)
combo/apps/search/utils.py
17 17
from django.conf import settings
18 18
from django.contrib.contenttypes.models import ContentType
19 19
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector
20
from combo.data.models import CellBase
21 20
from django.db import connection
22
from django.db.models import Q
21
from django.db.models import Q, Prefetch
23 22
from django.db.transaction import atomic
24 23

  
24
from combo.data.models import Page, CellBase, ValidityInfo
25 25
from .models import IndexedCell
26 26

  
27 27

  
28
def set_cell_access(indexed_cell, cell):
29
    indexed_cell.public_access = bool(cell.page.public and cell.public)
30
    indexed_cell.excluded_groups.clear()
31
    indexed_cell.restricted_groups.clear()
28
def set_cell_groups(indexed_cell, cell):
29
    restricted_groups = []
30
    excluded_groups = []
32 31
    if not indexed_cell.public_access:
33
        indexed_cell.restricted_groups.set(cell.groups.all())
32
        restricted_groups = cell.prefetched_groups
34 33
        if cell.restricted_to_unlogged:
35
            indexed_cell.excluded_groups.set(cell.page.groups.all())
34
            excluded_groups = cell.page.prefetched_groups
36 35
        else:
37
            for group in cell.page.groups.all():
38
                indexed_cell.restricted_groups.add(group)
39
    indexed_cell.save()
36
            for group in cell.page.prefetched_groups:
37
                restricted_groups.append(group)
38
    if restricted_groups:
39
        indexed_cell.restricted_groups.add(*restricted_groups)
40
    if excluded_groups:
41
        indexed_cell.excluded_groups.add(*excluded_groups)
40 42

  
41 43

  
42 44
@atomic
43 45
def index_site():
46
    cell_classes = list(CellBase.get_cell_classes())
47
    # populate ContentType cache
48
    ContentType.objects.get_for_models(*cell_classes)
44 49
    IndexedCell.objects.all().delete()
45 50
    external_urls = {}
46
    for klass in CellBase.get_cell_classes():
47
        for cell in klass.objects.filter(page__snapshot__isnull=True, page__sub_slug='').exclude(placeholder__startswith='_'):
51
    validity_info_list = list(ValidityInfo.objects.select_related('content_type'))
52
    pages_by_pk = {
53
        p.pk: p for p in (
54
            Page.objects
55
            .prefetch_related(Prefetch('groups', to_attr='prefetched_groups')))}
56
    for klass in cell_classes:
57
        queryset = (
58
            klass.objects
59
            .filter(page__snapshot__isnull=True, page__sub_slug='')
60
            .exclude(placeholder__startswith='_')
61
            .prefetch_related(
62
                Prefetch('groups', to_attr='prefetched_groups')))
63
        for cell in queryset:
64
            cell.prefetched_validity_info = [
65
                v for v in validity_info_list
66
                if v.object_id == cell.pk and v.content_type.model_class() == cell.__class__]
67
            cell.page = pages_by_pk.get(cell.page_id)
48 68
            cell_type = ContentType.objects.get_for_model(cell)
49 69
            indexed_cell = IndexedCell(cell_type=cell_type, cell_pk=cell.id)
50 70
            try:
......
52 72
            except Exception:  # ignore rendering error
53 73
                continue
54 74
            if indexed_cell.indexed_text:
75
                indexed_cell.public_access = bool(cell.page.public and cell.public)
55 76
                indexed_cell.page_id = cell.page_id
56 77
                indexed_cell.url = cell.page.get_online_url()
57 78
                indexed_cell.title = cell.page.title
58 79
                indexed_cell.save()
59
                set_cell_access(indexed_cell, cell)
80
                set_cell_groups(indexed_cell, cell)
60 81

  
61 82
            for link_data in cell.get_external_links_data():
62 83
                # index external links
63 84
                indexed_cell = external_urls.get(indexed_cell.url)
64 85
                if indexed_cell is None:
65 86
                    # create an entry for that link.
66
                    indexed_cell = IndexedCell(cell_type=cell_type, cell_pk=cell.id)
87
                    indexed_cell = IndexedCell(
88
                        cell_type=cell_type,
89
                        cell_pk=cell.id,
90
                        public_access=bool(cell.page.public and cell.public),
91
                        url=link_data['url'],
92
                        title=link_data['title'],
93
                        indexed_text=link_data.get('text') or '',
94
                    )
67 95
                    indexed_cell.save()
68
                    set_cell_access(indexed_cell, cell)
69
                    indexed_cell.url = link_data['url']
70
                    indexed_cell.title = link_data['title']
71
                    indexed_cell.indexed_text = link_data.get('text') or ''
96
                    set_cell_groups(indexed_cell, cell)
72 97
                    external_urls[indexed_cell.url] = indexed_cell
73 98
                else:
74 99
                    # if that link already exists, add detailed texts
75 100
                    indexed_cell.indexed_text += ' ' + link_data['title']
76 101
                    indexed_cell.indexed_text += ' ' + link_data.get('text') or ''
77
                indexed_cell.save()
102
                    indexed_cell.save()
78 103

  
79 104

  
80 105
def search_site(request, query):
tests/test_search.py
544 544
    index_site()  # populate cache
545 545
    with CaptureQueriesContext(connection) as ctx:
546 546
        index_site()
547
        assert len(ctx.captured_queries) == 591
547
        assert len(ctx.captured_queries) == 195
548
-