0001-general-redo-full-text-search-using-querysets-33632.patch
combo/apps/search/__init__.py | ||
---|---|---|
15 | 15 |
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
16 | 16 | |
17 | 17 |
import django.apps |
18 |
from django.core.urlresolvers import reverse |
|
18 | 19 |
from django.utils.translation import ugettext_lazy as _ |
19 | 20 | |
20 | 21 |
from .engines import engines |
... | ... | |
28 | 29 |
from . import urls |
29 | 30 |
return urls.urlpatterns |
30 | 31 | |
32 |
def hourly(self): |
|
33 |
from .utils import index_site |
|
34 |
index_site() |
|
35 | ||
36 |
def ready(self): |
|
37 |
# register built-in search engine for page contents |
|
38 |
engines.register(self.get_search_engines) |
|
39 | ||
40 |
def get_search_engines(self): |
|
41 |
from .utils import search_site |
|
42 |
return { |
|
43 |
'_text': { |
|
44 |
'function': search_site, |
|
45 |
'label': _('Page Contents'), |
|
46 |
} |
|
47 |
} |
|
48 | ||
49 | ||
31 | 50 |
default_app_config = 'combo.apps.search.AppConfig' |
combo/apps/search/management/commands/update_index.py | ||
---|---|---|
1 |
# combo - content management system |
|
2 |
# Copyright (C) 2017 Entr'ouvert |
|
3 |
# |
|
4 |
# This program is free software: you can redistribute it and/or modify it |
|
5 |
# under the terms of the GNU Affero General Public License as published |
|
6 |
# by the Free Software Foundation, either version 3 of the License, or |
|
7 |
# (at your option) any later version. |
|
8 |
# |
|
9 |
# This program is distributed in the hope that it will be useful, |
|
10 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
12 |
# GNU Affero General Public License for more details. |
|
13 |
# |
|
14 |
# You should have received a copy of the GNU Affero General Public License |
|
15 |
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
16 | ||
17 |
from django.utils.timezone import now |
|
18 | ||
19 |
from haystack.management.commands.update_index import Command as UpdateIndexCommand |
|
20 | ||
21 |
from combo.data.models import Page, ExternalLinkSearchItem |
|
22 |
from combo.apps.search.models import SearchCell |
|
23 | ||
24 | ||
25 |
class Command(UpdateIndexCommand): |
|
26 | ||
27 |
def add_arguments(self, parser): |
|
28 |
super(Command, self).add_arguments(parser) |
|
29 |
parser.add_argument( |
|
30 |
'--skip-external-links-collection', action='store_true', default=False, |
|
31 |
dest='skip_external_links_collection') |
|
32 | ||
33 |
def handle(self, **options): |
|
34 |
if not any(SearchCell.get_cells_by_search_service('_text')): |
|
35 |
# do not index site if there's no matching search cell |
|
36 |
return |
|
37 |
if not options.get('skip_external_links_collection', False): |
|
38 |
self.collect_external_links(options) |
|
39 |
return super(Command, self).handle(**options) |
|
40 | ||
41 |
def collect_external_links(self, options): |
|
42 |
start_time = now() |
|
43 | ||
44 |
if options.get('remove'): |
|
45 |
ExternalLinkSearchItem.objects.all().delete() |
|
46 | ||
47 |
# assemble external links data |
|
48 |
links = {} |
|
49 |
for page in Page.objects.filter(sub_slug=''): |
|
50 |
if not page.is_visible(user=None): |
|
51 |
continue |
|
52 |
for cell in page.get_cells(): |
|
53 |
if not cell.is_visible(user=None): |
|
54 |
continue |
|
55 |
for link_data in cell.get_external_links_data(): |
|
56 |
if not link_data['url'] in links: |
|
57 |
# create an entry for that link. |
|
58 |
links[link_data['url']] = {} |
|
59 |
links[link_data['url']]['title'] = link_data['title'] |
|
60 |
links[link_data['url']]['all_texts'] = [] |
|
61 |
else: |
|
62 |
# if that link already exists, just keep the title as |
|
63 |
# text. |
|
64 |
links[link_data['url']]['all_texts'].append(link_data['title']) |
|
65 |
# additional texts will be assembled and indexed |
|
66 |
links[link_data['url']]['all_texts'].append(link_data.get('text') or '') |
|
67 | ||
68 |
# save data as ExternalLinkSearchItem objects |
|
69 |
for link_url, link_data in links.items(): |
|
70 |
link_object, created = ExternalLinkSearchItem.objects.get_or_create( |
|
71 |
url=link_url, |
|
72 |
defaults={'title': link_data['title']}) |
|
73 |
link_object.title = link_data['title'] |
|
74 |
link_object.text = '\n'.join(link_data['all_texts']) |
|
75 |
link_object.save() |
|
76 | ||
77 |
# remove obsolete objects |
|
78 |
ExternalLinkSearchItem.objects.filter(last_update_timestamp__lt=start_time).delete() |
combo/apps/search/migrations/0006_indexedcell.py | ||
---|---|---|
1 |
# -*- coding: utf-8 -*- |
|
2 |
# Generated by Django 1.11.17 on 2020-01-20 15:30 |
|
3 |
from __future__ import unicode_literals |
|
4 | ||
5 |
from django.db import migrations, models |
|
6 |
import django.db.models.deletion |
|
7 | ||
8 | ||
9 |
class Migration(migrations.Migration): |
|
10 | ||
11 |
dependencies = [ |
|
12 |
('data', '0041_delete_externallinksearchitem'), |
|
13 |
('auth', '0008_alter_user_username_max_length'), |
|
14 |
('contenttypes', '0002_remove_content_type_name'), |
|
15 |
('search', '0005_searchcell_autofocus'), |
|
16 |
] |
|
17 | ||
18 |
operations = [ |
|
19 |
migrations.CreateModel( |
|
20 |
name='IndexedCell', |
|
21 |
fields=[ |
|
22 |
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), |
|
23 |
('cell_pk', models.PositiveIntegerField(null=True)), |
|
24 |
('url', models.CharField(blank=True, max_length=500, null=True)), |
|
25 |
('title', models.CharField(blank=True, max_length=500, null=True)), |
|
26 |
('indexed_text', models.TextField(blank=True, null=True)), |
|
27 |
('public_access', models.BooleanField(default=False)), |
|
28 |
('last_update_timestamp', models.DateTimeField(auto_now=True)), |
|
29 |
('cell_type', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='contenttypes.ContentType')), |
|
30 |
('excluded_groups', models.ManyToManyField(blank=True, related_name='_indexedcell_excluded_groups_+', to='auth.Group')), |
|
31 |
('page', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='data.Page')), |
|
32 |
('restricted_groups', models.ManyToManyField(blank=True, related_name='_indexedcell_restricted_groups_+', to='auth.Group')), |
|
33 |
], |
|
34 |
), |
|
35 |
] |
combo/apps/search/models.py | ||
---|---|---|
16 | 16 | |
17 | 17 |
import os |
18 | 18 | |
19 |
from django.conf import settings |
|
19 |
from django.contrib.auth.models import Group |
|
20 |
from django.contrib.contenttypes import fields |
|
21 |
from django.contrib.contenttypes.models import ContentType |
|
20 | 22 |
from django.db import models |
21 | 23 |
from django.utils.translation import ugettext_lazy as _ |
22 | 24 |
from django import template |
23 | 25 |
from django.http import HttpResponse |
24 | 26 |
from django.core.exceptions import PermissionDenied |
25 |
from django.core.urlresolvers import reverse |
|
26 | 27 |
from django.utils.http import quote |
27 | 28 |
from django.template import RequestContext, Template |
28 | 29 | |
29 | 30 |
from jsonfield import JSONField |
30 |
from haystack import connections |
|
31 | 31 | |
32 | 32 |
from combo.utils import requests |
33 |
from combo.data.models import CellBase |
|
33 |
from combo.data.models import CellBase, Page
|
|
34 | 34 |
from combo.data.library import register_cell_class |
35 | 35 |
from combo.utils import get_templated_url |
36 | 36 | |
... | ... | |
69 | 69 |
services = [] |
70 | 70 |
for service_slug in self._search_services.get('data') or []: |
71 | 71 |
service = engines.get(service_slug) |
72 |
if service and service.get('url'):
|
|
72 |
if service and (service.get('url') or service.get('function')):
|
|
73 | 73 |
service['slug'] = service_slug |
74 | 74 |
services.append(service) |
75 | 75 |
return services |
... | ... | |
141 | 141 |
if not query: |
142 | 142 |
return render_response(service) |
143 | 143 | |
144 |
url = get_templated_url(service['url'], |
|
145 |
context={'request': request, 'q': query, 'search_service': service}) |
|
146 |
url = url % {'q': quote(query.encode('utf-8'))} # if url contains %(q)s |
|
147 |
if url.startswith('/'): |
|
148 |
url = request.build_absolute_uri(url) |
|
149 | ||
150 |
if not url: |
|
151 |
return render_response(service) |
|
152 | ||
153 |
kwargs = {} |
|
154 |
kwargs['cache_duration'] = service.get('cache_duration', 0) |
|
155 |
kwargs['remote_service'] = 'auto' if service.get('signature') else None |
|
156 |
# don't automatically add user info to query string, if required it can |
|
157 |
# be set explicitely in the URL template in the engine definition (via |
|
158 |
# {{user_nameid}} or {{user_email}}). |
|
159 |
kwargs['without_user'] = True |
|
160 |
# don't send error traces on HTTP errors |
|
161 |
kwargs['log_errors'] = 'warn' |
|
162 | ||
163 |
response = requests.get(url, **kwargs) |
|
164 |
try: |
|
165 |
results = response.json() |
|
166 |
except ValueError: |
|
167 |
return render_response(service) |
|
144 |
if service.get('function'): # internal search engine |
|
145 |
results = {'data': service['function'](request, query)} |
|
146 |
else: |
|
147 |
url = get_templated_url(service['url'], |
|
148 |
context={'request': request, 'q': query, 'search_service': service}) |
|
149 |
url = url % {'q': quote(query.encode('utf-8'))} # if url contains %(q)s |
|
150 |
if url.startswith('/'): |
|
151 |
url = request.build_absolute_uri(url) |
|
152 | ||
153 |
if not url: |
|
154 |
return render_response(service) |
|
155 | ||
156 |
kwargs = {} |
|
157 |
kwargs['cache_duration'] = service.get('cache_duration', 0) |
|
158 |
kwargs['remote_service'] = 'auto' if service.get('signature') else None |
|
159 |
# don't automatically add user info to query string, if required it can |
|
160 |
# be set explicitely in the URL template in the engine definition (via |
|
161 |
# {{user_nameid}} or {{user_email}}). |
|
162 |
kwargs['without_user'] = True |
|
163 |
# don't send error traces on HTTP errors |
|
164 |
kwargs['log_errors'] = 'warn' |
|
165 | ||
166 |
response = requests.get(url, **kwargs) |
|
167 |
try: |
|
168 |
results = response.json() |
|
169 |
except ValueError: |
|
170 |
return render_response(service) |
|
168 | 171 | |
169 | 172 |
if service.get('data_key'): |
170 | 173 |
results['data'] = results.get(service['data_key']) or [] |
... | ... | |
179 | 182 |
for hit in results.get('data') or []: |
180 | 183 |
for k, v in hit_templates.items(): |
181 | 184 |
hit[k] = v.render(RequestContext(request, hit)) |
185 | ||
182 | 186 |
return render_response(service, results) |
183 | 187 | |
184 | 188 |
def has_text_search_service(self): |
185 | 189 |
return '_text' in self._search_services.get('data', []) |
186 | 190 | |
187 | 191 |
def missing_index(self): |
188 |
return not os.path.exists(connections['default'].get_backend().path) |
|
192 |
return IndexedCell.objects.all().count() == 0 |
|
193 | ||
194 | ||
195 |
class IndexedCell(models.Model): |
|
196 |
cell_type = models.ForeignKey(ContentType, on_delete=models.CASCADE) |
|
197 |
cell_pk = models.PositiveIntegerField(null=True) |
|
198 |
cell = fields.GenericForeignKey('cell_type', 'cell_pk') |
|
199 |
page = models.ForeignKey(Page, on_delete=models.CASCADE, blank=True, null=True) |
|
200 |
url = models.CharField(max_length=500, blank=True, null=True) |
|
201 |
title = models.CharField(max_length=500, blank=True, null=True) |
|
202 |
indexed_text = models.TextField(blank=True, null=True) |
|
203 |
public_access = models.BooleanField(default=False) |
|
204 |
restricted_groups = models.ManyToManyField(Group, blank=True, related_name='+') |
|
205 |
excluded_groups = models.ManyToManyField(Group, blank=True, related_name='+') |
|
206 |
last_update_timestamp = models.DateTimeField(auto_now=True) |
combo/apps/search/utils.py | ||
---|---|---|
1 |
# combo - content management system |
|
2 |
# Copyright (C) 2014-2020 Entr'ouvert |
|
3 |
# |
|
4 |
# This program is free software: you can redistribute it and/or modify it |
|
5 |
# under the terms of the GNU Affero General Public License as published |
|
6 |
# by the Free Software Foundation, either version 3 of the License, or |
|
7 |
# (at your option) any later version. |
|
8 |
# |
|
9 |
# This program is distributed in the hope that it will be useful, |
|
10 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
12 |
# GNU Affero General Public License for more details. |
|
13 |
# |
|
14 |
# You should have received a copy of the GNU Affero General Public License |
|
15 |
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
16 | ||
17 |
from django.conf import settings |
|
18 |
from django.contrib.contenttypes.models import ContentType |
|
19 |
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector |
|
20 |
from combo.data.models import CellBase |
|
21 |
from django.db import connection |
|
22 |
from django.db.models import Q |
|
23 |
from django.db.transaction import atomic |
|
24 | ||
25 |
from .models import IndexedCell |
|
26 | ||
27 | ||
28 |
def set_cell_access(indexed_cell, cell): |
|
29 |
indexed_cell.public_access = bool(cell.page.public and cell.public) |
|
30 |
indexed_cell.excluded_groups.clear() |
|
31 |
indexed_cell.restricted_groups.clear() |
|
32 |
if not indexed_cell.public_access: |
|
33 |
indexed_cell.restricted_groups.set(cell.groups.all()) |
|
34 |
if cell.restricted_to_unlogged: |
|
35 |
indexed_cell.excluded_groups.set(cell.page.groups.all()) |
|
36 |
else: |
|
37 |
for group in cell.page.groups.all(): |
|
38 |
indexed_cell.restricted_groups.add(group) |
|
39 |
indexed_cell.save() |
|
40 | ||
41 | ||
42 |
@atomic |
|
43 |
def index_site(): |
|
44 |
IndexedCell.objects.all().delete() |
|
45 |
external_urls = {} |
|
46 |
for klass in CellBase.get_cell_classes(): |
|
47 |
for cell in klass.objects.filter(page__snapshot__isnull=True).exclude(placeholder__startswith='_'): |
|
48 |
cell_type = ContentType.objects.get_for_model(cell) |
|
49 |
indexed_cell = IndexedCell(cell_type=cell_type, cell_pk=cell.id) |
|
50 |
try: |
|
51 |
indexed_cell.indexed_text = cell.render_for_search() |
|
52 |
except Exception: # ignore rendering error |
|
53 |
continue |
|
54 |
if indexed_cell.indexed_text: |
|
55 |
indexed_cell.page_id = cell.page_id |
|
56 |
indexed_cell.url = cell.page.get_online_url() |
|
57 |
indexed_cell.title = cell.page.title |
|
58 |
indexed_cell.save() |
|
59 |
set_cell_access(indexed_cell, cell) |
|
60 | ||
61 |
for link_data in cell.get_external_links_data(): |
|
62 |
# index external links |
|
63 |
indexed_cell = external_urls.get(indexed_cell.url) |
|
64 |
if indexed_cell is None: |
|
65 |
# create an entry for that link. |
|
66 |
indexed_cell = IndexedCell(cell_type=cell_type, cell_pk=cell.id) |
|
67 |
indexed_cell.save() |
|
68 |
set_cell_access(indexed_cell, cell) |
|
69 |
indexed_cell.url = link_data['url'] |
|
70 |
indexed_cell.title = link_data['title'] |
|
71 |
indexed_cell.indexed_text = link_data.get('text') or '' |
|
72 |
external_urls[indexed_cell.url] = indexed_cell |
|
73 |
else: |
|
74 |
# if that link already exists, add detailed texts |
|
75 |
indexed_cell.indexed_text += ' ' + link_data['title'] |
|
76 |
indexed_cell.indexed_text += ' ' + link_data.get('text') or '' |
|
77 |
indexed_cell.save() |
|
78 | ||
79 | ||
80 |
def search_site(request, query): |
|
81 |
if connection.vendor == 'postgresql': |
|
82 |
config = settings.POSTGRESQL_FTS_SEARCH_CONFIG |
|
83 |
vector = SearchVector('title', config=config, weight='A') + SearchVector('indexed_text', config=config, weight='A') |
|
84 |
query = SearchQuery(query) |
|
85 |
qs = IndexedCell.objects.annotate(rank=SearchRank(vector, query)).filter(rank__gte=0.3).order_by('-rank') |
|
86 |
else: |
|
87 |
qs = IndexedCell.objects.filter( |
|
88 |
Q(indexed_text__icontains=query) | Q(title__icontains=query)) |
|
89 |
if request.user.is_anonymous: |
|
90 |
qs = qs.exclude(public_access=False) |
|
91 |
else: |
|
92 |
qs = qs.filter( |
|
93 |
Q(restricted_groups=None) | |
|
94 |
Q(restricted_groups__in=request.user.groups.all())) |
|
95 |
qs = qs.exclude(excluded_groups__in=request.user.groups.all()) |
|
96 | ||
97 |
hits = [] |
|
98 |
seen = {} |
|
99 |
for hit in qs: |
|
100 |
if hit.url in seen: |
|
101 |
continue |
|
102 |
hits.append({ |
|
103 |
'text': hit.title, |
|
104 |
'rank': getattr(hit, 'rank', None), |
|
105 |
'url': hit.url, |
|
106 |
}) |
|
107 |
seen[hit.url] = True |
|
108 |
if len(hits) == 10: |
|
109 |
break |
|
110 | ||
111 |
return hits |
combo/data/apps.py | ||
---|---|---|
15 | 15 |
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
16 | 16 | |
17 | 17 |
from django.apps import AppConfig |
18 |
from django.core.urlresolvers import reverse |
|
19 |
from django.utils.translation import ugettext_lazy as _ |
|
20 | 18 | |
21 | 19 | |
22 | 20 |
class DataConfig(AppConfig): |
23 | 21 |
name = 'combo.data' |
24 | 22 |
verbose_name = 'data' |
25 | ||
26 |
def ready(self): |
|
27 |
# register built-in search engine for page contents |
|
28 |
from combo.apps.search import engines |
|
29 |
engines.register(self.get_search_engines) |
|
30 | ||
31 |
def get_search_engines(self): |
|
32 |
return { |
|
33 |
'_text': { |
|
34 |
'url': reverse('api-search') + '?q=%(q)s', |
|
35 |
'label': _('Page Contents'), |
|
36 |
} |
|
37 |
} |
combo/data/migrations/0041_delete_externallinksearchitem.py | ||
---|---|---|
1 |
# -*- coding: utf-8 -*- |
|
2 |
# Generated by Django 1.11.17 on 2020-01-20 15:30 |
|
3 |
from __future__ import unicode_literals |
|
4 | ||
5 |
from django.db import migrations |
|
6 | ||
7 | ||
8 |
class Migration(migrations.Migration): |
|
9 | ||
10 |
dependencies = [ |
|
11 |
('data', '0040_auto_20200119_1017'), |
|
12 |
] |
|
13 | ||
14 |
operations = [ |
|
15 |
migrations.DeleteModel( |
|
16 |
name='ExternalLinkSearchItem', |
|
17 |
), |
|
18 |
] |
combo/data/models.py | ||
---|---|---|
729 | 729 |
return '' |
730 | 730 |
if self.user_dependant: |
731 | 731 |
return '' |
732 |
if not self.page.is_visible(user=None): |
|
733 |
return '' |
|
734 |
if not self.is_visible(user=None): |
|
735 |
return '' |
|
736 | 732 |
request = RequestFactory().get(self.page.get_online_url()) |
737 | 733 |
request.user = None # compat |
738 | 734 |
context = { |
... | ... | |
1447 | 1443 |
return context |
1448 | 1444 | |
1449 | 1445 | |
1450 |
class ExternalLinkSearchItem(models.Model): |
|
1451 |
# Link to an external site. |
|
1452 |
# |
|
1453 |
# Those are automatically collected during by the "update_index" command, |
|
1454 |
# that calls get_external_links_data from all available cells, to be used |
|
1455 |
# by the general search engine. |
|
1456 |
title = models.CharField(_('Title'), max_length=150) |
|
1457 |
text = models.TextField(blank=True) |
|
1458 |
url = models.CharField(_('URL'), max_length=200, blank=True) |
|
1459 |
last_update_timestamp = models.DateTimeField(auto_now=True) |
|
1460 | ||
1461 | ||
1462 | 1446 |
@receiver(pre_save, sender=Page) |
1463 | 1447 |
def create_redirects(sender, instance, raw, **kwargs): |
1464 | 1448 |
if raw or not instance.id or instance.snapshot_id: |
combo/data/search_indexes.py | ||
---|---|---|
1 |
# combo - content management system |
|
2 |
# Copyright (C) 2014-2017 Entr'ouvert |
|
3 |
# |
|
4 |
# This program is free software: you can redistribute it and/or modify it |
|
5 |
# under the terms of the GNU Affero General Public License as published |
|
6 |
# by the Free Software Foundation, either version 3 of the License, or |
|
7 |
# (at your option) any later version. |
|
8 |
# |
|
9 |
# This program is distributed in the hope that it will be useful, |
|
10 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
12 |
# GNU Affero General Public License for more details. |
|
13 |
# |
|
14 |
# You should have received a copy of the GNU Affero General Public License |
|
15 |
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
16 | ||
17 |
from haystack import indexes |
|
18 |
from haystack.exceptions import SkipDocument |
|
19 | ||
20 |
from .models import Page, CellBase, ExternalLinkSearchItem |
|
21 | ||
22 |
class PageIndex(indexes.SearchIndex, indexes.Indexable): |
|
23 |
title = indexes.CharField(model_attr='title', boost=1.5) |
|
24 |
text = indexes.CharField(document=True, use_template=True, |
|
25 |
template_name='combo/search/page.txt') |
|
26 |
url = indexes.CharField(indexed=False) |
|
27 | ||
28 |
def get_model(self): |
|
29 |
return Page |
|
30 | ||
31 |
def prepare_url(self, obj): |
|
32 |
return obj.get_online_url() |
|
33 | ||
34 |
def prepare(self, obj): |
|
35 |
if not obj.is_visible(user=None): |
|
36 |
raise SkipDocument() |
|
37 |
return super(PageIndex, self).prepare(obj) |
|
38 | ||
39 | ||
40 |
class ExternalLinkSearchIndex(indexes.SearchIndex, indexes.Indexable): |
|
41 |
title = indexes.CharField(model_attr='title', boost=1.5) |
|
42 |
text = indexes.CharField(model_attr='text', document=True) |
|
43 |
url = indexes.CharField(model_attr='url', indexed=False) |
|
44 | ||
45 |
def get_model(self): |
|
46 |
return ExternalLinkSearchItem |
combo/data/templates/combo/search/page.txt | ||
---|---|---|
1 |
{% autoescape off %} |
|
2 |
{% for cell in object.get_cells %} |
|
3 |
{% if cell.placeholder|first != '_' %} {# ignore technical placeholders #} |
|
4 |
{{ cell.render_for_search }} |
|
5 |
{% endif %} |
|
6 |
{% endfor %} |
|
7 |
{% endautoescape %} |
combo/public/urls.py | ||
---|---|---|
21 | 21 | |
22 | 22 |
urlpatterns = [ |
23 | 23 |
url(r'^api/menu-badges/$', views.menu_badges), |
24 |
url(r'^api/search/$', views.api_search, name='api-search'), |
|
25 | 24 |
url(r'^ajax/cell/(?P<page_pk>\w+)/(?P<cell_reference>[\w_-]+)/$', |
26 | 25 |
views.ajax_page_cell, name='combo-public-ajax-page-cell'), |
27 | 26 |
url(r'^snapshot/(?P<pk>\w+)/$', manager_required(views.snapshot), name='combo-snapshot-view'), |
combo/public/views.py | ||
---|---|---|
40 | 40 |
from django.utils.translation import ugettext as _ |
41 | 41 |
from django.forms.widgets import Media |
42 | 42 | |
43 |
from haystack.inputs import AutoQuery |
|
44 |
from haystack.query import SearchQuerySet, SQ |
|
45 | ||
46 | 43 |
if 'mellon' in settings.INSTALLED_APPS: |
47 | 44 |
from mellon.utils import get_idps |
48 | 45 |
else: |
... | ... | |
552 | 549 |
menu_badges.mellon_no_passive = True |
553 | 550 | |
554 | 551 | |
555 |
def api_search(request): |
|
556 |
for cell in SearchCell.get_cells_by_search_service('_text'): |
|
557 |
if not cell.is_visible(request.user): |
|
558 |
continue |
|
559 |
break |
|
560 |
else: |
|
561 |
raise Http404() |
|
562 |
query = request.GET.get('q') or '' |
|
563 |
sqs = SearchQuerySet().filter(SQ(content=AutoQuery(query)) | SQ(title=AutoQuery(query))) |
|
564 |
sqs = sqs.highlight() |
|
565 |
sqs.load_all() |
|
566 |
hits = [] |
|
567 |
for hit in sqs: |
|
568 |
description = None |
|
569 |
if hit.model_name == 'page' and hit.highlighted['text']: |
|
570 |
description = '<p>%s</p>' % hit.highlighted['text'][0] |
|
571 |
hits.append({ |
|
572 |
'text': hit.title, |
|
573 |
'url': hit.url, |
|
574 |
'description': description, |
|
575 |
}) |
|
576 | ||
577 |
return HttpResponse(json.dumps({'data': hits}), content_type='application/json') |
|
578 | ||
579 | ||
580 | 552 |
def snapshot(request, *args, **kwargs): |
581 | 553 |
snapshot = PageSnapshot.objects.get(id=kwargs['pk']) |
582 | 554 |
return publish_page(request, snapshot.get_page()) |
combo/settings.py | ||
---|---|---|
76 | 76 |
'combo.apps.calendar', |
77 | 77 |
'combo.apps.pwa', |
78 | 78 |
'combo.apps.gallery', |
79 |
'haystack', |
|
80 | 79 |
'xstatic.pkg.josefinsans', |
81 | 80 |
'xstatic.pkg.leaflet', |
82 | 81 |
'xstatic.pkg.opensans', |
... | ... | |
187 | 186 |
CKEDITOR_CONFIGS['small'] = copy.copy(CKEDITOR_CONFIGS['default']) |
188 | 187 |
CKEDITOR_CONFIGS['small']['height'] = 150 |
189 | 188 | |
190 |
HAYSTACK_CONNECTIONS = { |
|
191 |
'default': { |
|
192 |
'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine', |
|
193 |
'PATH': os.path.join(BASE_DIR, 'whoosh_index'), |
|
194 |
}, |
|
195 |
} |
|
196 | ||
197 | 189 |
# from solr.thumbnail -- https://sorl-thumbnail.readthedocs.io/en/latest/reference/settings.html |
198 | 190 |
THUMBNAIL_PRESERVE_FORMAT = True |
199 | 191 |
THUMBNAIL_FORCE_OVERWRITE = False |
... | ... | |
262 | 254 | |
263 | 255 |
# search services |
264 | 256 |
COMBO_SEARCH_SERVICES = {} |
257 |
POSTGRESQL_FTS_SEARCH_CONFIG = 'french' |
|
265 | 258 | |
266 | 259 |
# mapping of payment modes |
267 | 260 |
LINGO_NO_ONLINE_PAYMENT_REASONS = {} |
debian/combo.cron.hourly | ||
---|---|---|
2 | 2 | |
3 | 3 |
/sbin/runuser -u combo /usr/bin/combo-manage -- tenant_command cron --all-tenants |
4 | 4 |
/sbin/runuser -u combo /usr/bin/combo-manage -- tenant_command clearsessions --all-tenants |
5 |
# update_index cannot be used due to some bug in haystack/whoosh (#30509) |
|
6 |
/sbin/runuser -u combo /usr/bin/combo-manage -- tenant_command rebuild_index --noinput --all-tenants -v0 |
debian/control | ||
---|---|---|
20 | 20 |
python3-xstatic-opensans, |
21 | 21 |
python3-xstatic-roboto-fontface (>= 0.5.0.0), |
22 | 22 |
python3-eopayment (>= 1.35), |
23 |
python3-django-haystack (>= 2.4.0), |
|
24 | 23 |
python3-django-ratelimit, |
25 | 24 |
python3-sorl-thumbnail, |
26 | 25 |
python3-pil, |
27 | 26 |
python3-pywebpush, |
28 | 27 |
python3-pygal, |
29 | 28 |
python3-lxml |
30 |
Recommends: python3-django-mellon, python3-whoosh
|
|
29 |
Recommends: python3-django-mellon |
|
31 | 30 |
Conflicts: python-lingo |
32 | 31 |
Breaks: combo (<< 2.34.post2) |
33 | 32 |
Description: Portal Management System (Python module) |
requirements.txt | ||
---|---|---|
11 | 11 |
eopayment>=1.13 |
12 | 12 |
python-dateutil |
13 | 13 |
djangorestframework>=3.3, <3.7 |
14 |
django-haystack |
|
15 |
whoosh |
|
16 | 14 |
sorl-thumbnail |
17 | 15 |
pyproj |
setup.py | ||
---|---|---|
162 | 162 |
'eopayment>=1.41', |
163 | 163 |
'python-dateutil', |
164 | 164 |
'djangorestframework>=3.3, <3.7', |
165 |
'django-haystack', |
|
166 | 165 |
'django-ratelimit<3', |
167 |
'whoosh', |
|
168 | 166 |
'sorl-thumbnail', |
169 | 167 |
'Pillow', |
170 | 168 |
'pyproj', |
tests/settings.py | ||
---|---|---|
44 | 44 |
import tempfile |
45 | 45 |
MEDIA_ROOT = tempfile.mkdtemp('combo-test') |
46 | 46 | |
47 |
HAYSTACK_CONNECTIONS['default']['PATH'] = os.path.join( |
|
48 |
tempfile.mkdtemp('combo-test-whoosh')) |
|
49 | ||
50 | 47 |
if 'DISABLE_MIGRATIONS' in os.environ: |
51 | 48 |
class DisableMigrations(object): |
52 | 49 |
def __contains__(self, item): |
tests/test_search.py | ||
---|---|---|
6 | 6 |
import mock |
7 | 7 | |
8 | 8 |
from django.conf import settings |
9 |
from django.contrib.auth.models import AnonymousUser, User, Group |
|
9 | 10 |
from django.test import override_settings |
10 | 11 |
from django.test.client import RequestFactory |
11 | 12 |
from django.core.management import call_command |
12 | 13 |
from django.core.urlresolvers import reverse |
13 | 14 | |
14 |
from haystack.exceptions import SkipDocument |
|
15 | ||
16 | 15 |
from combo.apps.search.engines import engines |
17 |
from combo.apps.search.models import SearchCell |
|
16 |
from combo.apps.search.models import SearchCell, IndexedCell |
|
17 |
from combo.apps.search.utils import index_site, search_site |
|
18 | 18 |
from combo.data.models import Page, JsonCell, TextCell, MenuCell, LinkCell |
19 |
from combo.data.search_indexes import PageIndex |
|
20 | 19 | |
21 | 20 |
from .test_manager import login |
22 | 21 | |
... | ... | |
229 | 228 |
page = Page(title='example page', slug='example-page') |
230 | 229 |
page.save() |
231 | 230 | |
232 |
# no indexation of private cells (is_visible check)
|
|
231 |
# private cells are indexed
|
|
233 | 232 |
cell = TextCell(page=page, text='foobar', public=False, order=0) |
234 |
assert cell.render_for_search() == ''
|
|
233 |
assert cell.render_for_search().strip() == 'foobar'
|
|
235 | 234 | |
236 | 235 |
# no indexation of empty cells (is_relevant check) |
237 | 236 |
cell = TextCell(page=page, text='', order=0) |
... | ... | |
247 | 246 | |
248 | 247 |
def test_search_contents_index(): |
249 | 248 |
page = Page(title='example page', slug='example-page') |
249 |
page.public = True |
|
250 | 250 |
page.save() |
251 | 251 | |
252 |
page_index = PageIndex() |
|
253 |
assert page_index.get_model() is Page |
|
254 | ||
255 |
assert page_index.prepare_url(page) == '/example-page/' |
|
256 | ||
257 |
page_index.prepare(page) |
|
258 | ||
259 |
page.public = False |
|
260 |
with pytest.raises(SkipDocument): |
|
261 |
page_index.prepare(page) |
|
262 | ||
263 |
page.public = True |
|
264 | 252 |
cell = TextCell(page=page, text='<p>foobar</p>', order=0) |
265 | 253 |
cell.save() |
266 | 254 | |
267 |
prepared_data = page_index.prepare(page) |
|
268 |
assert 'foobar' in prepared_data['text'] |
|
255 |
request = RequestFactory().get('/') |
|
256 |
request.user = AnonymousUser() |
|
257 |
hits = search_site(request, 'foobar') |
|
258 |
assert len(hits) == 0 |
|
259 |
index_site() |
|
260 |
hits = search_site(request, 'foobar') |
|
261 |
assert len(hits) == 1 |
|
262 | ||
269 | 263 | |
270 | 264 |
def test_search_contents_technical_placeholder(): |
271 | 265 |
page = Page(title='example page', slug='example-page') |
... | ... | |
274 | 268 |
TextCell(page=page, text='<p>foobar</p>', order=0, placeholder='_off').save() |
275 | 269 |
TextCell(page=page, text='<p>barfoo</p>', order=0, placeholder='on').save() |
276 | 270 | |
277 |
page_index = PageIndex() |
|
278 |
prepared_data = page_index.prepare(page) |
|
279 |
assert 'barfoo' in prepared_data['text'] |
|
280 |
assert not 'foobar' in prepared_data['text'] |
|
271 |
request = RequestFactory().get('/') |
|
272 |
request.user = AnonymousUser() |
|
273 |
index_site() |
|
274 |
hits = search_site(request, 'foobar') |
|
275 |
assert len(hits) == 0 |
|
276 |
hits = search_site(request, 'barfoo') |
|
277 |
assert len(hits) == 1 |
|
278 | ||
281 | 279 | |
282 | 280 |
def test_search_api(app): |
283 | 281 |
page = Page(title='example page', slug='example-page') |
... | ... | |
291 | 289 | |
292 | 290 |
cell = TextCell(page=second_page, text='<p>other baz</p>', order=0) |
293 | 291 |
cell.save() |
294 | ||
295 |
page_index = PageIndex() |
|
296 |
page_index.reindex() |
|
297 | ||
298 |
resp = app.get('/api/search/?q=foobar', status=404) |
|
292 |
index_site() |
|
299 | 293 | |
300 | 294 |
cell = SearchCell(page=page, _search_services={'data': ['_text']}, order=0) |
301 | 295 |
cell.save() |
302 | 296 | |
303 |
resp = app.get('/api/search/?q=foobar', status=200)
|
|
304 |
assert len(resp.json['data']) == 1
|
|
305 |
assert resp.json['data'][0]['text'] == 'example page'
|
|
297 |
resp = app.get('/ajax/search/%s/_text/?q=foobar' % cell.id, status=200)
|
|
298 |
assert resp.text.count('<li') == 1
|
|
299 |
assert 'example page' in resp.text
|
|
306 | 300 | |
307 |
resp = app.get('/api/search/?q=other', status=200)
|
|
308 |
assert len(resp.json['data']) == 1
|
|
309 |
assert resp.json['data'][0]['text'] == 'second page'
|
|
301 |
resp = app.get('/ajax/search/%s/_text/?q=other' % cell.id, status=200)
|
|
302 |
assert resp.text.count('<li') == 1
|
|
303 |
assert 'second page' in resp.text
|
|
310 | 304 | |
311 |
resp = app.get('/api/search/?q=baz', status=200)
|
|
312 |
assert len(resp.json['data']) == 2
|
|
305 |
resp = app.get('/ajax/search/%s/_text/?q=baz' % cell.id, status=200)
|
|
306 |
assert resp.text.count('<li') == 2
|
|
313 | 307 | |
314 |
resp = app.get('/api/search/?q=quux', status=200)
|
|
315 |
assert len(resp.json['data']) == 0
|
|
308 |
resp = app.get('/ajax/search/%s/_text/?q=quux' % cell.id, status=200)
|
|
309 |
assert resp.text.count('<li') == 0
|
|
316 | 310 | |
317 |
def test_update_index_command(app): |
|
318 |
call_command('clear_index', interactive=False) |
|
319 |
call_command('update_index') # empty site |
|
320 | 311 | |
312 |
def test_search_external_links(app): |
|
321 | 313 |
page = Page(title='example page', slug='example-page') |
322 | 314 |
page.save() |
323 | 315 | |
324 | 316 |
cell = SearchCell(page=page, _search_services={'data': ['_text']}, order=0) |
325 | 317 |
cell.save() |
326 | 318 | |
327 |
call_command('update_index') |
|
328 |
resp = app.get('/api/search/?q=foobar', status=200) |
|
329 |
assert len(resp.json['data']) == 0 |
|
319 |
index_site() |
|
320 |
request = RequestFactory().get('/') |
|
321 |
request.user = AnonymousUser() |
|
322 |
hits = search_site(request, 'foobar') |
|
323 |
assert len(hits) == 0 |
|
330 | 324 | |
331 | 325 |
LinkCell(title='foobar', url='http://example.net', page=page, order=0).save() |
332 |
call_command('update_index')
|
|
326 |
index_site()
|
|
333 | 327 | |
334 |
resp = app.get('/api/search/?q=foobar', status=200) |
|
335 |
assert len(resp.json['data']) == 1 |
|
336 |
assert resp.json['data'][0]['text'] == 'foobar' |
|
337 |
assert resp.json['data'][0]['description'] is None |
|
338 |
assert resp.json['data'][0]['url'] == 'http://example.net' |
|
328 |
hits = search_site(request, 'foobar') |
|
329 |
assert len(hits) == 1 |
|
330 |
assert hits[0]['text'] == 'foobar' |
|
331 |
assert hits[0]['url'] == 'http://example.net' |
|
339 | 332 | |
333 |
# second link with same target |
|
340 | 334 |
LinkCell(title='baz', url='http://example.net', page=page, order=0).save() |
341 |
call_command('update_index') |
|
342 | ||
343 |
resp = app.get('/api/search/?q=baz', status=200) |
|
344 |
assert len(resp.json['data']) == 1 |
|
345 |
assert resp.json['data'][0]['url'] == 'http://example.net' |
|
335 |
index_site() |
|
346 | 336 | |
347 | 337 |
# add a second link with the same target |
348 |
LinkCell(title='bar', url='http://example.net', page=page, order=0).save() |
|
349 |
call_command('update_index') |
|
338 |
hits = search_site(request, 'baz') |
|
339 |
assert len(hits) == 1 |
|
340 |
assert hits[0]['text'] in ('foobar', 'baz') |
|
341 |
assert hits[0]['url'] == 'http://example.net' |
|
342 |
hits = search_site(request, 'foobar') |
|
343 |
assert len(hits) == 1 |
|
344 |
assert hits[0]['text'] in ('foobar', 'baz') |
|
345 |
assert hits[0]['url'] == 'http://example.net' |
|
350 | 346 | |
351 |
resp = app.get('/api/search/?q=baz', status=200) |
|
352 |
assert len(resp.json['data']) == 1 |
|
353 |
assert resp.json['data'][0]['url'] == 'http://example.net' |
|
354 | ||
355 |
resp = app.get('/api/search/?q=bar', status=200) |
|
356 |
assert len(resp.json['data']) == 1 |
|
357 |
assert resp.json['data'][0]['url'] == 'http://example.net' |
|
358 | 347 | |
359 | 348 |
def test_manager_search_cell(app, admin_user): |
360 | 349 |
Page.objects.all().delete() |
... | ... | |
399 | 388 | |
400 | 389 | |
401 | 390 |
def test_manager_waiting_index_message(app, admin_user): |
402 |
from haystack import connections |
|
403 |
shutil.rmtree(connections['default'].get_backend().path) |
|
404 | ||
405 | 391 |
Page.objects.all().delete() |
406 | 392 |
page = Page(title='One', slug='one', template_name='standard') |
407 | 393 |
page.save() |
... | ... | |
417 | 403 |
resp = resp.form.submit().follow() |
418 | 404 |
assert 'Content indexing has been scheduled' in resp.text |
419 | 405 | |
420 |
os.mkdir(connections['default'].get_backend().path) |
|
421 |
call_command('update_index') |
|
406 |
index_site() |
|
422 | 407 |
resp = app.get('/manage/pages/%s/' % page.id) |
423 | 408 |
assert 'Content indexing has been scheduled' not in resp.text |
424 | 409 | |
... | ... | |
455 | 440 |
page.save() |
456 | 441 |
search_engines = engines.get_engines() |
457 | 442 |
assert 'users' in search_engines.keys() |
443 | ||
444 | ||
445 |
def test_private_search(app): |
|
446 |
page = Page(title='example page', slug='example-page') |
|
447 |
page.save() |
|
448 | ||
449 |
TextCell(page=page, text='<p>foobar</p>', order=0, public=False).save() |
|
450 |
TextCell(page=page, text='<p>barfoo</p>', order=0, public=True).save() |
|
451 | ||
452 |
request = RequestFactory().get('/') |
|
453 |
request.user = AnonymousUser() |
|
454 |
index_site() |
|
455 |
hits = search_site(request, 'foobar') |
|
456 |
assert len(hits) == 0 |
|
457 |
hits = search_site(request, 'barfoo') |
|
458 |
assert len(hits) == 1 |
|
459 | ||
460 |
request.user = User.objects.create_user(username='normal-user') |
|
461 |
hits = search_site(request, 'foobar') |
|
462 |
assert len(hits) == 1 |
|
463 |
hits = search_site(request, 'barfoo') |
|
464 |
assert len(hits) == 1 |
|
465 | ||
466 | ||
467 |
def test_restricted_search(app): |
|
468 |
group = Group(name='plop') |
|
469 |
group.save() |
|
470 | ||
471 |
page = Page(title='example page', slug='example-page') |
|
472 |
page.save() |
|
473 | ||
474 |
cell = TextCell(page=page, text='<p>foobar</p>', order=0, public=False) |
|
475 |
cell.save() |
|
476 |
cell.groups.set([group]) |
|
477 |
TextCell(page=page, text='<p>barfoo</p>', order=0, public=False).save() |
|
478 |
index_site() |
|
479 | ||
480 |
# first cell is restricted, it's not found |
|
481 |
request = RequestFactory().get('/') |
|
482 |
request.user = User.objects.create_user(username='normal-user') |
|
483 |
hits = search_site(request, 'foobar') |
|
484 |
assert len(hits) == 0 |
|
485 |
hits = search_site(request, 'barfoo') |
|
486 |
assert len(hits) == 1 |
|
487 | ||
488 |
page.groups.set([group]) |
|
489 |
index_site() |
|
490 | ||
491 |
# page is restricted, no cell is found |
|
492 |
hits = search_site(request, 'foobar') |
|
493 |
assert len(hits) == 0 |
|
494 |
hits = search_site(request, 'barfoo') |
|
495 |
assert len(hits) == 0 |
|
496 | ||
497 |
# user is in group, gets a result |
|
498 |
request.user.groups.set([group]) |
|
499 |
hits = search_site(request, 'foobar') |
|
500 |
assert len(hits) == 1 |
|
501 |
hits = search_site(request, 'barfoo') |
|
502 |
assert len(hits) == 1 |
|
503 | ||
504 |
# cell is excluded from group view |
|
505 |
cell.restricted_to_unlogged = True |
|
506 |
cell.save() |
|
507 |
index_site() |
|
508 | ||
509 |
hits = search_site(request, 'foobar') |
|
510 |
assert len(hits) == 0 |
|
511 |
hits = search_site(request, 'barfoo') |
|
512 |
assert len(hits) == 1 |
|
458 |
- |