From a61bd6aee361e6b3addd21f86606278eb6b35876 Mon Sep 17 00:00:00 2001 From: Benjamin Dauvergne Date: Fri, 9 Dec 2022 16:24:00 +0100 Subject: [PATCH 2/2] misc: push cache of JSON content in get_json_from_url (#39723) It allows request_json_items, request_geojson_items and get_geojson_data to use the cache from get_json_from_url, and cache is then also used in AutocompleteDirectory. --- wcs/api.py | 8 ++++++- wcs/data_sources.py | 53 ++++++++++++++++++++------------------------- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/wcs/api.py b/wcs/api.py index dedb6f2b7..7c1150874 100644 --- a/wcs/api.py +++ b/wcs/api.py @@ -1187,13 +1187,19 @@ class AutocompleteDirectory(Directory): if 'url' in info: named_data_source = None + cache_duration = 0 if info.get('data_source'): named_data_source = NamedDataSource.get(info['data_source']) + cache_duration = named_data_source.cache_duration url = info['url'] url += urllib.parse.quote(get_request().form.get('q', '')) url = sign_url_auto_orig(url) get_response().set_content_type('application/json') - entries = request_json_items(url, named_data_source and named_data_source.extended_data_source) + entries = request_json_items( + url, + named_data_source and named_data_source.extended_data_source, + cache_duration=cache_duration, + ) if entries is not None: return json.dumps({'err': 0, 'data': entries}) return json.dumps({'err': 1, 'data': []}) diff --git a/wcs/data_sources.py b/wcs/data_sources.py index 2b708a528..df4a7e5ae 100644 --- a/wcs/data_sources.py +++ b/wcs/data_sources.py @@ -241,13 +241,23 @@ def get_id_by_option_text(data_source, text_value): return str(option['id']) -def get_json_from_url(url, data_source=None, log_message_part='JSON data source', raise_request_error=False): +def get_json_from_url( + url, data_source=None, log_message_part='JSON data source', raise_request_error=False, cache_duration=0 +): url = sign_url_auto_orig(url) data_source = data_source or {} data_key = data_source.get('data_attribute') or 'data' geojson = data_source.get('type') == 'geojson' error_summary = None + if cache_duration: + cache_key = 'http-cache-%s' % force_str(hashlib.md5(force_bytes(url)).hexdigest()) + from django.core.cache import cache + + entries = cache.get(cache_key) + if entries is not None: + return entries + try: entries = misc.json_loads(misc.urlopen(url).read()) if not isinstance(entries, dict): @@ -273,6 +283,8 @@ def get_json_from_url(url, data_source=None, log_message_part='JSON data source' data = data[key] if not isinstance(data.get(keys[-1]), list): raise ValueError('not a json dict with a %s list attribute' % data_key) + if cache_duration: + cache.set(cache_key, entries, cache_duration) return entries except misc.ConnectionError as e: error_summary = 'Error loading %s (%s)' % (log_message_part, str(e)) @@ -292,8 +304,8 @@ def get_json_from_url(url, data_source=None, log_message_part='JSON data source' return None -def request_json_items(url, data_source): - entries = get_json_from_url(url, data_source) +def request_json_items(url, data_source, cache_duration=0): + entries = get_json_from_url(url, data_source, cache_duration=cache_duration) if entries is None: return None data_key = data_source.get('data_attribute') or 'data' @@ -321,8 +333,8 @@ def request_json_items(url, data_source): return items -def request_geojson_items(url, data_source): - entries = get_json_from_url(url, data_source) +def request_geojson_items(url, data_source, cache_duration=0): + entries = get_json_from_url(url, data_source, cache_duration=cache_duration) if entries is None: return None items = [] @@ -360,14 +372,13 @@ def get_structured_items(data_source, mode=None, include_disabled=True, raise_on def _get_structured_items(data_source, mode=None, raise_on_error=False): - cache_duration = 0 - if data_source.get('type') and data_source.get('type').startswith('carddef:'): # cards from wcs.carddef import CardDef return CardDef.get_data_source_items(data_source['type']) + cache_duration = 0 if data_source.get('type') not in ('json', 'jsonp', 'geojson', 'formula', 'wcs:users'): # named data source named_data_source = NamedDataSource.get_by_slug(data_source['type']) @@ -482,26 +493,16 @@ def _get_structured_items(data_source, mode=None, raise_on_error=False): if hasattr(request, 'datasources_cache') and url in request.datasources_cache: return request.datasources_cache[url] - if cache_duration: - cache_key = 'data-source-%s' % force_str(hashlib.md5(force_bytes(url)).hexdigest()) - from django.core.cache import cache - - items = cache.get(cache_key) - if items is not None: - return items - if geojson: - items = request_geojson_items(url, data_source) + items = request_geojson_items(url, data_source, cache_duration=cache_duration) else: - items = request_json_items(url, data_source) + items = request_json_items(url, data_source, cache_duration=cache_duration) if items is None: if raise_on_error: raise DataSourceError('datasource %s is unavailable' % url) return [] if hasattr(request, 'datasources_cache'): request.datasources_cache[url] = items - if cache_duration: - cache.set(cache_key, items, cache_duration) return items return [] @@ -856,15 +857,9 @@ class NamedDataSource(XmlStorableObject): if self.cache_duration: cache_duration = int(self.cache_duration) - if cache_duration: - cache_key = 'geojson-data-source-%s' % force_str(hashlib.md5(force_bytes(url)).hexdigest()) - from django.core.cache import cache - - data = cache.get(cache_key) - if data is not None: - return data - - data = get_json_from_url(url, self.data_source, raise_request_error=True) + data = get_json_from_url( + url, self.data_source, raise_request_error=True, cache_duration=cache_duration + ) id_property = self.id_property or 'id' label_template_property = self.label_template_property or '{{ text }}' @@ -881,8 +876,6 @@ class NamedDataSource(XmlStorableObject): if hasattr(request, 'datasources_cache'): request.datasources_cache[url] = data - if cache_duration: - cache.set(cache_key, data, cache_duration) return data -- 2.37.2