|
1 |
import datetime
|
|
2 |
from xml.etree import ElementTree as ET
|
|
3 |
import logging
|
|
4 |
import threading
|
|
5 |
|
|
6 |
import requests
|
|
7 |
import isodate
|
|
8 |
|
|
9 |
from django.core.files.storage import default_storage
|
|
10 |
from django.utils.timezone import now
|
|
11 |
|
|
12 |
|
|
13 |
class MetadataException(RuntimeError):
|
|
14 |
pass
|
|
15 |
|
|
16 |
|
|
17 |
class Metadata(object):
|
|
18 |
doc = None
|
|
19 |
cache_duration = None
|
|
20 |
valid_until = None
|
|
21 |
__cache = {}
|
|
22 |
url = None
|
|
23 |
path = None
|
|
24 |
timeout = 10
|
|
25 |
|
|
26 |
def __init__(self, url=None, path=None, content=None, timeout=None):
|
|
27 |
self.url = url or self.url
|
|
28 |
self.path = path or self.path
|
|
29 |
self.timeout = timeout or self.timeout
|
|
30 |
|
|
31 |
if url:
|
|
32 |
self.load_from_url()
|
|
33 |
elif path:
|
|
34 |
self.load_from_path()
|
|
35 |
else:
|
|
36 |
self.load_content(content)
|
|
37 |
|
|
38 |
def load_content(self, content):
|
|
39 |
self.doc = ET.fromstring(content)
|
|
40 |
try:
|
|
41 |
self.cache_duration = isodate.parse_duration(
|
|
42 |
self.doc.attrib.get('cacheDuration'))
|
|
43 |
except:
|
|
44 |
self.cache_duration = datetime.timedelta(seconds=30)
|
|
45 |
try:
|
|
46 |
self.valid_until = isodate.parse_datetime(
|
|
47 |
self.doc.attrib.get('validUntil'))
|
|
48 |
except:
|
|
49 |
self.valid_until = None
|
|
50 |
|
|
51 |
@property
|
|
52 |
def cache_path(self):
|
|
53 |
return 'metadata_cache_' + self.url.replace('/', '_')
|
|
54 |
|
|
55 |
@property
|
|
56 |
def cache_exists(self):
|
|
57 |
return default_storage.exists(self.cache_path)
|
|
58 |
|
|
59 |
@property
|
|
60 |
def cache_timestamp(self):
|
|
61 |
return default_storage.created_time(self.cache_path)
|
|
62 |
|
|
63 |
def retrieve_url(self):
|
|
64 |
logger = logging.getLogger(__name__)
|
|
65 |
try:
|
|
66 |
response = requests.get(self.url, timeout=self.timeout)
|
|
67 |
response.raise_for_status()
|
|
68 |
except requests.RequestException as e:
|
|
69 |
if self.cache_exists:
|
|
70 |
logger.warning(u'unable to retrieve metadata from %s, using filesystem cache: %s',
|
|
71 |
self.url, e)
|
|
72 |
return self.cache_timestamp, default_storage.open(self.cache_path).read()
|
|
73 |
logger.error(u'unable to retrieve metadata from %s, and no filesystem cache: %s',
|
|
74 |
self.url, e)
|
|
75 |
raise MetadataException('unable to retrieve uncached metadata from URL')
|
|
76 |
else:
|
|
77 |
content = response.content
|
|
78 |
if self.cache_exists:
|
|
79 |
default_storage.delete(self.cache_path)
|
|
80 |
default_storage.save(self.cache_path, content)
|
|
81 |
self.load_content(content)
|
|
82 |
self.__cache[self.url] = now(), content
|
|
83 |
|
|
84 |
def should_update(self, timestamp):
|
|
85 |
if now() - timestamp < self.cache_duration:
|
|
86 |
return False
|
|
87 |
return not self.valid_until or now() > self.valid_until
|
|
88 |
|
|
89 |
def background_update(self):
|
|
90 |
try:
|
|
91 |
self.retrieve_url()
|
|
92 |
except:
|
|
93 |
pass
|
|
94 |
|
|
95 |
def load_from_url(self):
|
|
96 |
# 1. check memory cache
|
|
97 |
# - if absent synchronously retrieve URL
|
|
98 |
if self.url not in self.__cache:
|
|
99 |
self.retrieve_url()
|
|
100 |
timestamp, content = self.__cache[self.url]
|
|
101 |
else:
|
|
102 |
timestamp, content = self.__cache[self.url]
|
|
103 |
self.load_content(content)
|
|
104 |
if self.should_update(timestamp):
|
|
105 |
timestamp = (now() - self.cache_duration + 3 *
|
|
106 |
datetime.timedelta(seconds=self.timeout))
|
|
107 |
self.__cache[self.url] = timestamp, content
|
|
108 |
t = threading.Thread(target=self.background_update)
|
|
109 |
t.start()
|