Projet

Général

Profil

0002-update-and-cache-metadata-from-URL-and-path-10196.patch

Benjamin Dauvergne, 07 juin 2019 18:07

Télécharger (19,6 ko)

Voir les différences:

Subject: [PATCH 2/2] update and cache metadata from URL and path (#10196)

 README                        |  34 +++++-
 debian/control                |   6 +-
 mellon/adapters.py            | 190 +++++++++++++++++++++++++++++-----
 mellon/app_settings.py        |   2 +
 mellon/utils.py               |   5 +-
 setup.py                      |   1 +
 tests/conftest.py             |  26 ++++-
 tests/test_default_adapter.py |  68 +++++++++++-
 tests/test_utils.py           |  13 +--
 tox.ini                       |   4 +-
 10 files changed, 305 insertions(+), 44 deletions(-)
README
76 76
MELLON_IDENTITY_PROVIDERS
77 77
-------------------------
78 78

  
79
A list of dictionaries, only one key is mandatory in those
80
dictionaries `METADATA` it should contain the UTF-8 content of the
81
metadata file of the identity provider or if it starts with a slash
82
the absolute path toward a metadata file. All other keys are override
83
of generic settings.
79
A list of dictionaries, they must contain at least one of the keys `METADATA`
80
(inline copy of the identity provider metadata), `METADATA_URL` URL of the IdP
81
metadata file, or `METADATA_PATH` an absolute path to the IdP metadata file..
82
All other keys are override of generic settings.
83

  
84
When using an URL, the URL is automatically cached in the `MEDIA_ROOT`
85
directory of your application in the directory named `mellon_metadata_cache`.
86
If you restart the application and the URL is unavailable, the file cache will
87
be used. The cache will be refreshed every `MELLON_METADATA_CACHE_TIME` seconds.
88
If the HTTP retrieval of the metadata URL takes longer thant
89
`METTON_METADATA_HTTP_TIMEOUT` seconds, retrieval will be skipped.
90

  
91
When the cache is already loaded, retrievals are done in the background by a
92
thread.
93

  
94
When using a local absolute path, the metadata is reloaded each time the
95
modification time of the file is superior to the last time it was loaded.
84 96

  
85 97
MELLON_PUBLIC_KEYS
86 98
------------------
......
261 273
Should be post or artifact. Default is post. You can refer to the SAML 2.0
262 274
specification to learn the difference.
263 275

  
276
MELLON_METADATA_CACHE_TIME
277
--------------------------
278

  
279
When using METADATA_URL to reference a metadata file, it's the duration in
280
secondes between refresh of the metadata file. Default is 3600 seconds, 1 hour.
281

  
282
METTON_METADATA_HTTP_TIMEOUT
283
---------------------------
284

  
285
Timeout in seconds for HTTP call made to retrieve metadata files. Default is 10
286
seconds.
287

  
264 288
Tests
265 289
=====
266 290

  
debian/control
15 15
    python (>= 2.7),
16 16
    python-django (>= 1.5),
17 17
    python-isodate,
18
    python-lasso
18
    python-lasso,
19
    python-atomicwrites
19 20
Breaks: python-hobo (<< 0.34.5)
20 21
Description: SAML authentication for Django
21 22

  
......
24 25
Depends: ${misc:Depends}, ${python:Depends},
25 26
    python3-django (>= 1.5),
26 27
    python3-isodate,
27
    python3-lasso
28
    python3-lasso,
29
    python3-atomicwrites
28 30
Description: SAML authentication for Django
mellon/adapters.py
13 13
# You should have received a copy of the GNU Affero General Public License
14 14
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 15

  
16
from xml.etree import ElementTree as ET
17
import hashlib
16 18
import logging
19
import os
20
import threading
21
import time
17 22
import uuid
18
from xml.etree import ElementTree as ET
19 23

  
20 24
import lasso
21 25
import requests
22 26
import requests.exceptions
27
from atomicwrites import atomic_write
23 28

  
24 29
from django.core.exceptions import PermissionDenied
30
from django.core.files.storage import default_storage
25 31
from django.contrib import auth
26 32
from django.contrib.auth.models import Group
27 33
from django.utils import six
28 34
from django.utils.encoding import force_text
35
from django.utils.six.moves.urllib.parse import urlparse
29 36

  
30 37
from . import utils, app_settings, models
31 38

  
39
logger = logging.getLogger(__name__)
40

  
32 41

  
33 42
class UserCreationError(Exception):
34 43
    pass
......
49 58

  
50 59
    def get_idps(self):
51 60
        for i, idp in enumerate(self.get_identity_providers_setting()):
52
            if 'METADATA_URL' in idp and 'METADATA' not in idp:
61
            if self.load_idp(idp, i):
62
                yield idp
63

  
64
    def load_metadata_path(self, idp, i):
65
        path = idp['METADATA_PATH']
66
        if not os.path.exists(path):
67
            logger.warning('metadata path %s does not exist', path)
68
            return
69
        last_update = idp.get('METADATA_PATH_LAST_UPDATE', 0)
70
        try:
71
            mtime = os.stat(path).st_mtime
72
        except OSError as e:
73
            logger.warning('metadata path %s : stat() call failed, %s', path, e)
74
            return
75
        if last_update == 0 or mtime >= last_update:
76
            idp['METADATA_PATH_LAST_UPDATE'] = time.time()
77
            try:
78
                with open(path) as fd:
79
                    metadata = fd.read()
80
            except OSError as e:
81
                logger.warning('metadata path %s : open()/read() call failed, %s', path, e)
82
                return
83
            entity_id = self.load_entity_id(metadata, i)
84
            if not entity_id:
85
                logger.error('invalid metadata file retrieved from %s', path)
86
                return
87
            if 'ENTITY_ID' in idp and idp['ENTITY_ID'] != entity_id:
88
                logger.error('metadata path %s : entityID changed %r != %r', path, entity_id, idp['ENTITY_ID'])
89
                del idp['ENTITY_ID']
90
            idp['METADATA'] = metadata
91

  
92
    def load_metadata_url(self, idp, i):
93
        url = idp['METADATA_URL']
94
        try:
95
            hostname = urlparse(url).hostname
96
        except (ValueError, TypeError) as e:
97
            logger.error('invalid METADATA_URL %r: %s', url, e)
98
            return
99
        if not hostname:
100
            logger.error('no hostname in METADATA_URL %r: %s', url)
101
            return
102
        last_update = idp.get('METADATA_URL_LAST_UPDATE', 0)
103
        metadata_cache_time = utils.get_setting(idp, 'METADATA_CACHE_TIME')
104
        timeout = utils.get_setting(idp, 'METADATA_HTTP_TIMEOUT')
105
        now = time.time()
106

  
107
        try:
108
            url_fingerprint = hashlib.md5(url.encode('ascii')).hexdigest()
109
            file_cache_key = '%s_%s.xml' % (hostname, url_fingerprint)
110
        except (UnicodeError, TypeError, ValueError):
111
            logger.exception('unable to compute file_cache_key')
112
            return
113

  
114
        cache_directory = default_storage.path('mellon_metadata_cache')
115
        file_cache_path = os.path.join(cache_directory, file_cache_key)
116

  
117
        if not os.path.exists(cache_directory):
118
            os.makedirs(cache_directory)
119

  
120
        if os.path.exists(file_cache_path) and 'METADATA' not in idp:
121
            try:
122
                with open(file_cache_path) as fd:
123
                    idp['METADATA'] = fd.read()
124
            except OSError:
125
                pass
126

  
127
        # fresh cache, skip loading
128
        if last_update and 'METADATA' in idp and (now - last_update) < metadata_cache_time:
129
            return
130

  
131
        def __http_get():
132
            try:
53 133
                verify_ssl_certificate = utils.get_setting(
54 134
                    idp, 'VERIFY_SSL_CERTIFICATE')
55 135
                try:
56
                    response = requests.get(idp['METADATA_URL'], verify=verify_ssl_certificate)
136
                    response = requests.get(url, verify=verify_ssl_certificate, timeout=timeout)
57 137
                    response.raise_for_status()
58 138
                except requests.exceptions.RequestException as e:
59 139
                    self.logger.error(
60 140
                        u'retrieval of metadata URL %r failed with error %s for %d-th idp',
61
                        idp['METADATA_URL'], e, i)
62
                    continue
141
                        url, e, i)
142
                    try:
143
                        with open(file_cache_path) as fd:
144
                            pass
145
                        idp['METADATA_PATH'] = file_cache_path
146
                        self.load_metadata_path()
147
                    except IOError:
148
                        pass
149
                    return
150
                entity_id = self.load_entity_id(response.text, i)
151
                if not entity_id:
152
                    logger.error('invalid metadata file retrieved from %s', url)
153
                    return
154
                if 'ENTITY_ID' in idp and idp['ENTITY_ID'] != entity_id:
155
                    logger.error('metadata url %s : entityID changed %r != %r', url, entity_id, idp['ENTITY_ID'])
156
                    del idp['ENTITY_ID']
63 157
                idp['METADATA'] = response.text
64
            elif 'METADATA' in idp:
65
                if idp['METADATA'].startswith('/'):
66
                    idp['METADATA'] = open(idp['METADATA']).read()
67
            else:
68
                self.logger.error(u'missing METADATA or METADATA_URL in %d-th idp', i)
69
                continue
158
                idp['METADATA_URL_LAST_UPDATE'] = now
159
                with atomic_write(file_cache_path, mode='wb', overwrite=True) as fd:
160
                    fd.write(response.text.encode('utf-8'))
161
                idp['METADATA_PATH'] = file_cache_path
162
                idp['METADATA_PATH_LAST_UPDATE'] = time.time() + 1
163
                idp.pop('METADATA_URL_UPDATE_THREAD', None)
164
                logger.debug('metadata url %s : update throught HTTP', url)
165
            finally:
166
                stale_timeout = 24 * metadata_cache_time
167
                if last_update and (now - idp['METADATA_URL_LAST_UPDATE']) > stale_timeout:
168
                    logger.error('metadata url %s : not updated since %.1f hours',
169
                                 stale_timeout / 3600.0)
170

  
171
        # we have cache, update in background
172
        if last_update and 'METADATA' in idp:
173
            t = threading.Thread(target=__http_get)
174
            t.start()
175
            # suspend updates for HTTP timeout + 5 seconds
176
            idp['METADATA_URL_UPDATE_THREAD'] = t
177
            idp['METADATA_URL_LAST_UPDATE'] = last_update + timeout + 5
178
        else:
179
            # synchronous update
180
            __http_get()
181

  
182
    def load_metadata(self, idp, i):
183
        # legacy support
184
        if 'METADATA' in idp and idp['METADATA'].startswith('/'):
185
            idp['METADATA_PATH'] = idp['METADATA']
186
            del idp['METADATA']
187

  
188
        if 'METADATA_PATH' in idp:
189
            self.load_metadata_path(idp, i)
190

  
191
        if 'METADATA_URL' in idp:
192
            self.load_metadata_url(idp, i)
193

  
194
        if 'METADATA' in idp:
70 195
            if 'ENTITY_ID' not in idp:
71
                try:
72
                    doc = ET.fromstring(idp['METADATA'])
73
                except (TypeError, ET.ParseError):
74
                    self.logger.error(u'METADATA of %d-th idp is invalid', i)
75
                    continue
76
                if doc.tag != '{%s}EntityDescriptor' % lasso.SAML2_METADATA_HREF:
77
                    self.logger.error(u'METADATA of %d-th idp has no EntityDescriptor root tag', i)
78
                    continue
79

  
80
                if 'entityID' not in doc.attrib:
81
                    self.logger.error(
82
                        u'METADATA of %d-th idp has no entityID attribute on its root tag', i)
83
                    continue
84
                idp['ENTITY_ID'] = doc.attrib['entityID']
85
            yield idp
196
                entity_id = self.load_entity_id(idp['METADATA'], i)
197
                if entity_id:
198
                    idp['ENTITY_ID'] = entity_id
199
            return idp['METADATA']
200

  
201
    def load_entity_id(self, metadata, i):
202
        try:
203
            doc = ET.fromstring(metadata)
204
        except (TypeError, ET.ParseError):
205
            self.logger.error(u'METADATA of %d-th idp is invalid', i)
206
            return None
207
        if doc.tag != '{%s}EntityDescriptor' % lasso.SAML2_METADATA_HREF:
208
            self.logger.error(u'METADATA of %d-th idp has no EntityDescriptor root tag', i)
209
            return None
210

  
211
        if 'entityID' not in doc.attrib:
212
            self.logger.error(
213
                u'METADATA of %d-th idp has no entityID attribute on its root tag', i)
214
            return None
215
        return doc.attrib['entityID']
216

  
217
    def load_idp(self, idp, i):
218
        metadata = self.load_metadata(idp, i)
219
        if not metadata:
220
            self.logger.error(u'missing METADATA or METADATA_URL in %d-th idp', i)
221
            return False
222

  
223
        return 'ENTITY_ID' in idp
86 224

  
87 225
    def authorize(self, idp, saml_attributes):
88 226
        if not idp:
mellon/app_settings.py
40 40
        'ARTIFACT_RESOLVE_TIMEOUT': 10.0,
41 41
        'LOGIN_HINTS': [],
42 42
        'SIGNATURE_METHOD': 'RSA-SHA256',
43
        'METADATA_CACHE_TIME': 3600,
44
        'METADATA_HTTP_TIMEOUT': 10,
43 45
    }
44 46

  
45 47
    @property
mellon/utils.py
276 276
        xml_encoding[0] = encoding
277 277
    parser = expat.ParserCreate()
278 278
    parser.XmlDeclHandler = xmlDeclHandler
279
    parser.Parse(content, True)
279
    try:
280
        parser.Parse(content, True)
281
    except expat.ExpatError as e:
282
        raise ValueError('invalid XML %s' % e)
280 283
    return xml_encoding[0]
281 284

  
282 285

  
setup.py
94 94
          'django>=1.5,<2.0',
95 95
          'requests',
96 96
          'isodate',
97
          'atomicwrites',
97 98
      ],
98 99
      setup_requires=[
99 100
          'django>=1.5,<2.0',
tests/conftest.py
1
import os
1 2
import logging
3

  
2 4
import pytest
3 5
import django_webtest
4 6

  
5 7

  
8
@pytest.fixture(autouse=True)
9
def settings(settings, tmpdir):
10
    settings.MEDIA_ROOT = str(tmpdir.mkdir('media'))
11
    return settings
12

  
13

  
6 14
@pytest.fixture
7
def app(request):
15
def app(request, settings):
8 16
    wtm = django_webtest.WebTestMixin()
9 17
    wtm._patch_settings()
10 18
    request.addfinalizer(wtm._unpatch_settings)
......
23 31

  
24 32

  
25 33
@pytest.fixture
26
def private_settings(request):
34
def private_settings(request, tmpdir):
27 35
    import django.conf
28 36
    from django.conf import UserSettingsHolder
29 37
    old = django.conf.settings._wrapped
......
42 50
    caplog.handler.stream = py.io.TextIO()
43 51
    caplog.handler.records = []
44 52
    return caplog
53

  
54

  
55
@pytest.fixture(scope='session')
56
def metadata():
57
    with open(os.path.join(os.path.dirname(__file__), 'metadata.xml')) as fd:
58
        yield fd.read()
59

  
60

  
61
@pytest.fixture
62
def metadata_path(tmpdir, metadata):
63
    metadata_path = tmpdir / 'metadata.xml'
64
    with metadata_path.open('w') as fd:
65
        fd.write(metadata)
66
    yield str(metadata_path)
tests/test_default_adapter.py
1
import pytest
1

  
2

  
3
import datetime
2 4
import re
3 5
import lasso
6
import time
4 7
from multiprocessing.pool import ThreadPool
5 8

  
9
import pytest
10

  
6 11
from django.contrib import auth
7 12
from django.db import connection
8 13

  
......
196 201
    user = adapter.lookup_user(idp, saml_attributes)
197 202
    assert user is None
198 203
    assert User.objects.count() == 0
204

  
205

  
206
@pytest.fixture
207
def adapter():
208
    return DefaultAdapter()
209

  
210

  
211
def test_load_metadata_simple(adapter, metadata):
212
    idp = {'METADATA': metadata}
213
    assert adapter.load_metadata(idp, 0) == metadata
214

  
215

  
216
def test_load_metadata_legacy(adapter, metadata_path, metadata):
217
    idp = {'METADATA': metadata_path}
218
    assert adapter.load_metadata(idp, 0) == metadata
219
    assert idp['METADATA'] == metadata
220

  
221

  
222
def test_load_metadata_path(adapter, metadata_path, metadata, freezer):
223
    now = time.time()
224
    idp = {'METADATA_PATH': str(metadata_path)}
225
    assert adapter.load_metadata(idp, 0) == metadata
226
    assert idp['METADATA'] == metadata
227
    assert idp['METADATA_PATH_LAST_UPDATE'] == now
228

  
229

  
230
def test_load_metadata_url(settings, adapter, metadata, httpserver, freezer, caplog):
231
    now = time.time()
232
    httpserver.serve_content(content=metadata, headers={'Content-Type': 'application/xml'})
233
    idp = {'METADATA_URL': httpserver.url}
234
    assert adapter.load_metadata(idp, 0) == metadata
235
    assert idp['METADATA'] == metadata
236
    assert idp['METADATA_URL_LAST_UPDATE'] == now
237
    assert 'METADATA_PATH' in idp
238
    assert idp['METADATA_PATH'].startswith(settings.MEDIA_ROOT)
239
    with open(idp['METADATA_PATH']) as fd:
240
        assert fd.read() == metadata
241
    assert idp['METADATA_PATH_LAST_UPDATE'] == now + 1
242
    httpserver.serve_content(content=metadata.replace('idp5', 'idp6'),
243
                             headers={'Content-Type': 'application/xml'})
244
    assert adapter.load_metadata(idp, 0) == metadata
245

  
246
    freezer.move_to(datetime.timedelta(seconds=3601))
247
    caplog.clear()
248
    assert adapter.load_metadata(idp, 0) == metadata
249
    # wait for update thread to finish
250
    try:
251
        idp['METADATA_URL_UPDATE_THREAD'].join()
252
    except KeyError:
253
        pass
254
    new_meta = adapter.load_metadata(idp, 0)
255
    assert new_meta != metadata
256
    assert new_meta == metadata.replace('idp5', 'idp6')
257
    assert 'entityID changed' in caplog.records[-1].message
258
    assert caplog.records[-1].levelname == 'ERROR'
259
    # test load from file cache
260
    del idp['METADATA']
261
    del idp['METADATA_PATH']
262
    del idp['METADATA_PATH_LAST_UPDATE']
263
    httpserver.serve_content(content='', headers={'Content-Type': 'application/xml'})
264
    assert adapter.load_metadata(idp, 0) == metadata.replace('idp5', 'idp6')
tests/test_utils.py
67 67
    assert len(server.providers) == 0
68 68

  
69 69

  
70
def test_create_server_good_metadata_file(mocker, rf, private_settings, caplog):
70
def test_create_server_good_metadata_file(mocker, rf, private_settings, tmpdir, caplog):
71
    path = tmpdir / 'metadata.xml'
72
    with path.open('w') as fd:
73
        fd.write(open('tests/metadata.xml').read())
74

  
71 75
    private_settings.MELLON_IDENTITY_PROVIDERS = [
72 76
        {
73
            'METADATA': '/xxx',
77
            'METADATA': str(path),
74 78
        }
75 79
    ]
76 80
    request = rf.get('/')
77
    with mock.patch(
78
        'mellon.adapters.open', mock.mock_open(read_data=open('tests/metadata.xml').read()),
79
            create=True):
80
        server = create_server(request)
81
    server = create_server(request)
81 82
    assert 'ERROR' not in caplog.text
82 83
    assert len(server.providers) == 1
83 84

  
tox.ini
1 1
[tox]
2
envlist = {coverage-,}py2-{dj18,dj111}-{pg,sqlite},py3-dj111-{pg,sqlite}
2
envlist = coverage-py2-{dj18,dj111}-{pg,sqlite},coverage-py3-dj111-{pg,sqlite}
3 3
toxworkdir = {env:TMPDIR:/tmp}/tox-{env:USER}/django-mellon/
4 4

  
5 5
[testenv]
......
24 24
  pytest-random
25 25
  pytest-mock
26 26
  pytest-django
27
  pytest-freezegun
28
  pytest-localserver
27 29
  pytz
28 30
  lxml
29 31
  cssselect
30
-