Projet

Général

Profil

0002-update-and-cache-metadata-from-URL-and-path-10196.patch

Benjamin Dauvergne, 07 juin 2019 14:50

Télécharger (18,1 ko)

Voir les différences:

Subject: [PATCH 2/2] update and cache metadata from URL and path (#10196)

 README                        |  12 +++
 debian/control                |   6 +-
 mellon/adapters.py            | 198 ++++++++++++++++++++++++++++------
 mellon/app_settings.py        |   2 +
 mellon/utils.py               |   5 +-
 setup.py                      |   1 +
 tests/conftest.py             |  26 ++++-
 tests/test_default_adapter.py |  68 +++++++++++-
 tests/test_utils.py           |  13 +--
 tox.ini                       |   4 +-
 10 files changed, 289 insertions(+), 46 deletions(-)
README
261 261
Should be post or artifact. Default is post. You can refer to the SAML 2.0
262 262
specification to learn the difference.
263 263

  
264
MELLON_METADATA_CACHE_TIME
265
--------------------------
266

  
267
When using METADATA_URL to reference a metadata file, it's the duration in
268
secondes between refresh of the metadata file. Default is 3600 seconds, 1 hour.
269

  
270
METTON_METADATA_HTTP_TIMEOUT
271
---------------------------
272

  
273
Timeout in seconds for HTTP call made to retrieve metadata files. Default is 10
274
seconds.
275

  
264 276
Tests
265 277
=====
266 278

  
debian/control
15 15
    python (>= 2.7),
16 16
    python-django (>= 1.5),
17 17
    python-isodate,
18
    python-lasso
18
    python-lasso,
19
    python-atomicwrites
19 20
Breaks: python-hobo (<< 0.34.5)
20 21
Description: SAML authentication for Django
21 22

  
......
24 25
Depends: ${misc:Depends}, ${python:Depends},
25 26
    python3-django (>= 1.5),
26 27
    python3-isodate,
27
    python3-lasso
28
    python3-lasso,
29
    python3-atomicwrites
28 30
Description: SAML authentication for Django
mellon/adapters.py
13 13
# You should have received a copy of the GNU Affero General Public License
14 14
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 15

  
16
from xml.etree import ElementTree as ET
17
import hashlib
16 18
import logging
19
import os
20
import threading
21
import time
17 22
import uuid
18
from xml.etree import ElementTree as ET
19 23

  
20 24
import lasso
21 25
import requests
22 26
import requests.exceptions
27
from atomicwrites import atomic_write
23 28

  
24 29
from django.core.exceptions import PermissionDenied
30
from django.core.files.storage import default_storage
25 31
from django.contrib import auth
26 32
from django.contrib.auth.models import Group
27 33
from django.utils import six
28 34
from django.utils.encoding import force_text
35
from django.utils.six.moves.urllib.parse import urlparse
29 36

  
30 37
from . import utils, app_settings, models
31 38

  
39
logger = logging.getLogger(__name__)
40

  
32 41

  
33 42
class UserCreationError(Exception):
34 43
    pass
......
49 58

  
50 59
    def get_idps(self):
51 60
        for i, idp in enumerate(self.get_identity_providers_setting()):
52
            if 'METADATA_URL' in idp and 'METADATA' not in idp:
53
                verify_ssl_certificate = utils.get_setting(
54
                    idp, 'VERIFY_SSL_CERTIFICATE')
61
            if self.load_idp(idp, i):
62
                yield idp
63

  
64
    def load_metadata_path(self, idp, i):
65
        path = idp['METADATA_PATH']
66
        if not os.path.exists(path):
67
            logger.warning('metadata path %s does not exist', path)
68
            return
69
        last_update = idp.get('METADATA_PATH_LAST_UPDATE', 0)
70
        try:
71
            mtime = os.stat(path).st_mtime
72
        except OSError as e:
73
            logger.warning('metadata path %s : stat() call failed, %s', path, e)
74
            return
75
        if last_update == 0 or mtime >= last_update:
76
            idp['METADATA_PATH_LAST_UPDATE'] = time.time()
77
            try:
78
                with open(path) as fd:
79
                    metadata = fd.read()
80
            except OSError as e:
81
                logger.warning('metadata path %s : open()/read() call failed, %s', path, e)
82
                return
83
            entity_id = self.load_entity_id(metadata, i)
84
            if not entity_id:
85
                logger.error('invalid metadata file retrieved from %s', path)
86
                return
87
            if 'ENTITY_ID' in idp and idp['ENTITY_ID'] != entity_id:
88
                logger.error('metadata path %s : entityID changed %r != %r', path, entity_id, idp['ENTITY_ID'])
89
                del idp['ENTITY_ID']
90
            idp['METADATA'] = metadata
91

  
92
    def load_metadata_url(self, idp, i):
93
        url = idp['METADATA_URL']
94
        try:
95
            hostname = urlparse(url).hostname
96
        except (ValueError, TypeError) as e:
97
            logger.error('invalid METADATA_URL %r: %s', url, e)
98
            return
99
        if not hostname:
100
            logger.error('no hostname in METADATA_URL %r: %s', url)
101
            return
102
        last_update = idp.get('METADATA_URL_LAST_UPDATE', 0)
103
        metadata_cache_time = utils.get_setting(idp, 'METADATA_CACHE_TIME')
104
        timeout = utils.get_setting(idp, 'METADATA_HTTP_TIMEOUT')
105
        now = time.time()
106

  
107
        try:
108
            url_fingerprint = hashlib.md5(url.encode('ascii')).hexdigest()
109
            file_cache_key = '%s_%s.xml' % (hostname, url_fingerprint)
110
        except (UnicodeError, TypeError, ValueError):
111
            logger.exception('unable to compute file_cache_key')
112
            return
113

  
114
        cache_directory = default_storage.path('mellon_metadata_cache')
115
        file_cache_path = os.path.join(cache_directory, file_cache_key)
116

  
117
        if not os.path.exists(cache_directory):
118
            os.makedirs(cache_directory)
119

  
120
        if os.path.exists(file_cache_path) and 'METADATA' not in idp:
121
            try:
122
                with open(file_cache_path) as fd:
123
                    idp['METADATA'] = fd.read()
124
            except OSError:
125
                pass
126

  
127
        # fresh cache, skip loading
128
        if last_update and 'METADATA' in idp and (now - last_update) < metadata_cache_time:
129
            return
130

  
131
        def __http_get():
132
            verify_ssl_certificate = utils.get_setting(
133
                idp, 'VERIFY_SSL_CERTIFICATE')
134
            try:
135
                response = requests.get(url, verify=verify_ssl_certificate, timeout=timeout)
136
                response.raise_for_status()
137
            except requests.exceptions.RequestException as e:
138
                self.logger.error(
139
                    u'retrieval of metadata URL %r failed with error %s for %d-th idp',
140
                    url, e, i)
55 141
                try:
56
                    response = requests.get(idp['METADATA_URL'], verify=verify_ssl_certificate)
57
                    response.raise_for_status()
58
                except requests.exceptions.RequestException as e:
59
                    self.logger.error(
60
                        u'retrieval of metadata URL %r failed with error %s for %d-th idp',
61
                        idp['METADATA_URL'], e, i)
62
                    continue
63
                idp['METADATA'] = response.text
64
            elif 'METADATA' in idp:
65
                if idp['METADATA'].startswith('/'):
66
                    idp['METADATA'] = open(idp['METADATA']).read()
67
            else:
68
                self.logger.error(u'missing METADATA or METADATA_URL in %d-th idp', i)
69
                continue
142
                    with open(file_cache_path) as fd:
143
                        pass
144
                    idp['METADATA_PATH'] = file_cache_path
145
                    self.load_metadata_path()
146
                except IOError:
147
                    pass
148
                return
149
            entity_id = self.load_entity_id(response.text, i)
150
            if not entity_id:
151
                logger.error('invalid metadata file retrieved from %s', url)
152
                return
153
            if 'ENTITY_ID' in idp and idp['ENTITY_ID'] != entity_id:
154
                logger.error('metadata url %s : entityID changed %r != %r', url, entity_id, idp['ENTITY_ID'])
155
                del idp['ENTITY_ID']
156
            idp['METADATA'] = response.text
157
            idp['METADATA_URL_LAST_UPDATE'] = now
158
            with atomic_write(file_cache_path, mode='wb', overwrite=True) as fd:
159
                fd.write(response.text.encode('utf-8'))
160
            idp['METADATA_PATH'] = file_cache_path
161
            idp['METADATA_PATH_LAST_UPDATE'] = time.time() + 1
162
            idp.pop('METADATA_URL_UPDATE_THREAD', None)
163
            logger.debug('metadata url %s : update throught HTTP', url)
164

  
165
        # we have cache, update in background
166
        if last_update and 'METADATA' in idp:
167
            t = threading.Thread(target=__http_get)
168
            t.start()
169
            # suspend updates for HTTP timeout + 5 seconds
170
            idp['METADATA_URL_UPDATE_THREAD'] = t
171
            idp['METADATA_URL_LAST_UPDATE'] = last_update + timeout + 5
172
        else:
173
            # synchronous update
174
            __http_get()
175

  
176
    def load_metadata(self, idp, i):
177
        # legacy support
178
        if 'METADATA' in idp and idp['METADATA'].startswith('/'):
179
            idp['METADATA_PATH'] = idp['METADATA']
180
            del idp['METADATA']
181

  
182
        if 'METADATA_PATH' in idp:
183
            self.load_metadata_path(idp, i)
184

  
185
        if 'METADATA_URL' in idp:
186
            self.load_metadata_url(idp, i)
187

  
188
        if 'METADATA' in idp:
70 189
            if 'ENTITY_ID' not in idp:
71
                try:
72
                    doc = ET.fromstring(idp['METADATA'])
73
                except (TypeError, ET.ParseError):
74
                    self.logger.error(u'METADATA of %d-th idp is invalid', i)
75
                    continue
76
                if doc.tag != '{%s}EntityDescriptor' % lasso.SAML2_METADATA_HREF:
77
                    self.logger.error(u'METADATA of %d-th idp has no EntityDescriptor root tag', i)
78
                    continue
79

  
80
                if 'entityID' not in doc.attrib:
81
                    self.logger.error(
82
                        u'METADATA of %d-th idp has no entityID attribute on its root tag', i)
83
                    continue
84
                idp['ENTITY_ID'] = doc.attrib['entityID']
85
            yield idp
190
                entity_id = self.load_entity_id(idp['METADATA'], i)
191
                if entity_id:
192
                    idp['ENTITY_ID'] = entity_id
193
            return idp['METADATA']
194

  
195
    def load_entity_id(self, metadata, i):
196
        try:
197
            doc = ET.fromstring(metadata)
198
        except (TypeError, ET.ParseError):
199
            self.logger.error(u'METADATA of %d-th idp is invalid', i)
200
            return None
201
        if doc.tag != '{%s}EntityDescriptor' % lasso.SAML2_METADATA_HREF:
202
            self.logger.error(u'METADATA of %d-th idp has no EntityDescriptor root tag', i)
203
            return None
204

  
205
        if 'entityID' not in doc.attrib:
206
            self.logger.error(
207
                u'METADATA of %d-th idp has no entityID attribute on its root tag', i)
208
            return None
209
        return doc.attrib['entityID']
210

  
211
    def load_idp(self, idp, i):
212
        metadata = self.load_metadata(idp, i)
213
        if not metadata:
214
            self.logger.error(u'missing METADATA or METADATA_URL in %d-th idp', i)
215
            return False
216

  
217
        return 'ENTITY_ID' in idp
86 218

  
87 219
    def authorize(self, idp, saml_attributes):
88 220
        if not idp:
mellon/app_settings.py
40 40
        'ARTIFACT_RESOLVE_TIMEOUT': 10.0,
41 41
        'LOGIN_HINTS': [],
42 42
        'SIGNATURE_METHOD': 'RSA-SHA256',
43
        'METADATA_CACHE_TIME': 3600,
44
        'METADATA_HTTP_TIMEOUT': 10,
43 45
    }
44 46

  
45 47
    @property
mellon/utils.py
276 276
        xml_encoding[0] = encoding
277 277
    parser = expat.ParserCreate()
278 278
    parser.XmlDeclHandler = xmlDeclHandler
279
    parser.Parse(content, True)
279
    try:
280
        parser.Parse(content, True)
281
    except expat.ExpatError as e:
282
        raise ValueError('invalid XML %s' % e)
280 283
    return xml_encoding[0]
281 284

  
282 285

  
setup.py
94 94
          'django>=1.5,<2.0',
95 95
          'requests',
96 96
          'isodate',
97
          'atomicwrites',
97 98
      ],
98 99
      setup_requires=[
99 100
          'django>=1.5,<2.0',
tests/conftest.py
1
import os
1 2
import logging
3

  
2 4
import pytest
3 5
import django_webtest
4 6

  
5 7

  
8
@pytest.fixture(autouse=True)
9
def settings(settings, tmpdir):
10
    settings.MEDIA_ROOT = str(tmpdir.mkdir('media'))
11
    return settings
12

  
13

  
6 14
@pytest.fixture
7
def app(request):
15
def app(request, settings):
8 16
    wtm = django_webtest.WebTestMixin()
9 17
    wtm._patch_settings()
10 18
    request.addfinalizer(wtm._unpatch_settings)
......
23 31

  
24 32

  
25 33
@pytest.fixture
26
def private_settings(request):
34
def private_settings(request, tmpdir):
27 35
    import django.conf
28 36
    from django.conf import UserSettingsHolder
29 37
    old = django.conf.settings._wrapped
......
42 50
    caplog.handler.stream = py.io.TextIO()
43 51
    caplog.handler.records = []
44 52
    return caplog
53

  
54

  
55
@pytest.fixture(scope='session')
56
def metadata():
57
    with open(os.path.join(os.path.dirname(__file__), 'metadata.xml')) as fd:
58
        yield fd.read()
59

  
60

  
61
@pytest.fixture
62
def metadata_path(tmpdir, metadata):
63
    metadata_path = tmpdir / 'metadata.xml'
64
    with metadata_path.open('w') as fd:
65
        fd.write(metadata)
66
    yield str(metadata_path)
tests/test_default_adapter.py
1
import pytest
1

  
2

  
3
import datetime
2 4
import re
3 5
import lasso
6
import time
4 7
from multiprocessing.pool import ThreadPool
5 8

  
9
import pytest
10

  
6 11
from django.contrib import auth
7 12
from django.db import connection
8 13

  
......
196 201
    user = adapter.lookup_user(idp, saml_attributes)
197 202
    assert user is None
198 203
    assert User.objects.count() == 0
204

  
205

  
206
@pytest.fixture
207
def adapter():
208
    return DefaultAdapter()
209

  
210

  
211
def test_load_metadata_simple(adapter, metadata):
212
    idp = {'METADATA': metadata}
213
    assert adapter.load_metadata(idp, 0) == metadata
214

  
215

  
216
def test_load_metadata_legacy(adapter, metadata_path, metadata):
217
    idp = {'METADATA': metadata_path}
218
    assert adapter.load_metadata(idp, 0) == metadata
219
    assert idp['METADATA'] == metadata
220

  
221

  
222
def test_load_metadata_path(adapter, metadata_path, metadata, freezer):
223
    now = time.time()
224
    idp = {'METADATA_PATH': str(metadata_path)}
225
    assert adapter.load_metadata(idp, 0) == metadata
226
    assert idp['METADATA'] == metadata
227
    assert idp['METADATA_PATH_LAST_UPDATE'] == now
228

  
229

  
230
def test_load_metadata_url(settings, adapter, metadata, httpserver, freezer, caplog):
231
    now = time.time()
232
    httpserver.serve_content(content=metadata, headers={'Content-Type': 'application/xml'})
233
    idp = {'METADATA_URL': httpserver.url}
234
    assert adapter.load_metadata(idp, 0) == metadata
235
    assert idp['METADATA'] == metadata
236
    assert idp['METADATA_URL_LAST_UPDATE'] == now
237
    assert 'METADATA_PATH' in idp
238
    assert idp['METADATA_PATH'].startswith(settings.MEDIA_ROOT)
239
    with open(idp['METADATA_PATH']) as fd:
240
        assert fd.read() == metadata
241
    assert idp['METADATA_PATH_LAST_UPDATE'] == now + 1
242
    httpserver.serve_content(content=metadata.replace('idp5', 'idp6'),
243
                             headers={'Content-Type': 'application/xml'})
244
    assert adapter.load_metadata(idp, 0) == metadata
245

  
246
    freezer.move_to(datetime.timedelta(seconds=3601))
247
    caplog.clear()
248
    assert adapter.load_metadata(idp, 0) == metadata
249
    # wait for update thread to finish
250
    try:
251
        idp['METADATA_URL_UPDATE_THREAD'].join()
252
    except KeyError:
253
        pass
254
    new_meta = adapter.load_metadata(idp, 0)
255
    assert new_meta != metadata
256
    assert new_meta == metadata.replace('idp5', 'idp6')
257
    assert 'entityID changed' in caplog.records[-1].message
258
    assert caplog.records[-1].levelname == 'ERROR'
259
    # test load from file cache
260
    del idp['METADATA']
261
    del idp['METADATA_PATH']
262
    del idp['METADATA_PATH_LAST_UPDATE']
263
    httpserver.serve_content(content='', headers={'Content-Type': 'application/xml'})
264
    assert adapter.load_metadata(idp, 0) == metadata.replace('idp5', 'idp6')
tests/test_utils.py
67 67
    assert len(server.providers) == 0
68 68

  
69 69

  
70
def test_create_server_good_metadata_file(mocker, rf, private_settings, caplog):
70
def test_create_server_good_metadata_file(mocker, rf, private_settings, tmpdir, caplog):
71
    path = tmpdir / 'metadata.xml'
72
    with path.open('w') as fd:
73
        fd.write(open('tests/metadata.xml').read())
74

  
71 75
    private_settings.MELLON_IDENTITY_PROVIDERS = [
72 76
        {
73
            'METADATA': '/xxx',
77
            'METADATA': str(path),
74 78
        }
75 79
    ]
76 80
    request = rf.get('/')
77
    with mock.patch(
78
        'mellon.adapters.open', mock.mock_open(read_data=open('tests/metadata.xml').read()),
79
            create=True):
80
        server = create_server(request)
81
    server = create_server(request)
81 82
    assert 'ERROR' not in caplog.text
82 83
    assert len(server.providers) == 1
83 84

  
tox.ini
1 1
[tox]
2
envlist = {coverage-,}py2-{dj18,dj111}-{pg,sqlite},py3-dj111-{pg,sqlite}
2
envlist = coverage-py2-{dj18,dj111}-{pg,sqlite},coverage-py3-dj111-{pg,sqlite}
3 3
toxworkdir = {env:TMPDIR:/tmp}/tox-{env:USER}/django-mellon/
4 4

  
5 5
[testenv]
......
24 24
  pytest-random
25 25
  pytest-mock
26 26
  pytest-django
27
  pytest-freezegun
28
  pytest-localserver
27 29
  pytz
28 30
  lxml
29 31
  cssselect
30
-