From 4e09435be4d98aadb0676e1663c9765a13bfea88 Mon Sep 17 00:00:00 2001 From: Benjamin Dauvergne Date: Thu, 30 Jan 2020 21:03:06 +0100 Subject: [PATCH] migrate to python3 (#39430) --- debian/control | 10 +- debian/pydist-overrides | 6 +- debian/rules | 4 +- setup.py | 36 ++- tests/test_wcs.py | 40 +-- tox.ini | 11 +- wcs_olap/cmd.py | 40 ++- wcs_olap/feeder.py | 25 +- wcs_olap/signature.py | 30 ++- wcs_olap/tb.py | 55 ---- wcs_olap/wcs_api.py | 585 ++++++++++++++++++++++++++++++---------- 11 files changed, 563 insertions(+), 279 deletions(-) delete mode 100644 wcs_olap/tb.py diff --git a/debian/control b/debian/control index 898b82d..78b3c6c 100644 --- a/debian/control +++ b/debian/control @@ -2,13 +2,11 @@ Source: wcs-olap Section: python Priority: optional Maintainer: Benjamin Dauvergne -Build-Depends: python-setuptools (>= 0.6b3), python-all (>= 2.6), debhelper (>= 9), dh-python -Standards-Version: 3.9.1 -X-Python-Version: >= 2.7 -Homepage: http://dev.entrouvert.org/projects/publik-bi/ +Build-Depends: python3-setuptools, python3-all, debhelper (>= 9), dh-python +Standards-Version: 3.9.6 +Homepage: http://dev.entrouvert.org/projects/wcs-olap/ Package: wcs-olap Architecture: all -Depends: ${python:Depends} -XB-Python-Version: ${python:Versions} +Depends: ${python3:Depends} Description: Export w.c.s. datas into a snowflake schema built on PostgreSQL diff --git a/debian/pydist-overrides b/debian/pydist-overrides index c533f09..13865b3 100644 --- a/debian/pydist-overrides +++ b/debian/pydist-overrides @@ -1,3 +1,3 @@ -isodate python-isodate -psycopg2 python-psycopg2 -cached_property python-cached-property +isodate python3-isodate +psycopg2 python3-psycopg2 +cached_property python3-cached-property diff --git a/debian/rules b/debian/rules index 46f5b5d..c2e08fb 100755 --- a/debian/rules +++ b/debian/rules @@ -1,6 +1,8 @@ #!/usr/bin/make -f +export PYBUILD_NAME=wcs-olap + %: - dh $@ --with python2 + dh $@ --with python3 --buildsystem=pybuild diff --git a/setup.py b/setup.py index e7dd8d9..3c764a3 100644 --- a/setup.py +++ b/setup.py @@ -9,37 +9,39 @@ from setuptools.command.sdist import sdist class eo_sdist(sdist): def run(self): - print "creating VERSION file" if os.path.exists('VERSION'): os.remove('VERSION') version = get_version() - version_file = open('VERSION', 'w') - version_file.write(version) - version_file.close() + with open('VERSION', 'w') as fd: + fd.write(version) sdist.run(self) - print "removing VERSION file" if os.path.exists('VERSION'): os.remove('VERSION') def get_version(): '''Use the VERSION, if absent generates a version with git describe, if not - tag exists, take 0.0.0- and add the length of the commit log. + tag exists, take 0.0- and add the length of the commit log. ''' if os.path.exists('VERSION'): with open('VERSION', 'r') as v: return v.read() if os.path.exists('.git'): - p = subprocess.Popen(['git', 'describe', '--dirty', '--match=v*'], stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + p = subprocess.Popen( + ['git', 'describe', '--dirty=.dirty', '--match=v*'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) result = p.communicate()[0] if p.returncode == 0: - result = result.split()[0][1:] + result = result.decode('ascii').strip()[1:] # strip spaces/newlines and initial v + if '-' in result: # not a tagged version + real_number, commit_count, commit_hash = result.split('-', 2) + version = '%s.post%s+%s' % (real_number, commit_count, commit_hash) + else: + version = result + return version else: - result = '0.0.0-%s' % len(subprocess.check_output( - ['git', 'rev-list', 'HEAD']).splitlines()) - return result.replace('-', '.').replace('.g', '+g') - return '0.0.0' + return '0.0.post%s' % len(subprocess.check_output(['git', 'rev-list', 'HEAD']).splitlines()) + return '0.0' setup(name="wcs-olap", @@ -54,7 +56,13 @@ setup(name="wcs-olap", maintainer_email="bdauvergne@entrouvert.com", packages=find_packages(), include_package_data=True, - install_requires=['requests', 'psycopg2', 'isodate', 'six', 'cached-property'], + install_requires=[ + 'requests', + 'psycopg2', + 'isodate', + 'six', + 'cached-property' + ], entry_points={ 'console_scripts': ['wcs-olap=wcs_olap.cmd:main'], }, diff --git a/tests/test_wcs.py b/tests/test_wcs.py index a636dce..60c7962 100644 --- a/tests/test_wcs.py +++ b/tests/test_wcs.py @@ -1,12 +1,9 @@ -from __future__ import unicode_literals - import json - import pytest +import pathlib import requests -import pathlib2 -import mock +import httmock import utils @@ -90,34 +87,41 @@ def test_wcs_fixture(wcs, postgres_db, tmpdir, olap_cmd, caplog): # verify JSON schema with (olap_cmd.model_dir / 'olap.model').open() as fd, \ - (pathlib2.Path(__file__).parent / 'olap.model').open() as fd2: - json_schema = json.load(fd) - expected_json_schema = json.load(fd2) - expected_json_schema['pg_dsn'] = postgres_db.dsn - assert json_schema == expected_json_schema + (pathlib.Path(__file__).parent / 'olap.model').open() as fd2: + json_schema = json.load(fd) + expected_json_schema = json.load(fd2) + expected_json_schema['pg_dsn'] = postgres_db.dsn + assert json_schema == expected_json_schema def test_requests_exception(wcs, postgres_db, tmpdir, olap_cmd, caplog): - with mock.patch('requests.get', side_effect=requests.RequestException('wat!')): + @httmock.urlmatch() + def requests_raise(url, request): + raise requests.RequestException('wat!') + + with httmock.HTTMock(requests_raise): with pytest.raises(SystemExit): olap_cmd(no_log_errors=False) assert 'wat!' in caplog.text def test_requests_not_ok(wcs, postgres_db, tmpdir, olap_cmd, caplog): - with mock.patch('requests.get') as mocked_get: - mocked_get.return_value.ok = False - mocked_get.return_value.status_code = 401 - mocked_get.return_value.text = '{"err": 1, "err_desc": "invalid signature"}' + @httmock.urlmatch() + def return_401(url, request): + return {'status_code': 401, 'content': {"err": 1, "err_desc": "invalid signature"}} + + with httmock.HTTMock(return_401): with pytest.raises(SystemExit): olap_cmd(no_log_errors=False) assert 'invalid signature' in caplog.text def test_requests_not_json(wcs, postgres_db, tmpdir, olap_cmd, caplog): - with mock.patch('requests.get') as mocked_get: - mocked_get.return_value.ok = True - mocked_get.return_value.json.side_effect = ValueError('invalid JSON') + @httmock.urlmatch() + def return_invalid_json(url, request): + return 'x' + + with httmock.HTTMock(return_invalid_json): with pytest.raises(SystemExit): olap_cmd(no_log_errors=False) assert 'Invalid JSON content' in caplog.text diff --git a/tox.ini b/tox.ini index d683d71..6e57de7 100644 --- a/tox.ini +++ b/tox.ini @@ -5,7 +5,7 @@ [tox] toxworkdir = {env:TMPDIR:/tmp}/tox-{env:USER}/wcs-olap/{env:BRANCH_NAME:} -envlist = py2-coverage +envlist = py3-coverage [testenv] usedevelop = true @@ -17,14 +17,17 @@ deps = pytest pytest-cov pytest-random - quixote<3.0 + quixote>=3 psycopg2-binary vobject pyproj django-ratelimit<3 gadjo - mock + httmock django>=1.11,<1.12 commands = - ./get_wcs.sh + #./get_wcs.sh py.test {env:COVERAGE:} {posargs:--random-group tests} + +[pytest] +junit_family=xunit2 diff --git a/wcs_olap/cmd.py b/wcs_olap/cmd.py index d5c070d..9965891 100644 --- a/wcs_olap/cmd.py +++ b/wcs_olap/cmd.py @@ -1,14 +1,12 @@ -import sys import argparse -import ConfigParser -import os +import configparser +import locale import logging import logging.config -from . import wcs_api -from .feeder import WcsOlapFeeder -import locale +import os +import sys -from . import tb +from . import wcs_api, feeder def main(): @@ -16,13 +14,10 @@ def main(): main2() except SystemExit: raise - except: - tb.print_tb() - raise SystemExit(1) def get_config(path=None): - config = ConfigParser.ConfigParser() + config = configparser.ConfigParser() global_config_path = '/etc/wcs-olap/config.ini' user_config_path = os.path.expanduser('~/.wcs-olap.ini') if not path: @@ -60,14 +55,14 @@ def main2(): fake = args.fake config = get_config(path=args.config_path) # list all known urls - urls = [url for url in config.sections() if url.startswith('http://') or - url.startswith('https://')] + urls = [url for url in config.sections() if url.startswith('http://') + or url.startswith('https://')] defaults = {} if not args.all: try: url = args.url or urls[0] except IndexError: - print 'no url found' + print('no url found') raise SystemExit(1) urls = [url] if config.has_section(args.url): @@ -97,22 +92,23 @@ def main2(): pg_dsn = defaults['pg_dsn'] slugs = defaults.get('slugs', '').strip().split() or getattr(args, 'slug', []) batch_size = int(defaults.get('batch_size', 500)) - except KeyError, e: + except KeyError as e: failure = True logger.error('configuration incomplete for %s: %s', url, e) else: try: - api = wcs_api.WcsApi(url=url, orig=orig, key=key, slugs=slugs, - verify=defaults.get('verify', 'True') == 'True', - batch_size=batch_size) + api = wcs_api.WcsApi(url=url, orig=orig, key=key, + batch_size=batch_size, + verify=(defaults.get('verify', 'True') == 'True')) logger.info('starting synchronizing w.c.s. at %r with PostgreSQL at %s', url, pg_dsn) - feeder = WcsOlapFeeder(api=api, schema=schema, pg_dsn=pg_dsn, logger=logger, - config=defaults, do_feed=feed, fake=fake) - feeder.feed() + olap_feeder = feeder.WcsOlapFeeder( + api=api, schema=schema, pg_dsn=pg_dsn, logger=logger, + config=defaults, do_feed=feed, fake=fake, slugs=slugs) + olap_feeder.feed() logger.info('finished') feed_result = False - except: + except Exception: if args.no_log_errors: raise feed_result = True diff --git a/wcs_olap/feeder.py b/wcs_olap/feeder.py index e1cafb0..f2e2456 100644 --- a/wcs_olap/feeder.py +++ b/wcs_olap/feeder.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals -from collections import OrderedDict, Counter +from collections import OrderedDict import datetime import six import copy @@ -10,7 +10,7 @@ import itertools import os import json import hashlib -from utils import Whatever +from .utils import Whatever import psycopg2 from cached_property import cached_property @@ -78,8 +78,9 @@ class WcsOlapFeeder(object): status_to_id = dict((c[1], c[0]) for c in channels) id_to_status = dict((c[0], c[1]) for c in channels) - def __init__(self, api, pg_dsn, schema, logger=None, config=None, do_feed=True, fake=False): + def __init__(self, api, pg_dsn, schema, logger=None, config=None, do_feed=True, fake=False, slugs=None): self.api = api + self.slugs = slugs self.fake = fake self.logger = logger or Whatever() self.schema = schema @@ -291,7 +292,7 @@ class WcsOlapFeeder(object): @cached_property def formdefs(self): - return self.api.formdefs + return [formdef for formdef in self.api.formdefs if not self.slugs or formdef.slug in self.slugs] @cached_property def roles(self): @@ -441,7 +442,7 @@ CREATE TABLE public.dates AS (SELECT if isinstance(o, six.string_types): return o.format(**ctx) elif isinstance(o, dict): - return dict((k, helper(v)) for k, v in o.iteritems()) + return dict((k, helper(v)) for k, v in o.items()) elif isinstance(o, list): return [helper(v) for v in o] elif isinstance(o, (bool, int, float)): @@ -466,8 +467,8 @@ CREATE TABLE public.dates AS (SELECT # categories tmp_cat_map = self.create_labeled_table( - 'category', enumerate(c.name for c in self.categories), comment='catégorie') - self.categories_mapping = dict((c.id, tmp_cat_map[c.name]) for c in self.categories) + 'category', enumerate(c.title for c in self.categories), comment='catégorie') + self.categories_mapping = dict((c.slug, tmp_cat_map[c.title]) for c in self.categories) self.create_labeled_table('hour', zip(range(0, 24), map(str, range(0, 24))), comment='heures') @@ -506,7 +507,7 @@ CREATE TABLE public.dates AS (SELECT 'geolocation_base': 'position géographique', } self.create_table('{generic_formdata_table}', self.columns) - for at, comment in self.comments.iteritems(): + for at, comment in self.comments.items(): self.ex('COMMENT ON COLUMN {generic_formdata_table}.%s IS %%s' % at, vars=(comment,)) self.ex('COMMENT ON TABLE {generic_formdata_table} IS %s', vars=('tous les formulaires',)) # evolutions @@ -663,7 +664,7 @@ class WcsFormdefFeeder(object): } # add function fields - for function, name in self.formdef.schema.workflow.functions.iteritems(): + for function, name in self.formdef.schema.workflow.functions.items(): at = 'function_%s' % slugify(function) columns[at] = { 'sql_col_name': at, @@ -746,7 +747,7 @@ class WcsFormdefFeeder(object): values = [] generic_evolution_values = [] evolution_values = [] - for data in self.formdef.datas: + for data in self.formdef.formdatas.anonymized.full: json_data = {} # ignore formdata without status @@ -818,7 +819,7 @@ class WcsFormdefFeeder(object): v = '(%.6f, %.6f)' % (v.get('lon'), v.get('lat')) row['geolocation_%s' % geolocation] = v # add function fields value - for function, name in self.formdef.schema.workflow.functions.iteritems(): + for function, name in self.formdef.schema.workflow.functions.items(): try: v = data.functions[function] except KeyError: @@ -949,7 +950,7 @@ class WcsFormdefFeeder(object): }) # add dimension for function - for function, name in self.formdef.schema.workflow.functions.iteritems(): + for function, name in self.formdef.schema.workflow.functions.items(): at = 'function_%s' % slugify(function) cube['joins'].append({ 'name': at, diff --git a/wcs_olap/signature.py b/wcs_olap/signature.py index 30124f9..b57e21f 100644 --- a/wcs_olap/signature.py +++ b/wcs_olap/signature.py @@ -1,12 +1,12 @@ +import urllib.parse as urlparse import datetime import base64 import hmac import hashlib -import urllib import random -import urlparse '''Simple signature scheme for query strings''' +# from http://repos.entrouvert.org/portail-citoyen.git/tree/portail_citoyen/apps/data_source_plugin/signature.py def sign_url(url, key, algo='sha256', timestamp=None, nonce=None): @@ -20,23 +20,25 @@ def sign_query(query, key, algo='sha256', timestamp=None, nonce=None): timestamp = datetime.datetime.utcnow() timestamp = timestamp.strftime('%Y-%m-%dT%H:%M:%SZ') if nonce is None: - nonce = hex(random.SystemRandom().getrandbits(128))[2:-1] + nonce = hex(random.getrandbits(128))[2:] new_query = query if new_query: new_query += '&' - new_query += urllib.urlencode(( + new_query += urlparse.urlencode(( ('algo', algo), ('timestamp', timestamp), ('nonce', nonce))) signature = base64.b64encode(sign_string(new_query, key, algo=algo)) - new_query += '&signature=' + urllib.quote(signature) + new_query += '&signature=' + urlparse.quote(signature) return new_query def sign_string(s, key, algo='sha256', timedelta=30): digestmod = getattr(hashlib, algo) - if isinstance(key, unicode): + if isinstance(key, str): key = key.encode('utf-8') + if isinstance(s, str): + s = s.encode('utf-8') hash = hmac.HMAC(key, digestmod=digestmod, msg=s) return hash.digest() @@ -48,12 +50,17 @@ def check_url(url, key, known_nonce=None, timedelta=30): def check_query(query, key, known_nonce=None, timedelta=30): parsed = urlparse.parse_qs(query) + if not ('signature' in parsed and 'algo' in parsed + and 'timestamp' in parsed and 'nonce' in parsed): + return False + unsigned_query, signature_content = query.split('&signature=', 1) + if '&' in signature_content: + return False # signature must be the last parameter signature = base64.b64decode(parsed['signature'][0]) algo = parsed['algo'][0] timestamp = parsed['timestamp'][0] timestamp = datetime.datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%SZ') nonce = parsed['nonce'] - unsigned_query = query.split('&signature=')[0] if known_nonce is not None and known_nonce(nonce): return False if abs(datetime.datetime.utcnow() - timestamp) > datetime.timedelta(seconds=timedelta): @@ -68,5 +75,12 @@ def check_string(s, signature, key, algo='sha256'): return False res = 0 for a, b in zip(signature, signature2): - res |= ord(a) ^ ord(b) + res |= a ^ b return res == 0 + + +if __name__ == '__main__': + key = '12345' + signed_query = sign_query('NameId=_12345&orig=montpellier', key) + assert check_query(signed_query, key, timedelta=0) is False + assert check_query(signed_query, key) is True diff --git a/wcs_olap/tb.py b/wcs_olap/tb.py deleted file mode 100644 index 1f7d293..0000000 --- a/wcs_olap/tb.py +++ /dev/null @@ -1,55 +0,0 @@ -from StringIO import StringIO -import sys -import linecache - - -def print_tb(): - exc_type, exc_value, tb = sys.exc_info() - if exc_value: - exc_value = unicode(str(exc_value), errors='ignore') - error_file = StringIO() - - limit = None - if hasattr(sys, 'tracebacklimit'): - limit = sys.tracebacklimit - print >>error_file, "Exception:" - print >>error_file, " type = '%s', value = '%s'" % (exc_type, exc_value) - print >>error_file - - # format the traceback - print >>error_file, 'Stack trace (most recent call first):' - n = 0 - while tb is not None and (limit is None or n < limit): - frame = tb.tb_frame - function = frame.f_code.co_name - filename = frame.f_code.co_filename - exclineno = frame.f_lineno - locals = frame.f_locals.items() - - print >>error_file, ' File "%s", line %s, in %s' % (filename, exclineno, function) - linecache.checkcache(filename) - for lineno in range(exclineno - 2, exclineno + 3): - line = linecache.getline(filename, lineno, frame.f_globals) - if line: - if lineno == exclineno: - print >>error_file, '>%5s %s' % (lineno, line.rstrip()) - else: - print >>error_file, ' %5s %s' % (lineno, line.rstrip()) - print >>error_file - if locals: - print >>error_file, " locals: " - for key, value in locals: - print >>error_file, " %s =" % key, - try: - repr_value = repr(value) - if len(repr_value) > 10000: - repr_value = repr_value[:10000] + ' [...]' - print >>error_file, repr_value, - except: - print >>error_file, "", - print >>error_file - print >>error_file - tb = tb.tb_next - n = n + 1 - - print >>sys.stderr, error_file.getvalue() diff --git a/wcs_olap/wcs_api.py b/wcs_olap/wcs_api.py index 047f1e9..41a0c60 100644 --- a/wcs_olap/wcs_api.py +++ b/wcs_olap/wcs_api.py @@ -1,61 +1,80 @@ -import six +# wcs_olap +# Copyright (C) 2020 Entr'ouvert +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import collections +import base64 +import copy +import logging +import datetime +import contextlib +import json + import requests -import urlparse -import urllib import isodate -import logging +import urllib.parse as urlparse from . import signature -logger = logging.getLogger(__name__) +class WcsApiError(Exception): + pass -def exception_to_text(e): - try: - return six.text_type(e) - except Exception: - pass +class JSONFile(object): + def __init__(self, d): + self.d = d - try: - return six.text_type(e.decode('utf8')) - except Exception: - pass + @property + def filename(self): + return self.d.get('filename', '') - try: - return six.text_type(repr(e)) - except Exception: - pass + @property + def content_type(self): + return self.d.get('content_type', 'application/octet-stream') - try: - args = e.args - try: - content = six.text_type(repr(args)) if args != [] else '' - except Exception: - content = '' - except AttributeError: - content = '' - return u'%s(%s)' % (e.__class__.__name__, content) + @property + def content(self): + return base64.b64decode(self.d['content']) -class WcsApiError(Exception): - def __init__(self, message, **kwargs): - super(WcsApiError, self).__init__(message) - self.kwargs = kwargs - - def __str__(self): - kwargs = self.kwargs.copy() - if 'exception' in kwargs: - kwargs['exception'] = exception_to_text(kwargs['exception']) - return '%s: %s' % (self.args[0], ' '.join('%s=%s' % (key, value) for key, value in kwargs.items())) +def to_dict(o): + if hasattr(o, 'to_dict'): + return o.to_dict() + elif isinstance(o, dict): + return {k: to_dict(v) for k, v in o.items()} + elif isinstance(o, (list, tuple)): + return [to_dict(v) for v in o] + else: + return o class BaseObject(object): def __init__(self, wcs_api, **kwargs): - self.__wcs_api = wcs_api + self._wcs_api = wcs_api self.__dict__.update(**kwargs) + def to_dict(self): + d = collections.OrderedDict() + for key, value in self.__dict__.items(): + if key[0] == '_': + continue + d[key] = to_dict(value) + return d + class FormDataWorkflow(BaseObject): status = None @@ -92,17 +111,24 @@ class Evolution(BaseObject): class FormData(BaseObject): geolocations = None evolution = None + submission = None + workflow = None + roles = None + with_files = False - def __init__(self, wcs_api, **kwargs): + def __init__(self, wcs_api, forms, **kwargs): + self.forms = forms super(FormData, self).__init__(wcs_api, **kwargs) self.receipt_time = isodate.parse_datetime(self.receipt_time) - self.submission = BaseObject(wcs_api, **self.submission) - self.workflow = FormDataWorkflow(wcs_api, **self.workflow) + if self.submission: + self.submission = BaseObject(wcs_api, **self.submission) + if self.workflow: + self.workflow = FormDataWorkflow(wcs_api, **self.workflow) self.evolution = [Evolution(wcs_api, **evo) for evo in self.evolution or []] self.functions = {} self.concerned_roles = [] self.action_roles = [] - for function in self.roles: + for function in self.roles or []: roles = [Role(wcs_api, **r) for r in self.roles[function]] if function == 'concerned': self.concerned_roles.extend(roles) @@ -113,11 +139,23 @@ class FormData(BaseObject): self.functions[function] = roles[0] except IndexError: self.functions[function] = None - del self.roles + if 'roles' in self.__dict__: + del self.roles - def __repr__(self): - return '<{klass} {display_id!r}>'.format(klass=self.__class__.__name__, - display_id=self.id) + def __str__(self): + return '{self.formdef} - {self.id}'.format(self=self) + + @property + def full(self): + if self.with_files: + return self + if not hasattr(self, '_full'): + self._full = self.forms[self.id] + return self._full + + @property + def anonymized(self): + return self.forms.anonymized[self.id] @property def endpoint_delay(self): @@ -140,6 +178,13 @@ class FormData(BaseObject): else: return + def __getitem__(self, key): + value = self.full.fields.get(key) + # unserialize files + if isinstance(value, dict) and 'content' in value: + return JSONFile(value) + return value + class Workflow(BaseObject): statuses = None @@ -148,11 +193,10 @@ class Workflow(BaseObject): def __init__(self, wcs_api, **kwargs): super(Workflow, self).__init__(wcs_api, **kwargs) self.statuses = [BaseObject(wcs_api, **v) for v in (self.statuses or [])] - if self.statuses: - assert not hasattr(self.statuses[0], 'startpoint'), 'startpoint is exported by w.c.s. FIXME' - for status in self.statuses: - status.startpoint = False - self.statuses[0].startpoint = True + assert not hasattr(self.statuses[0], 'startpoint'), 'startpoint is exported by w.c.s. FIXME' + for status in self.statuses: + status.startpoint = False + self.statuses[0].startpoint = True self.statuses_map = dict((s.id, s) for s in self.statuses) self.fields = [Field(wcs_api, **field) for field in (self.fields or [])] @@ -177,29 +221,269 @@ class Schema(BaseObject): self.geolocations = sorted((k, v) for k, v in (self.geolocations or {}).items()) +class FormDatas(object): + def __init__(self, wcs_api, formdef, full=False, anonymize=False, batch=1000): + self.wcs_api = wcs_api + self.formdef = formdef + self._full = full + self.anonymize = anonymize + self.batch = batch + + def __getitem__(self, slice_or_id): + # get batch of forms + if isinstance(slice_or_id, slice): + def helper(): + if slice_or_id.stop <= slice_or_id.start or slice_or_id.step: + raise ValueError('invalid slice %s' % slice_or_id) + offset = slice_or_id.start + limit = slice_or_id.stop - slice_or_id.start + + url_parts = ['api/forms/{self.formdef.slug}/list'.format(self=self)] + query = {} + query['full'] = 'on' if self._full else 'off' + if offset: + query['offset'] = str(offset) + if limit: + query['limit'] = str(limit) + if self.anonymize: + query['anonymise'] = 'on' + url_parts.append('?%s' % urlparse.urlencode(query)) + for d in self.wcs_api.get_json(*url_parts): + # w.c.s. had a bug where some formdata lost their draft status, skip them + if not d.get('receipt_time'): + continue + yield FormData(wcs_api=self.wcs_api, forms=self, formdef=self.formdef, **d) + return helper() + # or get one form + else: + url_parts = ['api/forms/{formdef.slug}/{id}/'.format(formdef=self.formdef, id=slice_or_id)] + if self.anonymize: + url_parts.append('?anonymise=true') + d = self.wcs_api.get_json(*url_parts) + return FormData(wcs_api=self.wcs_api, forms=self, formdef=self.formdef, with_files=True, **d) + + @property + def full(self): + forms = copy.copy(self) + forms._full = True + return forms + + @property + def anonymized(self): + forms = copy.copy(self) + forms.anonymize = True + return forms + + def batched(self, batch): + forms = copy.copy(self) + forms.batch = batch + return forms + + def __iter__(self): + start = 0 + while True: + empty = True + for formdef in self[start:start + self.batch]: + empty = False + yield formdef + if empty: + break + start += self.batch + + def __len__(self): + return len(list((o for o in self))) + + +class CancelSubmitError(Exception): + pass + + +class FormDefSubmit(object): + formdef = None + data = None + user_email = None + user_name_id = None + backoffice_submission = False + submission_channel = None + submission_context = None + draft = False + + def __init__(self, wcs_api, formdef, **kwargs): + self.wcs_api = wcs_api + self.formdef = formdef + self.data = {} + self.__dict__.update(kwargs) + + def payload(self): + d = { + 'data': self.data.copy(), + } + if self.draft: + d.setdefault('meta', {})['draft'] = True + if self.backoffice_submission: + d.setdefault('meta', {})['backoffice-submission'] = True + if self.submission_context: + d['context'] = self.submission_context + if self.submission_channel: + d.setdefault('context', {})['channel'] = self.submission_channel + if self.user_email: + d.setdefault('user', {})['email'] = self.user_email + if self.user_name_id: + d.setdefault('user', {})['NameID'] = self.user_name_id + return d + + def set(self, field, value, **kwargs): + if isinstance(field, Field): + varname = field.varname + if not varname: + raise ValueError('field has no varname, submit is impossible') + else: + varname = field + try: + field = [f for f in self.formdef.schema.fields if f.varname == varname][0] + except IndexError: + raise ValueError('no field for varname %s' % varname) + + if value is None or value == {} or value == []: + self.data.pop(varname, None) + elif hasattr(self, '_set_type_%s' % field.type): + getattr(self, '_set_type_%s' % field.type)( + varname=varname, + field=field, + value=value, **kwargs) + else: + self.data[varname] = value + + def _set_type_item(self, varname, field, value, **kwargs): + if isinstance(value, dict): + if not set(value).issuperset(set(['id', 'text'])): + raise ValueError('item field value must have id and text value') + # clean previous values + self.data.pop(varname, None) + self.data.pop(varname + '_raw', None) + self.data.pop(varname + '_structured', None) + if isinstance(value, dict): + # structured & display values + self.data[varname + '_raw'] = value['id'] + self.data[varname] = value['text'] + if len(value) > 2: + self.data[varname + '_structured'] = value + else: + # raw id in varname + self.data[varname] = value + + def _set_type_items(self, varname, field, value, **kwargs): + if not isinstance(value, list): + raise TypeError('%s is an ItemsField it needs a list as value' % varname) + + has_dict = False + for choice in value: + if isinstance(value, dict): + if not set(value).issuperset(set(['id', 'text'])): + raise ValueError('items field values must have id and text value') + has_dict = True + if has_dict: + if not all(isinstance(choice, dict) for choice in value): + raise ValueError('ItemsField value must be all structured or none') + # clean previous values + self.data.pop(varname, None) + self.data.pop(varname + '_raw', None) + self.data.pop(varname + '_structured', None) + if has_dict: + raw = self.data[varname + '_raw'] = [] + display = self.data[varname] = [] + structured = self.data[varname + '_structured'] = [] + for choice in value: + raw.append(choice['id']) + display.append(choice['text']) + structured.append(choice) + else: + self.data[varname] = value[:] + + def _set_type_file(self, varname, field, value, **kwargs): + filename = kwargs.get('filename') + content_type = kwargs.get('content_type', 'application/octet-stream') + if hasattr(value, 'read'): + content = base64.b64encode(value.read()).decode('ascii') + elif isinstance(value, bytes): + content = base64.b64encode(value).decode('ascii') + elif isinstance(value, dict): + if not set(value).issuperset(set(['filename', 'content'])): + raise ValueError('file field needs a dict value with filename and content') + content = value['content'] + filename = value['filename'] + content_type = value.get('content_type', content_type) + if not filename: + raise ValueError('missing filename') + self.data[varname] = { + 'filename': filename, + 'content': content, + 'content_type': content_type, + } + + def _set_type_date(self, varname, field, value): + if isinstance(value, str): + value = datetime.datetime.strptime(value, '%Y-%m-%d').date() + if isinstance(value, datetime.datetime): + value = value.date() + if isinstance(value, datetime.date): + value = value.strftime('%Y-%m-%d') + self.data[varname] = value + + def _set_type_map(self, varname, field, value): + if not isinstance(value, dict): + raise TypeError('value must be a dict for a map field') + if set(value) != set(['lat', 'lon']): + raise ValueError('map field expect keys lat and lon') + self.data[varname] = value + + def _set_type_bool(self, varname, field, value): + if isinstance(value, str): + value = value.lower().strip() in ['yes', 'true', 'on'] + if not isinstance(value, bool): + raise TypeError('value must be a boolean or a string true, yes, on, false, no, off') + self.data[varname] = value + + def cancel(self): + raise CancelSubmitError + + class FormDef(BaseObject): geolocations = None def __init__(self, wcs_api, **kwargs): - self.__wcs_api = wcs_api + self._wcs_api = wcs_api self.__dict__.update(**kwargs) - def __unicode__(self): + def __str__(self): return self.title @property - def datas(self): - datas = self.__wcs_api.get_formdata(self.slug) - for data in datas: - data.formdef = self - yield data + def formdatas(self): + return FormDatas(wcs_api=self._wcs_api, formdef=self) @property def schema(self): - return self.__wcs_api.get_schema(self.slug) - - def __repr__(self): - return '<{klass} {slug!r}>'.format(klass=self.__class__.__name__, slug=self.slug) + if not hasattr(self, '_schema'): + d = self._wcs_api.get_json('api/formdefs/{self.slug}/schema'.format(self=self)) + self._schema = Schema(self._wcs_api, **d) + return self._schema + + @contextlib.contextmanager + def submit(self, **kwargs): + submitter = FormDefSubmit( + wcs_api=self._wcs_api, + formdef=self, + **kwargs) + try: + yield submitter + except CancelSubmitError: + return + payload = submitter.payload() + d = self._wcs_api.post_json(payload, 'api/formdefs/{self.slug}/submit'.format(self=self)) + if d['err'] != 0: + raise WcsApiError('submited returned an error: %s' % d) + submitter.result = BaseObject(self._wcs_api, **d['data']) class Role(BaseObject): @@ -210,99 +494,128 @@ class Category(BaseObject): pass +class WcsObjects(object): + url = None + object_class = None + + def __init__(self, wcs_api): + self.wcs_api = wcs_api + + def __getitem__(self, slug): + if isinstance(slug, self.object_class): + slug = slug.slug + for instance in self: + if instance.slug == slug: + return instance + raise KeyError('no instance with slug %r' % slug) + + def __iter__(self): + for d in self.wcs_api.get_json(self.url)['data']: + yield self.object_class(wcs_api=self.wcs_api, **d) + + def __len__(self): + return len(list((o for o in self))) + + +class Roles(WcsObjects): + # Paths are not coherent :/ + url = 'api/roles' + object_class = Role + + +class FormDefs(WcsObjects): + url = 'api/formdefs/?include-count=on' + object_class = FormDef + + +class Categories(WcsObjects): + url = 'api/categories/' + object_class = Category + + class WcsApi(object): - def __init__(self, url, orig, key, verify=True, slugs=None, batch_size=500): + def __init__(self, url, email=None, name_id=None, batch_size=1000, + session=None, logger=None, orig=None, key=None, verify=True): self.url = url + self.batch_size = batch_size + self.email = email + self.name_id = name_id + self.requests = session or requests.Session() + self.logger = logger or logging.getLogger(__name__) self.orig = orig self.key = key self.verify = verify - self.cache = {} - self.slugs = slugs or [] - self.batch_size = batch_size - @property - def formdefs_url(self): - return urlparse.urljoin(self.url, 'api/formdefs/') - - @property - def forms_url(self): - return urlparse.urljoin(self.url, 'api/forms/') - - @property - def roles_url(self): - return urlparse.urljoin(self.url, 'api/roles') + def _build_url(self, url_parts): + url = self.url + for url_part in url_parts: + url = urlparse.urljoin(url, url_part) + return url def get_json(self, *url_parts): - url = reduce(lambda x, y: urlparse.urljoin(x, y), url_parts) - params = {'orig': self.orig} - query_string = urllib.urlencode(params) - presigned_url = url + ('&' if '?' in url else '?') + query_string - if presigned_url in self.cache: - return self.cache[presigned_url] - signed_url = signature.sign_url(presigned_url, self.key) + url = self._build_url(url_parts) + params = {} + if self.email: + params['email'] = self.email + if self.name_id: + params['NameID'] = self.name_id + if self.orig: + params['orig'] = self.orig + query_string = urlparse.urlencode(params) + complete_url = url + ('&' if '?' in url else '?') + query_string + final_url = complete_url + if self.key: + final_url = signature.sign_url(final_url, self.key) try: - response = requests.get(signed_url, verify=self.verify) + response = self.requests.get(final_url, verify=self.verify) + response.raise_for_status() except requests.RequestException as e: - raise WcsApiError('GET request failed', url=signed_url, exception=e) + content = getattr(getattr(e, 'response', None), 'content', None) + raise WcsApiError('GET request failed', final_url, e, content) + else: + try: + return response.json() + except ValueError as e: + raise WcsApiError('Invalid JSON content', final_url, e) + + def post_json(self, data, *url_parts): + url = self._build_url(url_parts) + params = {} + if self.email: + params['email'] = self.email + if self.name_id: + params['NameID'] = self.name_id + if self.orig: + params['orig'] = self.orig + query_string = urlparse.urlencode(params) + complete_url = url + ('&' if '?' in url else '?') + query_string + final_url = complete_url + if self.key: + final_url = signature.sign_url(final_url, self.key) + try: + response = self.requests.post( + final_url, + data=json.dumps(data), + headers={'content-type': 'application/json'}, + verify=self.verify) + response.raise_for_status() + except requests.RequestException as e: + content = getattr(getattr(e, 'response', None), 'content', None) + raise WcsApiError('POST request failed', final_url, e, content) else: - if not response.ok: - try: - text = response.text - except UnicodeError: - text = '' + repr(response.content) - raise WcsApiError('GET response is not 200', - url=signed_url, - status_code=response.status_code, - content=text) try: - content = response.json() - self.cache[presigned_url] = content - return content + return response.json() except ValueError as e: - raise WcsApiError('Invalid JSON content', url=signed_url, exception=e) + raise WcsApiError('Invalid JSON content', final_url, e) @property def roles(self): - return [Role(wcs_api=self, **d) for d in self.get_json(self.roles_url)['data']] + return Roles(self) @property def formdefs(self): - result = self.get_json(self.formdefs_url + '?include-count=on') - if isinstance(result, dict): - if result['err'] == 0: - data = result['data'] - else: - logger.error(u'could not retrieve formdefs from %s, err_desc: %s', - self.formdefs_url, result.get('err_desc')) - return [] - else: - data = result - return [FormDef(wcs_api=self, **d) for d in data - if not self.slugs or d['slug'] in self.slugs] + return FormDefs(self) @property def categories(self): - d = {} - for f in self.formdefs: - if hasattr(f.schema, 'category'): - d[f.schema.category_id] = f.schema.category - return [Category(wcs_api=self, id=k, name=v) for k, v in d.items()] - - def get_formdata(self, slug): - offset = 0 - limit = self.batch_size - while True: - data = self.get_json(self.forms_url, - slug + '/list?anonymise&full=on&offset=%d&limit=%d' % (offset, limit)) - for d in data: - # w.c.s. had a bug where some formdata lost their draft status, skip them - if not d.get('receipt_time'): - continue - yield FormData(wcs_api=self, **d) - if len(data) < limit: - break - offset += limit - - def get_schema(self, slug): - json_schema = self.get_json(self.formdefs_url, slug + '/', 'schema?anonymise') - return Schema(wcs_api=self, **json_schema) + return Categories(self) -- 2.24.0