From 638d2bf44af9f00f95c56d90df92711a931cec92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20P=C3=A9ters?= Date: Thu, 3 Mar 2016 18:33:07 +0100 Subject: [PATCH] sql: use materialized views if available, for statistics (#9553) --- tests/test_sql.py | 27 ++++++++++++++++++++++++--- wcs/publisher.py | 14 ++++++++++++++ wcs/qommon/publisher.py | 10 +++++++--- wcs/sql.py | 36 +++++++++++++++++++++++++++++++++++- 4 files changed, 80 insertions(+), 7 deletions(-) diff --git a/tests/test_sql.py b/tests/test_sql.py index 3670e4c..ba474a1 100644 --- a/tests/test_sql.py +++ b/tests/test_sql.py @@ -839,7 +839,7 @@ def test_migration_1_tracking_code(): def test_migration_2_formdef_id_in_views(): conn, cur = sql.get_connection_and_cursor() cur.execute('UPDATE wcs_meta SET value = 1 WHERE key = %s', ('sql_level',)) - cur.execute('DROP VIEW wcs_all_forms') + cur.execute('DROP VIEW wcs_all_forms CASCADE') # hack a formdef table the wrong way, to check it is reconstructed # properly before the views are created @@ -882,7 +882,7 @@ def test_migration_2_formdef_id_in_views(): def test_migration_6_actions_roles(): conn, cur = sql.get_connection_and_cursor() cur.execute('UPDATE wcs_meta SET value = 5 WHERE key = %s', ('sql_level',)) - cur.execute('DROP VIEW wcs_all_forms') + cur.execute('DROP VIEW wcs_all_forms CASCADE') # hack a formdef table the wrong way, to check it is reconstructed # properly before the views are created @@ -907,7 +907,7 @@ def test_migration_6_actions_roles(): def test_migration_10_submission_channel(): conn, cur = sql.get_connection_and_cursor() cur.execute('UPDATE wcs_meta SET value = 9 WHERE key = %s', ('sql_level',)) - cur.execute('DROP VIEW wcs_all_forms') + cur.execute('DROP VIEW wcs_all_forms CASCADE') # hack a formdef table the wrong way, to check it is reconstructed # properly before the views are created @@ -1330,3 +1330,24 @@ def test_views_submission_info(): cur.execute('''SELECT COUNT(*) FROM wcs_all_forms WHERE submission_channel = %s''', ('mail',)) assert bool(cur.fetchone()[0] == 1) + +@postgresql +def test_materialized_view(): + conn, cur = sql.get_connection_and_cursor() + if pub.pg_version < 94000: + pytest.skip('this requires PostgreSQL >= 9.4') + return + + sql.refresh_materialized_views() + test_last_update_time() + assert sql.get_yearly_totals() == [(str(datetime.date.today().year), 2)] + conn, cur = sql.get_connection_and_cursor() + + formdef = FormDef.select()[0] + formdata1 = formdef.data_class()() + formdata1.status = 'wf-st1' + formdata1.just_created() + formdata1.store() + assert sql.get_yearly_totals() == [(str(datetime.date.today().year), 2)] + sql.refresh_materialized_views() + assert sql.get_yearly_totals() == [(str(datetime.date.today().year), 3)] diff --git a/wcs/publisher.py b/wcs/publisher.py index 87fd1b3..28b1de3 100644 --- a/wcs/publisher.py +++ b/wcs/publisher.py @@ -16,6 +16,7 @@ import cPickle import os +import random import sys import zipfile @@ -39,6 +40,7 @@ from root import RootDirectory from backoffice import RootDirectory as BackofficeRootDirectory from admin import RootDirectory as AdminRootDirectory import sessions +from qommon.cron import CronJob from users import User from tracking_code import TrackingCode @@ -110,6 +112,12 @@ class WcsPublisher(StubWcsPublisher): "missing_appdir_redirect") configure = classmethod(configure) + @classmethod + def register_cronjobs(cls): + QommonPublisher.register_cronjobs() + cls.register_cronjob(CronJob(cls.refresh_materialized_views, + minutes=[random.randint(0, 59)])) + def is_using_postgresql(self): return bool(self.has_site_option('postgresql') and self.cfg.get('postgresql', {})) @@ -238,6 +246,12 @@ class WcsPublisher(StubWcsPublisher): conn.commit() cur.close() + def refresh_materialized_views(self): + if not self.is_using_postgresql(): + return + import sql + sql.refresh_materialized_views() + def migrate_sql(self): import sql sql.migrate() diff --git a/wcs/qommon/publisher.py b/wcs/qommon/publisher.py index 127fc0c..d8602a0 100644 --- a/wcs/qommon/publisher.py +++ b/wcs/qommon/publisher.py @@ -746,13 +746,17 @@ class QommonPublisher(Publisher): cls.etld = etld.etld(filename) load_effective_tld_names = classmethod(load_effective_tld_names) + @classmethod + def register_cronjobs(cls): + cls.register_cronjob(CronJob(cls.clean_sessions, minutes=range(0, 60, 5))) + cls.register_cronjob(CronJob(cls.clean_afterjobs, minutes=[random.randint(0, 59)])) + cls.register_cronjob(CronJob(cls.clean_tempfiles, minutes=[random.randint(0, 59)])) + def create_publisher(cls, register_cron=True, register_tld_names=True): cls.load_extra_dirs() cls.load_translations() if register_cron: - cls.register_cronjob(CronJob(cls.clean_sessions, minutes=range(0, 60, 5))) - cls.register_cronjob(CronJob(cls.clean_afterjobs, minutes=[random.randint(0, 59)])) - cls.register_cronjob(CronJob(cls.clean_tempfiles, minutes=[random.randint(0, 59)])) + cls.register_cronjobs() if register_tld_names: cls.load_effective_tld_names() diff --git a/wcs/sql.py b/wcs/sql.py index 50aaa0b..2634e12 100644 --- a/wcs/sql.py +++ b/wcs/sql.py @@ -229,6 +229,11 @@ def get_connection(new=False): if new: raise get_publisher().pgconn = None + else: + cur = get_publisher().pgconn.cursor() + cur.execute('SHOW server_version_num') + get_publisher().pg_version = cur.fetchone()[0] + cur.close() return get_publisher().pgconn def cleanup_connection(): @@ -646,6 +651,8 @@ def drop_global_views(conn, cur): for view_name in view_names: cur.execute('''DROP VIEW IF EXISTS %s''' % view_name) + if get_publisher().pg_version >= 94000: + cur.execute('''DROP MATERIALIZED VIEW IF EXISTS wcs_materialized_all_forms''') cur.execute('''DROP VIEW IF EXISTS wcs_all_forms''') def do_global_views(conn, cur): @@ -1621,9 +1628,32 @@ class AnyFormData(SqlMixin): return o +@guard_postgres +def create_materialized_view_if_not_exists(conn, cur): + if get_publisher().pg_version < 94000: + return + cur.execute('''CREATE MATERIALIZED VIEW IF NOT EXISTS wcs_materialized_all_forms + AS SELECT * from wcs_all_forms''') + +@guard_postgres +def refresh_materialized_views(): + conn, cur = get_connection_and_cursor() + if get_publisher().pg_version < 94000: + return + try: + cur.execute('REFRESH MATERIALIZED VIEW wcs_materialized_all_forms') + conn.commit() + except psycopg2.ProgrammingError: + conn.rollback() + create_materialized_view_if_not_exists(conn, cur) + cur.close() + def get_period_query(period_start=None, period_end=None, criterias=None, parameters=None): clause = [NotNull('receipt_time')] - table_name = 'wcs_all_forms' + if get_publisher().pg_version >= 94000: + table_name = 'wcs_materialized_all_forms' + else: + table_name = 'wcs_all_forms' if criterias: for criteria in criterias: if criteria.__class__.__name__ == 'Equal' and \ @@ -1647,6 +1677,7 @@ def get_period_query(period_start=None, period_end=None, criterias=None, paramet @guard_postgres def get_weekday_totals(period_start=None, period_end=None, criterias=None): conn, cur = get_connection_and_cursor() + create_materialized_view_if_not_exists(conn, cur) statement = '''SELECT DATE_PART('dow', receipt_time) AS weekday, COUNT(*)''' parameters = {} statement += get_period_query(period_start, period_end, criterias, parameters) @@ -1670,6 +1701,7 @@ def get_weekday_totals(period_start=None, period_end=None, criterias=None): @guard_postgres def get_hour_totals(period_start=None, period_end=None, criterias=None): conn, cur = get_connection_and_cursor() + create_materialized_view_if_not_exists(conn, cur) statement = '''SELECT DATE_PART('hour', receipt_time) AS hour, COUNT(*)''' parameters = {} statement += get_period_query(period_start, period_end, criterias, parameters) @@ -1694,6 +1726,7 @@ def get_hour_totals(period_start=None, period_end=None, criterias=None): @guard_postgres def get_monthly_totals(period_start=None, period_end=None, criterias=None): conn, cur = get_connection_and_cursor() + create_materialized_view_if_not_exists(conn, cur) statement = '''SELECT DATE_TRUNC('month', receipt_time) AS month, COUNT(*) ''' parameters = {} statement += get_period_query(period_start, period_end, criterias, parameters) @@ -1723,6 +1756,7 @@ def get_monthly_totals(period_start=None, period_end=None, criterias=None): @guard_postgres def get_yearly_totals(period_start=None, period_end=None, criterias=None): conn, cur = get_connection_and_cursor() + create_materialized_view_if_not_exists(conn, cur) statement = '''SELECT DATE_TRUNC('year', receipt_time) AS year, COUNT(*)''' parameters = {} statement += get_period_query(period_start, period_end, criterias, parameters) -- 2.7.0