From dbcbefe99c00b86fcc8a60b077b38a351c81967e Mon Sep 17 00:00:00 2001 From: Valentin Deniaud Date: Tue, 1 Feb 2022 16:39:02 +0100 Subject: [PATCH 2/2] statistics: group forms count by field (#60777) --- tests/api/test_statistics.py | 106 +++++++++++++++++++++++++++++++++-- wcs/sql.py | 65 ++++++++++++--------- wcs/statistics/views.py | 93 +++++++++++++++++++++++++----- 3 files changed, 221 insertions(+), 43 deletions(-) diff --git a/tests/api/test_statistics.py b/tests/api/test_statistics.py index e71bd6ba2..f0fed3ae1 100644 --- a/tests/api/test_statistics.py +++ b/tests/api/test_statistics.py @@ -254,8 +254,20 @@ def test_statistics_forms_count_subfilters(pub, formdef): resp = get_app(pub).get(sign_uri('/api/statistics/forms/count/?form=%s' % formdef.url_name)) - # check item field subfilter + # check group-by subfilter assert resp.json['data']['subfilters'][0] == { + 'id': 'group-by', + 'label': 'Group by', + 'options': [ + {'id': 'test-item', 'label': 'Test item'}, + {'id': 'test-items', 'label': 'Test items'}, + {'id': 'checkbox', 'label': 'Checkbox'}, + {'id': 'status', 'label': 'Status'}, + ], + } + + # check item field subfilter + assert resp.json['data']['subfilters'][1] == { 'id': 'filter-test-item', 'label': 'Test item', 'options': [{'id': 'baz', 'label': 'Baz'}, {'id': 'foo', 'label': 'Foo'}], @@ -263,7 +275,7 @@ def test_statistics_forms_count_subfilters(pub, formdef): } # check items field subfilter - assert resp.json['data']['subfilters'][1] == { + assert resp.json['data']['subfilters'][2] == { 'id': 'filter-test-items', 'label': 'Test items', 'options': [ @@ -275,7 +287,7 @@ def test_statistics_forms_count_subfilters(pub, formdef): } # check block boolean field subfilter - assert resp.json['data']['subfilters'][2] == { + assert resp.json['data']['subfilters'][3] == { 'id': 'filter-blockdata_bool', 'label': 'Bool', 'options': [{'id': 'true', 'label': 'Yes'}, {'id': 'false', 'label': 'No'}], @@ -283,7 +295,7 @@ def test_statistics_forms_count_subfilters(pub, formdef): } # check boolean backoffice field subfilter - assert resp.json['data']['subfilters'][3] == { + assert resp.json['data']['subfilters'][4] == { 'id': 'filter-checkbox', 'label': 'Checkbox', 'options': [{'id': 'true', 'label': 'Yes'}, {'id': 'false', 'label': 'No'}], @@ -451,3 +463,89 @@ def test_statistics_forms_count_subfilters_query(pub, formdef): # invalid filter resp = get_app(pub).get(sign_uri(url + '&filter-xxx=yyy')) assert resp.json['data']['series'][0]['data'] == [] + + +def test_statistics_forms_count_group_by(pub, formdef): + for i in range(20): + formdata = formdef.data_class()() + formdata.just_created() + formdata.receipt_time = datetime.datetime(2021, 1, 1, 0, 0).timetuple() + if i % 3: + formdata.data['1'] = True + formdata.data['2'] = 'foo' + formdata.data['3'] = ['bar', 'baz'] + elif i % 2: + formdata.data['1'] = False + formdata.data['2'] = 'baz' + formdata.data['3'] = ['baz'] + formdata.jump_status('2') + else: + formdata.receipt_time = datetime.datetime(2021, 3, 1, 2, 0).timetuple() + formdata.store() + + # group by item field + url = '/api/statistics/forms/count/?form=%s' % formdef.url_name + resp = get_app(pub).get(sign_uri(url + '&group-by=test-item')) + assert resp.json['data']['x_labels'] == ['2021-01', '2021-02', '2021-03'] + assert resp.json['data']['series'] == [ + {'data': [3, None, None], 'label': 'baz'}, + {'data': [13, None, None], 'label': 'foo'}, + {'data': [None, None, 4], 'label': 'None'}, + ] + + resp = get_app(pub).get(sign_uri(url + '&group-by=test-item&time_interval=year')) + assert resp.json['data']['x_labels'] == ['2021'] + assert resp.json['data']['series'] == [ + {'label': 'baz', 'data': [3]}, + {'label': 'foo', 'data': [13]}, + {'label': 'None', 'data': [4]}, + ] + + resp = get_app(pub).get(sign_uri(url + '&group-by=test-item&time_interval=hour')) + assert resp.json['data']['x_labels'] == list(range(24)) + assert resp.json['data']['series'][0]['data'][0] == 3 + assert resp.json['data']['series'][1]['data'][0] == 13 + assert resp.json['data']['series'][2]['data'][2] == 4 + + resp = get_app(pub).get(sign_uri(url + '&group-by=test-item&time_interval=weekday')) + assert len(resp.json['data']['x_labels']) == 7 + assert resp.json['data']['series'] == [ + {'label': 'baz', 'data': [None, None, None, None, 3, None, None]}, + {'label': 'foo', 'data': [None, None, None, None, 13, None, None]}, + {'label': 'None', 'data': [4, None, None, None, None, None, None]}, + ] + + # group by items field + url = '/api/statistics/forms/count/?form=%s' % formdef.url_name + resp = get_app(pub).get(sign_uri(url + '&group-by=test-items')) + assert resp.json['data']['x_labels'] == ['2021-01', '2021-02', '2021-03'] + assert resp.json['data']['series'] == [ + {'label': 'bar', 'data': [13, None, None]}, + {'label': 'baz', 'data': [16, None, None]}, + {'label': 'None', 'data': [None, None, 4]}, + ] + + # group by boolean field + resp = get_app(pub).get(sign_uri(url + '&group-by=checkbox')) + assert resp.json['data']['x_labels'] == ['2021-01', '2021-02', '2021-03'] + assert resp.json['data']['series'] == [ + {'data': [3, None, None], 'label': 'No'}, + {'data': [13, None, None], 'label': 'Yes'}, + {'data': [None, None, 4], 'label': 'None'}, + ] + + # group by status + resp = get_app(pub).get(sign_uri(url + '&group-by=status')) + assert resp.json['data']['x_labels'] == ['2021-01', '2021-02', '2021-03'] + assert resp.json['data']['series'] == [ + {'data': [3, None, None], 'label': 'End status'}, + {'data': [13, None, 4], 'label': 'New status'}, + ] + + # group by on block field is not supported + resp = get_app(pub).get(sign_uri(url + '&group-by=blockdata_bool')) + assert resp.json['data']['series'] == [{'data': [16, 0, 4], 'label': 'Forms Count'}] + + # invalid field + resp = get_app(pub).get(sign_uri(url + '&group-by=xxx')) + assert resp.json['data']['series'] == [{'data': [16, 0, 4], 'label': 'Forms Count'}] diff --git a/wcs/sql.py b/wcs/sql.py index 82cb18346..96e1be8f2 100644 --- a/wcs/sql.py +++ b/wcs/sql.py @@ -3439,6 +3439,20 @@ def get_period_query( return statement +def get_time_aggregate_query(time_interval, query, group_by, function='DATE_TRUNC'): + statement = f"SELECT {function}('{time_interval}', receipt_time) AS {time_interval}, " + if group_by: + statement += '%s, ' % group_by + statement += 'COUNT(*) ' + statement += query + + aggregate_fields = time_interval + if group_by: + aggregate_fields += ', %s' % group_by + statement += f' GROUP BY {aggregate_fields} ORDER BY {aggregate_fields}' + return statement + + @guard_postgres def get_actionable_counts(user_roles): conn, cur = get_connection_and_cursor() @@ -3483,23 +3497,22 @@ def get_total_counts(user_roles): @guard_postgres -def get_weekday_totals(period_start=None, period_end=None, criterias=None): +def get_weekday_totals(period_start=None, period_end=None, criterias=None, group_by=None): conn, cur = get_connection_and_cursor() - statement = '''SELECT DATE_PART('dow', receipt_time) AS weekday, COUNT(*)''' parameters = {} - statement += get_period_query( + statement = get_period_query( period_start=period_start, period_end=period_end, criterias=criterias, parameters=parameters ) - statement += ' GROUP BY weekday ORDER BY weekday' '' + statement = get_time_aggregate_query('dow', statement, group_by, function='DATE_PART') cur.execute(statement, parameters) result = cur.fetchall() - result = [(int(x), y) for x, y in result] + result = [(int(x[0]), *x[1:]) for x in result] coverage = [x[0] for x in result] for weekday in range(7): if weekday not in coverage: result.append((weekday, 0)) - result.sort() + result.sort(key=lambda x: x[0]) # add labels, weekday_names = [ @@ -3511,7 +3524,7 @@ def get_weekday_totals(period_start=None, period_end=None, criterias=None): _('Friday'), _('Saturday'), ] - result = [(weekday_names[x], y) for (x, y) in result] + result = [(weekday_names[x[0]], *x[1:]) for x in result] # and move Sunday last result = result[1:] + [result[0]] @@ -3542,24 +3555,23 @@ def get_formdef_totals(period_start=None, period_end=None, criterias=None): @guard_postgres -def get_hour_totals(period_start=None, period_end=None, criterias=None): +def get_hour_totals(period_start=None, period_end=None, criterias=None, group_by=None): conn, cur = get_connection_and_cursor() - statement = '''SELECT DATE_PART('hour', receipt_time) AS hour, COUNT(*)''' parameters = {} - statement += get_period_query( + statement = get_period_query( period_start=period_start, period_end=period_end, criterias=criterias, parameters=parameters ) - statement += ' GROUP BY hour ORDER BY hour' + statement = get_time_aggregate_query('hour', statement, group_by, function='DATE_PART') cur.execute(statement, parameters) result = cur.fetchall() - result = [(int(x), y) for x, y in result] + result = [(int(x[0]), *x[1:]) for x in result] coverage = [x[0] for x in result] for hour in range(24): if hour not in coverage: result.append((hour, 0)) - result.sort() + result.sort(key=lambda x: x[0]) conn.commit() cur.close() @@ -3568,18 +3580,22 @@ def get_hour_totals(period_start=None, period_end=None, criterias=None): @guard_postgres -def get_monthly_totals(period_start=None, period_end=None, criterias=None): +def get_monthly_totals( + period_start=None, + period_end=None, + criterias=None, + group_by=None, +): conn, cur = get_connection_and_cursor() - statement = '''SELECT DATE_TRUNC('month', receipt_time) AS month, COUNT(*) ''' parameters = {} - statement += get_period_query( + statement = get_period_query( period_start=period_start, period_end=period_end, criterias=criterias, parameters=parameters ) - statement += ' GROUP BY month ORDER BY month' '' + statement = get_time_aggregate_query('month', statement, group_by) cur.execute(statement, parameters) raw_result = cur.fetchall() - result = [('%d-%02d' % x.timetuple()[:2], y) for x, y in raw_result] + result = [('%d-%02d' % x[0].timetuple()[:2], *x[1:]) for x in raw_result] if result: coverage = [x[0] for x in result] current_month = raw_result[0][0] @@ -3590,7 +3606,7 @@ def get_monthly_totals(period_start=None, period_end=None, criterias=None): result.append((label, 0)) current_month = current_month + datetime.timedelta(days=31) current_month = current_month - datetime.timedelta(days=current_month.day - 1) - result.sort() + result.sort(key=lambda x: x[0]) conn.commit() cur.close() @@ -3599,18 +3615,17 @@ def get_monthly_totals(period_start=None, period_end=None, criterias=None): @guard_postgres -def get_yearly_totals(period_start=None, period_end=None, criterias=None): +def get_yearly_totals(period_start=None, period_end=None, criterias=None, group_by=None): conn, cur = get_connection_and_cursor() - statement = '''SELECT DATE_TRUNC('year', receipt_time) AS year, COUNT(*)''' parameters = {} - statement += get_period_query( + statement = get_period_query( period_start=period_start, period_end=period_end, criterias=criterias, parameters=parameters ) - statement += ' GROUP BY year ORDER BY year' + statement = get_time_aggregate_query('year', statement, group_by) cur.execute(statement, parameters) raw_result = cur.fetchall() - result = [(str(x.year), y) for x, y in raw_result] + result = [(str(x[0].year), *x[1:]) for x in raw_result] if result: coverage = [x[0] for x in result] current_year = raw_result[0][0] @@ -3620,7 +3635,7 @@ def get_yearly_totals(period_start=None, period_end=None, criterias=None): if label not in coverage: result.append((label, 0)) current_year = current_year + datetime.timedelta(days=366) - result.sort() + result.sort(key=lambda x: x[0]) conn.commit() cur.close() diff --git a/wcs/statistics/views.py b/wcs/statistics/views.py index c17a33735..a3fd03b17 100644 --- a/wcs/statistics/views.py +++ b/wcs/statistics/views.py @@ -14,11 +14,14 @@ # You should have received a copy of the GNU General Public License # along with this program; if not, see . +import collections + from django.http import HttpResponseBadRequest, HttpResponseForbidden, JsonResponse from django.urls import reverse from django.views.generic import View from quixote import get_publisher +from wcs import sql from wcs.api_utils import is_url_signed from wcs.backoffice.management import FormPage from wcs.categories import Category @@ -114,6 +117,7 @@ class FormsCountView(RestrictedView): } category_id = request.GET.get('category', '_all') formdef_slug = request.GET.get('form', '_all') + group_by = request.GET.get('group-by') subfilters = [] if formdef_slug != '_all': try: @@ -124,7 +128,8 @@ class FormsCountView(RestrictedView): totals_kwargs['criterias'].append(Equal('formdef_id', formdef.id)) totals_kwargs['criterias'].extend(self.get_filters_criterias(formdef, form_page)) - + if group_by: + totals_kwargs['group_by'] = self.get_group_by_column(form_page, group_by) subfilters = self.get_subfilters(form_page) elif category_id != '_all': totals_kwargs['criterias'].append(Equal('category_id', category_id)) @@ -140,20 +145,14 @@ class FormsCountView(RestrictedView): else: return HttpResponseBadRequest('invalid time_interval parameter') + if not totals_kwargs.get('group_by'): + x_labels = [x[0] for x in totals] + series = [{'label': _('Forms Count'), 'data': [x[1] for x in totals]}] + else: + x_labels, series = self.get_grouped_data(totals, group_by, formdef) + return JsonResponse( - { - 'data': { - 'x_labels': [x[0] for x in totals], - 'series': [ - { - 'label': _('Forms Count'), - 'data': [x[1] for x in totals], - } - ], - 'subfilters': subfilters, - }, - 'err': 0, - } + {'data': {'x_labels': x_labels, 'series': series, 'subfilters': subfilters}, 'err': 0} ) def get_filters_criterias(self, formdef, form_page): @@ -183,6 +182,7 @@ class FormsCountView(RestrictedView): @staticmethod def get_subfilters(form_page): subfilters = [] + field_choices = [] for field in form_page.get_formdef_fields(): if not getattr(field, 'include_in_statistics', False) or not field.contextual_varname: continue @@ -225,5 +225,70 @@ class FormsCountView(RestrictedView): filter_description['default'] = field.default_filter_value subfilters.append(filter_description) + if not hasattr(field, 'block_field'): + field_choices.append((field.contextual_varname, field.label)) + + if field_choices: + subfilters.insert( + 0, + { + 'id': 'group-by', + 'label': _('Group by'), + 'options': [{'id': x[0], 'label': x[1]} for x in field_choices], + }, + ) return subfilters + + def get_group_by_column(self, form_page, group_by): + if group_by == 'status': + return group_by + else: + fields = [ + x + for x in form_page.get_formdef_fields() + if getattr(x, 'contextual_varname', None) == group_by + ] + if fields: + field = fields[0] + if not hasattr(field, 'block_field'): # block fields are not supported + return sql.get_field_id(field) + + def get_grouped_data(self, totals, group_by, formdef): + totals_by_time = collections.OrderedDict( + # time1: {group1: total_11, group2: total_12}, + # time2: {group1: total_21} + ) + seen_group_values = set( + # group1, group2 + ) + for total in totals: + totals_by_group = totals_by_time.setdefault(total[0], collections.Counter()) + if len(total) == 2: + # ignore empty value used to fill time gaps + continue + groups = total[1] + if not isinstance(groups, list): + groups = [groups] + for group in groups: + totals_by_group[group] += total[2] + seen_group_values.add(group) + + totals_by_group = { + # group1: [total_11, total_21], + # group2: [total_12, None], + } + for group in seen_group_values: + totals_by_group[group] = [totals.get(group) for totals in totals_by_time.values()] + + def get_label(group): + if group_by == 'status': + status_id = group.split('-')[1] + status = formdef.workflow.get_status(status_id) + return status.name + group_labels = {True: _('Yes'), False: _('No'), None: _('None')} + return group_labels.get(group, group) + + x_labels = list(totals_by_time) + series = [{'label': get_label(group), 'data': data} for group, data in totals_by_group.items()] + return x_labels, series -- 2.30.2