Project

General

Profile

0002-statistics-group-forms-count-by-field-60777.patch

Valentin Deniaud, 03 February 2022 06:02 PM

Download (18.2 KB)

View differences:

Subject: [PATCH 2/2] statistics: group forms count by field (#60777)

 tests/api/test_statistics.py | 106 +++++++++++++++++++++++++++++++++--
 wcs/sql.py                   |  65 ++++++++++++---------
 wcs/statistics/views.py      |  93 +++++++++++++++++++++++++-----
 3 files changed, 221 insertions(+), 43 deletions(-)
tests/api/test_statistics.py
254 254

  
255 255
    resp = get_app(pub).get(sign_uri('/api/statistics/forms/count/?form=%s' % formdef.url_name))
256 256

  
257
    # check item field subfilter
257
    # check group-by subfilter
258 258
    assert resp.json['data']['subfilters'][0] == {
259
        'id': 'group-by',
260
        'label': 'Group by',
261
        'options': [
262
            {'id': 'test-item', 'label': 'Test item'},
263
            {'id': 'test-items', 'label': 'Test items'},
264
            {'id': 'checkbox', 'label': 'Checkbox'},
265
            {'id': 'status', 'label': 'Status'},
266
        ],
267
    }
268

  
269
    # check item field subfilter
270
    assert resp.json['data']['subfilters'][1] == {
259 271
        'id': 'filter-test-item',
260 272
        'label': 'Test item',
261 273
        'options': [{'id': 'baz', 'label': 'Baz'}, {'id': 'foo', 'label': 'Foo'}],
......
263 275
    }
264 276

  
265 277
    # check items field subfilter
266
    assert resp.json['data']['subfilters'][1] == {
278
    assert resp.json['data']['subfilters'][2] == {
267 279
        'id': 'filter-test-items',
268 280
        'label': 'Test items',
269 281
        'options': [
......
275 287
    }
276 288

  
277 289
    # check block boolean field subfilter
278
    assert resp.json['data']['subfilters'][2] == {
290
    assert resp.json['data']['subfilters'][3] == {
279 291
        'id': 'filter-blockdata_bool',
280 292
        'label': 'Bool',
281 293
        'options': [{'id': 'true', 'label': 'Yes'}, {'id': 'false', 'label': 'No'}],
......
283 295
    }
284 296

  
285 297
    # check boolean backoffice field subfilter
286
    assert resp.json['data']['subfilters'][3] == {
298
    assert resp.json['data']['subfilters'][4] == {
287 299
        'id': 'filter-checkbox',
288 300
        'label': 'Checkbox',
289 301
        'options': [{'id': 'true', 'label': 'Yes'}, {'id': 'false', 'label': 'No'}],
......
451 463
    # invalid filter
452 464
    resp = get_app(pub).get(sign_uri(url + '&filter-xxx=yyy'))
453 465
    assert resp.json['data']['series'][0]['data'] == []
466

  
467

  
468
def test_statistics_forms_count_group_by(pub, formdef):
469
    for i in range(20):
470
        formdata = formdef.data_class()()
471
        formdata.just_created()
472
        formdata.receipt_time = datetime.datetime(2021, 1, 1, 0, 0).timetuple()
473
        if i % 3:
474
            formdata.data['1'] = True
475
            formdata.data['2'] = 'foo'
476
            formdata.data['3'] = ['bar', 'baz']
477
        elif i % 2:
478
            formdata.data['1'] = False
479
            formdata.data['2'] = 'baz'
480
            formdata.data['3'] = ['baz']
481
            formdata.jump_status('2')
482
        else:
483
            formdata.receipt_time = datetime.datetime(2021, 3, 1, 2, 0).timetuple()
484
        formdata.store()
485

  
486
    # group by item field
487
    url = '/api/statistics/forms/count/?form=%s' % formdef.url_name
488
    resp = get_app(pub).get(sign_uri(url + '&group-by=test-item'))
489
    assert resp.json['data']['x_labels'] == ['2021-01', '2021-02', '2021-03']
490
    assert resp.json['data']['series'] == [
491
        {'data': [3, None, None], 'label': 'baz'},
492
        {'data': [13, None, None], 'label': 'foo'},
493
        {'data': [None, None, 4], 'label': 'None'},
494
    ]
495

  
496
    resp = get_app(pub).get(sign_uri(url + '&group-by=test-item&time_interval=year'))
497
    assert resp.json['data']['x_labels'] == ['2021']
498
    assert resp.json['data']['series'] == [
499
        {'label': 'baz', 'data': [3]},
500
        {'label': 'foo', 'data': [13]},
501
        {'label': 'None', 'data': [4]},
502
    ]
503

  
504
    resp = get_app(pub).get(sign_uri(url + '&group-by=test-item&time_interval=hour'))
505
    assert resp.json['data']['x_labels'] == list(range(24))
506
    assert resp.json['data']['series'][0]['data'][0] == 3
507
    assert resp.json['data']['series'][1]['data'][0] == 13
508
    assert resp.json['data']['series'][2]['data'][2] == 4
509

  
510
    resp = get_app(pub).get(sign_uri(url + '&group-by=test-item&time_interval=weekday'))
511
    assert len(resp.json['data']['x_labels']) == 7
512
    assert resp.json['data']['series'] == [
513
        {'label': 'baz', 'data': [None, None, None, None, 3, None, None]},
514
        {'label': 'foo', 'data': [None, None, None, None, 13, None, None]},
515
        {'label': 'None', 'data': [4, None, None, None, None, None, None]},
516
    ]
517

  
518
    # group by items field
519
    url = '/api/statistics/forms/count/?form=%s' % formdef.url_name
520
    resp = get_app(pub).get(sign_uri(url + '&group-by=test-items'))
521
    assert resp.json['data']['x_labels'] == ['2021-01', '2021-02', '2021-03']
522
    assert resp.json['data']['series'] == [
523
        {'label': 'bar', 'data': [13, None, None]},
524
        {'label': 'baz', 'data': [16, None, None]},
525
        {'label': 'None', 'data': [None, None, 4]},
526
    ]
527

  
528
    # group by boolean field
529
    resp = get_app(pub).get(sign_uri(url + '&group-by=checkbox'))
530
    assert resp.json['data']['x_labels'] == ['2021-01', '2021-02', '2021-03']
531
    assert resp.json['data']['series'] == [
532
        {'data': [3, None, None], 'label': 'No'},
533
        {'data': [13, None, None], 'label': 'Yes'},
534
        {'data': [None, None, 4], 'label': 'None'},
535
    ]
536

  
537
    # group by status
538
    resp = get_app(pub).get(sign_uri(url + '&group-by=status'))
539
    assert resp.json['data']['x_labels'] == ['2021-01', '2021-02', '2021-03']
540
    assert resp.json['data']['series'] == [
541
        {'data': [3, None, None], 'label': 'End status'},
542
        {'data': [13, None, 4], 'label': 'New status'},
543
    ]
544

  
545
    # group by on block field is not supported
546
    resp = get_app(pub).get(sign_uri(url + '&group-by=blockdata_bool'))
547
    assert resp.json['data']['series'] == [{'data': [16, 0, 4], 'label': 'Forms Count'}]
548

  
549
    # invalid field
550
    resp = get_app(pub).get(sign_uri(url + '&group-by=xxx'))
551
    assert resp.json['data']['series'] == [{'data': [16, 0, 4], 'label': 'Forms Count'}]
wcs/sql.py
3439 3439
    return statement
3440 3440

  
3441 3441

  
3442
def get_time_aggregate_query(time_interval, query, group_by, function='DATE_TRUNC'):
3443
    statement = f"SELECT {function}('{time_interval}', receipt_time) AS {time_interval}, "
3444
    if group_by:
3445
        statement += '%s, ' % group_by
3446
    statement += 'COUNT(*) '
3447
    statement += query
3448

  
3449
    aggregate_fields = time_interval
3450
    if group_by:
3451
        aggregate_fields += ', %s' % group_by
3452
    statement += f' GROUP BY {aggregate_fields} ORDER BY {aggregate_fields}'
3453
    return statement
3454

  
3455

  
3442 3456
@guard_postgres
3443 3457
def get_actionable_counts(user_roles):
3444 3458
    conn, cur = get_connection_and_cursor()
......
3483 3497

  
3484 3498

  
3485 3499
@guard_postgres
3486
def get_weekday_totals(period_start=None, period_end=None, criterias=None):
3500
def get_weekday_totals(period_start=None, period_end=None, criterias=None, group_by=None):
3487 3501
    conn, cur = get_connection_and_cursor()
3488
    statement = '''SELECT DATE_PART('dow', receipt_time) AS weekday, COUNT(*)'''
3489 3502
    parameters = {}
3490
    statement += get_period_query(
3503
    statement = get_period_query(
3491 3504
        period_start=period_start, period_end=period_end, criterias=criterias, parameters=parameters
3492 3505
    )
3493
    statement += ' GROUP BY weekday ORDER BY weekday' ''
3506
    statement = get_time_aggregate_query('dow', statement, group_by, function='DATE_PART')
3494 3507
    cur.execute(statement, parameters)
3495 3508

  
3496 3509
    result = cur.fetchall()
3497
    result = [(int(x), y) for x, y in result]
3510
    result = [(int(x[0]), *x[1:]) for x in result]
3498 3511
    coverage = [x[0] for x in result]
3499 3512
    for weekday in range(7):
3500 3513
        if weekday not in coverage:
3501 3514
            result.append((weekday, 0))
3502
    result.sort()
3515
    result.sort(key=lambda x: x[0])
3503 3516

  
3504 3517
    # add labels,
3505 3518
    weekday_names = [
......
3511 3524
        _('Friday'),
3512 3525
        _('Saturday'),
3513 3526
    ]
3514
    result = [(weekday_names[x], y) for (x, y) in result]
3527
    result = [(weekday_names[x[0]], *x[1:]) for x in result]
3515 3528
    # and move Sunday last
3516 3529
    result = result[1:] + [result[0]]
3517 3530

  
......
3542 3555

  
3543 3556

  
3544 3557
@guard_postgres
3545
def get_hour_totals(period_start=None, period_end=None, criterias=None):
3558
def get_hour_totals(period_start=None, period_end=None, criterias=None, group_by=None):
3546 3559
    conn, cur = get_connection_and_cursor()
3547
    statement = '''SELECT DATE_PART('hour', receipt_time) AS hour, COUNT(*)'''
3548 3560
    parameters = {}
3549
    statement += get_period_query(
3561
    statement = get_period_query(
3550 3562
        period_start=period_start, period_end=period_end, criterias=criterias, parameters=parameters
3551 3563
    )
3552
    statement += ' GROUP BY hour ORDER BY hour'
3564
    statement = get_time_aggregate_query('hour', statement, group_by, function='DATE_PART')
3553 3565
    cur.execute(statement, parameters)
3554 3566

  
3555 3567
    result = cur.fetchall()
3556
    result = [(int(x), y) for x, y in result]
3568
    result = [(int(x[0]), *x[1:]) for x in result]
3557 3569

  
3558 3570
    coverage = [x[0] for x in result]
3559 3571
    for hour in range(24):
3560 3572
        if hour not in coverage:
3561 3573
            result.append((hour, 0))
3562
    result.sort()
3574
    result.sort(key=lambda x: x[0])
3563 3575

  
3564 3576
    conn.commit()
3565 3577
    cur.close()
......
3568 3580

  
3569 3581

  
3570 3582
@guard_postgres
3571
def get_monthly_totals(period_start=None, period_end=None, criterias=None):
3583
def get_monthly_totals(
3584
    period_start=None,
3585
    period_end=None,
3586
    criterias=None,
3587
    group_by=None,
3588
):
3572 3589
    conn, cur = get_connection_and_cursor()
3573
    statement = '''SELECT DATE_TRUNC('month', receipt_time) AS month, COUNT(*) '''
3574 3590
    parameters = {}
3575
    statement += get_period_query(
3591
    statement = get_period_query(
3576 3592
        period_start=period_start, period_end=period_end, criterias=criterias, parameters=parameters
3577 3593
    )
3578
    statement += ' GROUP BY month ORDER BY month' ''
3594
    statement = get_time_aggregate_query('month', statement, group_by)
3579 3595
    cur.execute(statement, parameters)
3580 3596

  
3581 3597
    raw_result = cur.fetchall()
3582
    result = [('%d-%02d' % x.timetuple()[:2], y) for x, y in raw_result]
3598
    result = [('%d-%02d' % x[0].timetuple()[:2], *x[1:]) for x in raw_result]
3583 3599
    if result:
3584 3600
        coverage = [x[0] for x in result]
3585 3601
        current_month = raw_result[0][0]
......
3590 3606
                result.append((label, 0))
3591 3607
            current_month = current_month + datetime.timedelta(days=31)
3592 3608
            current_month = current_month - datetime.timedelta(days=current_month.day - 1)
3593
        result.sort()
3609
        result.sort(key=lambda x: x[0])
3594 3610

  
3595 3611
    conn.commit()
3596 3612
    cur.close()
......
3599 3615

  
3600 3616

  
3601 3617
@guard_postgres
3602
def get_yearly_totals(period_start=None, period_end=None, criterias=None):
3618
def get_yearly_totals(period_start=None, period_end=None, criterias=None, group_by=None):
3603 3619
    conn, cur = get_connection_and_cursor()
3604
    statement = '''SELECT DATE_TRUNC('year', receipt_time) AS year, COUNT(*)'''
3605 3620
    parameters = {}
3606
    statement += get_period_query(
3621
    statement = get_period_query(
3607 3622
        period_start=period_start, period_end=period_end, criterias=criterias, parameters=parameters
3608 3623
    )
3609
    statement += ' GROUP BY year ORDER BY year'
3624
    statement = get_time_aggregate_query('year', statement, group_by)
3610 3625
    cur.execute(statement, parameters)
3611 3626

  
3612 3627
    raw_result = cur.fetchall()
3613
    result = [(str(x.year), y) for x, y in raw_result]
3628
    result = [(str(x[0].year), *x[1:]) for x in raw_result]
3614 3629
    if result:
3615 3630
        coverage = [x[0] for x in result]
3616 3631
        current_year = raw_result[0][0]
......
3620 3635
            if label not in coverage:
3621 3636
                result.append((label, 0))
3622 3637
            current_year = current_year + datetime.timedelta(days=366)
3623
        result.sort()
3638
        result.sort(key=lambda x: x[0])
3624 3639

  
3625 3640
    conn.commit()
3626 3641
    cur.close()
wcs/statistics/views.py
14 14
# You should have received a copy of the GNU General Public License
15 15
# along with this program; if not, see <http://www.gnu.org/licenses/>.
16 16

  
17
import collections
18

  
17 19
from django.http import HttpResponseBadRequest, HttpResponseForbidden, JsonResponse
18 20
from django.urls import reverse
19 21
from django.views.generic import View
20 22
from quixote import get_publisher
21 23

  
24
from wcs import sql
22 25
from wcs.api_utils import is_url_signed
23 26
from wcs.backoffice.management import FormPage
24 27
from wcs.categories import Category
......
114 117
        }
115 118
        category_id = request.GET.get('category', '_all')
116 119
        formdef_slug = request.GET.get('form', '_all')
120
        group_by = request.GET.get('group-by')
117 121
        subfilters = []
118 122
        if formdef_slug != '_all':
119 123
            try:
......
124 128

  
125 129
            totals_kwargs['criterias'].append(Equal('formdef_id', formdef.id))
126 130
            totals_kwargs['criterias'].extend(self.get_filters_criterias(formdef, form_page))
127

  
131
            if group_by:
132
                totals_kwargs['group_by'] = self.get_group_by_column(form_page, group_by)
128 133
            subfilters = self.get_subfilters(form_page)
129 134
        elif category_id != '_all':
130 135
            totals_kwargs['criterias'].append(Equal('category_id', category_id))
......
140 145
        else:
141 146
            return HttpResponseBadRequest('invalid time_interval parameter')
142 147

  
148
        if not totals_kwargs.get('group_by'):
149
            x_labels = [x[0] for x in totals]
150
            series = [{'label': _('Forms Count'), 'data': [x[1] for x in totals]}]
151
        else:
152
            x_labels, series = self.get_grouped_data(totals, group_by, formdef)
153

  
143 154
        return JsonResponse(
144
            {
145
                'data': {
146
                    'x_labels': [x[0] for x in totals],
147
                    'series': [
148
                        {
149
                            'label': _('Forms Count'),
150
                            'data': [x[1] for x in totals],
151
                        }
152
                    ],
153
                    'subfilters': subfilters,
154
                },
155
                'err': 0,
156
            }
155
            {'data': {'x_labels': x_labels, 'series': series, 'subfilters': subfilters}, 'err': 0}
157 156
        )
158 157

  
159 158
    def get_filters_criterias(self, formdef, form_page):
......
183 182
    @staticmethod
184 183
    def get_subfilters(form_page):
185 184
        subfilters = []
185
        field_choices = []
186 186
        for field in form_page.get_formdef_fields():
187 187
            if not getattr(field, 'include_in_statistics', False) or not field.contextual_varname:
188 188
                continue
......
225 225
                filter_description['default'] = field.default_filter_value
226 226

  
227 227
            subfilters.append(filter_description)
228
            if not hasattr(field, 'block_field'):
229
                field_choices.append((field.contextual_varname, field.label))
230

  
231
        if field_choices:
232
            subfilters.insert(
233
                0,
234
                {
235
                    'id': 'group-by',
236
                    'label': _('Group by'),
237
                    'options': [{'id': x[0], 'label': x[1]} for x in field_choices],
238
                },
239
            )
228 240

  
229 241
        return subfilters
242

  
243
    def get_group_by_column(self, form_page, group_by):
244
        if group_by == 'status':
245
            return group_by
246
        else:
247
            fields = [
248
                x
249
                for x in form_page.get_formdef_fields()
250
                if getattr(x, 'contextual_varname', None) == group_by
251
            ]
252
            if fields:
253
                field = fields[0]
254
                if not hasattr(field, 'block_field'):  # block fields are not supported
255
                    return sql.get_field_id(field)
256

  
257
    def get_grouped_data(self, totals, group_by, formdef):
258
        totals_by_time = collections.OrderedDict(
259
            # time1: {group1: total_11, group2: total_12},
260
            # time2: {group1: total_21}
261
        )
262
        seen_group_values = set(
263
            # group1, group2
264
        )
265
        for total in totals:
266
            totals_by_group = totals_by_time.setdefault(total[0], collections.Counter())
267
            if len(total) == 2:
268
                # ignore empty value used to fill time gaps
269
                continue
270
            groups = total[1]
271
            if not isinstance(groups, list):
272
                groups = [groups]
273
            for group in groups:
274
                totals_by_group[group] += total[2]
275
                seen_group_values.add(group)
276

  
277
        totals_by_group = {
278
            # group1: [total_11, total_21],
279
            # group2: [total_12, None],
280
        }
281
        for group in seen_group_values:
282
            totals_by_group[group] = [totals.get(group) for totals in totals_by_time.values()]
283

  
284
        def get_label(group):
285
            if group_by == 'status':
286
                status_id = group.split('-')[1]
287
                status = formdef.workflow.get_status(status_id)
288
                return status.name
289
            group_labels = {True: _('Yes'), False: _('No'), None: _('None')}
290
            return group_labels.get(group, group)
291

  
292
        x_labels = list(totals_by_time)
293
        series = [{'label': get_label(group), 'data': data} for group, data in totals_by_group.items()]
294
        return x_labels, series
230
-