Project

General

Profile

0002-statistics-group-forms-count-by-field-60777.patch

Valentin Deniaud, 15 February 2022 05:27 PM

Download (18.5 KB)

View differences:

Subject: [PATCH 2/2] statistics: group forms count by field (#60777)

 tests/api/test_statistics.py | 106 +++++++++++++++++++++++++++++++++--
 wcs/sql.py                   |  65 ++++++++++++---------
 wcs/statistics/views.py      |  95 +++++++++++++++++++++++++------
 3 files changed, 221 insertions(+), 45 deletions(-)
tests/api/test_statistics.py
254 254

  
255 255
    resp = get_app(pub).get(sign_uri('/api/statistics/forms/count/?form=%s' % formdef.url_name))
256 256

  
257
    # check item field subfilter
257
    # check group-by subfilter
258 258
    assert resp.json['data']['subfilters'][0] == {
259
        'id': 'group-by',
260
        'label': 'Group by',
261
        'options': [
262
            {'id': 'test-item', 'label': 'Test item'},
263
            {'id': 'test-items', 'label': 'Test items'},
264
            {'id': 'checkbox', 'label': 'Checkbox'},
265
            {'id': 'status', 'label': 'Status'},
266
        ],
267
    }
268

  
269
    # check item field subfilter
270
    assert resp.json['data']['subfilters'][1] == {
259 271
        'id': 'filter-test-item',
260 272
        'label': 'Test item',
261 273
        'options': [{'id': 'baz', 'label': 'Baz'}, {'id': 'foo', 'label': 'Foo'}],
......
263 275
    }
264 276

  
265 277
    # check items field subfilter
266
    assert resp.json['data']['subfilters'][1] == {
278
    assert resp.json['data']['subfilters'][2] == {
267 279
        'id': 'filter-test-items',
268 280
        'label': 'Test items',
269 281
        'options': [
......
275 287
    }
276 288

  
277 289
    # check block boolean field subfilter
278
    assert resp.json['data']['subfilters'][2] == {
290
    assert resp.json['data']['subfilters'][3] == {
279 291
        'id': 'filter-blockdata_bool',
280 292
        'label': 'Bool',
281 293
        'options': [{'id': 'true', 'label': 'Yes'}, {'id': 'false', 'label': 'No'}],
......
283 295
    }
284 296

  
285 297
    # check boolean backoffice field subfilter
286
    assert resp.json['data']['subfilters'][3] == {
298
    assert resp.json['data']['subfilters'][4] == {
287 299
        'id': 'filter-checkbox',
288 300
        'label': 'Checkbox',
289 301
        'options': [{'id': 'true', 'label': 'Yes'}, {'id': 'false', 'label': 'No'}],
......
451 463
    # invalid filter
452 464
    resp = get_app(pub).get(sign_uri(url + '&filter-xxx=yyy'))
453 465
    assert resp.json['data']['series'][0]['data'] == []
466

  
467

  
468
def test_statistics_forms_count_group_by(pub, formdef):
469
    for i in range(20):
470
        formdata = formdef.data_class()()
471
        formdata.just_created()
472
        formdata.receipt_time = datetime.datetime(2021, 1, 1, 0, 0).timetuple()
473
        if i % 3:
474
            formdata.data['1'] = True
475
            formdata.data['2'] = 'foo'
476
            formdata.data['3'] = ['bar', 'baz']
477
        elif i % 2:
478
            formdata.data['1'] = False
479
            formdata.data['2'] = 'baz'
480
            formdata.data['3'] = ['baz']
481
            formdata.jump_status('2')
482
        else:
483
            formdata.receipt_time = datetime.datetime(2021, 3, 1, 2, 0).timetuple()
484
        formdata.store()
485

  
486
    # group by item field
487
    url = '/api/statistics/forms/count/?form=%s' % formdef.url_name
488
    resp = get_app(pub).get(sign_uri(url + '&group-by=test-item'))
489
    assert resp.json['data']['x_labels'] == ['2021-01', '2021-02', '2021-03']
490
    assert resp.json['data']['series'] == [
491
        {'data': [3, None, None], 'label': 'baz'},
492
        {'data': [13, None, None], 'label': 'foo'},
493
        {'data': [None, None, 4], 'label': 'None'},
494
    ]
495

  
496
    resp = get_app(pub).get(sign_uri(url + '&group-by=test-item&time_interval=year'))
497
    assert resp.json['data']['x_labels'] == ['2021']
498
    assert resp.json['data']['series'] == [
499
        {'label': 'baz', 'data': [3]},
500
        {'label': 'foo', 'data': [13]},
501
        {'label': 'None', 'data': [4]},
502
    ]
503

  
504
    resp = get_app(pub).get(sign_uri(url + '&group-by=test-item&time_interval=hour'))
505
    assert resp.json['data']['x_labels'] == list(range(24))
506
    assert resp.json['data']['series'][0]['data'][0] == 3
507
    assert resp.json['data']['series'][1]['data'][0] == 13
508
    assert resp.json['data']['series'][2]['data'][2] == 4
509

  
510
    resp = get_app(pub).get(sign_uri(url + '&group-by=test-item&time_interval=weekday'))
511
    assert len(resp.json['data']['x_labels']) == 7
512
    assert resp.json['data']['series'] == [
513
        {'label': 'baz', 'data': [None, None, None, None, 3, None, None]},
514
        {'label': 'foo', 'data': [None, None, None, None, 13, None, None]},
515
        {'label': 'None', 'data': [4, None, None, None, None, None, None]},
516
    ]
517

  
518
    # group by items field
519
    url = '/api/statistics/forms/count/?form=%s' % formdef.url_name
520
    resp = get_app(pub).get(sign_uri(url + '&group-by=test-items'))
521
    assert resp.json['data']['x_labels'] == ['2021-01', '2021-02', '2021-03']
522
    assert resp.json['data']['series'] == [
523
        {'label': 'bar', 'data': [13, None, None]},
524
        {'label': 'baz', 'data': [16, None, None]},
525
        {'label': 'None', 'data': [None, None, 4]},
526
    ]
527

  
528
    # group by boolean field
529
    resp = get_app(pub).get(sign_uri(url + '&group-by=checkbox'))
530
    assert resp.json['data']['x_labels'] == ['2021-01', '2021-02', '2021-03']
531
    assert resp.json['data']['series'] == [
532
        {'data': [3, None, None], 'label': 'No'},
533
        {'data': [13, None, None], 'label': 'Yes'},
534
        {'data': [None, None, 4], 'label': 'None'},
535
    ]
536

  
537
    # group by status
538
    resp = get_app(pub).get(sign_uri(url + '&group-by=status'))
539
    assert resp.json['data']['x_labels'] == ['2021-01', '2021-02', '2021-03']
540
    assert resp.json['data']['series'] == [
541
        {'data': [3, None, None], 'label': 'End status'},
542
        {'data': [13, None, 4], 'label': 'New status'},
543
    ]
544

  
545
    # group by on block field is not supported
546
    resp = get_app(pub).get(sign_uri(url + '&group-by=blockdata_bool'))
547
    assert resp.json['data']['series'] == [{'data': [16, 0, 4], 'label': 'Forms Count'}]
548

  
549
    # invalid field
550
    resp = get_app(pub).get(sign_uri(url + '&group-by=xxx'))
551
    assert resp.json['data']['series'] == [{'data': [16, 0, 4], 'label': 'Forms Count'}]
wcs/sql.py
3525 3525
    return statement
3526 3526

  
3527 3527

  
3528
def get_time_aggregate_query(time_interval, query, group_by, function='DATE_TRUNC'):
3529
    statement = f"SELECT {function}('{time_interval}', receipt_time) AS {time_interval}, "
3530
    if group_by:
3531
        statement += '%s, ' % group_by
3532
    statement += 'COUNT(*) '
3533
    statement += query
3534

  
3535
    aggregate_fields = time_interval
3536
    if group_by:
3537
        aggregate_fields += ', %s' % group_by
3538
    statement += f' GROUP BY {aggregate_fields} ORDER BY {aggregate_fields}'
3539
    return statement
3540

  
3541

  
3528 3542
@guard_postgres
3529 3543
def get_actionable_counts(user_roles):
3530 3544
    conn, cur = get_connection_and_cursor()
......
3569 3583

  
3570 3584

  
3571 3585
@guard_postgres
3572
def get_weekday_totals(period_start=None, period_end=None, criterias=None):
3586
def get_weekday_totals(period_start=None, period_end=None, criterias=None, group_by=None):
3573 3587
    conn, cur = get_connection_and_cursor()
3574
    statement = '''SELECT DATE_PART('dow', receipt_time) AS weekday, COUNT(*)'''
3575 3588
    parameters = {}
3576
    statement += get_period_query(
3589
    statement = get_period_query(
3577 3590
        period_start=period_start, period_end=period_end, criterias=criterias, parameters=parameters
3578 3591
    )
3579
    statement += ' GROUP BY weekday ORDER BY weekday' ''
3592
    statement = get_time_aggregate_query('dow', statement, group_by, function='DATE_PART')
3580 3593
    cur.execute(statement, parameters)
3581 3594

  
3582 3595
    result = cur.fetchall()
3583
    result = [(int(x), y) for x, y in result]
3596
    result = [(int(x[0]), *x[1:]) for x in result]
3584 3597
    coverage = [x[0] for x in result]
3585 3598
    for weekday in range(7):
3586 3599
        if weekday not in coverage:
3587 3600
            result.append((weekday, 0))
3588
    result.sort()
3601
    result.sort(key=lambda x: x[0])
3589 3602

  
3590 3603
    # add labels,
3591 3604
    weekday_names = [
......
3597 3610
        _('Friday'),
3598 3611
        _('Saturday'),
3599 3612
    ]
3600
    result = [(weekday_names[x], y) for (x, y) in result]
3613
    result = [(weekday_names[x[0]], *x[1:]) for x in result]
3601 3614
    # and move Sunday last
3602 3615
    result = result[1:] + [result[0]]
3603 3616

  
......
3628 3641

  
3629 3642

  
3630 3643
@guard_postgres
3631
def get_hour_totals(period_start=None, period_end=None, criterias=None):
3644
def get_hour_totals(period_start=None, period_end=None, criterias=None, group_by=None):
3632 3645
    conn, cur = get_connection_and_cursor()
3633
    statement = '''SELECT DATE_PART('hour', receipt_time) AS hour, COUNT(*)'''
3634 3646
    parameters = {}
3635
    statement += get_period_query(
3647
    statement = get_period_query(
3636 3648
        period_start=period_start, period_end=period_end, criterias=criterias, parameters=parameters
3637 3649
    )
3638
    statement += ' GROUP BY hour ORDER BY hour'
3650
    statement = get_time_aggregate_query('hour', statement, group_by, function='DATE_PART')
3639 3651
    cur.execute(statement, parameters)
3640 3652

  
3641 3653
    result = cur.fetchall()
3642
    result = [(int(x), y) for x, y in result]
3654
    result = [(int(x[0]), *x[1:]) for x in result]
3643 3655

  
3644 3656
    coverage = [x[0] for x in result]
3645 3657
    for hour in range(24):
3646 3658
        if hour not in coverage:
3647 3659
            result.append((hour, 0))
3648
    result.sort()
3660
    result.sort(key=lambda x: x[0])
3649 3661

  
3650 3662
    conn.commit()
3651 3663
    cur.close()
......
3654 3666

  
3655 3667

  
3656 3668
@guard_postgres
3657
def get_monthly_totals(period_start=None, period_end=None, criterias=None):
3669
def get_monthly_totals(
3670
    period_start=None,
3671
    period_end=None,
3672
    criterias=None,
3673
    group_by=None,
3674
):
3658 3675
    conn, cur = get_connection_and_cursor()
3659
    statement = '''SELECT DATE_TRUNC('month', receipt_time) AS month, COUNT(*) '''
3660 3676
    parameters = {}
3661
    statement += get_period_query(
3677
    statement = get_period_query(
3662 3678
        period_start=period_start, period_end=period_end, criterias=criterias, parameters=parameters
3663 3679
    )
3664
    statement += ' GROUP BY month ORDER BY month' ''
3680
    statement = get_time_aggregate_query('month', statement, group_by)
3665 3681
    cur.execute(statement, parameters)
3666 3682

  
3667 3683
    raw_result = cur.fetchall()
3668
    result = [('%d-%02d' % x.timetuple()[:2], y) for x, y in raw_result]
3684
    result = [('%d-%02d' % x[0].timetuple()[:2], *x[1:]) for x in raw_result]
3669 3685
    if result:
3670 3686
        coverage = [x[0] for x in result]
3671 3687
        current_month = raw_result[0][0]
......
3676 3692
                result.append((label, 0))
3677 3693
            current_month = current_month + datetime.timedelta(days=31)
3678 3694
            current_month = current_month - datetime.timedelta(days=current_month.day - 1)
3679
        result.sort()
3695
        result.sort(key=lambda x: x[0])
3680 3696

  
3681 3697
    conn.commit()
3682 3698
    cur.close()
......
3685 3701

  
3686 3702

  
3687 3703
@guard_postgres
3688
def get_yearly_totals(period_start=None, period_end=None, criterias=None):
3704
def get_yearly_totals(period_start=None, period_end=None, criterias=None, group_by=None):
3689 3705
    conn, cur = get_connection_and_cursor()
3690
    statement = '''SELECT DATE_TRUNC('year', receipt_time) AS year, COUNT(*)'''
3691 3706
    parameters = {}
3692
    statement += get_period_query(
3707
    statement = get_period_query(
3693 3708
        period_start=period_start, period_end=period_end, criterias=criterias, parameters=parameters
3694 3709
    )
3695
    statement += ' GROUP BY year ORDER BY year'
3710
    statement = get_time_aggregate_query('year', statement, group_by)
3696 3711
    cur.execute(statement, parameters)
3697 3712

  
3698 3713
    raw_result = cur.fetchall()
3699
    result = [(str(x.year), y) for x, y in raw_result]
3714
    result = [(str(x[0].year), *x[1:]) for x in raw_result]
3700 3715
    if result:
3701 3716
        coverage = [x[0] for x in result]
3702 3717
        current_year = raw_result[0][0]
......
3706 3721
            if label not in coverage:
3707 3722
                result.append((label, 0))
3708 3723
            current_year = current_year + datetime.timedelta(days=366)
3709
        result.sort()
3724
        result.sort(key=lambda x: x[0])
3710 3725

  
3711 3726
    conn.commit()
3712 3727
    cur.close()
wcs/statistics/views.py
14 14
# You should have received a copy of the GNU General Public License
15 15
# along with this program; if not, see <http://www.gnu.org/licenses/>.
16 16

  
17
import collections
18

  
17 19
from django.http import HttpResponseBadRequest, HttpResponseForbidden, JsonResponse
18 20
from django.urls import reverse
19 21
from django.views.generic import View
20 22
from quixote import get_publisher
21 23

  
24
from wcs import sql
22 25
from wcs.api_utils import is_url_signed
23 26
from wcs.backoffice.management import FormPage
24 27
from wcs.categories import Category
......
105 108

  
106 109
class FormsCountView(RestrictedView):
107 110
    def get(self, request, *args, **kwargs):
108
        from wcs import sql
109

  
110 111
        time_interval = request.GET.get('time_interval', 'month')
111 112
        totals_kwargs = {
112 113
            'period_start': request.GET.get('start'),
......
115 116
        }
116 117
        category_id = request.GET.get('category', '_all')
117 118
        formdef_slug = request.GET.get('form', '_all')
119
        group_by = request.GET.get('group-by')
118 120
        subfilters = []
119 121
        if formdef_slug != '_all':
120 122
            try:
......
125 127

  
126 128
            totals_kwargs['criterias'].append(Equal('formdef_id', formdef.id))
127 129
            totals_kwargs['criterias'].extend(self.get_filters_criterias(formdef, form_page))
128

  
130
            if group_by:
131
                totals_kwargs['group_by'] = self.get_group_by_column(form_page, group_by)
129 132
            subfilters = self.get_subfilters(form_page)
130 133
        elif category_id != '_all':
131 134
            totals_kwargs['criterias'].append(Equal('category_id', category_id))
......
141 144
        else:
142 145
            return HttpResponseBadRequest('invalid time_interval parameter')
143 146

  
147
        if not totals_kwargs.get('group_by'):
148
            x_labels = [x[0] for x in totals]
149
            series = [{'label': _('Forms Count'), 'data': [x[1] for x in totals]}]
150
        else:
151
            x_labels, series = self.get_grouped_data(totals, group_by, formdef)
152

  
144 153
        return JsonResponse(
145
            {
146
                'data': {
147
                    'x_labels': [x[0] for x in totals],
148
                    'series': [
149
                        {
150
                            'label': _('Forms Count'),
151
                            'data': [x[1] for x in totals],
152
                        }
153
                    ],
154
                    'subfilters': subfilters,
155
                },
156
                'err': 0,
157
            }
154
            {'data': {'x_labels': x_labels, 'series': series, 'subfilters': subfilters}, 'err': 0}
158 155
        )
159 156

  
160 157
    def get_filters_criterias(self, formdef, form_page):
......
184 181
    @staticmethod
185 182
    def get_subfilters(form_page):
186 183
        subfilters = []
184
        field_choices = []
187 185
        for field in form_page.get_formdef_fields():
188 186
            if not getattr(field, 'include_in_statistics', False) or not field.contextual_varname:
189 187
                continue
......
226 224
                filter_description['default'] = field.default_filter_value
227 225

  
228 226
            subfilters.append(filter_description)
227
            if not hasattr(field, 'block_field'):
228
                field_choices.append((field.contextual_varname, field.label))
229

  
230
        if field_choices:
231
            subfilters.insert(
232
                0,
233
                {
234
                    'id': 'group-by',
235
                    'label': _('Group by'),
236
                    'options': [{'id': x[0], 'label': x[1]} for x in field_choices],
237
                },
238
            )
229 239

  
230 240
        return subfilters
241

  
242
    def get_group_by_column(self, form_page, group_by):
243
        if group_by == 'status':
244
            return group_by
245
        else:
246
            fields = [
247
                x
248
                for x in form_page.get_formdef_fields()
249
                if getattr(x, 'contextual_varname', None) == group_by
250
            ]
251
            if fields:
252
                field = fields[0]
253
                if not hasattr(field, 'block_field'):  # block fields are not supported
254
                    return sql.get_field_id(field)
255

  
256
    def get_grouped_data(self, totals, group_by, formdef):
257
        totals_by_time = collections.OrderedDict(
258
            # time1: {group1: total_11, group2: total_12},
259
            # time2: {group1: total_21}
260
        )
261
        seen_group_values = set(
262
            # group1, group2
263
        )
264
        for total in totals:
265
            totals_by_group = totals_by_time.setdefault(total[0], collections.Counter())
266
            if len(total) == 2:
267
                # ignore empty value used to fill time gaps
268
                continue
269
            groups = total[1]
270
            if not isinstance(groups, list):
271
                groups = [groups]
272
            for group in groups:
273
                totals_by_group[group] += total[2]
274
                seen_group_values.add(group)
275

  
276
        totals_by_group = {
277
            # group1: [total_11, total_21],
278
            # group2: [total_12, None],
279
        }
280
        for group in seen_group_values:
281
            totals_by_group[group] = [totals.get(group) for totals in totals_by_time.values()]
282

  
283
        def get_label(group):
284
            if group_by == 'status':
285
                status_id = group.split('-')[1]
286
                status = formdef.workflow.get_status(status_id)
287
                return status.name
288
            group_labels = {True: _('Yes'), False: _('No'), None: _('None')}
289
            return group_labels.get(group, group)
290

  
291
        x_labels = list(totals_by_time)
292
        series = [{'label': get_label(group), 'data': data} for group, data in totals_by_group.items()]
293
        return x_labels, series
231
-