Projet

Général

Profil

0001-journal-add-event-type-statistics-47467.patch

Valentin Deniaud, 24 novembre 2020 15:55

Télécharger (19,2 ko)

Voir les différences:

Subject: [PATCH] journal: add event type statistics (#47467)

 src/authentic2/apps/journal/models.py |  46 +++++++-
 src/authentic2/apps/journal/utils.py  |  60 ++++++++++
 src/authentic2/journal_event_types.py |  85 +++++++++++---
 tests/test_journal.py                 | 160 ++++++++++++++++++++++++++
 4 files changed, 331 insertions(+), 20 deletions(-)
src/authentic2/apps/journal/models.py
23 23
from django.conf import settings
24 24
from django.contrib.auth import get_user_model
25 25
from django.contrib.postgres.fields import ArrayField, JSONField
26
from django.contrib.postgres.fields.jsonb import KeyTextTransform
26 27
from django.contrib.contenttypes.models import ContentType
27 28
from django.core.exceptions import ObjectDoesNotExist
28 29
from django.db import models
29
from django.db.models import QuerySet, Q, F, Value
30
from django.db.models import QuerySet, Q, F, Value, Count
31
from django.db.models.functions import Trunc
30 32
from django.utils.translation import ugettext_lazy as _
31 33
from django.utils.timezone import utc, now
32 34

  
......
108 110
    def get_message(self, event, context=None):
109 111
        return self.label
110 112

  
113
    @classmethod
114
    def get_statistics(
115
        cls,
116
        group_by_time,
117
        group_by_field=None,
118
        group_by_references=False,
119
        which_references=None,
120
        start=None,
121
        end=None,
122
    ):
123
        if group_by_time not in ('timestamp', 'day', 'month', 'year'):
124
            raise ValueError('Usupported value for group_by_time: %s' % time_group_by)
125

  
126
        event_type = EventType.objects.get_for_name(cls.name)
127
        qs = Event.objects.filter(type=event_type)
128

  
129
        if start:
130
            qs = qs.filter(timestamp__gte=start)
131
        if end:
132
            qs = qs.filter(timestamp__lte=end)
133

  
134
        values = [group_by_time]
135
        if group_by_time != 'timestamp':
136
            qs = qs.annotate(
137
                **{group_by_time: Trunc('timestamp', kind=group_by_time, output_field=models.DateField())}
138
            )
139

  
140
        if group_by_field:
141
            # get field from JSONField
142
            qs = qs.annotate(**{group_by_field: KeyTextTransform(group_by_field, 'data')})
143
            values.append(group_by_field)
144

  
145
        if which_references:
146
            qs = qs.which_references(which_references)
147

  
148
        if group_by_references:
149
            values.append('reference_ids')
150

  
151
        qs = qs.values(*values)
152
        qs = qs.annotate(count=Count('id'))
153
        return qs.order_by(group_by_time)
154

  
111 155
    def __repr__(self):
112 156
        return '<EventTypeDefinition %r %s>' % (self.name, self.label)
113 157

  
src/authentic2/apps/journal/utils.py
14 14
# You should have received a copy of the GNU Affero General Public License
15 15
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 16

  
17
from datetime import timedelta, date
18

  
19
from django.db.models import Min, Max, DateField
20

  
17 21

  
18 22
def _json_value(value):
19 23
    if isinstance(value, (dict, list, str, int, bool)) or value is None:
......
30 34
            old[key] = _json_value(old_value)
31 35
        new[key] = _json_value(form.cleaned_data.get(key))
32 36
    return {'old': old, 'new': new}
37

  
38

  
39
class Statistics:
40
    time_label_formats = {
41
        'year': '%Y',
42
        'month': '%Y-%m',
43
        'day': '%Y-%m-%d',
44
    }
45

  
46
    def __init__(self, qs, time_interval):
47
        self.time_interval = time_interval
48
        self.x_labels = self.build_x_labels(qs)
49
        self._x_labels_indexes = {label: i for i, label in enumerate(self.x_labels)}
50
        self.series = {}
51

  
52
    def build_x_labels(self, qs):
53
        if self.time_interval == 'timestamp':
54
            return list(qs.distinct().values_list(self.time_interval, flat=True))
55

  
56
        aggregate = qs.aggregate(min=Min(self.time_interval), max=Max(self.time_interval))
57
        min_date, max_date = aggregate['min'].date(), aggregate['max'].date()
58
        if self.time_interval == 'day':
59
            return [min_date + timedelta(days=i) for i in range((max_date - min_date).days + 1)]
60
        if self.time_interval == 'year':
61
            return [date(year=i, month=1, day=1) for i in range(min_date.year, max_date.year + 1)]
62
        if self.time_interval == 'month':
63
            x_labels = []
64
            for year in range(min_date.year, max_date.year + 1):
65
                start_month = 1 if year != min_date.year else min_date.month
66
                end_month = 12 if year != max_date.year else max_date.month
67
                for month in range(start_month, end_month + 1):
68
                    x_labels.append(date(year=year, month=month, day=1))
69
            return x_labels
70

  
71
    def add(self, x_label, y_label, value):
72
        serie = self.get_serie(y_label)
73
        index = self.x_index(x_label)
74
        serie[index] = (serie[index] or 0) + value
75

  
76
    def get_serie(self, label):
77
        return self.series.setdefault(label, [None] * len(self.x_labels))
78

  
79
    def x_index(self, x_label):
80
        return self._x_labels_indexes[x_label]
81

  
82
    def to_json(self, get_y_label=lambda x: x):
83
        series = [{'label': get_y_label(label), 'data': data} for label, data in self.series.items()]
84
        return {
85
            'x_labels': [self.format_x_label(label) for label in self.x_labels],
86
            'series': series,
87
        }
88

  
89
    def format_x_label(self, label):
90
        if self.time_interval == 'timestamp':
91
            return label.isoformat()
92
        return label.strftime(self.time_label_formats[self.time_interval])
src/authentic2/journal_event_types.py
14 14
# You should have received a copy of the GNU Affero General Public License
15 15
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 16

  
17
from django.contrib.contenttypes.models import ContentType
17 18
from django.utils.translation import ugettext_lazy as _
18 19

  
19 20
from authentic2.custom_user.models import get_attributes_map
20
from authentic2.apps.journal.models import EventTypeDefinition
21
from authentic2.apps.journal.utils import form_to_old_new
21
from authentic2.apps.journal.models import EventTypeDefinition, n_2_pairing_rev
22
from authentic2.apps.journal.utils import form_to_old_new, Statistics
22 23
from authentic2.custom_user.models import User
23 24

  
24
from . import models
25
from .models import Service
25 26

  
26 27

  
27 28
class EventTypeWithService(EventTypeDefinition):
......
38 39

  
39 40
    @classmethod
40 41
    def get_service_name(self, event):
41
        (service,) = event.get_typed_references(models.Service)
42
        (service,) = event.get_typed_references(Service)
42 43
        if service is not None:
43 44
            return str(service)
44 45
        if 'service_name' in event.data:
......
46 47
        return ''
47 48

  
48 49

  
50
class EventTypeWithHow(EventTypeWithService):
51
    @classmethod
52
    def record(cls, user, session, service, how):
53
        super().record(user=user, session=session, service=service, data={'how': how})
54

  
55
    @classmethod
56
    def get_method_statistics(cls, group_by_time, service=None, ou=None, start=None, end=None):
57
        if ou:
58
            service = Service.objects.filter(ou=ou)
59

  
60
        qs = cls.get_statistics(
61
            group_by_time=group_by_time, group_by_field='how', which_references=service, start=start, end=end
62
        )
63
        stats = Statistics(qs, time_interval=group_by_time)
64

  
65
        for stat in qs:
66
            stats.add(x_label=stat[group_by_time], y_label=stat['how'], value=stat['count'])
67

  
68
        return stats.to_json(get_y_label=lambda x: _(login_method_label(x or '')))
69

  
70
    @classmethod
71
    def _get_method_statistics_by_reference(cls, group_by_time, reference, **kwargs):
72
        qs = cls.get_statistics(group_by_time, group_by_references=True, **kwargs)
73
        stats = Statistics(qs, time_interval=group_by_time)
74

  
75
        def get_reference_label(instance_pk, labels_cache={}):
76
            label = labels_cache.get(instance_pk)
77
            if not label:
78
                service = Service.objects.get(pk=instance_pk)
79
                if reference == 'service':
80
                    label = str(service)
81
                elif reference == 'ou':
82
                    label = str(service.ou)
83
                labels_cache[instance_pk] = label
84
            return label
85

  
86
        service_ct_id = ContentType.objects.get_for_model(Service).pk
87
        for stat in qs:
88
            for reference_id in stat['reference_ids'] or []:
89
                content_type_id, instance_pk = n_2_pairing_rev(reference_id)
90
                if content_type_id == service_ct_id:
91
                    reference_label = get_reference_label(instance_pk)
92
                    break
93
            else:
94
                reference_label = _('None')
95
            stats.add(x_label=stat[group_by_time], y_label=reference_label, value=stat['count'])
96

  
97
        return stats.to_json()
98

  
99
    @classmethod
100
    def get_service_statistics(cls, group_by_time, start=None, end=None, **kwargs):
101
        return cls._get_method_statistics_by_reference(group_by_time, 'service', start=start, end=end)
102

  
103
    @classmethod
104
    def get_service_ou_statistics(cls, group_by_time, start=None, end=None, **kwargs):
105
        return cls._get_method_statistics_by_reference(group_by_time, 'ou', start=start, end=end)
106

  
107

  
49 108
def login_method_label(how):
50 109
    if how.startswith('password'):
51 110
        return _('password')
......
73 132
                yield name
74 133

  
75 134

  
76
class UserLogin(EventTypeWithService):
135
class UserLogin(EventTypeWithHow):
77 136
    name = 'user.login'
78 137
    label = _('login')
79 138

  
80
    @classmethod
81
    def record(cls, user, session, service, how):
82
        super().record(user=user, session=session, service=service, data={'how': how})
83

  
84 139
    @classmethod
85 140
    def get_message(cls, event, context):
86 141
        how = event.get_data('how')
......
115 170
        return _('registration request with email "%s"') % email
116 171

  
117 172

  
118
class UserRegistration(EventTypeWithService):
173
class UserRegistration(EventTypeWithHow):
119 174
    name = 'user.registration'
120 175
    label = _('registration')
121 176

  
122
    @classmethod
123
    def record(cls, user, session, service, how):
124
        super().record(user=user, session=session, service=service, data={'how': how})
125

  
126 177
    @classmethod
127 178
    def get_message(cls, event, context):
128 179
        how = event.get_data('how')
......
219 270
        super().record(user=user, session=session, service=service)
220 271

  
221 272

  
222
class UserServiceSSO(EventTypeWithService):
273
class UserServiceSSO(EventTypeWithHow):
223 274
    name = 'user.service.sso'
224 275
    label = _('service single sign on')
225 276

  
226
    @classmethod
227
    def record(cls, user, session, service, how):
228
        super().record(user=user, session=session, service=service, data={'how': how})
229

  
230 277
    @classmethod
231 278
    def get_message(cls, event, context):
232 279
        service_name = cls.get_service_name(event)
tests/test_journal.py
19 19

  
20 20
import mock
21 21
import pytest
22
import pytz
22 23

  
23 24
from django.contrib.auth import get_user_model
24 25
from django.core.management import call_command
25 26
from django.utils.timezone import make_aware, make_naive
26 27

  
28
from authentic2.a2_rbac.models import OrganizationalUnit as OU
29
from authentic2.a2_rbac.utils import get_default_ou
27 30
from authentic2.apps.journal.forms import JournalForm
28 31
from authentic2.apps.journal.journal import Journal
29 32
from authentic2.apps.journal.models import EventTypeDefinition, EventType, Event, clean_registry
......
443 446
    assert len(caplog.records) == 1
444 447
    assert caplog.records[0].levelname == 'ERROR'
445 448
    assert caplog.records[0].message == 'could not render message of event type "user.login"'
449

  
450

  
451
@pytest.mark.parametrize('event_type_name', ['user.login', 'user.registration'])
452
def test_statistics(db, event_type_name, freezer):
453
    user = User.objects.create(username='john.doe', email='john.doe@example.com')
454
    user2 = User.objects.create(username='jane.doe', email='jane.doe@example.com')
455
    ou = OU.objects.create(name='Second OU')
456

  
457
    portal = Service.objects.create(name='portal', slug='portal', ou=ou)
458
    agendas = Service.objects.create(name='agendas', slug='agendas', ou=get_default_ou())
459
    forms = Service.objects.create(name='forms', slug='forms', ou=get_default_ou())
460

  
461
    method = {'how': 'password-on-https'}
462
    method2 = {'how': 'fc'}
463

  
464
    event_type = EventType.objects.get_for_name(event_type_name)
465

  
466
    freezer.move_to('2020-02-03 12:00')
467
    event = Event.objects.create(type=event_type, references=[user, portal], user=user, data=method)
468
    event = Event.objects.create(type=event_type, references=[user2, portal], user=user2, data=method)
469

  
470
    freezer.move_to('2020-02-03 13:00')
471
    event = Event.objects.create(type=event_type, references=[user, portal], user=user, data=method2)
472
    event = Event.objects.create(type=event_type, references=[user2, portal], user=user2, data=method2)
473

  
474
    freezer.move_to('2020-03-03 12:00')
475
    event = Event.objects.create(type=event_type, references=[user, portal], user=user, data=method)
476
    event = Event.objects.create(type=event_type, references=[user, agendas], user=user, data=method)
477
    event = Event.objects.create(type=event_type, references=[user, forms], user=user, data=method)
478
    event = Event.objects.create(type=event_type, user=user)
479

  
480
    event_type_definition = event_type.definition
481

  
482
    stats = event_type_definition.get_method_statistics('timestamp')
483
    stats['series'].sort(key=lambda x: x['label'])
484
    assert stats == {
485
        'x_labels': ['2020-02-03T12:00:00+00:00', '2020-02-03T13:00:00+00:00', '2020-03-03T12:00:00+00:00'],
486
        'series': [
487
            {'label': 'FranceConnect', 'data': [None, 2, None]},
488
            {'label': 'none', 'data': [None, None, 1]},
489
            {'label': 'password', 'data': [2, None, 3]},
490
        ],
491
    }
492

  
493
    start = datetime(year=2020, month=2, day=3, hour=12, minute=30, tzinfo=pytz.UTC)
494
    end = datetime(year=2020, month=2, day=3, hour=13, minute=30, tzinfo=pytz.UTC)
495
    stats = event_type_definition.get_method_statistics('timestamp', start=start, end=end)
496
    assert stats == {
497
        'x_labels': ['2020-02-03T13:00:00+00:00'],
498
        'series': [{'label': 'FranceConnect', 'data': [2]},],
499
    }
500

  
501
    stats = event_type_definition.get_method_statistics('month')
502
    stats['series'].sort(key=lambda x: x['label'])
503
    assert stats == {
504
        'x_labels': ['2020-02', '2020-03'],
505
        'series': [
506
            {'label': 'FranceConnect', 'data': [2, None]},
507
            {'label': 'none', 'data': [None, 1]},
508
            {'label': 'password', 'data': [2, 3]},
509
        ],
510
    }
511

  
512
    stats = event_type_definition.get_method_statistics('month', ou=get_default_ou())
513
    assert stats == {
514
        'x_labels': ['2020-03'],
515
        'series': [{'label': 'password', 'data': [2]},],
516
    }
517

  
518
    stats = event_type_definition.get_method_statistics('month', ou=ou)
519
    stats['series'].sort(key=lambda x: x['label'])
520
    assert stats == {
521
        'x_labels': ['2020-02', '2020-03'],
522
        'series': [{'label': 'FranceConnect', 'data': [2, None]}, {'label': 'password', 'data': [2, 1]}],
523
    }
524

  
525
    stats = event_type_definition.get_method_statistics('month', service=portal)
526
    stats['series'].sort(key=lambda x: x['label'])
527
    assert stats == {
528
        'x_labels': ['2020-02', '2020-03'],
529
        'series': [{'label': 'FranceConnect', 'data': [2, None]}, {'label': 'password', 'data': [2, 1]}],
530
    }
531

  
532
    stats = event_type_definition.get_method_statistics('year')
533
    stats['series'].sort(key=lambda x: x['label'])
534
    assert stats == {
535
        'x_labels': ['2020'],
536
        'series': [
537
            {'label': 'FranceConnect', 'data': [2]},
538
            {'label': 'none', 'data': [1]},
539
            {'label': 'password', 'data': [5]},
540
        ],
541
    }
542

  
543
    stats = event_type_definition.get_service_statistics('month')
544
    stats['series'].sort(key=lambda x: x['label'])
545
    assert stats == {
546
        'x_labels': ['2020-02', '2020-03'],
547
        'series': [
548
            {'label': 'None', 'data': [None, 1]},
549
            {'label': 'agendas', 'data': [None, 1]},
550
            {'label': 'forms', 'data': [None, 1]},
551
            {'label': 'portal', 'data': [4, 1]},
552
        ],
553
    }
554

  
555
    stats = event_type_definition.get_service_ou_statistics('month')
556
    stats['series'].sort(key=lambda x: x['label'])
557
    assert stats == {
558
        'x_labels': ['2020-02', '2020-03'],
559
        'series': [
560
            {'label': 'Default organizational unit', 'data': [None, 2]},
561
            {'label': 'None', 'data': [None, 1]},
562
            {'label': 'Second OU', 'data': [4, 1]},
563
        ],
564
    }
565

  
566

  
567
def test_statistics_fill_date_gaps(db, freezer):
568
    user = User.objects.create(username='john.doe', email='john.doe@example.com')
569
    method = {'how': 'password-on-https'}
570
    event_type = EventType.objects.get_for_name('user.login')
571

  
572
    freezer.move_to('2020-12-29 12:00')
573
    event = Event.objects.create(type=event_type, data=method)
574
    freezer.move_to('2021-01-02 13:00')
575
    event = Event.objects.create(type=event_type, data=method)
576

  
577
    event_type_definition = event_type.definition
578

  
579
    stats = event_type_definition.get_method_statistics('day')
580
    assert stats == {
581
        'x_labels': ['2020-12-29', '2020-12-30', '2020-12-31', '2021-01-01', '2021-01-02'],
582
        'series': [{'label': 'password', 'data': [1, None, None, None, 1]}],
583
    }
584

  
585
    Event.objects.all().delete()
586
    freezer.move_to('2020-11-29 12:00')
587
    event = Event.objects.create(type=event_type, data=method)
588
    freezer.move_to('2022-02-02 13:00')
589
    event = Event.objects.create(type=event_type, data=method)
590
    stats = event_type_definition.get_method_statistics('month')
591
    assert stats == {
592
        'x_labels': ['2020-11', '2020-12'] + ['2021-%02d' % i for i in range(1, 13)] + ['2022-01', '2022-02'],
593
        'series': [{'label': 'password', 'data': [1] + [None] * 14 + [1]}],
594
    }
595

  
596
    Event.objects.all().delete()
597
    freezer.move_to('2020-11-29 12:00')
598
    event = Event.objects.create(type=event_type, data=method)
599
    freezer.move_to('2025-02-02 13:00')
600
    event = Event.objects.create(type=event_type, data=method)
601
    stats = event_type_definition.get_method_statistics('year')
602
    assert stats == {
603
        'x_labels': ['2020', '2021', '2022', '2023', '2024', '2025'],
604
        'series': [{'label': 'password', 'data': [1, None, None, None, None, 1]}],
605
    }
446
-