From a2a101056da2ec1ff08e65a45b25cfb5b71d306a Mon Sep 17 00:00:00 2001 From: Valentin Deniaud Date: Wed, 18 Nov 2020 17:23:21 +0100 Subject: [PATCH] journal: add event type statistics (#47467) --- src/authentic2/apps/journal/models.py | 46 +++++++- src/authentic2/apps/journal/utils.py | 60 ++++++++++ src/authentic2/journal_event_types.py | 85 +++++++++++--- tests/test_journal.py | 160 ++++++++++++++++++++++++++ 4 files changed, 331 insertions(+), 20 deletions(-) diff --git a/src/authentic2/apps/journal/models.py b/src/authentic2/apps/journal/models.py index de4314f3..1136c2b5 100644 --- a/src/authentic2/apps/journal/models.py +++ b/src/authentic2/apps/journal/models.py @@ -23,10 +23,12 @@ import re from django.conf import settings from django.contrib.auth import get_user_model from django.contrib.postgres.fields import ArrayField, JSONField +from django.contrib.postgres.fields.jsonb import KeyTextTransform from django.contrib.contenttypes.models import ContentType from django.core.exceptions import ObjectDoesNotExist from django.db import models -from django.db.models import QuerySet, Q, F, Value +from django.db.models import QuerySet, Q, F, Value, Count +from django.db.models.functions import Trunc from django.utils.translation import ugettext_lazy as _ from django.utils.timezone import utc, now @@ -108,6 +110,48 @@ class EventTypeDefinition(metaclass=EventTypeDefinitionMeta): def get_message(self, event, context=None): return self.label + @classmethod + def get_statistics( + cls, + group_by_time, + group_by_field=None, + group_by_references=False, + which_references=None, + start=None, + end=None, + ): + if group_by_time not in ('timestamp', 'day', 'month', 'year'): + raise ValueError('Usupported value for group_by_time: %s' % time_group_by) + + event_type = EventType.objects.get_for_name(cls.name) + qs = Event.objects.filter(type=event_type) + + if start: + qs = qs.filter(timestamp__gte=start) + if end: + qs = qs.filter(timestamp__lte=end) + + values = [group_by_time] + if group_by_time != 'timestamp': + qs = qs.annotate( + **{group_by_time: Trunc('timestamp', kind=group_by_time, output_field=models.DateField())} + ) + + if group_by_field: + # get field from JSONField + qs = qs.annotate(**{group_by_field: KeyTextTransform(group_by_field, 'data')}) + values.append(group_by_field) + + if which_references: + qs = qs.which_references(which_references) + + if group_by_references: + values.append('reference_ids') + + qs = qs.values(*values) + qs = qs.annotate(count=Count('id')) + return qs.order_by(group_by_time) + def __repr__(self): return '' % (self.name, self.label) diff --git a/src/authentic2/apps/journal/utils.py b/src/authentic2/apps/journal/utils.py index 10fbb8de..5efa6801 100644 --- a/src/authentic2/apps/journal/utils.py +++ b/src/authentic2/apps/journal/utils.py @@ -14,6 +14,10 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +from datetime import timedelta, date + +from django.db.models import Min, Max, DateField + def _json_value(value): if isinstance(value, (dict, list, str, int, bool)) or value is None: @@ -30,3 +34,59 @@ def form_to_old_new(form): old[key] = _json_value(old_value) new[key] = _json_value(form.cleaned_data.get(key)) return {'old': old, 'new': new} + + +class Statistics: + time_label_formats = { + 'year': '%Y', + 'month': '%Y-%m', + 'day': '%Y-%m-%d', + } + + def __init__(self, qs, time_interval): + self.time_interval = time_interval + self.x_labels = self.build_x_labels(qs) + self._x_labels_indexes = {label: i for i, label in enumerate(self.x_labels)} + self.series = {} + + def build_x_labels(self, qs): + if self.time_interval == 'timestamp': + return list(qs.distinct().values_list(self.time_interval, flat=True)) + + aggregate = qs.aggregate(min=Min(self.time_interval), max=Max(self.time_interval)) + min_date, max_date = aggregate['min'].date(), aggregate['max'].date() + if self.time_interval == 'day': + return [min_date + timedelta(days=i) for i in range((max_date - min_date).days + 1)] + if self.time_interval == 'year': + return [date(year=i, month=1, day=1) for i in range(min_date.year, max_date.year + 1)] + if self.time_interval == 'month': + x_labels = [] + for year in range(min_date.year, max_date.year + 1): + start_month = 1 if year != min_date.year else min_date.month + end_month = 12 if year != max_date.year else max_date.month + for month in range(start_month, end_month + 1): + x_labels.append(date(year=year, month=month, day=1)) + return x_labels + + def add(self, x_label, y_label, value): + serie = self.get_serie(y_label) + index = self.x_index(x_label) + serie[index] = (serie[index] or 0) + value + + def get_serie(self, label): + return self.series.setdefault(label, [None] * len(self.x_labels)) + + def x_index(self, x_label): + return self._x_labels_indexes[x_label] + + def to_json(self, get_y_label=lambda x: x): + series = [{'label': get_y_label(label), 'data': data} for label, data in self.series.items()] + return { + 'x_labels': [self.format_x_label(label) for label in self.x_labels], + 'series': series, + } + + def format_x_label(self, label): + if self.time_interval == 'timestamp': + return label.isoformat() + return label.strftime(self.time_label_formats[self.time_interval]) diff --git a/src/authentic2/journal_event_types.py b/src/authentic2/journal_event_types.py index 77391fe5..e2fcb260 100644 --- a/src/authentic2/journal_event_types.py +++ b/src/authentic2/journal_event_types.py @@ -14,14 +14,15 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +from django.contrib.contenttypes.models import ContentType from django.utils.translation import ugettext_lazy as _ from authentic2.custom_user.models import get_attributes_map -from authentic2.apps.journal.models import EventTypeDefinition -from authentic2.apps.journal.utils import form_to_old_new +from authentic2.apps.journal.models import EventTypeDefinition, n_2_pairing_rev +from authentic2.apps.journal.utils import form_to_old_new, Statistics from authentic2.custom_user.models import User -from . import models +from .models import Service class EventTypeWithService(EventTypeDefinition): @@ -38,7 +39,7 @@ class EventTypeWithService(EventTypeDefinition): @classmethod def get_service_name(self, event): - (service,) = event.get_typed_references(models.Service) + (service,) = event.get_typed_references(Service) if service is not None: return str(service) if 'service_name' in event.data: @@ -46,6 +47,64 @@ class EventTypeWithService(EventTypeDefinition): return '' +class EventTypeWithHow(EventTypeWithService): + @classmethod + def record(cls, user, session, service, how): + super().record(user=user, session=session, service=service, data={'how': how}) + + @classmethod + def get_method_statistics(cls, group_by_time, service=None, ou=None, start=None, end=None): + if ou: + service = Service.objects.filter(ou=ou) + + qs = cls.get_statistics( + group_by_time=group_by_time, group_by_field='how', which_references=service, start=start, end=end + ) + stats = Statistics(qs, time_interval=group_by_time) + + for stat in qs: + stats.add(x_label=stat[group_by_time], y_label=stat['how'], value=stat['count']) + + return stats.to_json(get_y_label=lambda x: _(login_method_label(x or ''))) + + @classmethod + def _get_method_statistics_by_reference(cls, group_by_time, reference, **kwargs): + qs = cls.get_statistics(group_by_time, group_by_references=True, **kwargs) + stats = Statistics(qs, time_interval=group_by_time) + + def get_reference_label(instance_pk, labels_cache={}): + label = labels_cache.get(instance_pk) + if not label: + service = Service.objects.get(pk=instance_pk) + if reference == 'service': + label = str(service) + elif reference == 'ou': + label = str(service.ou) + labels_cache[instance_pk] = label + return label + + service_ct_id = ContentType.objects.get_for_model(Service).pk + for stat in qs: + for reference_id in stat['reference_ids'] or []: + content_type_id, instance_pk = n_2_pairing_rev(reference_id) + if content_type_id == service_ct_id: + reference_label = get_reference_label(instance_pk) + break + else: + reference_label = _('None') + stats.add(x_label=stat[group_by_time], y_label=reference_label, value=stat['count']) + + return stats.to_json() + + @classmethod + def get_service_statistics(cls, group_by_time, start=None, end=None, **kwargs): + return cls._get_method_statistics_by_reference(group_by_time, 'service', start=start, end=end) + + @classmethod + def get_service_ou_statistics(cls, group_by_time, start=None, end=None, **kwargs): + return cls._get_method_statistics_by_reference(group_by_time, 'ou', start=start, end=end) + + def login_method_label(how): if how.startswith('password'): return _('password') @@ -73,14 +132,10 @@ def get_attributes_label(attributes_new_values): yield name -class UserLogin(EventTypeWithService): +class UserLogin(EventTypeWithHow): name = 'user.login' label = _('login') - @classmethod - def record(cls, user, session, service, how): - super().record(user=user, session=session, service=service, data={'how': how}) - @classmethod def get_message(cls, event, context): how = event.get_data('how') @@ -115,14 +170,10 @@ class UserRegistrationRequest(EventTypeDefinition): return _('registration request with email "%s"') % email -class UserRegistration(EventTypeWithService): +class UserRegistration(EventTypeWithHow): name = 'user.registration' label = _('registration') - @classmethod - def record(cls, user, session, service, how): - super().record(user=user, session=session, service=service, data={'how': how}) - @classmethod def get_message(cls, event, context): how = event.get_data('how') @@ -219,14 +270,10 @@ class UserDeletion(EventTypeWithService): super().record(user=user, session=session, service=service) -class UserServiceSSO(EventTypeWithService): +class UserServiceSSO(EventTypeWithHow): name = 'user.service.sso' label = _('service single sign on') - @classmethod - def record(cls, user, session, service, how): - super().record(user=user, session=session, service=service, data={'how': how}) - @classmethod def get_message(cls, event, context): service_name = cls.get_service_name(event) diff --git a/tests/test_journal.py b/tests/test_journal.py index 34f37653..194472ce 100644 --- a/tests/test_journal.py +++ b/tests/test_journal.py @@ -19,11 +19,14 @@ import random import mock import pytest +import pytz from django.contrib.auth import get_user_model from django.core.management import call_command from django.utils.timezone import make_aware, make_naive +from authentic2.a2_rbac.models import OrganizationalUnit as OU +from authentic2.a2_rbac.utils import get_default_ou from authentic2.apps.journal.forms import JournalForm from authentic2.apps.journal.journal import Journal from authentic2.apps.journal.models import EventTypeDefinition, EventType, Event, clean_registry @@ -443,3 +446,160 @@ def test_message_in_context_exception_handling(db, some_event_types, caplog): assert len(caplog.records) == 1 assert caplog.records[0].levelname == 'ERROR' assert caplog.records[0].message == 'could not render message of event type "user.login"' + + +@pytest.mark.parametrize('event_type_name', ['user.login', 'user.registration']) +def test_statistics(db, event_type_name, freezer): + user = User.objects.create(username='john.doe', email='john.doe@example.com') + user2 = User.objects.create(username='jane.doe', email='jane.doe@example.com') + ou = OU.objects.create(name='Second OU') + + portal = Service.objects.create(name='portal', slug='portal', ou=ou) + agendas = Service.objects.create(name='agendas', slug='agendas', ou=get_default_ou()) + forms = Service.objects.create(name='forms', slug='forms', ou=get_default_ou()) + + method = {'how': 'password-on-https'} + method2 = {'how': 'fc'} + + event_type = EventType.objects.get_for_name(event_type_name) + + freezer.move_to('2020-02-03 12:00') + event = Event.objects.create(type=event_type, references=[user, portal], user=user, data=method) + event = Event.objects.create(type=event_type, references=[user2, portal], user=user2, data=method) + + freezer.move_to('2020-02-03 13:00') + event = Event.objects.create(type=event_type, references=[user, portal], user=user, data=method2) + event = Event.objects.create(type=event_type, references=[user2, portal], user=user2, data=method2) + + freezer.move_to('2020-03-03 12:00') + event = Event.objects.create(type=event_type, references=[user, portal], user=user, data=method) + event = Event.objects.create(type=event_type, references=[user, agendas], user=user, data=method) + event = Event.objects.create(type=event_type, references=[user, forms], user=user, data=method) + event = Event.objects.create(type=event_type, user=user) + + event_type_definition = event_type.definition + + stats = event_type_definition.get_method_statistics('timestamp') + stats['series'].sort(key=lambda x: x['label']) + assert stats == { + 'x_labels': ['2020-02-03T12:00:00+00:00', '2020-02-03T13:00:00+00:00', '2020-03-03T12:00:00+00:00'], + 'series': [ + {'label': 'FranceConnect', 'data': [None, 2, None]}, + {'label': 'none', 'data': [None, None, 1]}, + {'label': 'password', 'data': [2, None, 3]}, + ], + } + + start = datetime(year=2020, month=2, day=3, hour=12, minute=30, tzinfo=pytz.UTC) + end = datetime(year=2020, month=2, day=3, hour=13, minute=30, tzinfo=pytz.UTC) + stats = event_type_definition.get_method_statistics('timestamp', start=start, end=end) + assert stats == { + 'x_labels': ['2020-02-03T13:00:00+00:00'], + 'series': [{'label': 'FranceConnect', 'data': [2]},], + } + + stats = event_type_definition.get_method_statistics('month') + stats['series'].sort(key=lambda x: x['label']) + assert stats == { + 'x_labels': ['2020-02', '2020-03'], + 'series': [ + {'label': 'FranceConnect', 'data': [2, None]}, + {'label': 'none', 'data': [None, 1]}, + {'label': 'password', 'data': [2, 3]}, + ], + } + + stats = event_type_definition.get_method_statistics('month', ou=get_default_ou()) + assert stats == { + 'x_labels': ['2020-03'], + 'series': [{'label': 'password', 'data': [2]},], + } + + stats = event_type_definition.get_method_statistics('month', ou=ou) + stats['series'].sort(key=lambda x: x['label']) + assert stats == { + 'x_labels': ['2020-02', '2020-03'], + 'series': [{'label': 'FranceConnect', 'data': [2, None]}, {'label': 'password', 'data': [2, 1]}], + } + + stats = event_type_definition.get_method_statistics('month', service=portal) + stats['series'].sort(key=lambda x: x['label']) + assert stats == { + 'x_labels': ['2020-02', '2020-03'], + 'series': [{'label': 'FranceConnect', 'data': [2, None]}, {'label': 'password', 'data': [2, 1]}], + } + + stats = event_type_definition.get_method_statistics('year') + stats['series'].sort(key=lambda x: x['label']) + assert stats == { + 'x_labels': ['2020'], + 'series': [ + {'label': 'FranceConnect', 'data': [2]}, + {'label': 'none', 'data': [1]}, + {'label': 'password', 'data': [5]}, + ], + } + + stats = event_type_definition.get_service_statistics('month') + stats['series'].sort(key=lambda x: x['label']) + assert stats == { + 'x_labels': ['2020-02', '2020-03'], + 'series': [ + {'label': 'None', 'data': [None, 1]}, + {'label': 'agendas', 'data': [None, 1]}, + {'label': 'forms', 'data': [None, 1]}, + {'label': 'portal', 'data': [4, 1]}, + ], + } + + stats = event_type_definition.get_service_ou_statistics('month') + stats['series'].sort(key=lambda x: x['label']) + assert stats == { + 'x_labels': ['2020-02', '2020-03'], + 'series': [ + {'label': 'Default organizational unit', 'data': [None, 2]}, + {'label': 'None', 'data': [None, 1]}, + {'label': 'Second OU', 'data': [4, 1]}, + ], + } + + +def test_statistics_fill_date_gaps(db, freezer): + user = User.objects.create(username='john.doe', email='john.doe@example.com') + method = {'how': 'password-on-https'} + event_type = EventType.objects.get_for_name('user.login') + + freezer.move_to('2020-12-29 12:00') + event = Event.objects.create(type=event_type, data=method) + freezer.move_to('2021-01-02 13:00') + event = Event.objects.create(type=event_type, data=method) + + event_type_definition = event_type.definition + + stats = event_type_definition.get_method_statistics('day') + assert stats == { + 'x_labels': ['2020-12-29', '2020-12-30', '2020-12-31', '2021-01-01', '2021-01-02'], + 'series': [{'label': 'password', 'data': [1, None, None, None, 1]}], + } + + Event.objects.all().delete() + freezer.move_to('2020-11-29 12:00') + event = Event.objects.create(type=event_type, data=method) + freezer.move_to('2022-02-02 13:00') + event = Event.objects.create(type=event_type, data=method) + stats = event_type_definition.get_method_statistics('month') + assert stats == { + 'x_labels': ['2020-11', '2020-12'] + ['2021-%02d' % i for i in range(1, 13)] + ['2022-01', '2022-02'], + 'series': [{'label': 'password', 'data': [1] + [None] * 14 + [1]}], + } + + Event.objects.all().delete() + freezer.move_to('2020-11-29 12:00') + event = Event.objects.create(type=event_type, data=method) + freezer.move_to('2025-02-02 13:00') + event = Event.objects.create(type=event_type, data=method) + stats = event_type_definition.get_method_statistics('year') + assert stats == { + 'x_labels': ['2020', '2021', '2022', '2023', '2024', '2025'], + 'series': [{'label': 'password', 'data': [1, None, None, None, None, 1]}], + } -- 2.20.1