0002-log-errors-for-down-connectors-when-it-persists-3538.patch
passerelle/base/forms.py | ||
---|---|---|
1 | 1 |
from django import forms |
2 | 2 | |
3 |
from .models import ApiUser, AccessRight |
|
3 |
from .models import ApiUser, AccessRight, AvailabilityParameters, parse_notification_delays |
|
4 | ||
4 | 5 | |
5 | 6 |
class ApiUserForm(forms.ModelForm): |
6 | 7 |
class Meta: |
... | ... | |
17 | 18 |
'resource_type': forms.HiddenInput(), |
18 | 19 |
'resource_pk': forms.HiddenInput(), |
19 | 20 |
} |
21 | ||
22 | ||
23 |
class AvailabilityParametersForm(forms.ModelForm): |
|
24 |
class Meta: |
|
25 |
model = AvailabilityParameters |
|
26 |
fields = ['run_check', 'notification_delays'] |
|
27 |
widgets = { |
|
28 |
'notification_delays': forms.TextInput, |
|
29 |
} |
passerelle/base/migrations/0014_auto_20190820_0914.py | ||
---|---|---|
1 |
# -*- coding: utf-8 -*- |
|
2 |
# Generated by Django 1.11.20 on 2019-08-20 14:14 |
|
3 |
from __future__ import unicode_literals |
|
4 | ||
5 |
from django.db import migrations, models |
|
6 |
import passerelle.base.models |
|
7 | ||
8 | ||
9 |
class Migration(migrations.Migration): |
|
10 | ||
11 |
dependencies = [ |
|
12 |
('base', '0013_delete_templatevar'), |
|
13 |
] |
|
14 | ||
15 |
operations = [ |
|
16 |
migrations.AddField( |
|
17 |
model_name='availabilityparameters', |
|
18 |
name='notification_delays', |
|
19 |
field=models.TextField(default=b'0', validators=[passerelle.base.models.validate_notification_delays]), |
|
20 |
), |
|
21 |
migrations.AddField( |
|
22 |
model_name='resourcestatus', |
|
23 |
name='last_notification_timestamp', |
|
24 |
field=models.DateTimeField(blank=True, null=True), |
|
25 |
), |
|
26 |
] |
passerelle/base/models.py | ||
---|---|---|
8 | 8 |
import sys |
9 | 9 |
import traceback |
10 | 10 |
import base64 |
11 |
import itertools |
|
11 | 12 | |
12 | 13 |
from django.apps import apps |
13 | 14 |
from django.conf import settings |
... | ... | |
19 | 20 |
from django.utils.text import slugify |
20 | 21 |
from django.utils import timezone, six |
21 | 22 |
from django.utils.translation import ugettext_lazy as _ |
23 |
from django.utils.timezone import now |
|
22 | 24 |
from django.core.files.base import ContentFile |
23 | 25 | |
24 | 26 |
from django.contrib.contenttypes.models import ContentType |
... | ... | |
400 | 402 |
if not self.availability_parameters.run_check: |
401 | 403 |
return |
402 | 404 | |
403 |
currently_down = self.down() |
|
405 |
availability_parameters = self.availability_parameters |
|
406 | ||
404 | 407 |
try: |
405 | 408 |
self.check_status() |
406 | 409 |
status = 'up' |
... | ... | |
408 | 411 |
except NotImplementedError: |
409 | 412 |
return |
410 | 413 |
except Exception as e: |
414 |
from passerelle.utils.conversion import exception_to_text |
|
415 | ||
411 | 416 |
status = 'down' |
412 |
message = repr(e)[:500]
|
|
417 |
message = exception_to_text(e)[:500]
|
|
413 | 418 | |
414 | 419 |
resource_type = ContentType.objects.get_for_model(self) |
415 | 420 |
current_status = ResourceStatus.objects.filter( |
416 |
resource_type=resource_type,
|
|
417 |
resource_pk=self.pk).first()
|
|
421 |
resource_type=resource_type, |
|
422 |
resource_pk=self.pk).first() |
|
418 | 423 |
if not current_status or status != current_status.status: |
419 |
if status == 'down' and not currently_down: |
|
420 |
self.logger.error(u'connector "%s" (%s) is now down', self, self.__class__.__name__) |
|
424 |
if status == 'down' and not self.down(): # new downtime |
|
425 |
if availability_parameters.has_zero_delay(): |
|
426 |
self.logger.error(u'connector "%s" (%s) is now down: %s', self, self.__class__.__name__, message) |
|
427 |
else: |
|
428 |
self.logger.warning(u'connector "%s" (%s) is now down: %s', self, self.__class__.__name__, message) |
|
421 | 429 |
ResourceStatus( |
422 | 430 |
resource_type=resource_type, |
423 | 431 |
resource_pk=self.pk, |
424 | 432 |
status=status, |
425 | 433 |
message=message).save() |
426 |
if status == 'up' and currently_down:
|
|
434 |
if status == 'up' and current_status:
|
|
427 | 435 |
self.logger.info(u'connector "%s" (%s) is back up', self, self.__class__.__name__) |
428 | 436 |
elif status == 'down': |
429 |
current_status.message = message |
|
430 |
current_status.save() |
|
437 |
# check last_notification_downtime and current downtime to see if it matches a new notification delay |
|
438 |
last_notification_timestamp = current_status.last_notification_timestamp or current_status.start_timestamp |
|
439 | ||
440 |
current_time = now() |
|
441 |
downtime = (current_time - current_status.start_timestamp).total_seconds() // 60 |
|
442 |
last_notification_downtime = ( |
|
443 |
last_notification_timestamp - current_status.start_timestamp).total_seconds() // 60 |
|
444 | ||
445 |
for delay in availability_parameters.notification_delays_generator(): |
|
446 |
if not delay: |
|
447 |
continue |
|
448 |
if downtime >= delay: |
|
449 |
if last_notification_downtime < delay: |
|
450 |
days = downtime // 1440 |
|
451 |
hours = downtime // 60 |
|
452 |
if days > 1: |
|
453 |
human_duration = 'for %d days' % days |
|
454 |
elif hours > 1: |
|
455 |
human_duration = 'for %d hours' % hours |
|
456 |
else: |
|
457 |
human_duration = 'for %d minutes' % downtime |
|
458 |
self.logger.error(u'connector "%s" (%s) is still down %s: %s', |
|
459 |
self, self.__class__.__name__, |
|
460 |
human_duration, |
|
461 |
message, |
|
462 |
# when connector is down, logging is shutdown |
|
463 |
force=True) |
|
464 |
ResourceStatus.objects.filter(pk=current_status.pk).update( |
|
465 |
message=message, last_notification_timestamp=current_time) |
|
466 |
break |
|
467 |
else: |
|
468 |
ResourceStatus.objects.filter(pk=current_status.pk).update( |
|
469 |
message=message) |
|
470 |
break |
|
471 |
else: |
|
472 |
ResourceStatus.objects.filter(pk=current_status.pk).update( |
|
473 |
message=message) |
|
431 | 474 | |
432 | 475 |
def hourly(self): |
433 | 476 |
pass |
... | ... | |
536 | 579 |
unique_together = (('resource_type', 'resource_pk')) |
537 | 580 | |
538 | 581 | |
582 |
def parse_notification_delays(value): |
|
583 |
delays = [int(v.strip()) for v in value.split(',')] |
|
584 |
if not all(delay >= 0 for delay in delays): |
|
585 |
raise ValueError |
|
586 |
if delays != sorted(delays): |
|
587 |
raise ValueError |
|
588 |
if len(set(delays)) != len(delays): |
|
589 |
raise ValueError |
|
590 |
return delays |
|
591 | ||
592 | ||
593 |
def validate_notification_delays(value): |
|
594 |
try: |
|
595 |
if not value: |
|
596 |
raise ValueError |
|
597 |
parse_notification_delays(value) |
|
598 |
except ValueError: |
|
599 |
raise ValidationError(_('You must provide a list of increasing minutes delays')) |
|
600 | ||
601 | ||
539 | 602 |
class AvailabilityParameters(models.Model): |
540 | 603 |
resource_type = models.ForeignKey(ContentType) |
541 | 604 |
resource_pk = models.PositiveIntegerField() |
... | ... | |
543 | 606 |
run_check = models.BooleanField( |
544 | 607 |
default=True, verbose_name=_('Run regular availability checks'), |
545 | 608 |
help_text=_('Run an availability check every 5 minutes')) |
609 |
notification_delays = models.TextField( |
|
610 |
default='0', |
|
611 |
blank=False, |
|
612 |
validators=[validate_notification_delays], |
|
613 |
help_text=_('Increasing delay between error notifications in minutes, ex.: 0,5,10')) |
|
614 | ||
615 |
def has_zero_delay(self): |
|
616 |
return 0 in parse_notification_delays(self.notification_delays) |
|
617 | ||
618 |
def notification_delays_generator(self): |
|
619 |
notification_delays = parse_notification_delays(self.notification_delays) |
|
620 |
last_notification_delay = notification_delays[-1] |
|
621 |
if last_notification_delay > 1: |
|
622 |
notification_delays = itertools.chain( |
|
623 |
notification_delays, |
|
624 |
itertools.count(2 * last_notification_delay, last_notification_delay)) |
|
625 |
return notification_delays |
|
546 | 626 | |
547 | 627 |
class Meta: |
548 | 628 |
unique_together = (('resource_type', 'resource_pk')) |
... | ... | |
608 | 688 |
start_timestamp = models.DateTimeField(auto_now_add=True) |
609 | 689 |
status = models.CharField(max_length=20, choices=STATUS_CHOICES, default='unknown') |
610 | 690 |
message = models.CharField(max_length=500, blank=True) |
691 |
last_notification_timestamp = models.DateTimeField(blank=True, null=True) |
|
611 | 692 | |
612 | 693 |
class Meta: |
613 | 694 |
ordering = ['-start_timestamp'] |
... | ... | |
634 | 715 |
return self._logger.getEffectiveLevel() |
635 | 716 | |
636 | 717 |
def _log(self, levelname, message, *args, **kwargs): |
637 |
if self.connector.down(): |
|
718 |
force = kwargs.pop('force', False) |
|
719 |
if self.connector.down() and not force: |
|
638 | 720 |
# don't log if the connector is known to be down |
639 | 721 |
return |
640 | 722 |
levelno = getattr(logging, levelname) |
passerelle/base/views.py | ||
---|---|---|
15 | 15 |
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
16 | 16 | |
17 | 17 |
from django.contrib.contenttypes.models import ContentType |
18 |
from django.contrib import messages |
|
18 | 19 |
from django.core.urlresolvers import reverse |
19 | 20 |
from django.core.exceptions import ObjectDoesNotExist, PermissionDenied |
20 | 21 |
from django.forms import models as model_forms |
21 | 22 |
from django.views.generic import ( |
22 | 23 |
DetailView, ListView, CreateView, UpdateView, DeleteView, FormView) |
23 | 24 |
from django.http import Http404 |
25 |
from django.utils.translation import ugettext_lazy as _ |
|
24 | 26 | |
25 |
from .models import ApiUser, AccessRight, LoggingParameters, AvailabilityParameters, ResourceStatus
|
|
26 |
from .forms import ApiUserForm, AccessRightForm |
|
27 |
from .models import ApiUser, AccessRight, LoggingParameters, ResourceStatus |
|
28 |
from .forms import ApiUserForm, AccessRightForm, AvailabilityParametersForm
|
|
27 | 29 |
from ..utils import get_trusted_services |
28 | 30 | |
29 | 31 | |
... | ... | |
155 | 157 |
return super(LoggingParametersUpdateView, self).form_valid(form) |
156 | 158 | |
157 | 159 | |
158 |
class ManageAvailabilityView(FormView):
|
|
160 |
class ManageAvailabilityView(UpdateView):
|
|
159 | 161 |
template_name = 'passerelle/manage/manage_availability_form.html' |
162 |
form_class = AvailabilityParametersForm |
|
163 | ||
164 |
def get_resource(self): |
|
165 |
if not hasattr(self, '_resource'): |
|
166 |
content_type = ContentType.objects.get_for_id(self.kwargs['resource_type']) |
|
167 |
self._resource = content_type.model_class().objects.get(pk=self.kwargs['resource_pk']) |
|
168 |
return self._resource |
|
169 | ||
170 |
def get_object(self, queryset=None): |
|
171 |
return self.get_resource().availability_parameters |
|
160 | 172 | |
161 | 173 |
def get_context_data(self, **kwargs): |
162 | 174 |
context = super(ManageAvailabilityView, self).get_context_data(**kwargs) |
163 |
connector = self.get_resource() |
|
164 |
context['connector'] = connector |
|
165 |
context['availability_status'] = connector.get_availability_status() |
|
175 |
context['connector'] = self.get_resource() |
|
176 |
context['availability_status'] = self.get_resource().get_availability_status() |
|
166 | 177 |
return context |
167 | 178 | |
168 |
def get_form_class(self): |
|
169 |
form_class = model_forms.modelform_factory( |
|
170 |
AvailabilityParameters, |
|
171 |
fields=['run_check']) |
|
172 |
return form_class |
|
173 | ||
174 |
def get_initial(self): |
|
175 |
d = self.initial.copy() |
|
176 |
d['resource_type'] = self.kwargs['resource_type'] |
|
177 |
d['resource_pk'] = self.kwargs['resource_pk'] |
|
178 |
d['run_check'] = self.get_resource().availability_parameters.run_check |
|
179 |
return d |
|
180 | ||
181 |
def get_resource(self): |
|
182 |
content_type = ContentType.objects.get_for_id(self.kwargs['resource_type']) |
|
183 |
return content_type.model_class().objects.get(pk=self.kwargs['resource_pk']) |
|
184 | ||
185 | 179 |
def get_success_url(self): |
186 | 180 |
return self.get_resource().get_absolute_url() |
187 | 181 | |
188 | 182 |
def form_valid(self, form): |
189 | 183 |
resource = self.get_resource() |
190 |
parameters = resource.availability_parameters |
|
191 |
run_check = form.cleaned_data['run_check'] |
|
192 | 184 | |
193 |
if not run_check and resource.down(): |
|
185 |
# if availability check is disabled resource is forced to up to activate logs |
|
186 |
if not form.instance.run_check and resource.down(): |
|
194 | 187 |
resource_type = ContentType.objects.get_for_model(resource) |
195 | 188 |
ResourceStatus( |
196 | 189 |
resource_type=resource_type, |
... | ... | |
198 | 191 |
status='up', |
199 | 192 |
message='').save() |
200 | 193 | |
201 |
if parameters.run_check != run_check: |
|
202 |
parameters.run_check = run_check |
|
203 |
parameters.save() |
|
204 |
resource.logger.info(u'availability checks %s', 'enabled' if run_check else 'disabled') |
|
194 |
# log changes to notification delays |
|
195 |
if 'notification_delays' in form.changed_data: |
|
196 |
resource.logger.info(u'availability checks delays set to %s', form.instance.notification_delays) |
|
197 | ||
198 |
# log changes to run_check, if enabled immediately check for availability |
|
199 |
if 'run_check' in form.changed_data: |
|
200 |
resource.logger.info(u'availability checks %s', 'enabled' if form.instance.run_check else 'disabled') |
|
201 |
if form.instance.run_check: |
|
202 |
resource.availability() |
|
205 | 203 | |
206 | 204 |
return super(ManageAvailabilityView, self).form_valid(form) |
tests/test_manager.py | ||
---|---|---|
248 | 248 |
assert not csv.down() |
249 | 249 |
status = csv.get_availability_status() |
250 | 250 |
assert status.status == 'up' |
251 | ||
252 |
# Notification delays |
|
253 |
resp = app.get(csv.get_absolute_url()) |
|
254 |
resp = resp.click('availability check') |
|
255 |
assert resp.form['notification_delays'].value == '0' |
|
256 |
resp.form['notification_delays'] = '0,5,100' |
|
257 |
resp = resp.form.submit().follow() |
|
258 |
assert not resp.pyquery('.messages .warning') |
|
259 |
assert csv.availability_parameters.notification_delays == '0,5,100' |
|
260 | ||
261 |
resp = app.get(csv.get_absolute_url()) |
|
262 |
resp = resp.click('availability check') |
|
263 |
resp.form['notification_delays'] = 'x' |
|
264 |
resp = resp.form.submit() |
|
265 |
assert len(resp.pyquery('#id_notification_delays_p .error')) |
tests/test_proxylogger.py | ||
---|---|---|
1 | 1 |
# -*- coding: utf-8 -*- |
2 | 2 | |
3 | 3 |
import logging |
4 |
import datetime |
|
5 |
import itertools |
|
4 | 6 | |
5 | 7 |
import pytest |
6 |
from httmock import all_requests, HTTMock |
|
8 |
from httmock import HTTMock |
|
9 | ||
10 |
from django.core.exceptions import ValidationError |
|
7 | 11 | |
8 | 12 |
from passerelle.base.models import ProxyLogger, ResourceLog |
9 | 13 |
from passerelle.apps.feeds.models import Feed |
... | ... | |
66 | 70 |
pr.debug(u'some message') |
67 | 71 |
assert len(ResourceLog.objects.all()) == 0 |
68 | 72 | |
69 |
def test_log_on_connector_availability_change(db, connector): |
|
73 | ||
74 |
def test_validate_notification_delays(db, connector): |
|
75 |
def take(iterator, count): |
|
76 |
return list(itertools.compress(iterator, range(1, count + 1))) |
|
77 | ||
78 |
availability_parameters = connector.availability_parameters |
|
79 |
assert availability_parameters.notification_delays == '0' |
|
80 | ||
81 |
availability_parameters.full_clean() |
|
82 |
assert availability_parameters.has_zero_delay() |
|
83 |
assert list(availability_parameters.notification_delays_generator()) == [0] |
|
84 | ||
85 |
availability_parameters.notification_delays = '0,5,100,1000' |
|
86 |
availability_parameters.full_clean() |
|
87 |
assert availability_parameters.has_zero_delay() |
|
88 |
assert take(availability_parameters.notification_delays_generator(), 6) == [0, 5, 100, 1000, 2000, 3000] |
|
89 | ||
90 |
availability_parameters.notification_delays = '5,100,1000' |
|
91 |
availability_parameters.full_clean() |
|
92 |
assert not availability_parameters.has_zero_delay() |
|
93 |
assert take(availability_parameters.notification_delays_generator(), 6) == [5, 100, 1000, 2000, 3000, 4000] |
|
94 | ||
95 |
availability_parameters.notification_delays = '5' |
|
96 |
availability_parameters.full_clean() |
|
97 |
assert not availability_parameters.has_zero_delay() |
|
98 |
assert take(availability_parameters.notification_delays_generator(), 3) == [5, 10, 15] |
|
99 | ||
100 |
# validation errors |
|
101 |
availability_parameters.notification_delays = '5,100,100' |
|
102 |
with pytest.raises(ValidationError): |
|
103 |
availability_parameters.full_clean() |
|
104 | ||
105 |
availability_parameters.notification_delays = '' |
|
106 |
with pytest.raises(ValidationError): |
|
107 |
availability_parameters.full_clean() |
|
108 | ||
109 |
availability_parameters.notification_delays = '1,x' |
|
110 |
with pytest.raises(ValidationError): |
|
111 |
availability_parameters.full_clean() |
|
112 | ||
113 |
availability_parameters.notification_delays = '10,1' |
|
114 |
with pytest.raises(ValidationError): |
|
115 |
availability_parameters.full_clean() |
|
116 | ||
117 | ||
118 |
@pytest.mark.parametrize('notification_delays', ['0', '0,5,100', '5,100']) |
|
119 |
def test_log_on_connector_availability_change(db, connector, freezer, notification_delays): |
|
70 | 120 |
connector.title = u'éléphant' |
121 |
availability_parameters = connector.availability_parameters |
|
122 |
availability_parameters.notification_delays = notification_delays |
|
123 |
availability_parameters.save() |
|
124 | ||
71 | 125 |
with HTTMock(up_mock): # set connector as up |
72 | 126 |
connector.availability() |
127 |
assert ResourceLog.objects.count() == 1 |
|
128 |
assert ResourceLog.objects.latest('id').level == 'info' |
|
129 |
assert ResourceLog.objects.latest('id').message == 'GET http://example.net/ (=> 200)' |
|
73 | 130 |
ResourceLog.objects.all().delete() |
131 | ||
132 |
# move 5 minutes in the future |
|
133 |
freezer.move_to(datetime.timedelta(seconds=60 * 5 + 1)) |
|
134 | ||
74 | 135 |
with HTTMock(down_mock): # set connector as down |
75 | 136 |
connector.availability() |
76 |
assert len(ResourceLog.objects.all()) == 2 |
|
137 |
assert connector.down() |
|
138 |
last_count1 = ResourceLog.objects.count() |
|
139 |
assert last_count1 == 2 |
|
77 | 140 |
assert ResourceLog.objects.all()[0].message == 'GET http://example.net/ (=> 404)' |
78 |
assert ResourceLog.objects.all()[1].level == 'error' |
|
79 |
assert ResourceLog.objects.all()[1].message == u'connector "éléphant" (Feed) is now down' |
|
141 |
assert ResourceLog.objects.all()[1].level == 'error' if notification_delays.startswith('0') else 'warning' |
|
142 |
assert (u'connector "éléphant" (Feed) is now down: 404 Client' |
|
143 |
in ResourceLog.objects.all()[1].message) |
|
144 | ||
145 |
# move 5 minutes in the future |
|
146 |
freezer.move_to(datetime.timedelta(seconds=60 * 5 + 1)) |
|
147 | ||
148 |
# second time log as error |
|
149 |
with HTTMock(down_mock): # connector is still down |
|
150 |
connector.availability() |
|
151 |
assert connector.down() |
|
152 |
last_count2 = ResourceLog.objects.count() |
|
153 |
if notification_delays == '0': |
|
154 |
assert last_count2 == last_count1 |
|
155 |
else: |
|
156 |
assert last_count2 == last_count1 + 1 |
|
157 |
assert ResourceLog.objects.all()[2].level == 'error' if notification_delays != '0' else 'warning' |
|
158 |
assert (u'connector "éléphant" (Feed) is still down for 5 minutes: 404' |
|
159 |
in ResourceLog.objects.all()[2].message) |
|
160 | ||
161 |
# move 3 minutes in the future |
|
162 |
freezer.move_to(datetime.timedelta(seconds=60 * 3 + 1)) |
|
163 | ||
164 |
# third time no log |
|
165 |
with HTTMock(down_mock): # connector is still down |
|
166 |
connector.availability() |
|
167 |
assert ResourceLog.objects.count() == last_count2 |
|
168 | ||
169 |
# move 3 minutes in the future |
|
170 |
freezer.move_to(datetime.timedelta(seconds=60 * 3 + 1)) |
|
171 | ||
172 |
with HTTMock(up_mock): # set connector as up |
|
173 |
connector.availability() |
|
174 |
assert not connector.down() |
|
175 |
last_count3 = ResourceLog.objects.count() |
|
176 |
assert last_count3 == last_count2 + 1 |
|
177 |
assert ResourceLog.objects.latest('id').level == 'info' |
|
178 |
assert ResourceLog.objects.latest('id').message == u'connector "éléphant" (Feed) is back up' |
|
179 | ||
180 |
# move 3 minutes in the future |
|
181 |
freezer.move_to(datetime.timedelta(seconds=60 * 3 + 1)) |
|
80 | 182 | |
183 |
# last message is a GET |
|
81 | 184 |
with HTTMock(up_mock): # set connector as up |
82 | 185 |
connector.availability() |
83 |
assert len(ResourceLog.objects.all()) == 3 |
|
84 |
assert ResourceLog.objects.all()[2].level == 'info' |
|
85 |
assert ResourceLog.objects.all()[2].message == u'connector "éléphant" (Feed) is back up' |
|
186 |
assert not connector.down() |
|
187 |
last_count4 = ResourceLog.objects.count() |
|
188 |
assert last_count4 == last_count3 + 1 |
|
189 |
assert ResourceLog.objects.latest('id').level == 'info' |
|
190 |
assert ResourceLog.objects.latest('id').message == 'GET http://example.net/ (=> 200)' |
tox.ini | ||
---|---|---|
35 | 35 |
http://quixote.python.ca/releases/Quixote-2.7b2.tar.gz |
36 | 36 |
vobject |
37 | 37 |
django-ratelimit |
38 |
pyquery |
|
38 | 39 |
commands = |
39 | 40 |
./get_wcs.sh |
40 | 41 |
django111: py.test {posargs: {env:FAST:} --junitxml=test_{envname}_results.xml --cov-report xml --cov-report html --cov=passerelle/ --cov-config .coveragerc tests/} |
41 |
- |