From 3ae5566f2979b08a676f9c88de6f86dddb004e88 Mon Sep 17 00:00:00 2001 From: Serghei Mihai Date: Wed, 28 Sep 2016 14:53:10 +0200 Subject: [PATCH] create announces from external RSS feed (#12919) --- corbo/forms.py | 4 +- corbo/migrations/0007_auto_20160928_1454.py | 24 +++++++++++ corbo/models.py | 47 +++++++++++++++++++++ corbo/templates/corbo/category_detail.html | 2 +- corbo/templates/corbo/category_form.html | 2 +- debian/control | 4 +- debian/corbo.cron.d | 1 + requirements.txt | 2 + setup.py | 2 + tests/test_announces.py | 63 +++++++++++++++++++++++++++++ tox.ini | 1 + 11 files changed, 147 insertions(+), 5 deletions(-) create mode 100644 corbo/migrations/0007_auto_20160928_1454.py create mode 100644 tests/test_announces.py diff --git a/corbo/forms.py b/corbo/forms.py index 3ec9b00..68ae4ea 100644 --- a/corbo/forms.py +++ b/corbo/forms.py @@ -8,7 +8,7 @@ class AnnounceForm(forms.ModelForm): class Meta: model = Announce - fields = '__all__' + exclude = ('identifier',) widgets = { 'publication_time': forms.TextInput(attrs={'class': 'datetimepicker', 'readonly': True}), @@ -26,5 +26,5 @@ class AnnounceForm(forms.ModelForm): class CategoryForm(forms.ModelForm): class Meta: - fields = ('name', ) + fields = ('name', 'rss_feed_url') model = Category diff --git a/corbo/migrations/0007_auto_20160928_1454.py b/corbo/migrations/0007_auto_20160928_1454.py new file mode 100644 index 0000000..1e7c73a --- /dev/null +++ b/corbo/migrations/0007_auto_20160928_1454.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('corbo', '0006_auto_20160928_0833'), + ] + + operations = [ + migrations.AddField( + model_name='announce', + name='identifier', + field=models.CharField(max_length=256, null=True, blank=True), + ), + migrations.AddField( + model_name='category', + name='rss_feed_url', + field=models.URLField(help_text='if defined, announces will be automatically created from rss items', null=True, verbose_name='Feed URL', blank=True), + ), + ] diff --git a/corbo/models.py b/corbo/models.py index 6ece581..c8ef7ac 100644 --- a/corbo/models.py +++ b/corbo/models.py @@ -1,11 +1,15 @@ import os +import io import hashlib +from time import mktime from datetime import datetime import logging import urlparse from html2text import HTML2Text from emails.django import Message from lxml.etree import HTML as HTMLTree +import requests +import feedparser from django.utils import timezone from django.conf import settings @@ -39,6 +43,8 @@ def transform_image_src(src, **kwargs): class Category(models.Model): name = models.CharField(_('Name'), max_length=64, blank=False, null=False) + rss_feed_url = models.URLField(_('Feed URL'), blank=True, null=True, + help_text=_('if defined, announces will be automatically created from rss items')) ctime = models.DateTimeField(auto_now_add=True) def __unicode__(self): @@ -50,11 +56,52 @@ class Category(models.Model): def get_subscriptions_count(self): return self.subscription_set.all().count() + def save(self, *args, **kwargs): + super(Category, self).save(*args, **kwargs) + if not self.rss_feed_url: + return + feed_response = requests.get(self.rss_feed_url) + if feed_response.ok: + content = feedparser.parse(feed_response.content) + for entry in content.get('entries', []): + substitutions = [] + published = datetime.fromtimestamp(mktime(entry.published_parsed)) + html_tree = HTMLTree(entry['summary']) + storage = DefaultStorage() + for img in html_tree.xpath('//img/@src'): + image_name = os.path.basename(img) + r = requests.get(img) + new_content = r.content + if storage.exists(image_name): + old_content = storage.open(image_name).read() + old_hash = hashlib.md5(old_content).hexdigest() + new_hash = hashlib.md5(new_content).hexdigest() + substitutions.append((img, storage.url(image_name))) + if new_hash == old_hash: + continue + new_image_name = storage.save(image_name, io.BytesIO(new_content)) + substitutions.append((img, storage.url(new_image_name))) + + text = entry['summary'] + for old, new in substitutions: + text = text.replace(old, new) + + announce, created = Announce.objects.get_or_create(identifier=entry['id'], + category=self) + announce.title = entry['title'] + announce.text = text + announce.publication_time = published + announce.save() + + if created: + Broadcast.objects.get_or_create(announce=announce) + class Announce(models.Model): category = models.ForeignKey('Category', verbose_name=_('category')) title = models.CharField(_('title'), max_length=256, help_text=_('maximum 256 characters')) + identifier = models.CharField(max_length=256, null=True, blank=True) text = RichTextField(_('Content')) publication_time = models.DateTimeField(_('publication time'), blank=True, null=True) diff --git a/corbo/templates/corbo/category_detail.html b/corbo/templates/corbo/category_detail.html index 5b7b9b2..8fc7dce 100644 --- a/corbo/templates/corbo/category_detail.html +++ b/corbo/templates/corbo/category_detail.html @@ -11,7 +11,7 @@ {% block appbar %}

{{ object.name }}

{% trans 'Delete' %} -{% trans 'Rename' %} +{% trans 'Edit' %} {% trans 'New announce' %} {% endblock %} diff --git a/corbo/templates/corbo/category_form.html b/corbo/templates/corbo/category_form.html index 560ab33..23a77c9 100644 --- a/corbo/templates/corbo/category_form.html +++ b/corbo/templates/corbo/category_form.html @@ -3,7 +3,7 @@ {% block appbar %} {% if object %} -

{% trans "Modify Category" %}

+

{% trans "Edit Category" %}

{% else %}

{% trans "New Category" %}

{% endif %} diff --git a/debian/control b/debian/control index f2e2477..dbd88db 100644 --- a/debian/control +++ b/debian/control @@ -12,7 +12,9 @@ Depends: ${misc:Depends}, ${python:Depends}, python-django (>= 1.7), python-django-ckeditor, python-gadjo, - python-emails + python-emails, + python-requests, + python-feedparser Description: Announces Manager Package: corbo diff --git a/debian/corbo.cron.d b/debian/corbo.cron.d index 46d2506..19112e1 100644 --- a/debian/corbo.cron.d +++ b/debian/corbo.cron.d @@ -1,3 +1,4 @@ PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin */5 * * * * corbo corbo-manage tenant_command send_announces --all-tenants +0 * * * * corbo corbo-manage tenant_command sync_external_feeds --all-tenants diff --git a/requirements.txt b/requirements.txt index 3b27fff..a08d428 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,6 @@ django-ckeditor<4.5.3 djangorestframework>=3.3,<3.4 html2text emails +feedparser +requests -e git+http://repos.entrouvert.org/gadjo.git/#egg=gadjo diff --git a/setup.py b/setup.py index 4804c7d..2ea6ba7 100644 --- a/setup.py +++ b/setup.py @@ -100,6 +100,8 @@ setup( 'gadjo', 'emails', 'lxml', + 'feedparser', + 'requests' ], zip_safe=False, cmdclass={ diff --git a/tests/test_announces.py b/tests/test_announces.py new file mode 100644 index 0000000..68376f9 --- /dev/null +++ b/tests/test_announces.py @@ -0,0 +1,63 @@ +import os +import pytest +import mock +import feedparser + +from django.core.files.storage import DefaultStorage + +from corbo.models import Category + +pytestmark = pytest.mark.django_db + +CATEGORIES = ('Alerts',) + +ATOM_FEED = """ + + + tag:linuxfr.org,2005:/news + Sample RSS Feeds + 2016-09-16T10:29:46+02:00 + + tag:linuxfr.org,2005:News/37537 + 2016-09-16T10:29:46+02:00 + 2016-09-16T11:27:00+02:00 + Feed entry sample + + <img src="http://example.com/logo.png"> + Feed entry content + + + Foo Bar + + + +""" + +def mocked_request_get(*args, **kwargs): + storage = DefaultStorage() + class MockResponse: + + def __init__(self, content): + self.ok = True + self.content = content + + if args[0] == 'http://example.com/atom': + return MockResponse(ATOM_FEED) + else: + logo_path = os.path.join(os.path.dirname(__file__), 'media', 'logo.png') + print "RETURNING: %s" % logo_path + return MockResponse(file(logo_path).read()) + + +@mock.patch('corbo.models.requests.get', side_effect=mocked_request_get) +def test_announces_from_feed(mocked_get): + storage = DefaultStorage() + feed_content = feedparser.parse(ATOM_FEED) + for category in CATEGORIES: + c = Category.objects.create(name=category, rss_feed_url='http://example.com/atom') + assert c.announce_set.count() == len(feed_content['entries']) + for announce in c.announce_set.all(): + assert announce.title in [feed['title'] for feed in feed_content['entries']] + assert storage.url('logo.png') in announce.text + # cleanup uploaded images + os.unlink(storage.path('logo.png')) diff --git a/tox.ini b/tox.ini index ecf4da4..c51eb4f 100644 --- a/tox.ini +++ b/tox.ini @@ -19,5 +19,6 @@ deps = djangorestframework>=3.3,<3.4 pylint==1.4.0 astroid==1.3.2 + mock commands = py.test {env:COVERAGE:} {posargs:tests/} -- 2.9.3