htt-daemon/rss.py

48 lines
1.0 KiB
Python

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import os
import feedparser
from lxml import html
URLS_LIST = 'urls.lst'
RSS_URL = 'https://htt-spirkelbach.de/feed/'
def load_old_urls():
urls = []
if os.path.isfile(URLS_LIST):
for url in open(URLS_LIST, 'r'):
urls += [url.strip()]
return urls
def save_old_urls(urls):
urls_file = open(URLS_LIST, 'w')
for url in urls:
urls_file.write('%s\n' % url)
urls_file.close()
def get_new_items():
old_urls = load_old_urls()
rss = feedparser.parse(RSS_URL)
new_items = []
for item in sorted(rss.entries, key=lambda x: x.published_parsed):
url = item.link
if not (url in old_urls):
if len(item.description) > 0:
text = html.fromstring(item.description).text_content()
if len(text) > 1:
text = ' '.join(text.split())
new_items += [u'%s\n%s' % (text[0:80], url)]
old_urls += [url]
save_old_urls(old_urls)
return new_items