48 lines
1.0 KiB
Python
48 lines
1.0 KiB
Python
#!/usr/bin/env python2
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import os
|
|
|
|
import feedparser
|
|
from lxml import html
|
|
|
|
URLS_LIST = 'urls.lst'
|
|
RSS_URL = 'https://htt-spirkelbach.de/feed/'
|
|
|
|
def load_old_urls():
|
|
urls = []
|
|
|
|
if os.path.isfile(URLS_LIST):
|
|
for url in open(URLS_LIST, 'r'):
|
|
urls += [url.strip()]
|
|
|
|
return urls
|
|
|
|
|
|
def save_old_urls(urls):
|
|
urls_file = open(URLS_LIST, 'w')
|
|
for url in urls:
|
|
urls_file.write('%s\n' % url)
|
|
urls_file.close()
|
|
|
|
|
|
def get_new_items():
|
|
old_urls = load_old_urls()
|
|
rss = feedparser.parse(RSS_URL)
|
|
|
|
new_items = []
|
|
|
|
for item in sorted(rss.entries, key=lambda x: x.published_parsed):
|
|
url = item.link
|
|
if not (url in old_urls):
|
|
if len(item.description) > 0:
|
|
text = html.fromstring(item.description).text_content()
|
|
if len(text) > 1:
|
|
text = ' '.join(text.split())
|
|
new_items += [u'%s…\n%s' % (text[0:80], url)]
|
|
old_urls += [url]
|
|
|
|
save_old_urls(old_urls)
|
|
|
|
return new_items
|