From 13d5143e6510c7621f7420e9c8c818a1b7ba166f Mon Sep 17 00:00:00 2001 From: Adam Sampson Date: Mon, 5 May 2003 02:19:47 +0000 Subject: [PATCH] Move most of the code into rawdoglib.rawdog. --- rawdog | 365 +--------------------------------------- rawdoglib/__init__.py | 2 +- rawdoglib/rawdog.py | 379 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 385 insertions(+), 361 deletions(-) create mode 100644 rawdoglib/rawdog.py diff --git a/rawdog b/rawdog index 58a7db7..7db2277 100644 --- a/rawdog +++ b/rawdog @@ -17,366 +17,11 @@ # Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, # MA 02111-1307 USA, or see http://www.gnu.org/. -VERSION = "0.4" -import rawdoglib.rssparser as rssparser -import sys, pickle, os, fcntl, time, sha - -def format_time(secs, config): - """Format a time and date nicely.""" - t = time.localtime(secs) - return time.strftime(config["timeformat"], t) + ", " + time.strftime(config["dayformat"], t) - -class Feed: - """An RSS feed.""" - - def __init__(self, url, period): - self.url = url - self.period = period - self.etag = None - self.modified = None - self.title = None - self.link = None - self.last_update = 0 - - def update(self, articles, now): - if (now - self.last_update) < (self.period * 60): return - self.last_update = now - - try: - p = rssparser.parse(self.url, self.etag, - self.modified, "rawdog/" + VERSION) - except: - print "Error fetching " + self.url - return - - self.etag = p.get("etag") - self.modified = p.get("modified") - # In the event that the feed hasn't changed, then both channel - # and feed will be empty. - - channel = p["channel"] - if channel.has_key("title"): - self.title = channel["title"] - if channel.has_key("link"): - self.link = channel["link"] - - feed = self.url - for item in p["items"]: - title = item.get("title") - link = item.get("link") - if item.has_key("content_encoded"): - description = item["content_encoded"] - else: - description = item.get("description") - - article = Article(feed, title, link, description, now) - - if articles.has_key(article.hash): - articles[article.hash].last_seen = now - else: - articles[article.hash] = article - - def get_html_name(self): - if self.title is not None: - return self.title - elif self.link is not None: - return self.link - else: - return self.url - - def get_html_link(self): - s = self.get_html_name() - if self.link is not None: - return '' + s + '' - else: - return s - -class Article: - """An article retrieved from an RSS feed.""" - - def __init__(self, feed, title, link, description, now): - self.feed = feed - self.title = title - self.link = link - self.description = description - - s = str(feed) + str(title) + str(link) + str(description) - self.hash = sha.new(s).hexdigest() - - self.last_seen = now - self.added = now - - def can_expire(self, now): - return ((now - self.last_seen) > (24 * 60 * 60)) - -class DayWriter: - """Utility class for writing day sections into a series of articles.""" - - def __init__(self, file, config): - self.lasttime = [-1, -1, -1, -1, -1] - self.file = file - self.counter = 0 - self.config = config - - def start_day(self, tm): - print >>self.file, '
' - day = time.strftime(self.config["dayformat"], tm) - print >>self.file, '

' + day + '

' - self.counter += 1 - - def start_time(self, tm): - print >>self.file, '
' - clock = time.strftime(self.config["timeformat"], tm) - print >>self.file, '

' + clock + '

' - self.counter += 1 - - def time(self, s): - tm = time.localtime(s) - if tm[:3] != self.lasttime[:3]: - self.close(0) - self.start_day(tm) - if tm[:6] != self.lasttime[:6]: - self.close(1) - self.start_time(tm) - self.lasttime = tm - - def close(self, n = 0): - while self.counter > n: - print >>self.file, "
" - self.counter -= 1 - -class Rawdog: - """The aggregator itself.""" - - def __init__(self): - self.feeds = {} - self.articles = {} - self.last_update = 0 - - def list(self): - for url in self.feeds.keys(): - feed = self.feeds[url] - print url - print " Title:", feed.title - print " Link:", feed.link - - def update(self, config): - now = time.time() - - seenfeeds = {} - for (url, period) in config["feedslist"]: - seenfeeds[url] = 1 - if not self.feeds.has_key(url): - self.feeds[url] = Feed(url, period) - else: - self.feeds[url].period = period - for url in self.feeds.keys(): - if not seenfeeds.has_key(url): - del self.feeds[url] - else: - self.feeds[url].update(self.articles, now) - - for key in self.articles.keys(): - if self.articles[key].can_expire(now) or not self.feeds.has_key(self.articles[key].feed): - del self.articles[key] - - self.last_update = now - self.changed = 1 - - def write(self, config): - outputfile = config["outputfile"] - now = time.time() - - f = open(outputfile + ".new", "w") - - refresh = 24 * 60 - for feed in self.feeds.values(): - if feed.period < refresh: refresh = feed.period - - print >>f, """ - - - """ - if config["userefresh"]: - print >>f, """""" - print >>f, """ - rawdog - - - -
""" - - dw = DayWriter(f, config) - - for article in articles: - dw.time(article.added) - - feed = self.feeds[article.feed] - f.write('
\n') - f.write('

\n') - - title = article.title - link = article.link - description = article.description - if title is None: - if link is None: - title = "Article" - else: - title = "Link" - - f.write('') - if link is not None: f.write('') - f.write(title) - if link is not None: f.write('') - f.write('\n') - - f.write('[' + feed.get_html_link() + ']') - - f.write('

\n') - - if description is not None: - f.write('

' + description + '

\n') - - f.write('
\n') - - dw.close() - print >>f, '
' - - if config["showfeeds"]: - print >>f, """

Feeds

-
- - - -""" - feeds = self.feeds.values() - feeds.sort(lambda a, b: cmp(a.get_html_name().lower(), b.get_html_name().lower())) - for feed in feeds: - print >>f, '' - print >>f, '' - print >>f, '' - print >>f, '' - print >>f, '' - print >>f, '' - print >>f, """
FeedRSSLast updateNext update
' + feed.get_html_link() + 'XML' + format_time(feed.last_update, config) + '' + format_time(feed.last_update + 60 * feed.period, config) + '
-
""" - - print >>f, """ - -""" - - f.close() - os.rename(outputfile + ".new", outputfile) - -def main(argv): - """The command-line interface to the aggregator.""" - - if len(argv) < 1: - print "Usage: rawdog action [action ...]" - print "action can be list, update, write" - return 1 - - statedir = os.environ["HOME"] + "/.rawdog" - try: - os.chdir(statedir) - except OSError: - print "No ~/.rawdog directory" - return 1 - - try: - f = open("config", "r") - except IOError: - print "No config file" - return 1 - config = { - "feedslist" : [], - "outputfile" : "output.html", - "maxarticles" : 200, - "dayformat" : "%A, %d %B %Y", - "timeformat" : "%I:%M %p", - "userefresh" : 0, - "showfeeds" : 1, - } - for line in f.readlines(): - line = line.strip() - if line == "" or line[0] == "#": continue - l = line.split(" ", 1) - if len(l) != 2: - print "Bad line in config file: " + line - return 1 - if l[0] == "feed": - l = l[1].split(" ", 1) - if len(l) != 2: - print "Bad line in config file: "+ line - config["feedslist"].append((l[1], int(l[0]))) - elif l[0] == "outputfile": - config["outputfile"] = l[1] - elif l[0] == "maxarticles": - config["maxarticles"] = int(l[1]) - elif l[0] == "dayformat": - config["dayformat"] = l[1] - elif l[0] == "timeformat": - config["timeformat"] = l[1] - elif l[0] == "userefresh": - config["userefresh"] = int(l[1]) - elif l[0] == "showfeeds": - config["showfeeds"] = int(l[1]) - else: - print "Unknown config command: " + l[0] - return 1 - f.close() - - try: - f = open("state", "r+") - fcntl.lockf(f.fileno(), fcntl.LOCK_EX) - rawdog = pickle.load(f) - rawdog.changed = 0 - except IOError: - f = open("state", "w+") - fcntl.lockf(f.fileno(), fcntl.LOCK_EX) - rawdog = Rawdog() - rawdog.changed = 1 - - for action in argv: - if action == "list": - rawdog.list() - elif action == "update": - rawdog.update(config) - elif action == "write": - rawdog.write(config) - else: - print "Unknown action: " + action - return 1 - - if rawdog.changed: - f.seek(0) - f.truncate(0) - pickle.dump(rawdog, f) - f.close() - - return 0 +# Rawdog, Article and Feed are imported to allow loading of state from versions +# prior to 0.4. (This should be removed for 0.5, since pickle updates the +# module path correctly once the state has been pickled again.) +from rawdoglib.rawdog import main, Rawdog, Article, Feed +import sys if __name__ == "__main__": sys.exit(main(sys.argv[1:])) diff --git a/rawdoglib/__init__.py b/rawdoglib/__init__.py index 7e91484..0b2a0f1 100644 --- a/rawdoglib/__init__.py +++ b/rawdoglib/__init__.py @@ -1 +1 @@ -__all__ = ['rssparser', 'timeoutsocket'] +__all__ = ['rssparser', 'timeoutsocket', 'rawdog'] diff --git a/rawdoglib/rawdog.py b/rawdoglib/rawdog.py new file mode 100644 index 0000000..e031bdf --- /dev/null +++ b/rawdoglib/rawdog.py @@ -0,0 +1,379 @@ +# rawdog: RSS aggregator without delusions of grandeur. +# Copyright 2003 Adam Sampson +# +# rawdog is free software; you can redistribute and/or modify it +# under the terms of that license as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# rawdog is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with rawdog; see the file COPYING. If not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA, or see http://www.gnu.org/. + +VERSION = "0.4" +import rssparser +import pickle, os, fcntl, time, sha + +def format_time(secs, config): + """Format a time and date nicely.""" + t = time.localtime(secs) + return time.strftime(config["timeformat"], t) + ", " + time.strftime(config["dayformat"], t) + +class Feed: + """An RSS feed.""" + + def __init__(self, url, period): + self.url = url + self.period = period + self.etag = None + self.modified = None + self.title = None + self.link = None + self.last_update = 0 + + def update(self, articles, now): + if (now - self.last_update) < (self.period * 60): return + self.last_update = now + + try: + p = rssparser.parse(self.url, self.etag, + self.modified, "rawdog/" + VERSION) + except: + print "Error fetching " + self.url + return + + self.etag = p.get("etag") + self.modified = p.get("modified") + # In the event that the feed hasn't changed, then both channel + # and feed will be empty. + + channel = p["channel"] + if channel.has_key("title"): + self.title = channel["title"] + if channel.has_key("link"): + self.link = channel["link"] + + feed = self.url + for item in p["items"]: + title = item.get("title") + link = item.get("link") + if item.has_key("content_encoded"): + description = item["content_encoded"] + else: + description = item.get("description") + + article = Article(feed, title, link, description, now) + + if articles.has_key(article.hash): + articles[article.hash].last_seen = now + else: + articles[article.hash] = article + + def get_html_name(self): + if self.title is not None: + return self.title + elif self.link is not None: + return self.link + else: + return self.url + + def get_html_link(self): + s = self.get_html_name() + if self.link is not None: + return '' + s + '' + else: + return s + +class Article: + """An article retrieved from an RSS feed.""" + + def __init__(self, feed, title, link, description, now): + self.feed = feed + self.title = title + self.link = link + self.description = description + + s = str(feed) + str(title) + str(link) + str(description) + self.hash = sha.new(s).hexdigest() + + self.last_seen = now + self.added = now + + def can_expire(self, now): + return ((now - self.last_seen) > (24 * 60 * 60)) + +class DayWriter: + """Utility class for writing day sections into a series of articles.""" + + def __init__(self, file, config): + self.lasttime = [-1, -1, -1, -1, -1] + self.file = file + self.counter = 0 + self.config = config + + def start_day(self, tm): + print >>self.file, '
' + day = time.strftime(self.config["dayformat"], tm) + print >>self.file, '

' + day + '

' + self.counter += 1 + + def start_time(self, tm): + print >>self.file, '
' + clock = time.strftime(self.config["timeformat"], tm) + print >>self.file, '

' + clock + '

' + self.counter += 1 + + def time(self, s): + tm = time.localtime(s) + if tm[:3] != self.lasttime[:3]: + self.close(0) + self.start_day(tm) + if tm[:6] != self.lasttime[:6]: + self.close(1) + self.start_time(tm) + self.lasttime = tm + + def close(self, n = 0): + while self.counter > n: + print >>self.file, "
" + self.counter -= 1 + +class Rawdog: + """The aggregator itself.""" + + def __init__(self): + self.feeds = {} + self.articles = {} + self.last_update = 0 + + def list(self): + for url in self.feeds.keys(): + feed = self.feeds[url] + print url + print " Title:", feed.title + print " Link:", feed.link + + def update(self, config): + now = time.time() + + seenfeeds = {} + for (url, period) in config["feedslist"]: + seenfeeds[url] = 1 + if not self.feeds.has_key(url): + self.feeds[url] = Feed(url, period) + else: + self.feeds[url].period = period + for url in self.feeds.keys(): + if not seenfeeds.has_key(url): + del self.feeds[url] + else: + self.feeds[url].update(self.articles, now) + + for key in self.articles.keys(): + if self.articles[key].can_expire(now) or not self.feeds.has_key(self.articles[key].feed): + del self.articles[key] + + self.last_update = now + self.changed = 1 + + def write(self, config): + outputfile = config["outputfile"] + now = time.time() + + f = open(outputfile + ".new", "w") + + refresh = 24 * 60 + for feed in self.feeds.values(): + if feed.period < refresh: refresh = feed.period + + print >>f, """ + + + """ + if config["userefresh"]: + print >>f, """""" + print >>f, """ + rawdog + + + +
""" + + dw = DayWriter(f, config) + + for article in articles: + dw.time(article.added) + + feed = self.feeds[article.feed] + f.write('
\n') + f.write('

\n') + + title = article.title + link = article.link + description = article.description + if title is None: + if link is None: + title = "Article" + else: + title = "Link" + + f.write('') + if link is not None: f.write('') + f.write(title) + if link is not None: f.write('') + f.write('\n') + + f.write('[' + feed.get_html_link() + ']') + + f.write('

\n') + + if description is not None: + f.write('

' + description + '

\n') + + f.write('
\n') + + dw.close() + print >>f, '
' + + if config["showfeeds"]: + print >>f, """

Feeds

+
+ + + +""" + feeds = self.feeds.values() + feeds.sort(lambda a, b: cmp(a.get_html_name().lower(), b.get_html_name().lower())) + for feed in feeds: + print >>f, '' + print >>f, '' + print >>f, '' + print >>f, '' + print >>f, '' + print >>f, '' + print >>f, """
FeedRSSLast updateNext update
' + feed.get_html_link() + 'XML' + format_time(feed.last_update, config) + '' + format_time(feed.last_update + 60 * feed.period, config) + '
+
""" + + print >>f, """ + +""" + + f.close() + os.rename(outputfile + ".new", outputfile) + +def main(argv): + """The command-line interface to the aggregator.""" + + if len(argv) < 1: + print "Usage: rawdog action [action ...]" + print "action can be list, update, write" + return 1 + + statedir = os.environ["HOME"] + "/.rawdog" + try: + os.chdir(statedir) + except OSError: + print "No ~/.rawdog directory" + return 1 + + try: + f = open("config", "r") + except IOError: + print "No config file" + return 1 + config = { + "feedslist" : [], + "outputfile" : "output.html", + "maxarticles" : 200, + "dayformat" : "%A, %d %B %Y", + "timeformat" : "%I:%M %p", + "userefresh" : 0, + "showfeeds" : 1, + } + for line in f.readlines(): + line = line.strip() + if line == "" or line[0] == "#": continue + l = line.split(" ", 1) + if len(l) != 2: + print "Bad line in config file: " + line + return 1 + if l[0] == "feed": + l = l[1].split(" ", 1) + if len(l) != 2: + print "Bad line in config file: "+ line + config["feedslist"].append((l[1], int(l[0]))) + elif l[0] == "outputfile": + config["outputfile"] = l[1] + elif l[0] == "maxarticles": + config["maxarticles"] = int(l[1]) + elif l[0] == "dayformat": + config["dayformat"] = l[1] + elif l[0] == "timeformat": + config["timeformat"] = l[1] + elif l[0] == "userefresh": + config["userefresh"] = int(l[1]) + elif l[0] == "showfeeds": + config["showfeeds"] = int(l[1]) + else: + print "Unknown config command: " + l[0] + return 1 + f.close() + + try: + f = open("state", "r+") + fcntl.lockf(f.fileno(), fcntl.LOCK_EX) + rawdog = pickle.load(f) + rawdog.changed = 0 + except IOError: + f = open("state", "w+") + fcntl.lockf(f.fileno(), fcntl.LOCK_EX) + rawdog = Rawdog() + rawdog.changed = 1 + + for action in argv: + if action == "list": + rawdog.list() + elif action == "update": + rawdog.update(config) + elif action == "write": + rawdog.write(config) + else: + print "Unknown action: " + action + return 1 + + if rawdog.changed: + f.seek(0) + f.truncate(0) + pickle.dump(rawdog, f) + f.close() + + return 0 + -- 2.35.1