#!/usr/bin/env python # rawdog: RSS aggregator without delusions of grandeur. # Copyright 2003 Adam Sampson # # rawdog is free software; you can redistribute and/or modify it # under the terms of that license as published by the Free Software # Foundation; either version 2 of the License, or (at your option) # any later version. # # rawdog is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with rawdog; see the file COPYING. If not, write to the # Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, # MA 02111-1307 USA, or see http://www.gnu.org/. VERSION = "0.3" import rawdoglib.rssparser as rssparser import sys, pickle, os, fcntl, time, sha def hash_item(feed, title, link, description): """Return the hash value for an article.""" s = str(feed) + str(title) + str(link) + str(description) return sha.new(s).hexdigest() def maybe_get(hash, key): """If hash has a key called key, return its value, else return None.""" if hash.has_key(key): return hash[key] return None class Feed: """An RSS feed.""" def __init__(self, url, period): self.url = url self.period = period self.etag = None self.modified = None self.title = None self.link = None self.last_update = 0 def update(self, articles, now): if (now - self.last_update) < (self.period * 60): return self.last_update = now try: p = rssparser.parse(self.url, self.etag, self.modified, "rawdog/" + VERSION) except: print "Error fetching " + self.url return self.etag = maybe_get(p, "etag") self.modified = maybe_get(p, "modified") # In the event that the feed hasn't changed, then both channel # and feed will be empty. channel = p["channel"] if channel.has_key("title"): self.title = channel["title"] if channel.has_key("link"): self.link = channel["link"] feed = self.url for item in p["items"]: title = maybe_get(item, "title") link = maybe_get(item, "link") if item.has_key("content_encoded"): description = item["content_encoded"] else: description = maybe_get(item, "description") hash = hash_item(feed, title, link, description) if articles.has_key(hash): articles[hash].last_seen = now else: articles[hash] = Article(feed, title, link, description, hash, now) def get_html_name(self): if self.title is not None: return self.title elif self.link is not None: return self.link else: return self.url def get_html_link(self): s = self.get_html_name() if self.link is not None: return '' + s + '' else: return s class Article: """An article retrieved from an RSS feed.""" def __init__(self, feed, title, link, description, hash, now): self.feed = feed self.title = title self.link = link self.description = description self.hash = hash self.last_seen = now self.added = now def can_expire(self, now): return ((now - self.last_seen) > (24 * 60 * 60)) class DayWriter: """Utility class for writing day sections into a series of articles.""" def __init__(self, file, config): self.lasttime = [-1, -1, -1, -1, -1] self.file = file self.counter = 0 self.config = config def start_day(self, tm): print >>self.file, '
' day = time.strftime(self.config["dayformat"], tm) print >>self.file, '

' + day + '

' self.counter += 1 def start_time(self, tm): print >>self.file, '
' clock = time.strftime(self.config["timeformat"], tm) print >>self.file, '

' + clock + '

' self.counter += 1 def time(self, s): tm = time.localtime(s) if tm[:3] != self.lasttime[:3]: self.close(0) self.start_day(tm) if tm[:6] != self.lasttime[:6]: self.close(1) self.start_time(tm) self.lasttime = tm def close(self, n = 0): while self.counter > n: print >>self.file, "
" self.counter -= 1 class Rawdog: """The aggregator itself.""" def __init__(self): self.feeds = {} self.articles = {} self.last_update = 0 def list(self): for url in self.feeds.keys(): feed = self.feeds[url] print url print " Title:", feed.title print " Link:", feed.link def update(self, config): now = time.time() seenfeeds = {} for (url, period) in config["feedslist"]: seenfeeds[url] = 1 if not self.feeds.has_key(url): self.feeds[url] = Feed(url, period) else: self.feeds[url].period = period for url in self.feeds.keys(): if not seenfeeds.has_key(url): del self.feeds[url] else: self.feeds[url].update(self.articles, now) for key in self.articles.keys(): if self.articles[key].can_expire(now) or not self.feeds.has_key(self.articles[key].feed): del self.articles[key] self.last_update = now self.changed = 1 def write(self, config): outputfile = config["outputfile"] now = time.time() f = open(outputfile + ".new", "w") refresh = 24 * 60 for feed in self.feeds.values(): if feed.period < refresh: refresh = feed.period print >>f, """ """ if config["userefresh"]: print >>f, """""" print >>f, """ rawdog
""" dw = DayWriter(f, config) for article in articles: dw.time(article.added) feed = self.feeds[article.feed] f.write('
\n') f.write('

\n') title = article.title link = article.link description = article.description if title is None: if link is None: title = "Article" else: title = "Link" f.write('') if link is not None: f.write('') f.write(title) if link is not None: f.write('') f.write('\n') f.write('[' + feed.get_html_link() + ']') f.write('

\n') if description is not None: f.write('

' + description + '

\n') f.write('
\n') dw.close() print >>f, """
""" f.close() os.rename(outputfile + ".new", outputfile) def main(argv): """The command-line interface to the aggregator.""" if len(argv) < 1: print "Usage: rawdog action [action ...]" print "action can be list, update, write" return 1 statedir = os.environ["HOME"] + "/.rawdog" try: os.chdir(statedir) except OSError: print "No ~/.rawdog directory" return 1 try: f = open("config", "r") except IOError: print "No config file" return 1 config = { "feedslist" : [], "outputfile" : "output.html", "maxarticles" : 200, "dayformat" : "%A, %d %B %Y", "timeformat" : "%I:%M %p", "userefresh" : 0, } for line in f.readlines(): line = line.strip() if line == "" or line[0] == "#": continue l = line.split(" ", 1) if len(l) != 2: print "Bad line in config file: " + line return 1 if l[0] == "feed": l = l[1].split(" ", 1) if len(l) != 2: print "Bad line in config file: "+ line config["feedslist"].append((l[1], int(l[0]))) elif l[0] == "outputfile": config["outputfile"] = l[1] elif l[0] == "maxarticles": config["maxarticles"] = int(l[1]) elif l[0] == "dayformat": config["dayformat"] = l[1] elif l[0] == "timeformat": config["timeformat"] = l[1] elif l[0] == "userefresh": config["userefresh"] = int(l[1]) else: print "Unknown config command: " + l[0] return 1 f.close() try: f = open("state", "r+") fcntl.lockf(f.fileno(), fcntl.LOCK_EX) rawdog = pickle.load(f) rawdog.changed = 0 except IOError: f = open("state", "w+") fcntl.lockf(f.fileno(), fcntl.LOCK_EX) rawdog = Rawdog() rawdog.changed = 1 for action in argv: if action == "list": rawdog.list() elif action == "update": rawdog.update(config) elif action == "write": rawdog.write(config) else: print "Unknown action: " + action return 1 if rawdog.changed: f.seek(0) f.truncate(0) pickle.dump(rawdog, f) f.close() return 0 if __name__ == "__main__": sys.exit(main(sys.argv[1:]))