From ce6b1b279f1a5b62367a16a1cd8086f9169cba34 Mon Sep 17 00:00:00 2001 From: Adam Sampson Date: Sat, 8 Oct 2005 15:29:33 +0000 Subject: [PATCH] Rework write() to make it easier for plugins to replace it. Update email addresses and copyright notices. --- NEWS | 7 + PLUGINS | 25 +++- README | 2 +- rawdog | 2 +- rawdoglib/persister.py | 2 +- rawdoglib/plugins.py | 2 +- rawdoglib/rawdog.py | 285 +++++++++++++++++++++++---------------- rawdoglib/upgrade_1_2.py | 2 +- setup.py | 2 +- style.css | 2 +- 10 files changed, 203 insertions(+), 128 deletions(-) diff --git a/NEWS b/NEWS index b1c9e52..9df3dfd 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,13 @@ Foster). Remove some unnecessary imports found by pyflakes. +Add output_sorted_filter and output_write_files hooks, deprecating +the output_write hook (which wasn't very useful originally, and isn't +used by any of the plugins I've been sent). Restructure the "write" code +so that it should be far easier to write custom output plugins: there +are several new methods on Rawdog for doing different bits of the write +process. + - rawdog 2.4 Provide guid in item templates (suggested by Rick van Rein). diff --git a/PLUGINS b/PLUGINS index 9ee8ba5..01e9d76 100644 --- a/PLUGINS +++ b/PLUGINS @@ -102,8 +102,29 @@ write. * articles: the mutable list of Article objects -Called just before rawdog starts writing the HTML output. This hook can -be used to implement alternative output methods. +Called immediately before output_sorted_filter; this hook is here for +backwards compatibility, and should not be used in new plugins. + +### output_sorted_filter(rawdog, config, articles) + +* articles: the mutable list of Article objects + +Called after rawdog sorts the list of articles to write, but before it +removes duplicate and excessively old articles. This hook can be used to +implement alternate duplicate-filtering methods. If you return False +from this hook, then rawdog will not do its usual duplicate-removing +filter pass. + +### output_write_files(rawdog, config, articles, article_dates) + +* articles: the mutable list of Article objects +* article_dates: a dictionary mapping Article objects to the dates that + were used to sort them + +Called when rawdog is about to write its output to files. This hook can +be used to implement alternative output methods. If you return False +from this hook, then rawdog will not write any output itself (and the +later output_ hooks will thus not be called). ### output_items_begin(rawdog, config, f) diff --git a/README b/README index 9672d1d..42eb574 100644 --- a/README +++ b/README @@ -1,5 +1,5 @@ rawdog: RSS Aggregator Without Delusions Of Grandeur -Adam Sampson +Adam Sampson rawdog is an RSS (and other) feed aggregator, based on Mark Pilgrim's flexible feed parser. It's just an aggregator; it's not a weblog authoring tool, nor is diff --git a/rawdog b/rawdog index fd57082..03d070c 100644 --- a/rawdog +++ b/rawdog @@ -1,6 +1,6 @@ #!/usr/bin/env python # rawdog: RSS aggregator without delusions of grandeur. -# Copyright 2003 Adam Sampson +# Copyright 2003, 2004, 2005 Adam Sampson # # rawdog is free software; you can redistribute and/or modify it # under the terms of that license as published by the Free Software diff --git a/rawdoglib/persister.py b/rawdoglib/persister.py index 1b8d630..f021ca0 100644 --- a/rawdoglib/persister.py +++ b/rawdoglib/persister.py @@ -1,5 +1,5 @@ # persister: safe class persistance wrapper -# Copyright 2003, 2004 Adam Sampson +# Copyright 2003, 2004, 2005 Adam Sampson # # persister is free software; you can redistribute it and/or modify it # under the terms of the GNU Lesser General Public License as diff --git a/rawdoglib/plugins.py b/rawdoglib/plugins.py index 16acc6e..a0da0ac 100644 --- a/rawdoglib/plugins.py +++ b/rawdoglib/plugins.py @@ -1,5 +1,5 @@ # plugins: handle add-on modules for rawdog. -# Copyright 2004 Adam Sampson +# Copyright 2004, 2005 Adam Sampson # # rawdog is free software; you can redistribute and/or modify it # under the terms of that license as published by the Free Software diff --git a/rawdoglib/rawdog.py b/rawdoglib/rawdog.py index fd9fd4c..044a88a 100644 --- a/rawdoglib/rawdog.py +++ b/rawdoglib/rawdog.py @@ -1,5 +1,5 @@ # rawdog: RSS aggregator without delusions of grandeur. -# Copyright 2003, 2004 Adam Sampson +# Copyright 2003, 2004, 2005 Adam Sampson # # rawdog is free software; you can redistribute and/or modify it # under the terms of that license as published by the Free Software @@ -800,6 +800,7 @@ class Rawdog(Persistable): print >>sys.stderr, "Feed URL automatically changed." def list(self, config): + """List the configured feeds.""" for url in self.feeds.keys(): feed = self.feeds[url] feed_info = feed.feed_info @@ -810,6 +811,8 @@ class Rawdog(Persistable): print " Link:", feed_info.get("link") def sync_from_config(self, config): + """Update rawdog's internal state to match the + configuration.""" seenfeeds = {} for (url, period, args) in config["feedslist"]: seenfeeds[url] = 1 @@ -839,6 +842,8 @@ class Rawdog(Persistable): self.modified() def update(self, config, feedurl = None): + """Perform the update action: check feeds for new articles, and + expire old ones.""" config.log("Starting update") now = time.time() @@ -883,6 +888,7 @@ class Rawdog(Persistable): config.log("Finished update") def get_template(self, config): + """Get the main template.""" if config["template"] != "default": return load_file(config["template"]) @@ -917,13 +923,14 @@ __feeds__

Generated by rawdog version __version__ -by Adam Sampson.

+by Adam Sampson.

""" return template def get_itemtemplate(self, config): + """Get the item template.""" if config["itemtemplate"] != "default": return load_file(config["itemtemplate"]) @@ -941,66 +948,100 @@ __description__ return template def show_template(self, config): + """Show the configured main template.""" print self.get_template(config) def show_itemtemplate(self, config): + """Show the configured item template.""" print self.get_itemtemplate(config) - def write(self, config): - outputfile = config["outputfile"] - config.log("Starting write") - now = time.time() + def write_article(self, f, article, config): + """Write an article to the given file.""" + feed = self.feeds[article.feed] + feed_info = feed.feed_info + entry_info = article.entry_info - bits = { "version" : VERSION } - bits.update(config["defines"]) + link = entry_info.get("link") + if link == "": + link = None - refresh = config["expireage"] - for feed in self.feeds.values(): - if feed.period < refresh: refresh = feed.period + guid = entry_info.get("id") + if guid == "": + guid = None - bits["refresh"] = """""" + itembits = {} + for name, value in feed.args.items(): + if name.startswith("define_"): + itembits[name[7:]] = value - article_dates = {} - articles = self.articles.values() - for a in articles: - if config["sortbyfeeddate"]: - article_dates[a] = a.date or a.added + title = detail_to_html(entry_info.get("title_detail"), True, config) + + key = None + for k in ["content", "summary_detail"]: + if entry_info.has_key(k): + key = k + break + if key is None: + description = None + else: + force_preformatted = feed.args.has_key("format") and (feed.args["format"] == "text") + description = detail_to_html(entry_info[key], False, config, force_preformatted) + + date = article.date + if title is None: + if link is None: + title = "Article" else: - article_dates[a] = a.added - numarticles = len(articles) + title = "Link" - def compare(a, b): - """Compare two articles to decide how they - should be sorted. Sort by added date, then - by feed, then by sequence, then by hash.""" - i = cmp(article_dates[b], article_dates[a]) - if i != 0: - return i - i = cmp(a.feed, b.feed) - if i != 0: - return i - i = cmp(a.sequence, b.sequence) - if i != 0: - return i - return cmp(a.hash, b.hash) - plugins.call_hook("output_filter", self, config, articles) - articles.sort(compare) - plugins.call_hook("output_sort", self, config, articles) + itembits["title_no_link"] = title + if link is not None: + itembits["url"] = string_to_html(link, config) + else: + itembits["url"] = "" + if guid is not None: + itembits["guid"] = string_to_html(guid, config) + else: + itembits["guid"] = "" + if link is None: + itembits["title"] = title + else: + itembits["title"] = '' + title + '' - if config["maxarticles"] != 0: - articles = articles[:config["maxarticles"]] + itembits["feed_title_no_link"] = detail_to_html(feed_info.get("title_detail"), True, config) + itembits["feed_title"] = feed.get_html_link(config) + itembits["feed_url"] = string_to_html(feed.url, config) + itembits["feed_hash"] = short_hash(feed.url) + itembits["feed_id"] = feed.get_id(config) + itembits["hash"] = short_hash(article.hash) - plugins.call_hook("output_write", self, config, articles) + if description is not None: + itembits["description"] = description + else: + itembits["description"] = "" - f = StringIO() + author = author_to_html(entry_info, feed.url, config) + if author is not None: + itembits["author"] = author + else: + itembits["author"] = "" + + itembits["added"] = format_time(article.added, config) + if date is not None: + itembits["date"] = format_time(date, config) + else: + itembits["date"] = "" + + plugins.call_hook("output_item_bits", self, config, feed, article, itembits) itemtemplate = self.get_itemtemplate(config) - dw = DayWriter(f, config) - plugins.call_hook("output_items_begin", self, config, f) + f.write(fill_template(itemtemplate, itembits)) + def write_remove_dups(self, articles, config, now): + """Filter the list of articles to remove articles that are too + old or are duplicates.""" + kept_articles = [] seen_links = {} seen_guids = {} - - count = 0 dup_count = 0 for article in articles: age = now - article.added @@ -1008,7 +1049,6 @@ __description__ break feed = self.feeds[article.feed] - feed_info = feed.feed_info entry_info = article.entry_info link = entry_info.get("link") @@ -1036,82 +1076,20 @@ __description__ dup_count += 1 continue - count += 1 - if not plugins.call_hook("output_items_heading", self, config, f, article, article_dates[article]): - dw.time(article_dates[article]) - - itembits = {} - for name, value in feed.args.items(): - if name.startswith("define_"): - itembits[name[7:]] = value - - title = detail_to_html(entry_info.get("title_detail"), True, config) - - key = None - for k in ["content", "summary_detail"]: - if entry_info.has_key(k): - key = k - break - if key is None: - description = None - else: - force_preformatted = feed.args.has_key("format") and (feed.args["format"] == "text") - description = detail_to_html(entry_info[key], False, config, force_preformatted) - - date = article.date - if title is None: - if link is None: - title = "Article" - else: - title = "Link" - - itembits["title_no_link"] = title - if link is not None: - itembits["url"] = string_to_html(link, config) - else: - itembits["url"] = "" - if guid is not None: - itembits["guid"] = string_to_html(guid, config) - else: - itembits["guid"] = "" - if link is None: - itembits["title"] = title - else: - itembits["title"] = '' + title + '' - - itembits["feed_title_no_link"] = detail_to_html(feed_info.get("title_detail"), True, config) - itembits["feed_title"] = feed.get_html_link(config) - itembits["feed_url"] = string_to_html(feed.url, config) - itembits["feed_hash"] = short_hash(feed.url) - itembits["feed_id"] = feed.get_id(config) - itembits["hash"] = short_hash(article.hash) + kept_articles.append(article) + return (kept_articles, dup_count) - if description is not None: - itembits["description"] = description - else: - itembits["description"] = "" - - author = author_to_html(entry_info, feed.url, config) - if author is not None: - itembits["author"] = author - else: - itembits["author"] = "" - - itembits["added"] = format_time(article.added, config) - if date is not None: - itembits["date"] = format_time(date, config) - else: - itembits["date"] = "" - - plugins.call_hook("output_item_bits", self, config, feed, article, itembits) - f.write(fill_template(itemtemplate, itembits)) + def get_main_template_bits(self, config): + """Get the bits that are used in the default main template, + with the exception of items and num_items.""" + bits = { "version" : VERSION } + bits.update(config["defines"]) - dw.close() - plugins.call_hook("output_items_end", self, config, f) + refresh = config["expireage"] + for feed in self.feeds.values(): + if feed.period < refresh: refresh = feed.period - bits["items"] = f.getvalue() - bits["num_items"] = str(numarticles) - config.log("Selected ", count, " of ", numarticles, " articles to write; ignored ", dup_count, " duplicates") + bits["refresh"] = """""" f = StringIO() print >>f, """ @@ -1131,8 +1109,29 @@ __description__ bits["feeds"] = f.getvalue() bits["num_feeds"] = str(len(feeds)) + return bits + + def write_output_file(self, articles, article_dates, config): + """Write a regular rawdog HTML output file.""" + f = StringIO() + dw = DayWriter(f, config) + plugins.call_hook("output_items_begin", self, config, f) + + for article in articles: + if not plugins.call_hook("output_items_heading", self, config, f, article, article_dates[article]): + dw.time(article_dates[article]) + + self.write_article(f, article, config) + + dw.close() + plugins.call_hook("output_items_end", self, config, f) + + bits = self.get_main_template_bits(config) + bits["items"] = f.getvalue() + bits["num_items"] = str(len(self.articles.values())) plugins.call_hook("output_bits", self, config, bits) s = fill_template(self.get_template(config), bits) + outputfile = config["outputfile"] if outputfile == "-": print s else: @@ -1146,6 +1145,54 @@ __description__ f.close() os.rename(outputfile + ".new", outputfile) + def write(self, config): + """Perform the write action: write articles to the output + file.""" + config.log("Starting write") + now = time.time() + + article_dates = {} + articles = self.articles.values() + for a in articles: + if config["sortbyfeeddate"]: + article_dates[a] = a.date or a.added + else: + article_dates[a] = a.added + numarticles = len(articles) + + def compare(a, b): + """Compare two articles to decide how they + should be sorted. Sort by added date, then + by feed, then by sequence, then by hash.""" + i = cmp(article_dates[b], article_dates[a]) + if i != 0: + return i + i = cmp(a.feed, b.feed) + if i != 0: + return i + i = cmp(a.sequence, b.sequence) + if i != 0: + return i + return cmp(a.hash, b.hash) + plugins.call_hook("output_filter", self, config, articles) + articles.sort(compare) + plugins.call_hook("output_sort", self, config, articles) + + if config["maxarticles"] != 0: + articles = articles[:config["maxarticles"]] + + plugins.call_hook("output_write", self, config, articles) + + if not plugins.call_hook("output_sorted_filter", self, config, articles): + (articles, dup_count) = self.write_remove_dups(articles, config, now) + else: + dup_count = 0 + + config.log("Selected ", len(articles), " of ", numarticles, " articles to write; ignored ", dup_count, " duplicates") + + if not plugins.call_hook("output_write_files", self, config, articles, article_dates): + self.write_output_file(articles, article_dates, config) + config.log("Finished write") def usage(): @@ -1173,7 +1220,7 @@ Special actions (all other options are ignored if one of these is specified): --upgrade OLDDIR NEWDIR Import feed state from rawdog 1.x directory OLDDIR into rawdog 2.x directory NEWDIR -Report bugs to .""" +Report bugs to .""" def main(argv): """The command-line interface to the aggregator.""" diff --git a/rawdoglib/upgrade_1_2.py b/rawdoglib/upgrade_1_2.py index 174d8cc..7b7a6ef 100644 --- a/rawdoglib/upgrade_1_2.py +++ b/rawdoglib/upgrade_1_2.py @@ -1,5 +1,5 @@ # upgrade_1_2: import state from rawdog 1.x state files to rawdog 2.x -# Copyright 2003 Adam Sampson +# Copyright 2003, 2004, 2005 Adam Sampson # # rawdog is free software; you can redistribute and/or modify it # under the terms of that license as published by the Free Software diff --git a/setup.py b/setup.py index 0e40607..b8c5094 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup(name = "rawdog", version = "2.5rc1", description = "RSS Aggregator Without Delusions Of Grandeur", author = "Adam Sampson", - author_email = "azz@us-lot.org", + author_email = "ats@offog.org", url = "http://offog.org/code/rawdog.html", license = "GNU GPL v2 or later", scripts = ['rawdog'], diff --git a/style.css b/style.css index c0d6052..977f1e0 100644 --- a/style.css +++ b/style.css @@ -1,5 +1,5 @@ /* Default stylesheet for rawdog. Customise this as you like. - Adam Sampson */ + Adam Sampson */ .xmlbutton { /* From Dylan Greene's suggestion: http://www.dylangreene.com/blog.asp?blogID=91 */ -- 2.35.1