From: Adam Sampson Date: Sat, 1 Feb 2014 22:49:04 +0000 (+0000) Subject: Don't crash if feedparser.parse raises an exception. X-Git-Tag: v2.19~1 X-Git-Url: http://git.ozo.com/?a=commitdiff_plain;h=42d67584ca9fb708717dfd2c17f8fecff1dd5d7b;p=rawdog%2F.git Don't crash if feedparser.parse raises an exception. Also add a testcase for this, and some explicit comments in the source about where it has to handle incomplete responses. --- diff --git a/NEWS b/NEWS index 7be35f5..f37fffa 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,10 @@ with the state file. Add some more comprehensive tests for the changeconfig option; in particular, test it more thoroughly with splitstate both on and off. +Don't crash if feedparser raises an exception during an update (i.e. +assume that any part of feedparser's response might be missing, until +we've checked that there wasn't an exception). + - rawdog 2.18 Be consistent about catching AttributeError when looking for attributes diff --git a/rawdoglib/rawdog.py b/rawdoglib/rawdog.py index 3661df2..16b6f8b 100644 --- a/rawdoglib/rawdog.py +++ b/rawdoglib/rawdog.py @@ -1,5 +1,5 @@ # rawdog: RSS aggregator without delusions of grandeur. -# Copyright 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012, 2013 Adam Sampson +# Copyright 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012, 2013, 2014 Adam Sampson # # rawdog is free software; you can redistribute and/or modify it # under the terms of that license as published by the Free Software @@ -457,10 +457,14 @@ class Feed: """Add new articles from a feed to the collection. Returns True if any articles were read, False otherwise.""" + # Note that feedparser might have thrown an exception -- + # so until we print the error message and return, we + # can't assume that p contains any particular field. + responses = p.get("rawdog_responses") if len(responses) > 0: last_status = responses[-1]["status"] - elif len(p["feed"]) != 0: + elif len(p.get("feed", [])) != 0: # Some protocol other than HTTP -- assume it's OK, # since we got some content. last_status = 200 @@ -541,7 +545,7 @@ class Feed: errors.append("If this condition persists, you should remove it from your config file.") errors.append("") fatal = True - elif version == "" and len(p["entries"]) == 0: + elif version == "" and len(p.get("entries", [])) == 0: # feedparser couldn't detect the type of this feed or # retrieve any entries from it. errors.append("The data retrieved from this URL could not be understood as a feed.") @@ -561,6 +565,9 @@ class Feed: if fatal: return False + # From here, we can assume that we've got a complete feedparser + # response. + p = ensure_unicode(p, p.get("encoding") or "UTF-8") # No entries means the feed hasn't changed, but for some reason diff --git a/test-rawdog b/test-rawdog index 2e86d7f..72c124e 100644 --- a/test-rawdog +++ b/test-rawdog @@ -642,6 +642,18 @@ EOF done done +begin "exception raised by feedparser" +make_rss20 $statedir/feed.rss +add "feed 0 feed.rss" +cat >$statedir/plugins/crash.py <