From 389b070a5cf0052a4a58a86aab90d4f1b6a7b3ec Mon Sep 17 00:00:00 2001 From: Adam Sampson Date: Tue, 9 Oct 2018 20:24:16 +0100 Subject: [PATCH] Rework exception-handling code for current feedparser. Errors from urllib2 used to be reported in bozo_exception. They are now raised directly from feedparser.parse, which probably makes more sense. rawdog now handles both cases, putting network-ish exceptions into rawdog_exception either way; it also sets rawdog_timeout when a timeout occurs, which should make it easier for plugins to detect this. As a result, there's no longer a distinction between the "Error while fetching feed" and "Error fetching or parsing feed" messages, and a traceback can be printed for both if showtracebacks is on. --- NEWS | 11 +++++++- rawdoglib/rawdog.py | 63 +++++++++++++++++++++++++++------------------ 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/NEWS b/NEWS index 5da3af2..38bb71d 100644 --- a/NEWS +++ b/NEWS @@ -18,7 +18,16 @@ default value. Support the current development version of feedparser (which will presumably be the 5.3 release eventually), which has been restructured -into multiple modules. +into multiple modules, and raises network exceptions (e.g. timeouts) +directly from feedparser.parse rather than putting them in +bozo_exception. + +rawdog's exception-handling code has been cleaned up as a result of the +above, and should work consistently for old and new versions of +feedparser. In the feedparser result dict, "bozo_exception" is now +converted to "rawdog_exception" for network errors, and "rawdog_timeout" +is set if a timeout exception occurred. For compatibility with existing +plugins, an empty "feed" is also provided on a timeout. - rawdog 2.22 diff --git a/rawdoglib/rawdog.py b/rawdoglib/rawdog.py index 23568ab..a781d70 100644 --- a/rawdoglib/rawdog.py +++ b/rawdoglib/rawdog.py @@ -363,6 +363,12 @@ def is_timeout_exception(exc): """Return True if the given exception object suggests that a timeout occurred, else return False.""" + if exc is None: + return False + + if isinstance(exc, socket.timeout): + return True + # Since urlopen throws away the original exception object, # we have to look at the stringified form to tell if it was a timeout. # (We're in reasonable company here, since test_ssl.py in the Python @@ -514,12 +520,30 @@ class Feed: try: result = feedparser.parse(url, **parse_args) + + # Older versions of feedparser return some kinds of + # download errors in bozo_exception rather than raising + # them from feedparser.parse. Normalise this. + e = result.get("bozo_exception") + if is_timeout_exception(e): + result = {"rawdog_timeout": e} + elif isinstance(e, urllib2.URLError): + result = {"rawdog_exception": e} except Exception, e: - result = { - "rawdog_exception": e, - "rawdog_traceback": sys.exc_info()[2], - } + if is_timeout_exception(e): + result = {"rawdog_timeout": e} + else: + result = { + "rawdog_exception": e, + "rawdog_traceback": sys.exc_info()[2], + } result["rawdog_responses"] = logger.get_log() + + # For compatibility with old hooks, include an empty "feed" + # if a timeout occurred. + if "rawdog_timeout" in result: + result["feed"] = [] + return result def update(self, rawdog, now, config, articles, p): @@ -551,15 +575,6 @@ class Feed: fatal = False old_url = self.url - if "rawdog_exception" in p: - errors.append("Error fetching or parsing feed:") - errors.append(str(p["rawdog_exception"])) - if config["showtracebacks"]: - from traceback import format_tb - errors.append("".join(format_tb(p["rawdog_traceback"]))) - errors.append("") - fatal = True - if len(responses) != 0 and responses[0]["status"] == 301: # Permanent redirect(s). Find the new location. i = 0 @@ -593,23 +608,21 @@ class Feed: errors.append("You should update its entry in your config file.") errors.append("") - bozo_exception = p.get("bozo_exception") - got_urlerror = isinstance(bozo_exception, urllib2.URLError) - got_timeout = isinstance(bozo_exception, socket.timeout) - if got_urlerror or got_timeout: - # urllib2 reported an error when fetching the feed. - # Check to see if it was a timeout. - if not (got_timeout or is_timeout_exception(bozo_exception)): - errors.append("Error while fetching feed:") - errors.append(str(bozo_exception)) - errors.append("") - fatal = True - elif config["ignoretimeouts"]: + if "rawdog_timeout" in p: + if config["ignoretimeouts"]: return False else: errors.append("Timeout while reading feed.") errors.append("") fatal = True + elif "rawdog_exception" in p: + errors.append("Error fetching or parsing feed:") + errors.append(str(p["rawdog_exception"])) + if config["showtracebacks"] and "rawdog_traceback" in p: + from traceback import format_tb + errors.append("".join(format_tb(p["rawdog_traceback"]))) + errors.append("") + fatal = True elif last_status == 304: # The feed hasn't changed. Return False to indicate # that we shouldn't do expiry. -- 2.35.1