From ec3a9e54e410c0bc6b056d6094de304111947636 Mon Sep 17 00:00:00 2001 From: Adam Sampson Date: Sun, 4 Aug 2013 12:41:00 +0000 Subject: [PATCH] Use urllib2, and factor URL-fetching out to a function. --- rawdoglib/feedscanner.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/rawdoglib/feedscanner.py b/rawdoglib/feedscanner.py index a9bc0cd..a690655 100644 --- a/rawdoglib/feedscanner.py +++ b/rawdoglib/feedscanner.py @@ -34,7 +34,7 @@ PERFORMANCE OF THIS SOFTWARE. import feedparser import re -import urllib +import urllib2 import urlparse import HTMLParser @@ -47,6 +47,21 @@ def is_feed(url): version = "" return (version != "") +def fetch_url(url): + """Fetch the given URL and return the data from it as a Unicode string.""" + + request = urllib2.Request(url) + + f = urllib2.urlopen(request) + data = f.read() + f.close() + + # Silently ignore encoding errors -- we don't need to go to the bother of + # detecting the encoding properly (like feedparser does). + data = data.decode("UTF-8", "ignore") + + return data + class FeedFinder(HTMLParser.HTMLParser): def __init__(self, base_uri): HTMLParser.HTMLParser.__init__(self) @@ -97,12 +112,7 @@ def feeds(page_url): if is_feed(page_url): return [page_url] - f = urllib.urlopen(page_url) - # Silently ignore encoding errors -- we don't need to go to the bother of - # detecting the encoding properly (like feedparser does). - data = f.read().decode("UTF-8", "ignore") - f.close() - + data = fetch_url(page_url) parser = FeedFinder(page_url) try: parser.feed(data) -- 2.35.1