From: Adam Sampson Date: Sun, 4 Aug 2013 12:41:00 +0000 (+0000) Subject: Use urllib2, and factor URL-fetching out to a function. X-Git-Tag: v2.18~3 X-Git-Url: http://git.ozo.com/?a=commitdiff_plain;h=ec3a9e54e410c0bc6b056d6094de304111947636;p=rawdog%2F.git Use urllib2, and factor URL-fetching out to a function. --- diff --git a/rawdoglib/feedscanner.py b/rawdoglib/feedscanner.py index a9bc0cd..a690655 100644 --- a/rawdoglib/feedscanner.py +++ b/rawdoglib/feedscanner.py @@ -34,7 +34,7 @@ PERFORMANCE OF THIS SOFTWARE. import feedparser import re -import urllib +import urllib2 import urlparse import HTMLParser @@ -47,6 +47,21 @@ def is_feed(url): version = "" return (version != "") +def fetch_url(url): + """Fetch the given URL and return the data from it as a Unicode string.""" + + request = urllib2.Request(url) + + f = urllib2.urlopen(request) + data = f.read() + f.close() + + # Silently ignore encoding errors -- we don't need to go to the bother of + # detecting the encoding properly (like feedparser does). + data = data.decode("UTF-8", "ignore") + + return data + class FeedFinder(HTMLParser.HTMLParser): def __init__(self, base_uri): HTMLParser.HTMLParser.__init__(self) @@ -97,12 +112,7 @@ def feeds(page_url): if is_feed(page_url): return [page_url] - f = urllib.urlopen(page_url) - # Silently ignore encoding errors -- we don't need to go to the bother of - # detecting the encoding properly (like feedparser does). - data = f.read().decode("UTF-8", "ignore") - f.close() - + data = fetch_url(page_url) parser = FeedFinder(page_url) try: parser.feed(data)