Use urllib2, and factor URL-fetching out to a function.

author Adam Sampson <ats@offog.org>

Sun, 4 Aug 2013 12:41:00 +0000 (12:41 +0000)

committer Adam Sampson <ats@offog.org>

Sun, 4 Aug 2013 12:41:00 +0000 (12:41 +0000)
author Adam Sampson <ats@offog.org>
Sun, 4 Aug 2013 12:41:00 +0000 (12:41 +0000)
committer Adam Sampson <ats@offog.org>
Sun, 4 Aug 2013 12:41:00 +0000 (12:41 +0000)
diff --git a/rawdoglib/feedscanner.py b/rawdoglib/feedscanner.py

index a9bc0cddd1deb3e0b8b3f06e762d16c5d0c4dc6b..a690655a1669e2c2590ef307f15e12e4c6292a56 100644 (file)
--- a/rawdoglib/feedscanner.py
+++ b/rawdoglib/feedscanner.py
@@ -34,7 +34,7 @@ PERFORMANCE OF THIS SOFTWARE.
  
  import feedparser
  import re
-import urllib
+import urllib2
  import urlparse
  import HTMLParser
  
@@ -47,6 +47,21 @@ def is_feed(url):
          version = ""
      return (version != "")
  
+def fetch_url(url):
+    """Fetch the given URL and return the data from it as a Unicode string."""
+
+    request = urllib2.Request(url)
+
+    f = urllib2.urlopen(request)
+    data = f.read()
+    f.close()
+
+    # Silently ignore encoding errors -- we don't need to go to the bother of
+    # detecting the encoding properly (like feedparser does).
+    data = data.decode("UTF-8", "ignore")
+
+    return data
+
  class FeedFinder(HTMLParser.HTMLParser):
      def __init__(self, base_uri):
          HTMLParser.HTMLParser.__init__(self)
@@ -97,12 +112,7 @@ def feeds(page_url):
      if is_feed(page_url):
          return [page_url]
  
-    f = urllib.urlopen(page_url)
-    # Silently ignore encoding errors -- we don't need to go to the bother of
-    # detecting the encoding properly (like feedparser does).
-    data = f.read().decode("UTF-8", "ignore")
-    f.close()
-
+    data = fetch_url(page_url)
      parser = FeedFinder(page_url)
      try:
          parser.feed(data)
author	Adam Sampson <ats@offog.org>
	Sun, 4 Aug 2013 12:41:00 +0000 (12:41 +0000)
committer	Adam Sampson <ats@offog.org>
	Sun, 4 Aug 2013 12:41:00 +0000 (12:41 +0000)