import feedparser
import re
-import urllib
+import urllib2
import urlparse
import HTMLParser
version = ""
return (version != "")
+def fetch_url(url):
+ """Fetch the given URL and return the data from it as a Unicode string."""
+
+ request = urllib2.Request(url)
+
+ f = urllib2.urlopen(request)
+ data = f.read()
+ f.close()
+
+ # Silently ignore encoding errors -- we don't need to go to the bother of
+ # detecting the encoding properly (like feedparser does).
+ data = data.decode("UTF-8", "ignore")
+
+ return data
+
class FeedFinder(HTMLParser.HTMLParser):
def __init__(self, base_uri):
HTMLParser.HTMLParser.__init__(self)
if is_feed(page_url):
return [page_url]
- f = urllib.urlopen(page_url)
- # Silently ignore encoding errors -- we don't need to go to the bother of
- # detecting the encoding properly (like feedparser does).
- data = f.read().decode("UTF-8", "ignore")
- f.close()
-
+ data = fetch_url(page_url)
parser = FeedFinder(page_url)
try:
parser.feed(data)