From 20b35ecb79a44661576c502d7a26304baf2b2b17 Mon Sep 17 00:00:00 2001 From: Adam Sampson Date: Sat, 11 Oct 2003 17:01:21 +0000 Subject: [PATCH] Add HTTP basic authentication support. --- config | 7 ++++++- rawdoglib/feedparser.py | 19 +++++++++++++++--- rawdoglib/rawdog.py | 44 +++++++++++++++++++++++------------------ 3 files changed, 47 insertions(+), 23 deletions(-) diff --git a/config b/config index 4a6f637..a04a873 100644 --- a/config +++ b/config @@ -56,13 +56,18 @@ showfeeds 1 # errors and are on a slow connection, increase this. timeout 30 -# The feeds you want to watch, in the format "feed period url". +# The feeds you want to watch, in the format "feed period url [args]". # The period is the minimum time in minutes between updates; if less # than period minutes have passed, "rawdog update" will skip that feed. # Specifying a period less than 30 minutes is considered to be bad manners; it # is suggested that you make the period as long as possible. +# Arguments are optional, and are of the form "key=value"; possible arguments +# are: +# user User for HTTP basic authentication +# password Password for HTTP basic authentication # You can specify as many feeds as you like. feed 60 http://www.advogato.org/rss/articles.xml feed 30 http://news.bbc.co.uk/rss/newsonline_uk_edition/front_page/rss091.xml feed 180 http://diveintomark.org/xml/rss.xml +feed 180 http://secretfeed.example.com/secret.rss user=bob password=secret diff --git a/rawdoglib/feedparser.py b/rawdoglib/feedparser.py index 1ea72ef..d976be8 100644 --- a/rawdoglib/feedparser.py +++ b/rawdoglib/feedparser.py @@ -78,6 +78,9 @@ __history__ = """ inline and as used in some RSS 2.0 feeds 2.5.3 - 8/6/2003 - TvdV - patch to track whether we're inside an image or textInput, and also to return the character encoding (if specified) + +Modifications for rawdog by Adam Sampson + Added HTTP basic auth support. """ try: @@ -505,7 +508,7 @@ class FeedURLHandler(urllib2.HTTPRedirectHandler, urllib2.HTTPDefaultErrorHandle http_error_300 = http_error_302 http_error_307 = http_error_302 -def open_resource(source, etag=None, modified=None, agent=None, referrer=None): +def open_resource(source, etag=None, modified=None, agent=None, referrer=None, authinfo=None): """ URI, filename, or string --> stream @@ -528,6 +531,9 @@ def open_resource(source, etag=None, modified=None, agent=None, referrer=None): If the referrer argument is supplied, it will be used as the value of a Referer[sic] request header. + + If the authinfo argument is supplied, it will be used as a (user, password) + pair for HTTP basic authentication. """ if hasattr(source, "read"): @@ -550,6 +556,13 @@ def open_resource(source, etag=None, modified=None, agent=None, referrer=None): request.add_header("Referer", referrer) request.add_header("Accept-encoding", "gzip") opener = urllib2.build_opener(FeedURLHandler()) + if authinfo: + (user, password) = authinfo + class DummyPasswordMgr: + def add_password(self, realm, uri, user, passwd): pass + def find_user_password(self, realm, authuri): + return (user, password) + opener.add_handler(urllib2.HTTPBasicAuthHandler(DummyPasswordMgr())) opener.addheaders = [] # RMK - must clear so we only send our custom User-Agent try: return opener.open(request) @@ -653,9 +666,9 @@ def parse_http_date(date): # the month or weekday lookup probably failed indicating an invalid timestamp return None -def parse(uri, etag=None, modified=None, agent=None, referrer=None): +def parse(uri, etag=None, modified=None, agent=None, referrer=None, authinfo=None): r = FeedParser() - f = open_resource(uri, etag=etag, modified=modified, agent=agent, referrer=referrer) + f = open_resource(uri, etag=etag, modified=modified, agent=agent, referrer=referrer, authinfo=authinfo) data = f.read() if hasattr(f, "headers"): if f.headers.get('content-encoding', '') == 'gzip': diff --git a/rawdoglib/rawdog.py b/rawdoglib/rawdog.py index 32a7e95..31bc2a4 100644 --- a/rawdoglib/rawdog.py +++ b/rawdoglib/rawdog.py @@ -61,9 +61,10 @@ def encode_references(s): class Feed: """An RSS feed.""" - def __init__(self, url, period): + def __init__(self, url): self.url = url - self.period = period + self.period = 30 + self.args = {} self.etag = None self.modified = None self.title = None @@ -77,18 +78,17 @@ class Feed: if not force and (now - self.last_update) < (self.period * 60): return 0 + if self.args.has_key("user") and self.args.has_key("password"): + authinfo = (self.args["user"], self.args["password"]) + else: + authinfo = None + try: - # Kludge for inadequate authentication support in - # urllib2. - u = self.url - if u.startswith("https:"): - import urllib - u = urllib.urlopen(u) - - p = feedparser.parse(u, self.etag, - self.modified, "rawdog/" + VERSION) + p = feedparser.parse(self.url, self.etag, + self.modified, "rawdog/" + VERSION, + None, authinfo) status = p.get("status") - except: + except IOError: p = None status = None @@ -302,10 +302,16 @@ class Config: raise ConfigError("Bad line in config: " + line) if l[0] == "feed": - l = l[1].split(" ", 1) - if len(l) != 2: + l = l[1].split(" ") + if len(l) < 2: raise ConfigError("Bad line in config: " + line) - self["feedslist"].append((l[1], int(l[0]))) + args = {} + for a in l[2:]: + as = a.split("=", 1) + if len(as) != 2: + raise ConfigError("Bad feed argument in config: " + a) + args[as[0]] = as[1] + self["feedslist"].append((l[1], int(l[0]), args)) elif l[0] == "outputfile": self["outputfile"] = l[1] elif l[0] == "maxarticles": @@ -347,12 +353,12 @@ class Rawdog(Persistable): timeoutsocket.setDefaultSocketTimeout(config["timeout"]) seenfeeds = {} - for (url, period) in config["feedslist"]: + for (url, period, args) in config["feedslist"]: seenfeeds[url] = 1 if not self.feeds.has_key(url): - self.feeds[url] = Feed(url, period) - else: - self.feeds[url].period = period + self.feeds[url] = Feed(url) + self.feeds[url].period = period + self.feeds[url].args = args for url in self.feeds.keys(): if not seenfeeds.has_key(url): del self.feeds[url] -- 2.35.1