From: Adam Sampson Date: Tue, 26 Jul 2016 11:26:09 +0000 (+0100) Subject: Check for non-absolute Location headers in 301 redirects. X-Git-Tag: v2.22~6 X-Git-Url: http://git.ozo.com/?a=commitdiff_plain;h=7490b2c4215ce6d38c8e7ac93498c373e077bead;p=rawdog%2F.git Check for non-absolute Location headers in 301 redirects. --- diff --git a/NEWS b/NEWS index 0dc5b54..8bd8ca2 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,10 @@ - rawdog 2.22 +When handling an HTTP 301 redirect response, check whether the new +location is an absolute URI (as the HTTP/1.1 specification says it +should be). Some broken servers return a relative path, or junk, and in +those cases rawdog shouldn't update the URL in the config file. + - rawdog 2.21 Don't crash when asked to show a non-existant template ("-s foo") -- and diff --git a/rawdoglib/rawdog.py b/rawdoglib/rawdog.py index 81ae64c..82b284a 100644 --- a/rawdoglib/rawdog.py +++ b/rawdoglib/rawdog.py @@ -41,6 +41,7 @@ import threading import time import types import urllib2 +import urlparse try: import tidylib @@ -538,7 +539,22 @@ class Feed: i += 1 location = responses[i - 1].get("location") - if location is None: + # According to RFC 2616, the Location header should be + # an absolute URI. This doesn't stop the occasional + # server sending something like "Location: /" or + # "Location: //foo/bar". It's usually a sign of + # brokenness, so fail rather than trying to interpret + # it liberally. + valid_uri = True + if location is not None: + parsed = urlparse.urlparse(location) + if parsed.scheme == "" or parsed.netloc == "": + valid_uri = False + + if not valid_uri: + errors.append("New URL: " + location) + errors.append("The feed returned a permanent redirect, but with an invalid new location.") + elif location is None: errors.append("The feed returned a permanent redirect, but without a new location.") else: errors.append("New URL: " + location) diff --git a/test-rawdog b/test-rawdog index 7d3a953..2ae7edc 100755 --- a/test-rawdog +++ b/test-rawdog @@ -1,6 +1,6 @@ #!/bin/sh # test-rawdog: run some basic tests to make sure rawdog's working. -# Copyright 2013, 2014, 2015 Adam Sampson +# Copyright 2013, 2014, 2015, 2016 Adam Sampson # # rawdog is free software; you can redistribute and/or modify it # under the terms of that license as published by the Free Software @@ -1570,6 +1570,14 @@ begin "HTTP 301 to nowhere" add "feed 0 $httpurl/301" rune "without a new location" -u +for badurl in "/" "http:" "://example.org" "://example.org:80,example.org/"; do + begin "HTTP 301 to invalid Location $badurl" + add "changeconfig true" + add "feed 0 $httpurl/301/=$badurl" + rune "invalid new location" -u + contains $statedir/config "$httpurl/301/=$badurl" +done + begin "HTTP 410 gone" add "feed 0 $httpurl/410" rune "You should remove it" -u diff --git a/testserver.py b/testserver.py index 2fe5a14..d78e05d 100644 --- a/testserver.py +++ b/testserver.py @@ -1,5 +1,5 @@ # testserver: servers for rawdog's test suite. -# Copyright 2013 Adam Sampson +# Copyright 2013, 2016 Adam Sampson # # rawdog is free software; you can redistribute and/or modify it # under the terms of that license as published by the Free Software @@ -104,9 +104,16 @@ class HTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): if m: # Request for a particular response code. code = int(m.group(1)) + dest = m.group(2) self.send_response(code) - if m.group(2): - self.send_header("Location", self.server.base_url + m.group(2)) + if dest: + if dest.startswith("/="): + # Provide an exact value for Location (to simulate an + # invalid response). + dest = dest[2:] + else: + dest = self.server.base_url + dest + self.send_header("Location", dest) self.end_headers() return None