Check for non-absolute Location headers in 301 redirects.
authorAdam Sampson <ats@offog.org>
Tue, 26 Jul 2016 11:26:09 +0000 (12:26 +0100)
committerAdam Sampson <ats@offog.org>
Tue, 26 Jul 2016 11:26:09 +0000 (12:26 +0100)
NEWS
rawdoglib/rawdog.py
test-rawdog
testserver.py

diff --git a/NEWS b/NEWS
index 0dc5b54296a62b0bbd08f618b13299dca09a8a56..8bd8ca291782f6ff7c6201145ed92b1fa24344d5 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,10 @@
 - rawdog 2.22
 
+When handling an HTTP 301 redirect response, check whether the new
+location is an absolute URI (as the HTTP/1.1 specification says it
+should be). Some broken servers return a relative path, or junk, and in
+those cases rawdog shouldn't update the URL in the config file.
+
 - rawdog 2.21
 
 Don't crash when asked to show a non-existant template ("-s foo") -- and
index 81ae64c658221684ac2848a0def94c49a8a827a7..82b284a8ae20d09e253fc487f9a9628176eb9288 100644 (file)
@@ -41,6 +41,7 @@ import threading
 import time
 import types
 import urllib2
+import urlparse
 
 try:
        import tidylib
@@ -538,7 +539,22 @@ class Feed:
                                i += 1
                        location = responses[i - 1].get("location")
 
-                       if location is None:
+                       # According to RFC 2616, the Location header should be
+                       # an absolute URI. This doesn't stop the occasional
+                       # server sending something like "Location: /" or
+                       # "Location: //foo/bar". It's usually a sign of
+                       # brokenness, so fail rather than trying to interpret
+                       # it liberally.
+                       valid_uri = True
+                       if location is not None:
+                               parsed = urlparse.urlparse(location)
+                               if parsed.scheme == "" or parsed.netloc == "":
+                                       valid_uri = False
+
+                       if not valid_uri:
+                               errors.append("New URL:     " + location)
+                               errors.append("The feed returned a permanent redirect, but with an invalid new location.")
+                       elif location is None:
                                errors.append("The feed returned a permanent redirect, but without a new location.")
                        else:
                                errors.append("New URL:     " + location)
index 7d3a953e98cb57299178bf830ea1f35d46eb9d43..2ae7edcca37c05d561ebfa5947b6f0588853ce83 100755 (executable)
@@ -1,6 +1,6 @@
 #!/bin/sh
 # test-rawdog: run some basic tests to make sure rawdog's working.
-# Copyright 2013, 2014, 2015 Adam Sampson <ats@offog.org>
+# Copyright 2013, 2014, 2015, 2016 Adam Sampson <ats@offog.org>
 #
 # rawdog is free software; you can redistribute and/or modify it
 # under the terms of that license as published by the Free Software
@@ -1570,6 +1570,14 @@ begin "HTTP 301 to nowhere"
 add "feed 0 $httpurl/301"
 rune "without a new location" -u
 
+for badurl in "/" "http:" "://example.org" "://example.org:80,example.org/"; do
+       begin "HTTP 301 to invalid Location $badurl"
+       add "changeconfig true"
+       add "feed 0 $httpurl/301/=$badurl"
+       rune "invalid new location" -u
+       contains $statedir/config "$httpurl/301/=$badurl"
+done
+
 begin "HTTP 410 gone"
 add "feed 0 $httpurl/410"
 rune "You should remove it" -u
index 2fe5a14ee3775885dd9bb6c3fa89cb569eee858c..d78e05d87b4a07fcca20de3b3f13f33344ba4961 100644 (file)
@@ -1,5 +1,5 @@
 # testserver: servers for rawdog's test suite.
-# Copyright 2013 Adam Sampson <ats@offog.org>
+# Copyright 2013, 2016 Adam Sampson <ats@offog.org>
 #
 # rawdog is free software; you can redistribute and/or modify it
 # under the terms of that license as published by the Free Software
@@ -104,9 +104,16 @@ class HTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
         if m:
             # Request for a particular response code.
             code = int(m.group(1))
+            dest = m.group(2)
             self.send_response(code)
-            if m.group(2):
-                self.send_header("Location", self.server.base_url + m.group(2))
+            if dest:
+                if dest.startswith("/="):
+                    # Provide an exact value for Location (to simulate an
+                    # invalid response).
+                    dest = dest[2:]
+                else:
+                    dest = self.server.base_url + dest
+                self.send_header("Location", dest)
             self.end_headers()
             return None