reported that the enclosure plugin didn't do this, and having had a look
at the others it seems to be a common problem.
+Make feedscanner handle "Content-Encoding: gzip" in responses, as
+tumblr.com's webservers will use this even if you explicitly refuse it
+in the request.
+
- rawdog 2.17
Add a one-paragraph description of rawdog to the README file, for use by
PERFORMANCE OF THIS SOFTWARE.
"""
+import cStringIO
import feedparser
+import gzip
import re
import urllib2
import urlparse
"""Fetch the given URL and return the data from it as a Unicode string."""
request = urllib2.Request(url)
+ request.add_header("Accept-Encoding", "gzip")
f = urllib2.urlopen(request)
+ headers = f.info()
data = f.read()
f.close()
+ # We have to support gzip encoding because some servers will use it
+ # even if you explicitly refuse it in Accept-Encoding.
+ encodings = headers.get("Content-Encoding", "")
+ encodings = [s.strip() for s in encodings.split(",")]
+ if "gzip" in encodings:
+ f = gzip.GzipFile(fileobj=cStringIO.StringIO(data))
+ data = f.read()
+ f.close()
+
# Silently ignore encoding errors -- we don't need to go to the bother of
# detecting the encoding properly (like feedparser does).
data = data.decode("UTF-8", "ignore")
rune "Adding feed" -a $httpurl/page.html
contains "$statedir/config" $httpurl/feed.atom
+begin "add feed, gzip-encoded response"
+make_rss20 $httpdir/feed.rss
+make_html_head $httpdir/page.html <<EOF
+<link rel="alternate" type="application/rss+xml" title="RSS" href="$httpurl/feed.rss">
+EOF
+rune "Adding feed" -a $httpurl/gzip/page.html
+contains "$statedir/config" $httpurl/feed.rss
+
+begin "add feed, gzip-encoded feed"
+make_rss20 $httpdir/feed.rss
+make_html_head $httpdir/page.html <<EOF
+<link rel="alternate" type="application/rss+xml" title="RSS" href="$httpurl/gzip/feed.rss">
+EOF
+rune "Adding feed" -a $httpurl/page.html
+contains "$statedir/config" $httpurl/gzip/feed.rss
+
begin "remove feed"
add "feed 3h $httpurl/0.rss"
add "feed 3h $httpurl/1.rss"
import SimpleHTTPServer
import SocketServer
import base64
+import cStringIO
+import gzip
import hashlib
import os
import re
self.end_headers()
return None
+ encoding = None
+ m = re.match(r'^/(gzip)(/.*)$', self.path)
+ if m:
+ # Request for a content encoding.
+ encoding = m.group(1)
+ self.path = m.group(2)
+
m = re.match(r'^/([^/]+)$', self.path)
if m:
# Request for a file.
mime_type = "text/html"
self.send_response(200)
+
+ if encoding:
+ self.send_header("Content-Encoding", encoding)
+ if encoding == "gzip":
+ data = f.read()
+ f.close()
+ f = cStringIO.StringIO()
+ g = gzip.GzipFile(fileobj=f, mode="wb")
+ g.write(data)
+ g.close()
+ size = f.tell()
+ f.seek(0)
+
self.send_header("Content-Length", size)
self.send_header("Content-Type", mime_type)
self.send_header("ETag", etag)