Rewrite encode_references to use regexps.

author Adam Sampson <ats@offog.org>

Wed, 28 Jan 2009 12:57:10 +0000 (12:57 +0000)

committer Adam Sampson <ats@offog.org>

Wed, 28 Jan 2009 12:57:10 +0000 (12:57 +0000)
author Adam Sampson <ats@offog.org>
Wed, 28 Jan 2009 12:57:10 +0000 (12:57 +0000)
committer Adam Sampson <ats@offog.org>
Wed, 28 Jan 2009 12:57:10 +0000 (12:57 +0000)
diff --git a/NEWS b/NEWS

index dbb839e5c0666efe343b1c8196e0370ecdb45eab..a81fbe31abd7b3f0fff7f90af4e00d3c5ff9f0ec 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,7 +1,8 @@
  - rawdog 2.12
  
-Cache the result of locale.getpreferredencoding(). This significantly
-speeds up writing output files.
+Cache the result of locale.getpreferredencoding(), and rewrite
+encode_references() to use regexps. This significantly speeds up writing
+output files.
  
  Update feedparser to revision 291, which fixes the handling of
  <media:title> elements (reported by Darren Griffith).
diff --git a/rawdoglib/rawdog.py b/rawdoglib/rawdog.py

index 3dba503bec37f28b8a28fe843986487d139553fd..79ab28b75621bb397ed4df411b2553e69b4ba2de 100644 (file)
--- a/rawdoglib/rawdog.py
+++ b/rawdoglib/rawdog.py
@@ -59,18 +59,12 @@ def format_time(secs, config):
                 format = config["timeformat"] + ", " + config["dayformat"]
         return safe_ftime(format, t)
  
+high_char_re = re.compile(r'[^\000-\177]')
  def encode_references(s):
         """Encode characters in a Unicode string using HTML references."""
-       r = StringIO()
-       for c in s:
-               n = ord(c)
-               if n >= 128:
-                       r.write("&#" + str(n) + ";")
-               else:
-                       r.write(c)
-       v = r.getvalue()
-       r.close()
-       return v
+       def encode(m):
+               return "&#" + str(ord(m.group(0))) + ";"
+       return high_char_re.sub(encode, s)
  
  # This list of block-level elements came from the HTML 4.01 specification.
  block_level_re = re.compile(r'^\s*<(p|h1|h2|h3|h4|h5|h6|ul|ol|pre|dl|div|noscript|blockquote|form|hr|table|fieldset|address)[^a-z]', re.I)
author	Adam Sampson <ats@offog.org>
	Wed, 28 Jan 2009 12:57:10 +0000 (12:57 +0000)
committer	Adam Sampson <ats@offog.org>
	Wed, 28 Jan 2009 12:57:10 +0000 (12:57 +0000)
NEWS		patch \| blob \| history
rawdoglib/rawdog.py		patch \| blob \| history