From: Adam Sampson Date: Mon, 11 Jun 2018 19:30:33 +0000 (+0100) Subject: Override pytidylib's BASE_OPTIONS explicitly. X-Git-Url: http://git.ozo.com/?a=commitdiff_plain;h=1ca4d2f1ad8c66d4235876b7eb39657a115ac2bb;p=rawdog%2F.git Override pytidylib's BASE_OPTIONS explicitly. This variable has moved from tidylib.BASE_OPTIONS to tidylib.tidy.BASE_OPTIONS as of pytidylib 0.3.2. --- diff --git a/NEWS b/NEWS index 2e7d5e2..161290b 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,9 @@ of the feed in question). Add the --find option, which shows what feedscanner returns for a given URL (as --dump does for feedparser). +The location of BASE_OPTIONS has changed in pytidylib 0.3.2; rather than +trying to change it, override the corresponding options explicitly. + - rawdog 2.22 When handling an HTTP 301 redirect response, check whether the new diff --git a/rawdoglib/rawdog.py b/rawdoglib/rawdog.py index 5c4eaa8..5731593 100644 --- a/rawdoglib/rawdog.py +++ b/rawdoglib/rawdog.py @@ -134,20 +134,27 @@ def sanitise_html(html, baseurl, inline, config): html = "

" + html if config["tidyhtml"]: + # This must include: options where the default value in tidy + # has changed at some point, and pytidylib's BASE_OPTIONS which + # it would otherwise set if we didn't specify them. args = { "numeric_entities": 1, + # In tidy 0.99 these are ASCII; in tidy 5, UTF-8. "input_encoding": "ascii", "output_encoding": "ascii", "output_html": 1, "output_xhtml": 0, "output_xml": 0, + "indent": 0, + "tidy-mark": 0, + "alt-text": "", + "doctype": "strict", + "force-output": 1, "wrap": 0, } call_hook("mxtidy_args", config, args, baseurl, inline) call_hook("tidy_args", config, args, baseurl, inline) if tidylib is not None: - # Disable PyTidyLib's somewhat unhelpful defaults. - tidylib.BASE_OPTIONS = {} output = tidylib.tidy_document(html, args)[0] elif mxtidy is not None: output = mxtidy.tidy(html, None, None, **args)[2]