From f3e8b26227f9384b58ed6aa533d858acec86c342 Mon Sep 17 00:00:00 2001 From: Michael James Gratton Date: Sat, 28 Jul 2018 15:08:39 +1000 Subject: [PATCH] Fix MS mailers munging plain-text signatures in HTML body parts. Geary previously wrapped plain text sigs (i.e. those that did not contain any HTML elements) in a DIV styled to preserve whitespace. This is all perfectly fine, except that certain Microsoft products munged the email for presentation and destroy the whitespace present in the message body, causing the sig to be mis-rendered. This works around their shitty software by using BRs instead of the DIV, also escapes other control chars and sequential whitespace as well when using a plain text sig. It also stops trying to smart-escape quoted reply text, since we know it's going to be HTML from the conversation viewer, meaning smart escaping only needs to be used for sigs. See issue #29 --- src/client/accounts/add-edit-page.vala | 2 +- src/client/composer/composer-widget.vala | 2 +- src/engine/rfc822/rfc822-utils.vala | 10 +-- src/engine/util/util-html.vala | 82 ++++++++++++++++-------- test/engine/util-html-test.vala | 46 +++++++++---- 5 files changed, 98 insertions(+), 44 deletions(-) diff --git a/src/client/accounts/add-edit-page.vala b/src/client/accounts/add-edit-page.vala index 62e19566..66a05a95 100644 --- a/src/client/accounts/add-edit-page.vala +++ b/src/client/accounts/add-edit-page.vala @@ -603,7 +603,7 @@ public class AddEditPage : Gtk.Box { private void on_signature_stack_changed() { if (signature_stack.visible_child_name == "preview_window") - preview_webview.load_html(Geary.HTML.smart_escape(email_signature, true), null); + preview_webview.load_html(Geary.HTML.smart_escape(email_signature), null); } private uint16 get_default_smtp_port() { diff --git a/src/client/composer/composer-widget.vala b/src/client/composer/composer-widget.vala index 16843e8b..787a0331 100644 --- a/src/client/composer/composer-widget.vala +++ b/src/client/composer/composer-widget.vala @@ -2141,7 +2141,7 @@ public class ComposerWidget : Gtk.EventBox { } account_sig = (!Geary.String.is_empty_or_whitespace(account_sig)) - ? Geary.HTML.smart_escape(account_sig, true) + ? Geary.HTML.smart_escape(account_sig) : ""; } diff --git a/src/engine/rfc822/rfc822-utils.vala b/src/engine/rfc822/rfc822-utils.vala index 44acbc49..7087cb7f 100644 --- a/src/engine/rfc822/rfc822-utils.vala +++ b/src/engine/rfc822/rfc822-utils.vala @@ -302,12 +302,14 @@ public string quote_email_for_forward(Geary.Email email, string? quote, TextForm return quoted; } -private string quote_body(Geary.Email email, string? quote, bool use_quotes, TextFormat format) +private string quote_body(Geary.Email email, + string? html_quote, + bool use_quotes, + TextFormat format) throws Error { Message? message = email.get_message(); - bool preserve_whitespace = !message.has_html_body(); string? body_text = null; - if (quote == null) { + if (String.is_empty(html_quote)) { switch (format) { case TextFormat.HTML: body_text = message.has_html_body() @@ -322,7 +324,7 @@ private string quote_body(Geary.Email email, string? quote, bool use_quotes, Tex break; } } else { - body_text = Geary.HTML.smart_escape(quote, preserve_whitespace); + body_text = html_quote; } // Wrap the whole thing in a blockquote. diff --git a/src/engine/util/util-html.vala b/src/engine/util/util-html.vala index 522c1192..ce76b1d7 100644 --- a/src/engine/util/util-html.vala +++ b/src/engine/util/util-html.vala @@ -10,6 +10,8 @@ namespace Geary.HTML { // Originally from here: http://daringfireball.net/2010/07/improved_regex_for_matching_urls public const string URL_REGEX = "(?i)\\b((?:[a-z][\\w-]+:(?:/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}/)(?:[^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+(?:\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:'\".,<>?«»“”‘’]))"; +private Regex WHITESPACE_REGEX; + private int init_count = 0; private Gee.HashSet? breaking_elements; private Gee.HashSet? spacing_elements; @@ -27,6 +29,12 @@ public void init() { return; init_element_sets(); + + try { + WHITESPACE_REGEX = new Regex("(\\R|\\t|[ ]+)"); + } catch (GLib.Error err) { + assert(true); + } } private void init_element_sets() { @@ -95,20 +103,58 @@ private void init_element_sets() { }); } +/** Converts plain text to HTML with reserved characters escaped. */ public inline string escape_markup(string? plain) { - return (!String.is_empty(plain) && plain.validate()) ? Markup.escape_text(plain) : ""; + return (!String.is_empty(plain) && plain.validate()) + ? Markup.escape_text(plain) : ""; } +/** Converts plain text to HTML with whitespace (SP, CR, LF) preserved. */ public string preserve_whitespace(string? text) { - if (String.is_empty(text)) - return ""; - - string output = text.replace(" ", " "); - output = output.replace("\r\n", "
"); - output = output.replace("\n", "
"); - output = output.replace("\r", "
"); + string preserved = ""; + if (!String.is_empty(text)) { + try { + preserved = WHITESPACE_REGEX.replace_eval( + text, -1, 0, 0, (info, result) => { + string match = info.fetch(0); + if (match[0] == ' ') { + result.append_c(' '); + for (int len = match.length - 1; len > 0; len--) { + result.append(" "); + } + } else if (match == "\t") { + result.append("    "); + } else { + result.append("
"); + } + return false; + }); + } catch (Error err) { + debug("Error preserving whitespace: %s", err.message); + } + } + return preserved; +} - return output; +/** + * Escape reserved HTML entities and preserves whitespace, if needed. + * + * Returns a string with reserved HTML entities escaped and + * whitespace preserved if the given string does not have HTML + * tags. + */ +public string smart_escape(string? text) { + string escaped = text ?? ""; + if (text != null) { + bool is_html = Regex.match_simple( + "<[A-Z]+ ?(?: [^>]*)?\\/?>", text, RegexCompileFlags.CASELESS + ); + if (!is_html) { + escaped = escape_markup(escaped); + escaped = preserve_whitespace(escaped); + } + } + return escaped; } /** @@ -168,22 +214,4 @@ private void recurse_html_nodes_for_text(Xml.Node? node, } } -// Escape reserved HTML entities if the string does not have HTML -// tags. If there are no tags, or if preserve_whitespace_in_html is -// true, wrap the string a div to preserve whitespace. -public string smart_escape(string? text, bool preserve_whitespace_in_html) { - if (text == null) - return text; - - string res = text; - if (!Regex.match_simple("<[A-Z]*(?: [^>]*)?\\/?>", res, - RegexCompileFlags.CASELESS)) { - res = Geary.HTML.escape_markup(res); - preserve_whitespace_in_html = true; - } - if (preserve_whitespace_in_html) - res = @"
$res
"; - return res; -} - } diff --git a/test/engine/util-html-test.vala b/test/engine/util-html-test.vala index 4f3b112f..b59230de 100644 --- a/test/engine/util-html-test.vala +++ b/test/engine/util-html-test.vala @@ -9,6 +9,7 @@ class Geary.HTML.UtilTest : TestCase { public UtilTest() { base("Geary.HTML.Util"); + add_test("preserve_whitespace", preserve_whitespace); add_test("smart_escape_div", smart_escape_div); add_test("smart_escape_no_closing_tag", smart_escape_no_closing_tag); add_test("smart_escape_img", smart_escape_img); @@ -19,39 +20,62 @@ class Geary.HTML.UtilTest : TestCase { add_test("remove_html_tags", remove_html_tags); } + public void preserve_whitespace() throws GLib.Error { + assert_string("some text", Geary.HTML.smart_escape("some text")); + assert_string("some  text", Geary.HTML.smart_escape("some text")); + assert_string("some   text", Geary.HTML.smart_escape("some text")); + assert_string("some    text", Geary.HTML.smart_escape("some\ttext")); + + assert_string("some
text", Geary.HTML.smart_escape("some\ntext")); + assert_string("some
text", Geary.HTML.smart_escape("some\rtext")); + assert_string("some
text", Geary.HTML.smart_escape("some\r\ntext")); + + assert_string("some

text", Geary.HTML.smart_escape("some\n\ntext")); + assert_string("some

text", Geary.HTML.smart_escape("some\r\rtext")); + assert_string("some

text", Geary.HTML.smart_escape("some\n\rtext")); + assert_string("some

text", Geary.HTML.smart_escape("some\r\n\r\ntext")); + } + public void smart_escape_div() throws Error { string html = "
ohhai
"; - assert(Geary.HTML.smart_escape(html, false) == html); + assert(Geary.HTML.smart_escape(html) == html); } public void smart_escape_no_closing_tag() throws Error { string html = "
ohhai"; - assert(Geary.HTML.smart_escape(html, false) == html); + assert(Geary.HTML.smart_escape(html) == html); } public void smart_escape_img() throws Error { string html = ""; - assert(Geary.HTML.smart_escape(html, false) == html); + assert(Geary.HTML.smart_escape(html) == html); } public void smart_escape_xhtml_img() throws Error { string html = ""; - assert(Geary.HTML.smart_escape(html, false) == html); + assert(Geary.HTML.smart_escape(html) == html); } public void smart_escape_mixed() throws Error { string html = "mixed
ohhai
text"; - assert(Geary.HTML.smart_escape(html, false) == html); + assert(Geary.HTML.smart_escape(html) == html); } - public void smart_escape_text() throws Error { - string text = "some text"; - assert(Geary.HTML.smart_escape(text, false) == "
some text
"); + public void smart_escape_text() throws GLib.Error { + assert_string("some text", Geary.HTML.smart_escape("some text")); + assert_string("<some text", Geary.HTML.smart_escape("")); } - public void smart_escape_text_url() throws Error { - string text = ""; - assert(Geary.HTML.smart_escape(text, false) == "
<http://example.com>
"); + public void smart_escape_text_url() throws GLib.Error { + assert_string( + "<http://example.com>", + Geary.HTML.smart_escape("") + ); + assert_string( + "<http://example.com>", + Geary.HTML.smart_escape("") + ); } public void remove_html_tags() throws Error {