When generating the preview, only the first 128 bytes of the first MIME part is fetched and used. If this part is text/html with a significant amount of embedded CSS, then there is a good chance the string passed to Geary.HTML::remove_html_tags() will be invalid, or be missing closing elements. Since that function uses regexes that require balanced tags to remove script and style blocks, then it was very possible that in these cases this method will miss removing these blocks. To solve this, remove_html_tags() is removed and its call sites are replaced by calls to Geary.HTML::html_to_text(), which has been tidyied up to produce more human-readable result. Add unit tests to cover new html_to_text functionality and its call sites. * src/engine/util/util-html.vala: Remove remove_html_tags(). Update html_to_text() to not just insert line breaks, but also insert spaces and alt text, and ignore tags like HEAD, SCRIPT and STYLE, as appropriate. Add an optional param to also allow skipping BLOCKQUOTE elements, which we don't want in the preview.
76 lines
2.7 KiB
Vala
76 lines
2.7 KiB
Vala
/*
|
|
* Copyright 2016 Michael Gratton <mike@vee.net>
|
|
*
|
|
* This software is licensed under the GNU Lesser General Public License
|
|
* (version 2.1 or later). See the COPYING file in this distribution.
|
|
*/
|
|
|
|
class Geary.HTML.UtilTest : Gee.TestCase {
|
|
|
|
public UtilTest() {
|
|
base("Geary.HTML.Util");
|
|
add_test("remove_html_tags", remove_html_tags);
|
|
}
|
|
|
|
public void remove_html_tags() {
|
|
string blockquote_body = """<blockquote>hello</blockquote> <p>there</p>""";
|
|
|
|
string style_complete = """<style>
|
|
.bodyblack { font-family: Verdana, Arial, Helvetica, sans-serif; font-size:
|
|
12px; }
|
|
td { font-size: 12px; }
|
|
.footer { font-family: Verdana, Arial, Helvetica, sans-serif; font-size: 10
|
|
px; }
|
|
</style>""";
|
|
|
|
string style_truncated = """<html><head>
|
|
<meta http-equiv=Content-Type content="text/html; charset=utf-8">
|
|
<style>
|
|
.bodyblack { font-family: Verdana, """;
|
|
|
|
assert(Geary.HTML.html_to_text(HTML_BODY_COMPLETE) == HTML_BODY_COMPLETE_EXPECTED);
|
|
assert(Geary.HTML.html_to_text(blockquote_body) == "hello\n there\n");
|
|
assert(Geary.HTML.html_to_text(blockquote_body, false) == " there\n");
|
|
assert(Geary.HTML.html_to_text(style_complete) == "");
|
|
assert(Geary.HTML.html_to_text(style_complete) == "");
|
|
assert(Geary.HTML.html_to_text(style_truncated) == "");
|
|
}
|
|
|
|
public static string HTML_BODY_COMPLETE = """<html><head>
|
|
<meta http-equiv=Content-Type content="text/html; charset=utf-8">
|
|
<style>
|
|
.bodyblack { font-family: Verdana, Arial, Helvetica, sans-serif; font-size: 12px; }
|
|
td { font-size: 12px; }
|
|
.footer { font-family: Verdana, Arial, Helvetica, sans-serif; font-size: 10px; }
|
|
</style>
|
|
</head>
|
|
<body><table cellSpacing="0" cellPadding="0" width="450" border="0" class="bodyblack"><tr><td>
|
|
<p><br />Hi Kenneth, <br /> <br /> We xxxxx xxxx xx xxx xxx xx xxxx x xxxxxxxx xxxxxxxx.
|
|
<br /> <br /> <br /> <br />Thank you, <br /> <br />XXXXX
|
|
X XXXXXX<br /><br />You can reply directly to this message or click the following link:<br /><a href="https://app.foobar.com/xxxxxxxx752a0ab01641966deff6c48623aba">https://app.foobar.com/xxxxxxxxxxxxxxxx1641966deff6c48623aba</a><br /><br />You can change your email preferences at:<br /><a href="https://app.foobar.com/xxxxxxxxxxxxx">https://app.foobar.com/xxxxxxxxxxx</a></p></td></tr>
|
|
</table></body></html>
|
|
""";
|
|
|
|
public static string HTML_BODY_COMPLETE_EXPECTED = """
|
|
|
|
Hi Kenneth,
|
|
|
|
We xxxxx xxxx xx xxx xxx xx xxxx x xxxxxxxx xxxxxxxx.
|
|
|
|
|
|
|
|
|
|
Thank you,
|
|
|
|
XXXXX
|
|
X XXXXXX
|
|
|
|
You can reply directly to this message or click the following link:
|
|
https://app.foobar.com/xxxxxxxxxxxxxxxx1641966deff6c48623aba
|
|
|
|
You can change your email preferences at:
|
|
https://app.foobar.com/xxxxxxxxxxx
|
|
|
|
""";
|
|
|
|
}
|