Removes HTML from quoted messages and previews. Closes #4664 Fixes #4404

This commit is contained in:
Eric Gregory 2012-01-30 11:45:02 -08:00
parent e6848c6de5
commit 2173636457
5 changed files with 80 additions and 12 deletions

View file

@ -22,8 +22,8 @@ client_src = [
'ui/message-viewer.vala',
'ui/preferences-dialog.vala',
'util/util-email.vala',
'util/util-keyring.vala',
'util/util-email.vala'
]
gsettings_schemas = [

View file

@ -240,6 +240,7 @@ public class Geary.RFC822.PreviewText : Geary.RFC822.Text {
if (preview_header != null) {
string? charset = null;
string? encoding = null;
bool is_html = false;
// Parse the header.
GMime.Stream header_stream = new GMime.StreamMem.with_buffer(
@ -247,6 +248,8 @@ public class Geary.RFC822.PreviewText : Geary.RFC822.Text {
GMime.Parser parser = new GMime.Parser.with_stream(header_stream);
GMime.Part? part = parser.construct_part() as GMime.Part;
if (part != null) {
is_html = (part.get_content_type().to_string() == "text/html");
charset = part.get_content_type_parameter("charset");
encoding = part.get_header("Content-Transfer-Encoding");
}
@ -268,7 +271,8 @@ public class Geary.RFC822.PreviewText : Geary.RFC822.Text {
input_stream.write_to_stream(filter);
uint8[] data = output.data;
data += (uint8) '\0';
buffer = new Geary.Memory.StringBuffer((string) data);
buffer = new Geary.Memory.StringBuffer(is_html ? Geary.HTML.remove_html_tags(
(string) data) : (string) data);
}
base (buffer);

View file

@ -60,18 +60,25 @@ public string quote_email_for_forward(Geary.Email email) {
}
private string quote_body(Geary.Email email, bool line_start_char = true) {
string ret = "";
string body_text = "";
try {
string[] lines = email.get_message().get_first_mime_part_of_content_type("text/plain")
.to_utf8().split("\n");
for (int i = 0; i < lines.length; i++) {
if (line_start_char)
ret += "> ";
ret += lines[i];
}
body_text = email.get_message().get_first_mime_part_of_content_type("text/plain").to_utf8();
} catch (Error err) {
debug("Could not get message text. %s", err.message);
try {
body_text = Geary.HTML.remove_html_tags(email.get_message().
get_first_mime_part_of_content_type("text/html").to_utf8());
} catch (Error err2) {
debug("Could not get message text. %s", err2.message);
}
}
string ret = "";
string[] lines = body_text.split("\n");
for (int i = 0; i < lines.length; i++) {
if (line_start_char)
ret += "> ";
ret += lines[i];
}
return ret;

View file

@ -0,0 +1,56 @@
/* Copyright 2011-2012 Yorba Foundation
*
* This software is licensed under the GNU Lesser General Public License
* (version 2.1 or later). See the COPYING file in this distribution.
*/
namespace Geary.HTML {
// Removes any text between < and >. Additionally, if input terminates in the middle of a tag,
// the tag will be removed.
// If the HTML is invalid, the original string will be returned.
public string remove_html_tags(string input) {
try {
string output = input;
// Count the number of < and > characters.
unichar c;
uint64 less_than = 0;
uint64 greater_than = 0;
for (int i = 0; output.get_next_char (ref i, out c);) {
if (c == '<')
less_than++;
else if (c == '>')
greater_than++;
}
if (less_than == greater_than + 1) {
output += ">"; // Append an extra > so our regex works.
greater_than++;
}
if (less_than != greater_than)
return input; // Invalid HTML.
// Removes script tags and everything between them.
// Based on regex here: http://stackoverflow.com/questions/116403/im-looking-for-a-regular-expression-to-remove-a-given-xhtml-tag-from-a-string
Regex script = new Regex("<script[^>]*?>[\\s\\S]*?<\\/script>", RegexCompileFlags.CASELESS);
output = script.replace(output, -1, 0, "");
// Removes style tags and everything between them.
// Based on regex above.
Regex style = new Regex("<style[^>]*?>[\\s\\S]*?<\\/style>", RegexCompileFlags.CASELESS);
output = style.replace(output, -1, 0, "");
// Removes remaining tags. Based on this regex:
// http://osherove.com/blog/2003/5/13/strip-html-tags-from-a-string-using-regular-expressions.html
Regex tags = new Regex("<(.|\n)*?>", RegexCompileFlags.CASELESS);
return tags.replace(output, -1, 0, "");
} catch (Error e) {
debug("Error stripping HTML tags: %s", e.message);
}
return input;
}
}

View file

@ -147,6 +147,7 @@ def build(bld):
'../engine/state/state-machine.vala',
'../engine/state/state-mapping.vala',
'../engine/util/util-html.vala',
'../engine/util/util-inet.vala',
'../engine/util/util-interfaces.vala',
'../engine/util/util-memory.vala',