Reenable composer attachment keyword checking.

* src/client/composer/composer-web-view.vala (ComposerWebView): Add
  ::contains_attachment_keywords that thunks to JS, remove uneeded
  ::get_block_quote_representation method.

* src/client/composer/composer-widget.vala (ComposerWidget): Remove
  attachment keywork checking related code, just call new method on
  editor to do the check.

* src/client/web-process/util-composer.vala,
  src/client/web-process/util-webkit.vala: Remove uneeded code.

* ui/composer-web-view.js (ComposerPageState): Implement new
  ::containsAttachmentKeyword method based on previous code. Add unit
  tests.
This commit is contained in:
Michael James Gratton 2017-01-26 13:31:08 +11:00
parent 3e36b5e961
commit e0bff994fa
6 changed files with 224 additions and 182 deletions

View file

@ -385,6 +385,21 @@ public class ComposerWebView : ClientWebView {
this.run_javascript.begin("geary.deleteQuotedMessage();", null);
}
/**
* Determines if the editor content contains an attachment keyword.
*/
public async bool contains_attachment_keywords(string keyword_spec, string subject) {
try {
return WebKitUtil.to_bool(
yield run_javascript("geary.containsAttachmentKeyword(\"%s\", \"%s\");"
.printf(keyword_spec, subject), null)
);
} catch (Error err) {
debug("Error checking or attchment keywords: %s", err.message);
return false;
}
}
/**
* Returns the editor content as an HTML string.
*/
@ -483,13 +498,6 @@ public class ComposerWebView : ClientWebView {
// XXX
}
/**
* ???
*/
public string get_block_quote_representation() {
return ""; // XXX
}
public override bool button_release_event(Gdk.EventButton event) {
// WebView seems to unconditionally consume button events, so
// to show a link popopver after the view has processed one,

View file

@ -160,14 +160,12 @@ public class ComposerWidget : Gtk.EventBox {
private const string URI_LIST_MIME_TYPE = "text/uri-list";
private const string FILE_URI_PREFIX = "file://";
public const string ATTACHMENT_KEYWORDS_SUFFIX = ".doc|.pdf|.xls|.ppt|.rtf|.pps";
// A list of keywords, separated by pipe ("|") characters, that suggest an attachment; since
// this is full-word checking, include all variants of each word. No spaces are allowed.
public const string ATTACHMENT_KEYWORDS_LOCALIZED = _("attach|attaching|attaches|attachment|attachments|attached|enclose|enclosed|enclosing|encloses|enclosure|enclosures");
private delegate bool CompareStringFunc(string key, string token);
// Translators: This is list of keywords, separated by pipe ("|")
// characters, that suggest an attachment; since this is full-word
// checking, include all variants of each word. No spaces are
// allowed.
private const string ATTACHMENT_KEYWORDS_LOCALIZED = _("attach|attaching|attaches|attachment|attachments|attached|enclose|enclosed|enclosing|encloses|enclosure|enclosures");
public Geary.Account account { get; private set; }
public Geary.RFC822.MailboxAddresses from { get; private set; }
@ -1218,82 +1216,6 @@ public class ComposerWidget : Gtk.EventBox {
return check_send_on_return(event) && base.key_press_event(event);
}
// compares all keys to all tokens according to user-supplied comparison function
// Returns true if found
private bool search_tokens(string[] keys, string[] tokens, CompareStringFunc cmp_func,
out string? found_key, out string? found_token) {
foreach (string key in keys) {
foreach (string token in tokens) {
if (cmp_func(key, token)) {
found_key = key;
found_token = token;
return true;
}
}
}
found_key = null;
found_token = null;
return false;
}
private bool email_contains_attachment_keywords() {
// Filter out all content contained in block quotes
string filtered = @"$subject\n";
filtered += this.editor.get_block_quote_representation();
Regex url_regex = null;
try {
// Prepare to ignore urls later
url_regex = new Regex(Geary.HTML.URL_REGEX, RegexCompileFlags.CASELESS);
} catch (Error error) {
debug("Error building regex in keyword checker: %s", error.message);
}
string[] suffix_keys = ATTACHMENT_KEYWORDS_SUFFIX.casefold().split("|");
string[] full_word_keys = ATTACHMENT_KEYWORDS_LOCALIZED.casefold().split("|");
foreach (string line in filtered.split("\n")) {
// Stop looking once we hit forwarded content
if (line.has_prefix("--")) {
break;
}
// casefold line, strip start and ending whitespace, then tokenize by whitespace
string folded = line.casefold().strip();
string[] tokens = folded.split_set(" \t");
// search for full-word matches
string? found_key, found_token;
bool found = search_tokens(full_word_keys, tokens, (key, token) => {
return key == token;
}, out found_key, out found_token);
// if not found, search for suffix matches
if (!found) {
found = search_tokens(suffix_keys, tokens, (key, token) => {
return token.has_suffix(key);
}, out found_key, out found_token);
}
if (found) {
try {
// Make sure the match isn't coming from a url
if (found_key in url_regex.replace(folded, -1, 0, "")) {
return true;
}
} catch (Error error) {
debug("Regex replacement error in keyword checker: %s", error.message);
return true;
}
}
}
return false;
}
private async bool should_send() {
bool has_subject = !Geary.String.is_empty(subject.strip());
bool has_attachment = this.attached_files.size > 0;
@ -1312,7 +1234,9 @@ public class ComposerWidget : Gtk.EventBox {
confirmation = _("Send message with an empty subject?");
} else if (!has_body && !has_attachment) {
confirmation = _("Send message with an empty body?");
} else if (!has_attachment && email_contains_attachment_keywords()) {
} else if (!has_attachment &&
yield this.editor.contains_attachment_keywords(
ATTACHMENT_KEYWORDS_LOCALIZED, this.subject)) {
confirmation = _("Send message without an attachment?");
}
if (confirmation != null) {

View file

@ -19,10 +19,6 @@ namespace Util.Composer {
private const string EDITING_DELETE_CONTAINER_ID = "WebKit-Editing-Delete-Container";
public string get_block_quote_representation(WebKit.WebPage page) {
return Util.DOM.get_text_representation(page.get_dom_document(), "blockquote");
}
public void linkify_document(WebKit.WebPage page) {
Util.DOM.linkify_document(page.get_dom_document());
}

View file

@ -8,91 +8,6 @@
public const string PROTOCOL_REGEX = "^(aim|apt|bitcoin|cvs|ed2k|ftp|file|finger|git|gtalk|http|https|irc|ircs|irc6|lastfm|ldap|ldaps|magnet|news|nntp|rsync|sftp|skype|smb|sms|svn|telnet|tftp|ssh|webcal|xmpp):";
namespace Util.DOM {
public WebKit.DOM.HTMLElement? select(WebKit.DOM.Node node, string selector) {
try {
if (node is WebKit.DOM.Document) {
return (node as WebKit.DOM.Document).query_selector(selector) as WebKit.DOM.HTMLElement;
} else {
return (node as WebKit.DOM.Element).query_selector(selector) as WebKit.DOM.HTMLElement;
}
} catch (Error error) {
debug("Error selecting element %s: %s", selector, error.message);
return null;
}
}
public WebKit.DOM.HTMLElement? clone_node(WebKit.DOM.Node node, bool deep = true) {
WebKit.DOM.HTMLElement? clone = null;
try {
clone = node.clone_node(deep) as WebKit.DOM.HTMLElement;
} catch (Error err) {
debug("Error selecting cloning node: %s", err.message);
}
return clone;
}
// Returns the text contained in the DOM document, after ignoring tags of type "exclude"
// and padding newlines where appropriate. Used to scan for attachment keywords.
public string get_text_representation(WebKit.DOM.Document doc, string exclude) {
WebKit.DOM.HTMLElement? copy = Util.DOM.clone_node(doc.get_body());
if (copy == null) {
return "";
}
// Keep deleting the next excluded element until there are none left
while (true) {
WebKit.DOM.HTMLElement? current = Util.DOM.select(copy, exclude);
if (current == null) {
break;
}
WebKit.DOM.Node parent = current.get_parent_node();
try {
parent.remove_child(current);
} catch (Error error) {
debug("Error removing blockquotes: %s", error.message);
break;
}
}
WebKit.DOM.NodeList node_list;
try {
node_list = copy.query_selector_all("br");
} catch (Error error) {
debug("Error finding <br>s: %s", error.message);
return copy.get_inner_text();
}
// Replace <br> tags with newlines
for (int i = 0; i < node_list.length; ++i) {
WebKit.DOM.Node br = node_list.item(i);
WebKit.DOM.Node parent = br.get_parent_node();
try {
parent.replace_child(doc.create_text_node("\n"), br);
} catch (Error error) {
debug("Error replacing <br>: %s", error.message);
}
}
try {
node_list = copy.query_selector_all("div");
} catch (Error error) {
debug("Error finding <div>s: %s", error.message);
return copy.get_inner_text();
}
// Pad each <div> with newlines
for (int i = 0; i < node_list.length; ++i) {
WebKit.DOM.Node div = node_list.item(i);
try {
div.insert_before(doc.create_text_node("\n"), div.first_child);
div.append_child(doc.create_text_node("\n"));
} catch (Error error) {
debug("Error padding <div> with newlines: %s", error.message);
}
}
return copy.get_inner_text();
}
// Linkifies plain text links in an HTML document.
public void linkify_document(WebKit.DOM.Document document) {

View file

@ -11,11 +11,13 @@ class ComposerPageStateTest : ClientWebViewTestCase<ComposerWebView> {
base("ComposerPageStateTest");
add_test("edit_context_font", edit_context_font);
add_test("edit_context_link", edit_context_link);
add_test("contains_attachment_keywords", contains_attachment_keywords);
add_test("get_html", get_html);
add_test("get_text", get_text);
add_test("get_text_with_quote", get_text_with_quote);
add_test("get_text_with_nested_quote", get_text_with_nested_quote);
add_test("resolve_nesting", resolve_nesting);
add_test("contains_keywords", contains_keywords);
add_test("quote_lines", quote_lines);
add_test("replace_non_breaking_space", replace_non_breaking_space);
}
@ -52,6 +54,43 @@ class ComposerPageStateTest : ClientWebViewTestCase<ComposerWebView> {
}
}
public void contains_attachment_keywords() {
load_body_fixture("""
<blockquote>inner quote</blockquote>
<p>some text</p>
some text
-- <br>sig
<p>outerquote text<p>
""");
try {
assert(WebKitUtil.to_bool(run_javascript(
@"geary.containsAttachmentKeyword(\"some\", \"subject text\");"
)));
assert(WebKitUtil.to_bool(run_javascript(
@"geary.containsAttachmentKeyword(\"subject\", \"subject text\");"
)));
assert(!WebKitUtil.to_bool(run_javascript(
@"geary.containsAttachmentKeyword(\"innerquote\", \"subject text\");"
)));
assert(!WebKitUtil.to_bool(run_javascript(
@"geary.containsAttachmentKeyword(\"sig\", \"subject text\");"
)));
assert(!WebKitUtil.to_bool(run_javascript(
@"geary.containsAttachmentKeyword(\"outerquote\", \"subject text\");"
)));
} catch (Geary.JS.Error err) {
print("Geary.JS.Error: %s\n", err.message);
assert_not_reached();
} catch (Error err) {
print("WKError: %s\n", err.message);
assert_not_reached();
}
}
public void get_html() {
string html = "<p>para</p>";
load_body_fixture(html);
@ -111,6 +150,59 @@ class ComposerPageStateTest : ClientWebViewTestCase<ComposerWebView> {
}
}
public void contains_keywords() {
load_body_fixture();
string complete_keys = """new Set(["keyword1", "keyword2"])""";
string suffix_keys = """new Set(["sf1", "sf2"])""";
try {
// Doesn't contain
assert(!WebKitUtil.to_bool(run_javascript(
@"ComposerPageState.containsKeywords('notcontained', $complete_keys, $suffix_keys);"
)));
assert(!WebKitUtil.to_bool(run_javascript(
@"ComposerPageState.containsKeywords('not contained', $complete_keys, $suffix_keys);"
)));
assert(!WebKitUtil.to_bool(run_javascript(
@"ComposerPageState.containsKeywords('not\tcontained', $complete_keys, $suffix_keys);"
)));
assert(!WebKitUtil.to_bool(run_javascript(
@"ComposerPageState.containsKeywords('http://www.keyword1.com', $complete_keys, $suffix_keys);"
)));
assert(!WebKitUtil.to_bool(run_javascript(
@"ComposerPageState.containsKeywords('http://www.something.com/something.sf1', $complete_keys, $suffix_keys);"
)));
assert(!WebKitUtil.to_bool(run_javascript(
@"ComposerPageState.containsKeywords('sf1', $complete_keys, $suffix_keys);"
)));
assert(!WebKitUtil.to_bool(run_javascript(
@"ComposerPageState.containsKeywords('.sf1', $complete_keys, $suffix_keys);"
)));
// Does contain
assert(WebKitUtil.to_bool(run_javascript(
@"ComposerPageState.containsKeywords('keyword1', $complete_keys, $suffix_keys);"
)));
assert(WebKitUtil.to_bool(run_javascript(
@"ComposerPageState.containsKeywords('keyword2 contained', $complete_keys, $suffix_keys);"
)));
assert(WebKitUtil.to_bool(run_javascript(
@"ComposerPageState.containsKeywords('keyword2\tcontained', $complete_keys, $suffix_keys);"
)));
assert(WebKitUtil.to_bool(run_javascript(
@"ComposerPageState.containsKeywords('something.sf1', $complete_keys, $suffix_keys);"
)));
assert(WebKitUtil.to_bool(run_javascript(
@"ComposerPageState.containsKeywords('something.something.sf2', $complete_keys, $suffix_keys);"
)));
} catch (Geary.JS.Error err) {
print("Geary.JS.Error: %s\n", err.message);
assert_not_reached();
} catch (Error err) {
print("WKError: %s\n", err.message);
assert_not_reached();
}
}
public void resolve_nesting() {
load_body_fixture();
unichar q_marker = Geary.RFC822.Utils.QUOTE_MARKER;

View file

@ -13,9 +13,13 @@ let ComposerPageState = function() {
this.init.apply(this, arguments);
};
ComposerPageState.BODY_ID = "message-body";
ComposerPageState.KEYWORD_SPLIT_REGEX = /[\s]+/g;
ComposerPageState.QUOTE_START = "‘";
ComposerPageState.QUOTE_END = "’";
ComposerPageState.QUOTE_MARKER = "\x7f";
// Taken from Geary.HTML.URL_REGEX, without the inline modifier (?x)
// at the start, which is unsupported in JS
ComposerPageState.URL_REGEX = new RegExp("\\b((?:[a-z][\\w-]+:(?:/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}/)(?:[^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+(?:\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:'\".,<>?«»“”‘’]))", "i");
ComposerPageState.prototype = {
__proto__: PageState.prototype,
@ -161,6 +165,76 @@ ComposerPageState.prototype = {
// XXX need mark the quote somehow so we can find it, select
// it and delete it using execCommand
},
/**
* Determines if subject or body content refers to attachments.
*/
containsAttachmentKeyword: function(keywordSpec, subject) {
// XXX this could also use a structured representation of the
// message body so we don't need to text to check
let ATTACHMENT_KEYWORDS_SUFFIX = "doc|pdf|xls|ppt|rtf|pps";
let completeKeys = new Set(keywordSpec.toLocaleLowerCase().split("|"));
let suffixKeys = new Set(ATTACHMENT_KEYWORDS_SUFFIX.split("|"));
// Check the subject line
if (ComposerPageState.containsKeywords(subject, completeKeys, suffixKeys)) {
return true;
}
// Check interesting body text
let breakingElements = new Set([
"BR", "P", "DIV", "BLOCKQUOTE", "TABLE", "OL", "UL", "HR"
]);
let content = this.messageBody.firstChild;
let found = false;
let done = false;
let textContent = [];
while (content != null && !done) {
if (content.nodeType == Node.TEXT_NODE) {
textContent.push(content.textContent);
} else if (content.nodeType == Node.ELEMENT_NODE) {
let isBreaking = breakingElements.has(content.nodeName);
if (isBreaking) {
textContent.push("\n");
}
// Always exclude quoted text
if (content.nodeName != "BLOCKQUOTE") {
textContent.push(content.innerText);
}
if (isBreaking || content.nextSibling == null) {
for (let line of textContent.join("").split("\n")) {
// Ignore everything after a sig or a
// forwarded message.
// XXX This breaks if the user types this at
// the start of a line, also, WK's innerText
// impl strips trailing whitespace, so can't
// test for 'line == "-- "' :(
if (line.startsWith("--")) {
done = true;
break;
}
line = line.trim();
if (line != "") {
if (ComposerPageState.containsKeywords(line, completeKeys, suffixKeys)) {
found = true;
done = true;
break;
}
}
}
textContent = [];
}
}
content = content.nextSibling;
}
return found;
},
tabOut: function() {
document.execCommand(
"inserthtml", false, "<span style='white-space: pre-wrap'>\t</span>"
@ -233,6 +307,39 @@ ComposerPageState.prototype = {
}
};
/**
* Determines if any keywords are present in a string.
*/
ComposerPageState.containsKeywords = function(line, completeKeys, suffixKeys) {
let tokens = new Set(
line.toLocaleLowerCase().split(ComposerPageState.KEYWORD_SPLIT_REGEX)
);
for (let key of completeKeys) {
if (tokens.has(key)) {
return true;
}
}
let urlRegex = ComposerPageState.URL_REGEX;
// XXX assuming all suffixes have length = 3 here.
let extLen = 3;
for (let token of tokens) {
let extDelim = token.length - (extLen + 1);
// We do care about "a.pdf", but not ".pdf"
if (token.length >= extLen + 2 && token.charAt(extDelim) == ".") {
let suffix = token.substring(extDelim + 1);
if (suffixKeys.has(suffix)) {
if (token.match(urlRegex) == null) {
return true;
}
}
}
}
return false;
};
/**
* Convert a HTML DOM tree to plain text with delineated quotes.
*
@ -405,7 +512,7 @@ let SelectionUtil = {
getCursorElement: function() {
let selection = window.getSelection();
let node = selection.focusNode;
if (node != null && node.nodeType != Node.ELEMENT_TYPE) {
if (node != null && node.nodeType != Node.ELEMENT_NODE) {
node = node.parentNode;
}
return node;