Merge branch 'mjog/589-attachment-keyword-check' into 'mainline'

Attachment keyword fixes

Closes #589

See merge request GNOME/geary!334
This commit is contained in:
Michael Gratton 2019-10-08 22:32:52 +00:00
commit 647ef90cfc
3 changed files with 174 additions and 105 deletions

View file

@ -161,11 +161,17 @@ public class ComposerWidget : Gtk.EventBox, Geary.BaseInterface {
private const string URI_LIST_MIME_TYPE = "text/uri-list";
private const string FILE_URI_PREFIX = "file://";
// Keep these in sync with the next const below.
private const string ATTACHMENT_KEYWORDS =
"attach|attaching|attaches|attachment|attachments|attached|enclose|enclosed|enclosing|encloses|enclosure|enclosures";
// Translators: This is list of keywords, separated by pipe ("|")
// characters, that suggest an attachment; since this is full-word
// checking, include all variants of each word. No spaces are
// allowed.
private const string ATTACHMENT_KEYWORDS_LOCALIZED = _("attach|attaching|attaches|attachment|attachments|attached|enclose|enclosed|enclosing|encloses|enclosure|enclosures");
// checking, include all variants of each word. No spaces are
// allowed. The words will be converted to lower case based on
// locale and English versions included automatically.
private const string ATTACHMENT_KEYWORDS_LOCALISED =
_("attach|attaching|attaches|attachment|attachments|attached|enclose|enclosed|enclosing|encloses|enclosure|enclosures");
public Geary.Account account { get; private set; }
private Gee.Map<string, Geary.AccountInformation> accounts;
@ -1334,7 +1340,12 @@ public class ComposerWidget : Gtk.EventBox, Geary.BaseInterface {
confirmation = _("Send message with an empty body?");
} else if (!has_attachment &&
yield this.editor.contains_attachment_keywords(
ATTACHMENT_KEYWORDS_LOCALIZED, this.subject)) {
string.join(
"|",
ATTACHMENT_KEYWORDS,
ATTACHMENT_KEYWORDS_LOCALISED
),
this.subject)) {
confirmation = _("Send message without an attachment?");
}
if (confirmation != null) {

View file

@ -13,17 +13,19 @@ class ComposerPageStateTest : ClientWebViewTestCase<ComposerWebView> {
public ComposerPageStateTest() {
base("ComposerPageStateTest");
add_test("html_to_text", html_to_text);
add_test("html_to_text_with_quote", html_to_text_with_quote);
add_test("html_to_text_with_nested_quote", html_to_text_with_nested_quote);
add_test("html_to_text_with_blacklist", html_to_text_with_blacklist);
add_test("edit_context_font", edit_context_font);
add_test("edit_context_link", edit_context_link);
add_test("indent_line", indent_line);
add_test("contains_attachment_keywords", contains_attachment_keywords);
add_test("clean_content", clean_content);
add_test("get_html", get_html);
add_test("get_text", get_text);
add_test("get_text_with_quote", get_text_with_quote);
add_test("get_text_with_nested_quote", get_text_with_nested_quote);
add_test("contains_keywords", contains_keywords);
// Depends contains_keywords and html_to_text_with_blacklist
add_test("contains_attachment_keywords", contains_attachment_keywords);
add_test("replace_non_breaking_space", replace_non_breaking_space);
try {
@ -33,6 +35,84 @@ class ComposerPageStateTest : ClientWebViewTestCase<ComposerWebView> {
}
}
public void html_to_text() throws Error {
load_body_fixture("<p>para</p>");
try {
assert(
Util.JS.to_string(
run_javascript(
@"ComposerPageState.htmlToText(window.document.body);"
).get_js_value()
) == "para\n\n\n\n"
);
} catch (Util.JS.Error err) {
print("Util.JS.Error: %s\n", err.message);
assert_not_reached();
} catch (Error err) {
print("WKError: %s\n", err.message);
assert_not_reached();
}
}
public void html_to_text_with_quote() throws Error {
unichar q_marker = Geary.RFC822.Utils.QUOTE_MARKER;
load_body_fixture("<p>pre</p> <blockquote><p>quote</p></blockquote> <p>post</p>");
try {
assert(
Util.JS.to_string(
run_javascript(
"ComposerPageState.htmlToText(window.document.body);"
).get_js_value()
) == @"pre\n\n$(q_marker)quote\n$(q_marker)\npost\n\n\n\n"
);
} catch (Util.JS.Error err) {
print("Util.JS.Error: %s", err.message);
assert_not_reached();
} catch (Error err) {
print("WKError: %s", err.message);
assert_not_reached();
}
}
public void html_to_text_with_nested_quote() throws Error {
unichar q_marker = Geary.RFC822.Utils.QUOTE_MARKER;
load_body_fixture("<p>pre</p> <blockquote><p>quote1</p> <blockquote><p>quote2</p></blockquote></blockquote> <p>post</p>");
try {
assert(
Util.JS.to_string(
run_javascript(
"ComposerPageState.htmlToText(window.document.body)"
).get_js_value()
) == @"pre\n\n$(q_marker)quote1\n$(q_marker)\n$(q_marker)$(q_marker)quote2\n$(q_marker)$(q_marker)\npost\n\n\n\n"
);
} catch (Util.JS.Error err) {
print("Util.JS.Error: %s\n", err.message);
assert_not_reached();
} catch (Error err) {
print("WKError: %s\n", err.message);
assert_not_reached();
}
}
public void html_to_text_with_blacklist() throws Error {
load_body_fixture("<p>pre</p> <blockquote><p>quote1</p> <blockquote><p>quote2</p></blockquote></blockquote> <p>post</p>");
try {
assert(
Util.JS.to_string(
run_javascript(
"ComposerPageState.htmlToText(window.document.body, [\"blockquote\"])"
).get_js_value()
) == @"pre\n\npost\n\n\n\n"
);
} catch (Util.JS.Error err) {
print("Util.JS.Error: %s\n", err.message);
assert_not_reached();
} catch (Error err) {
print("WKError: %s\n", err.message);
assert_not_reached();
}
}
public void edit_context_link() throws Error {
string html = "<a id=\"test\" href=\"url\">para</a>";
load_body_fixture(html);
@ -222,44 +302,6 @@ unknown://example6.com
}
}
public void get_text_with_quote() throws Error {
unichar q_marker = Geary.RFC822.Utils.QUOTE_MARKER;
load_body_fixture("<p>pre</p> <blockquote><p>quote</p></blockquote> <p>post</p>");
try {
assert(
Util.JS.to_string(
run_javascript(@"window.geary.getText();")
.get_js_value()
) == @"pre\n\n$(q_marker)quote\n$(q_marker)\npost\n\n\n\n"
);
} catch (Util.JS.Error err) {
print("Util.JS.Error: %s", err.message);
assert_not_reached();
} catch (Error err) {
print("WKError: %s", err.message);
assert_not_reached();
}
}
public void get_text_with_nested_quote() throws Error {
unichar q_marker = Geary.RFC822.Utils.QUOTE_MARKER;
load_body_fixture("<p>pre</p> <blockquote><p>quote1</p> <blockquote><p>quote2</p></blockquote></blockquote> <p>post</p>");
try {
assert(
Util.JS.to_string(
run_javascript(@"window.geary.getText();")
.get_js_value()
) == @"pre\n\n$(q_marker)quote1\n$(q_marker)\n$(q_marker)$(q_marker)quote2\n$(q_marker)$(q_marker)\npost\n\n\n\n"
);
} catch (Util.JS.Error err) {
print("Util.JS.Error: %s\n", err.message);
assert_not_reached();
} catch (Error err) {
print("WKError: %s\n", err.message);
assert_not_reached();
}
}
public void contains_keywords() throws Error {
load_body_fixture();
string complete_keys = """new Set(["keyword1", "keyword2"])""";
@ -317,6 +359,11 @@ unknown://example6.com
).get_js_value()
));
assert(Util.JS.to_bool(run_javascript(
@"ComposerPageState.containsKeywords('keyword1.', $complete_keys, $suffix_keys);"
).get_js_value()
));
assert(Util.JS.to_bool(run_javascript(
@"ComposerPageState.containsKeywords('something.sf1', $complete_keys, $suffix_keys);"
).get_js_value()
@ -326,6 +373,12 @@ unknown://example6.com
@"ComposerPageState.containsKeywords('something.something.sf2', $complete_keys, $suffix_keys);"
).get_js_value()
));
assert(!Util.JS.to_bool(run_javascript(
@"ComposerPageState.containsKeywords('http://something/esle.sf2', $complete_keys, $suffix_keys);"
).get_js_value()
));
} catch (Util.JS.Error err) {
print("Util.JS.Error: %s\n", err.message);
assert_not_reached();

View file

@ -12,7 +12,8 @@
let ComposerPageState = function() {
this.init.apply(this, arguments);
};
ComposerPageState.KEYWORD_SPLIT_REGEX = /[\s]+/g;
ComposerPageState.SPACE_CHAR_REGEX = /[\s]/i;
ComposerPageState.WORD_CHAR_REGEX = /[\s\\'!"#$%&()*+,\-.\/:;<=>?@\[\]^_`{|}~\u2000-\u206F\u2E00-\u2E7F]/i;
ComposerPageState.QUOTE_MARKER = "\x7f"; // delete
ComposerPageState.PROTOCOL_REGEX = /^(aim|apt|bitcoin|cvs|ed2k|ftp|file|finger|git|gtalk|http|https|irc|ircs|irc6|lastfm|ldap|ldaps|magnet|news|nntp|rsync|sftp|skype|smb|sms|svn|telnet|tftp|ssh|webcal|xmpp):/i;
// Taken from Geary.HTML.URL_REGEX, without the inline modifier (?x)
@ -238,30 +239,12 @@ ComposerPageState.prototype = {
return true;
}
// Check interesting body text
let node = this.bodyPart.firstChild;
let content = [];
let breakingElements = new Set([
"BR", "P", "DIV", "BLOCKQUOTE", "TABLE", "OL", "UL", "HR"
]);
while (node != null) {
if (node.nodeType == Node.TEXT_NODE) {
content.push(node.textContent);
} else if (content.nodeType == Node.ELEMENT_NODE) {
let isBreaking = breakingElements.has(node.nodeName);
if (isBreaking) {
content.push("\n");
}
// Only include non-quoted text
if (content.nodeName != "BLOCKQUOTE") {
content.push(content.textContent);
}
}
node = node.nextSibling;
}
// Check the body text
let content = ComposerPageState.htmlToText(
this.bodyPart, ["blockquote"]
);
return ComposerPageState.containsKeywords(
content.join(""), completeKeys, suffixKeys
content, completeKeys, suffixKeys
);
},
tabOut: function() {
@ -403,33 +386,50 @@ ComposerPageState.prototype = {
/**
* Determines if any keywords are present in a string.
*/
ComposerPageState.containsKeywords = function(line, completeKeys, suffixKeys) {
let tokens = new Set(
line.toLocaleLowerCase().split(ComposerPageState.KEYWORD_SPLIT_REGEX)
);
for (let key of completeKeys) {
if (tokens.has(key)) {
return true;
}
}
ComposerPageState.containsKeywords = function(line, wordKeys, suffixKeys) {
let urlRegex = ComposerPageState.URL_REGEX;
// XXX assuming all suffixes have length = 3 here.
let extLen = 3;
for (let token of tokens) {
let extDelim = token.length - (extLen + 1);
// We do care about "a.pdf", but not ".pdf"
if (token.length >= extLen + 2 && token.charAt(extDelim) == ".") {
let suffix = token.substring(extDelim + 1);
if (suffixKeys.has(suffix)) {
if (token.match(urlRegex) == null) {
return true;
let lastToken = -1;
let lastSpace = -1;
for (var i = 0; i <= line.length; i++) {
let char = (i < line.length) ? line[i] : " ";
if (char.match(ComposerPageState.WORD_CHAR_REGEX)) {
if (lastToken + 1 < i) {
let wordToken = line.substring(lastToken + 1, i).toLocaleLowerCase();
let isWordMatch = wordKeys.has(wordToken);
let isSuffixMatch = suffixKeys.has(wordToken);
if (isWordMatch || isSuffixMatch) {
let spaceToken = line.substring(lastSpace + 1, i);
let isUrl = (spaceToken.match(ComposerPageState.URL_REGEX) != null);
// Matches a token if it is a word that isn't in a
// URL. I.e. this gets "some attachment." but not
// "http://attachment.com"
if (isWordMatch && !isUrl) {
return true;
}
// Matches a token if it is a suffix that isn't a
// URL and such that the space-delimited token
// ends with ".SUFFIX". I.e. this matches "see
// attachment.pdf." but not
// "http://example.com/attachment.pdf" or "see the
// pdf."
if (isSuffixMatch &&
!isUrl &&
spaceToken.length != (1 + wordToken.length) &&
spaceToken.endsWith("." + wordToken)) {
return true;
}
}
}
lastToken = i;
if (char.match(ComposerPageState.SPACE_CHAR_REGEX)) {
lastSpace = i;
}
}
}
return false;
};
@ -460,11 +460,16 @@ ComposerPageState.cleanPart = function(part, removeIfEmpty) {
* `ComposerPageState.QUOTE_MARKER`, where the number of markers indicates
* the depth of nesting of the quote.
*/
ComposerPageState.htmlToText = function(root) {
ComposerPageState.htmlToText = function(root, blacklist = []) {
let parentStyle = window.getComputedStyle(root);
let text = "";
for (let node of (root.childNodes || [])) {
let nodeName = node.nodeName.toLowerCase();
if (blacklist.includes(nodeName)) {
continue;
}
let isBlock = (
node instanceof Element
&& window.getComputedStyle(node).display == "block"
@ -476,7 +481,7 @@ ComposerPageState.htmlToText = function(root) {
text += "\n";
}
}
switch (node.nodeName.toLowerCase()) {
switch (nodeName) {
case "#text":
let nodeText = node.nodeValue;
switch (parentStyle.whiteSpace) {
@ -500,24 +505,24 @@ ComposerPageState.htmlToText = function(root) {
break;
case "a":
if (node.closest("body.plain")) {
text += ComposerPageState.htmlToText(node);
text += ComposerPageState.htmlToText(node, blacklist);
} else if (node.textContent == node.href) {
text += "<" + node.href + ">";
} else {
text += ComposerPageState.htmlToText(node);
text += ComposerPageState.htmlToText(node, blacklist);
text += " <" + node.href + ">";
}
break;
case "b":
case "strong":
if (node.closest("body.plain")) {
text += ComposerPageState.htmlToText(node);
text += ComposerPageState.htmlToText(node, blacklist);
} else {
text += "*" + ComposerPageState.htmlToText(node) + "*";
text += "*" + ComposerPageState.htmlToText(node, blacklist) + "*";
}
break;
case "blockquote":
let bqText = ComposerPageState.htmlToText(node);
let bqText = ComposerPageState.htmlToText(node, blacklist);
// If there is a newline at the end of the quote, remove it
// After this switch we ensure that there is a newline after the quote
bqText = bqText.replace(/\n$/, "");
@ -532,23 +537,23 @@ ComposerPageState.htmlToText = function(root) {
case "i":
case "em":
if (node.closest("body.plain")) {
text += ComposerPageState.htmlToText(node);
text += ComposerPageState.htmlToText(node, blacklist);
} else {
text += "/" + ComposerPageState.htmlToText(node) + "/";
text += "/" + ComposerPageState.htmlToText(node, blacklist) + "/";
}
break;
case "u":
if (node.closest("body.plain")) {
text += ComposerPageState.htmlToText(node);
text += ComposerPageState.htmlToText(node, blacklist);
} else {
text += "_" + ComposerPageState.htmlToText(node) + "_";
text += "_" + ComposerPageState.htmlToText(node, blacklist) + "_";
}
break;
case "#comment":
case "style":
case "style":
break;
default:
text += ComposerPageState.htmlToText(node);
text += ComposerPageState.htmlToText(node, blacklist);
break;
}
if (isBlock) {