From 0112c8192cae57588747c51441e91f3e6d069697 Mon Sep 17 00:00:00 2001 From: Michael Gratton Date: Mon, 16 Dec 2019 23:17:55 +1100 Subject: [PATCH] Geary.SearchQuery: Allow client apps to build search queries Adds classes that allow building arbitrary query expressions and require an instance to be provided to Geary.SearchQuery, to be set as a property. This enables query expressions to be parsed by clients instead of the engine, in which ever whay they choose. --- .../application/application-main-window.vala | 7 +- .../conversation-viewer.vala | 7 +- src/client/util/util-email.vala | 578 +++++++++++++++++- src/engine/api/geary-account.vala | 22 +- src/engine/api/geary-search-query.vala | 284 ++++++++- src/engine/imap-db/imap-db-search-query.vala | 6 +- .../imap-engine-generic-account.vala | 11 +- test/client/util/util-email-test.vala | 354 ++++++++++- test/mock/mock-account.vala | 12 +- test/mock/mock-search-query.vala | 5 +- 10 files changed, 1229 insertions(+), 57 deletions(-) diff --git a/src/client/application/application-main-window.vala b/src/client/application/application-main-window.vala index d5255a41..c4f93431 100644 --- a/src/client/application/application-main-window.vala +++ b/src/client/application/application-main-window.vala @@ -981,11 +981,14 @@ public class Application.MainWindow : this.previous_non_search_folder = this.selected_folder; } - var strategy = this.application.config.get_search_strategy(); try { + var expr_factory = new Util.Email.SearchExpressionFactory( + this.application.config.get_search_strategy(), + context.account.information + ); var query = yield context.account.new_search_query( + expr_factory.parse_query(query_text), query_text, - strategy, cancellable ); this.folder_list.set_search( diff --git a/src/client/conversation-viewer/conversation-viewer.vala b/src/client/conversation-viewer/conversation-viewer.vala index 74706f8c..09d25020 100644 --- a/src/client/conversation-viewer/conversation-viewer.vala +++ b/src/client/conversation-viewer/conversation-viewer.vala @@ -429,10 +429,13 @@ public class ConversationViewer : Gtk.Stack, Geary.BaseInterface { // opening every message in the conversation as soon as // the user presses a key if (text.length >= 2) { - var strategy = this.config.get_search_strategy(); + var expr_factory = new Util.Email.SearchExpressionFactory( + this.config.get_search_strategy(), + account.information + ); query = yield account.new_search_query( + expr_factory.parse_query(text), text, - strategy, cancellable ); } diff --git a/src/client/util/util-email.vala b/src/client/util/util-email.vala index 4a1754a7..554dbe72 100644 --- a/src/client/util/util-email.vala +++ b/src/client/util/util-email.vala @@ -1,7 +1,9 @@ -/* Copyright 2016 Software Freedom Conservancy Inc. +/* + * Copyright 2016 Software Freedom Conservancy Inc. + * Copyright 2019 Michael Gratton * * This software is licensed under the GNU Lesser General Public License - * (version 2.1 or later). See the COPYING file in this distribution. + * (version 2.1 or later). See the COPYING file in this distribution. */ namespace Util.Email { @@ -312,3 +314,575 @@ namespace Util.Email { } } + + +/** + * Parses a human-entered email query string as a query expression. + * + * @see Geary.SearchQuery.Term + */ +public class Util.Email.SearchExpressionFactory : Geary.BaseObject { + + + private const unichar OPERATOR_SEPARATOR = ':'; + private const string OPERATOR_TEMPLATE = "%s:%s"; + + + private delegate Geary.SearchQuery.Term? OperatorFactory( + string value, + bool is_quoted + ); + + + private class FactoryContext { + + + public unowned OperatorFactory factory; + + + public FactoryContext(OperatorFactory factory) { + this.factory = factory; + } + + } + + + private class Tokeniser { + + + // These characters are chosen for being commonly used to + // continue a single word (such as extended last names, + // i.e. "Lars-Eric") or in terms commonly searched for in an + // email client, i.e. unadorned mailbox addresses. Note that + // characters commonly used for wildcards or that would be + // interpreted as wildcards by SQLite are not included here. + private const unichar[] CONTINUATION_CHARS = { + '-', '_', '.', '@' + }; + + public bool has_next { + get { return (this.current_pos < this.query.length); } + } + + public bool is_at_word { + get { return (this.attrs[this.current_c].is_word_start == 1); } + } + + public bool is_at_quote { + get { return (this.c == '"'); } + } + + public unichar current_character { get { return this.c; } } + + + private string query; + private int current_pos = -1; + private int next_pos = 0; + + private unichar c = 0; + private int current_c = -1; + private Pango.LogAttr[] attrs; + + + public Tokeniser(string query, Pango.Language language) { + this.query = query; + + // Break up search string into individual words and/or + // operators. Can't simply break on space or non-alphanumeric + // chars since some languages don't use spaces, so use Pango + // for its support for the Unicode UAX #29 word boundary spec. + this.attrs = new Pango.LogAttr[query.char_count() + 1]; + Pango.get_log_attrs( + query, query.length, -1, language, this.attrs + ); + + consume_char(); + } + + public void consume_char() { + var current_pos = this.next_pos; + if (this.query.get_next_char(ref this.next_pos, out this.c)) { + this.current_c++; + } + this.current_pos = current_pos; + } + + public void skip_to_next() { + while (this.has_next && !this.is_at_quote && !this.is_at_word) { + consume_char(); + } + } + + public string consume_word() { + var start = this.current_pos; + // the attr.is_word_end value applies to the first char + // after then end of a word, so need to move one past the + // end of the current word to determine where it ends + consume_char(); + while (this.has_next && + (this.c in CONTINUATION_CHARS || + this.attrs[this.current_c].is_word_end != 1)) { + consume_char(); + } + return this.query.slice(start, this.current_pos); + } + + public string consume_quote() { + consume_char(); // skip the leading quote + var start = this.current_pos; + var last_c = this.c; + while (this.has_next && (this.c != '"' || last_c == '\\')) { + consume_char(); + } + var quote = this.query.slice(start, this.current_pos); + consume_char(); // skip the trailing quote + return quote; + } + + } + + + public Geary.SearchQuery.Strategy default_strategy { get; private set; } + + public Geary.AccountInformation account { get; private set; } + + public Pango.Language language { + get; set; default = Pango.Language.get_default(); + } + + // Maps of localised search operator names and values to their + // internal forms + private Gee.Map text_operators = + new Gee.HashMap(); + private Gee.Map boolean_operators = + new Gee.HashMap(); + private Gee.Set search_op_to_me = new Gee.HashSet(); + private Gee.Set search_op_from_me = new Gee.HashSet(); + + + public SearchExpressionFactory(Geary.SearchQuery.Strategy default_strategy, + Geary.AccountInformation account) { + this.default_strategy = default_strategy; + this.account = account; + construct_factories(); + } + + /** Constructs a search expression from the given query string. */ + public Gee.List parse_query(string query) { + var operands = new Gee.LinkedList(); + var tokens = new Tokeniser(query, this.language); + while (tokens.has_next) { + if (tokens.is_at_word) { + Geary.SearchQuery.Term? op = null; + var word = tokens.consume_word(); + if (tokens.current_character == OPERATOR_SEPARATOR && + tokens.has_next) { + op = new_extended_operator(word, tokens); + } + if (op == null) { + op = new_text_all_operator(word, false); + } + operands.add(op); + } else if (tokens.is_at_quote) { + operands.add( + new_text_all_operator(tokens.consume_quote(), true) + ); + } else { + tokens.skip_to_next(); + } + } + + return operands; + } + + private Geary.SearchQuery.Term? new_extended_operator(string name, + Tokeniser tokens) { + Geary.SearchQuery.Term? op = null; + + // consume the ':' + tokens.consume_char(); + + bool is_quoted = false; + string? value = null; + if (tokens.is_at_word) { + value = tokens.consume_word(); + } else if (tokens.is_at_quote) { + value = tokens.consume_quote(); + is_quoted = true; + } + + FactoryContext? context = null; + if (value != null) { + context = this.text_operators[name]; + if (context == null) { + context = this.boolean_operators[ + OPERATOR_TEMPLATE.printf(name, value) + ]; + } + } + + if (context != null) { + op = context.factory(value, is_quoted); + } + + if (op == null) { + // Still no operator, so the name or value must have been + // invalid. Repair by treating each as separate ops, if + // present. + var term = ( + value == null + ? "%s:".printf(name) + : "%s:%s".printf(name, value) + ); + op = new_text_all_operator(term, false); + } + + return op; + } + + private inline Geary.SearchQuery.Strategy get_matching_strategy(bool is_quoted) { + return ( + is_quoted + ? Geary.SearchQuery.Strategy.EXACT + : this.default_strategy + ); + } + + private Gee.List get_account_addresses() { + Gee.List? mailboxes = + this.account.sender_mailboxes; + var addresses = new Gee.LinkedList(); + if (mailboxes != null) { + foreach (var mailbox in mailboxes) { + addresses.add(mailbox.address); + } + } + return addresses; + } + + private void construct_factories() { + // Maps of possibly translated search operator names and values + // to English/internal names and values. We include the + // English version anyway so that when translations provide a + // localised version of the operator names but have not also + // translated the user manual, the English version in the + // manual still works. + + // Text operators + /////////////////////////////////////////////////////////// + + FactoryContext attachment_name = new FactoryContext( + this.new_text_attachment_name_operator + ); + this.text_operators.set("attachment", attachment_name); + /// Translators: Can be typed in the search box like + /// "attachment:file.txt" to find messages with attachments + /// with a particular name. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.text_operators.set(C_("Search operator", "attachment"), + attachment_name); + + FactoryContext bcc = new FactoryContext(this.new_text_bcc_operator); + this.text_operators.set("bcc", bcc); + /// Translators: Can be typed in the search box like + /// "bcc:johndoe@example.com" to find messages bcc'd to a + /// particular person. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.text_operators.set(C_("Search operator", "bcc"), bcc); + + FactoryContext body = new FactoryContext(this.new_text_body_operator); + this.text_operators.set("body", body); + /// Translators: Can be typed in the search box like + /// "body:word" to find "word" only if it occurs in the body + /// of a message. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.text_operators.set(C_("Search operator", "body"), body); + + FactoryContext cc = new FactoryContext(this.new_text_cc_operator); + this.text_operators.set("cc", cc); + /// Translators: Can be typed in the search box like + /// "cc:johndoe@example.com" to find messages cc'd to a + /// particular person. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.text_operators.set(C_("Search operator", "cc"), cc); + + FactoryContext from = new FactoryContext(this.new_text_from_operator); + this.text_operators.set("from", from); + /// Translators: Can be typed in the search box like + /// "from:johndoe@example.com" to find messages from a + /// particular sender. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.text_operators.set(C_("Search operator", "from"), from); + + FactoryContext subject = new FactoryContext( + this.new_text_subject_operator + ); + this.text_operators.set("subject", subject); + /// Translators: Can be typed in the search box like + /// "subject:word" to find "word" only if it occurs in the + /// subject of a message. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.text_operators.set(C_("Search operator", "subject"), subject); + + FactoryContext to = new FactoryContext(this.new_text_to_operator); + this.text_operators.set("to", to); + /// Translators: Can be typed in the search box like + /// "to:johndoe@example.com" to find messages received by a + /// particular person. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.text_operators.set(C_("Search operator", "to"), to); + + /// Translators: Can be typed in the search box after "to:", + /// "cc:" and "bcc:" e.g.: "to:me". Matches conversations that + /// are addressed to the user. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.search_op_to_me.add( + C_("Search operator value - mail addressed to the user", "me") + ); + this.search_op_to_me.add("me"); + + /// Translators: Can be typed in the search box after "from:" + /// i.e.: "from:me". Matches conversations were sent by the + /// user. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.search_op_from_me.add( + C_("Search operator value - mail sent by the user", "me") + ); + this.search_op_from_me.add("me"); + + // Boolean operators + /////////////////////////////////////////////////////////// + + /// Translators: Can be typed in the search box like + /// "is:unread" to find messages that are read, unread, or + /// starred. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + string bool_is_name = C_("Search operator", "is"); + + /// Translators: Can be typed in the search box after "is:" + /// i.e.: "is:unread". Matches conversations that are flagged + /// unread. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + string bool_is_unread_value = C_("'is:' search operator value", "unread"); + + /// Translators: Can be typed in the search box after "is:" + /// i.e.: "is:read". Matches conversations that are flagged as + /// read. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + string bool_is_read_value = C_("'is:' search operator value", "read"); + + /// Translators: Can be typed in the search box after "is:" + /// i.e.: "is:starred". Matches conversations that are flagged + /// as starred. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + string bool_is_starred_value = C_("'is:' search operator value", "starred"); + + FactoryContext is_unread = new FactoryContext( + this.new_boolean_unread_operator + ); + this.boolean_operators.set("is:unread", is_unread); + this.boolean_operators.set( + OPERATOR_TEMPLATE.printf( + bool_is_name, bool_is_unread_value + ), is_unread + ); + + FactoryContext is_read = new FactoryContext( + this.new_boolean_read_operator + ); + this.boolean_operators.set("is:read", is_read); + this.boolean_operators.set( + OPERATOR_TEMPLATE.printf( + bool_is_name, bool_is_read_value + ), is_read + ); + + FactoryContext is_starred = new FactoryContext( + this.new_boolean_starred_operator + ); + this.boolean_operators.set("is:starred", is_starred); + this.boolean_operators.set( + OPERATOR_TEMPLATE.printf( + bool_is_name, bool_is_starred_value + ), is_starred + ); + } + + private Geary.SearchQuery.Term? new_text_all_operator( + string value, bool is_quoted + ) { + return new Geary.SearchQuery.EmailTextTerm( + ALL, get_matching_strategy(is_quoted), value + ); + } + + private Geary.SearchQuery.Term? new_text_attachment_name_operator( + string value, bool is_quoted + ) { + return new Geary.SearchQuery.EmailTextTerm( + ATTACHMENT_NAME, get_matching_strategy(is_quoted), value + ); + } + + private Geary.SearchQuery.Term? new_text_bcc_operator( + string value, bool is_quoted + ) { + Geary.SearchQuery.Term? op = null; + if (!is_quoted && value in this.search_op_to_me) { + op = new Geary.SearchQuery.EmailTextTerm.disjunction( + BCC, EXACT, get_account_addresses() + ); + } else { + op = new Geary.SearchQuery.EmailTextTerm( + BCC, EXACT, value + ); + } + return op; + } + + private Geary.SearchQuery.Term? new_text_body_operator( + string value, bool is_quoted + ) { + return new Geary.SearchQuery.EmailTextTerm( + BODY, get_matching_strategy(is_quoted), value + ); + } + + private Geary.SearchQuery.Term? new_text_cc_operator( + string value, bool is_quoted + ) { + Geary.SearchQuery.Term? op = null; + if (!is_quoted && value in this.search_op_to_me) { + op = new Geary.SearchQuery.EmailTextTerm.disjunction( + CC, EXACT, get_account_addresses() + ); + } else { + op = new Geary.SearchQuery.EmailTextTerm( + CC, get_matching_strategy(is_quoted), value + ); + } + return op; + } + + private Geary.SearchQuery.Term? new_text_from_operator( + string value, bool is_quoted + ) { + Geary.SearchQuery.Term? op = null; + if (!is_quoted && value in this.search_op_from_me) { + op = new Geary.SearchQuery.EmailTextTerm.disjunction( + FROM, EXACT, get_account_addresses() + ); + } else { + op = new Geary.SearchQuery.EmailTextTerm(FROM, EXACT, value); + } + return op; + } + + private Geary.SearchQuery.Term? new_text_subject_operator( + string value, bool is_quoted + ) { + return new Geary.SearchQuery.EmailTextTerm( + SUBJECT, get_matching_strategy(is_quoted), value + ); + } + + private Geary.SearchQuery.Term? new_text_to_operator( + string value, bool is_quoted + ) { + Geary.SearchQuery.Term? op = null; + if (!is_quoted && value in this.search_op_to_me) { + op = new Geary.SearchQuery.EmailTextTerm.disjunction( + TO, EXACT, get_account_addresses() + ); + } else { + op = new Geary.SearchQuery.EmailTextTerm( + TO, EXACT, value + ); + } + return op; + } + + private Geary.SearchQuery.Term? new_boolean_unread_operator( + string value, bool is_quoted + ) { + Geary.SearchQuery.Term? op = null; + if (!is_quoted) { + op = new Geary.SearchQuery.EmailFlagTerm(Geary.EmailFlags.UNREAD); + } + return op; + } + + private Geary.SearchQuery.Term? new_boolean_read_operator( + string value, bool is_quoted + ) { + Geary.SearchQuery.Term? op = null; + if (!is_quoted) { + op = new Geary.SearchQuery.EmailFlagTerm(Geary.EmailFlags.UNREAD); + op.is_negated = true; + } + return op; + } + + private Geary.SearchQuery.Term? new_boolean_starred_operator( + string value, bool is_quoted + ) { + Geary.SearchQuery.Term? op = null; + if (!is_quoted) { + op = new Geary.SearchQuery.EmailFlagTerm(Geary.EmailFlags.FLAGGED); + } + return op; + } + +} diff --git a/src/engine/api/geary-account.vala b/src/engine/api/geary-account.vala index d81661ca..6bba6b90 100644 --- a/src/engine/api/geary-account.vala +++ b/src/engine/api/geary-account.vala @@ -512,23 +512,13 @@ public abstract class Geary.Account : BaseObject, Logging.Source { ) throws GLib.Error; /** - * Create a new {@link SearchQuery} for this {@link Account}. - * - * See {@link Geary.SearchQuery.Strategy} for more information about how its interpreted by the - * Engine. In particular, note that it's an advisory parameter only and may have no effect, - * especially on server searches. However, it may also have a dramatic effect on what search - * results are returned and so should be used with some caution. Whether this parameter is - * user-configurable, available through GSettings or another configuration mechanism, or simply - * baked into the caller's code is up to the caller. CONSERVATIVE is designed to be a good - * default. - * - * The resulting object can only be used with calls into this - * account instance. + * Create a new search query for this account. */ - public abstract async SearchQuery new_search_query(string query, - SearchQuery.Strategy strategy, - GLib.Cancellable? cancellable) - throws GLib.Error; + public abstract async SearchQuery new_search_query( + Gee.List expression, + string text, + GLib.Cancellable? cancellable + ) throws GLib.Error; /** * Performs a search with the given query. Optionally, a list of folders not to search diff --git a/src/engine/api/geary-search-query.vala b/src/engine/api/geary-search-query.vala index 0a15ff2e..84f686b9 100644 --- a/src/engine/api/geary-search-query.vala +++ b/src/engine/api/geary-search-query.vala @@ -1,6 +1,6 @@ /* - * Copyright 2016 Software Freedom Conservancy Inc. - * Copyright 2019 Michael Gratton + * Copyright © 2016 Software Freedom Conservancy Inc. + * Copyright © 2019-2020 Michael Gratton * * This software is licensed under the GNU Lesser General Public License * (version 2.1 or later). See the COPYING file in this distribution. @@ -13,6 +13,16 @@ * Account.new_search_query} and then passed to search methods on * {@link Account} or {@link App.SearchFolder}. * + * Actual search queries are specified by the given {@link + * expression}, which is a list of {@link Term}. The expression + * denotes the conjunction of all given terms, that is, each term is + * combined by a Boolean AND function. While the order of the terms is + * not important, the expression should attempt to reflect the + * free-text search query it was built from (if any). A more + * expressive language is not supported since it is designed to work + * with both the Engine's built-in full text search system as well as + * other server-based systems, including IMAP. + * * @see Account.new_search_query * @see Account.local_search_async * @see Account.get_search_matches_async @@ -54,36 +64,272 @@ public abstract class Geary.SearchQuery : BaseObject { /** * Search for all textual variants, i.e. "the sky's the limit." */ - HORIZON + HORIZON; + + + /** Determines if stemming may be used for an operator. */ + internal bool is_stemming_enabled() { + return this != EXACT; + } + + /** + * The minimum term length before stemming is allowed. + * + * This prevents short words that might be stemmed from being stemmed. + */ + internal int get_min_term_length_for_stemming() { + var min = 0; + switch (this) { + case EXACT: + min = int.MAX; + break; + case CONSERVATIVE: + min = 6; + break; + case AGGRESSIVE: + min = 4; + break; + case HORIZON: + min = 0; + break; + } + return min; + } + + /** + * Maximum difference in lengths between term and stemmed variant. + * + * This prevents long words from being stemmed to much shorter + * words (which creates opportunities for greedy matching). + */ + internal int get_max_difference_term_stem_lengths() { + var max = 0; + switch (this) { + case EXACT: + max = 0; + break; + case CONSERVATIVE: + max = 2; + break; + case AGGRESSIVE: + max = 4; + break; + case HORIZON: + max =int.MAX; + break; + } + return max; + } + + /** + * Maximum difference in lengths between a matched word and the stemmed variant it matched + * against. + * + * This prevents long words being matched to short stem + * variants (which creates opportunities for greedy matching). + */ + internal int get_max_difference_match_stem_lengths() { + var max = 0; + switch (this) { + case EXACT: + max = 0; + break; + case CONSERVATIVE: + max = 2; + break; + case AGGRESSIVE: + max = 3; + break; + case HORIZON: + max = int.MAX; + break; + } + return max; + } + } - /** The account that owns this query. */ - public Account owner { get; private set; } + /** + * Parent class for terms that make up a search query's expression. + * + * @see SearchQuery.expression + */ + public abstract class Term : BaseObject { + + /** Determines opposite of the term is matched. */ + public bool is_negated { get; set; default = false; } + + /** Returns a string representation, for debugging. */ + public abstract string to_string(); + + } /** - * The original search text. + * A term that matches text properties of an email. + */ + public class EmailTextTerm : Term { + + + /** + * Supported text email properties that can be queried. + * + * @see EmailTextTerm + */ + public enum Property { + /** Search for a term in all supported properties. */ + ALL, + + /** Search for a term in the To field. */ + TO, + + /** Search for a term in the Cc field. */ + CC, + + /** Search for a term in the Bcc field. */ + BCC, + + /** Search for a term in the From field. */ + FROM, + + /** Search for a term in the email subject. */ + SUBJECT, + + /** Search for a term in the email body. */ + BODY, + + /** Search for a term in email attachment names. */ + ATTACHMENT_NAME; + } + + + /** The email property this term applies to. */ + public Property target { get; private set; } + + /** The strategy used for matching the given terms. */ + public Strategy matching_strategy { get; private set; } + + /** + * The strings to match against the given target. + * + * If more than one term is given, they are treated as the + * disjunction of all, that is they are combined using the + * Boolean OR function. + */ + public Gee.List terms { + get; private set; default = new Gee.ArrayList(); + } + + + public EmailTextTerm(Property target, + Strategy matching_strategy, + string term) { + this.target = target; + this.matching_strategy = matching_strategy; + this.terms.add(term); + } + + public EmailTextTerm.disjunction(Property target, + Strategy matching_strategy, + Gee.List terms) { + this.target = target; + this.matching_strategy = matching_strategy; + this.terms.add_all(terms); + } + + public override string to_string() { + var builder = new GLib.StringBuilder(); + if (this.is_negated) { + builder.append_c('!'); + } + + builder.append( + ObjectUtils.to_enum_nick( + typeof(Property), this.target).up() + ); + builder.append_c(':'); + builder.append( + ObjectUtils.to_enum_nick( + typeof(Strategy), this.matching_strategy + ).up() + ); + builder.append_c('('); + + var iter = this.terms.iterator(); + if (iter.next()) { + builder.append(iter.get().to_string()); + } + while (iter.next()) { + builder.append_c(','); + builder.append(iter.get().to_string()); + } + builder.append_c(')'); + return builder.str; + } + + } + + + /** + * A term that matches a given flag in an email. + */ + public class EmailFlagTerm : Term { + + + public NamedFlag value { get; private set; } + + + public EmailFlagTerm(NamedFlag value) { + this.value = value; + } + + public override string to_string() { + return "%s(%s)".printf( + this.is_negated ? "!" : "", + this.value.to_string() + ); + } + + } + + + /** + * A read-only list of search terms to be evaluated. + * + * Each given term is used in a conjunction, that is combined + * using a Boolean `AND` operator. + */ + public Gee.List expression { get; private set; } + private Gee.List _rw_expression = new Gee.ArrayList(); + + /** + * The original search text, if any. * * This is used mostly for debugging. */ public string raw { get; private set; } - /** - * The selected {@link Strategy} quality. - */ - public Strategy strategy { get; private set; } - - protected SearchQuery(Account owner, - string raw, - Strategy strategy) { - this.owner = owner; + protected SearchQuery(Gee.Collection expression, + string raw) { + this._rw_expression.add_all(expression); + this.expression = this._rw_expression.read_only_view; this.raw = raw; - this.strategy = strategy; } public string to_string() { - return "\"%s\" (%s)".printf(this.raw, this.strategy.to_string()); - } -} + var builder = new GLib.StringBuilder(); + builder.append_printf("\"%s\": ", this.raw); + var iter = this.expression.iterator(); + if (iter.next()) { + builder.append(iter.get().to_string()); + } + while (iter.next()) { + builder.append_c(','); + builder.append(iter.get().to_string()); + } + return builder.str; + } + +} diff --git a/src/engine/imap-db/imap-db-search-query.vala b/src/engine/imap-db/imap-db-search-query.vala index 78f4eda3..21dee990 100644 --- a/src/engine/imap-db/imap-db-search-query.vala +++ b/src/engine/imap-db/imap-db-search-query.vala @@ -99,6 +99,7 @@ private class Geary.ImapDB.SearchQuery : Geary.SearchQuery { } } + private Geary.SearchQuery.Strategy strategy; // Maps of localised search operator names and values to their // internal forms @@ -323,10 +324,11 @@ private class Geary.ImapDB.SearchQuery : Geary.SearchQuery { public async SearchQuery(Geary.Account owner, ImapDB.Account local, - string query, + Gee.Collection expression, + string raw, Geary.SearchQuery.Strategy strategy, GLib.Cancellable? cancellable) { - base(owner, query, strategy); + base(expression, raw); this.account = local; this.stemmer = new SnowBall.Stemmer(find_appropriate_search_stemmer()); diff --git a/src/engine/imap-engine/imap-engine-generic-account.vala b/src/engine/imap-engine/imap-engine-generic-account.vala index ef1ba7b4..07024658 100644 --- a/src/engine/imap-engine/imap-engine-generic-account.vala +++ b/src/engine/imap-engine/imap-engine-generic-account.vala @@ -572,12 +572,13 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.Account { } /** {@inheritDoc} */ - public override async SearchQuery new_search_query(string query, - SearchQuery.Strategy strategy, - GLib.Cancellable? cancellable) - throws GLib.Error { + public override async SearchQuery new_search_query( + Gee.List expression, + string text, + GLib.Cancellable? cancellable + ) throws GLib.Error { return yield new ImapDB.SearchQuery( - this, local, query, strategy, cancellable + this, this.local, expression, text, EXACT, cancellable ); } diff --git a/test/client/util/util-email-test.vala b/test/client/util/util-email-test.vala index fb3c365f..01605480 100644 --- a/test/client/util/util-email-test.vala +++ b/test/client/util/util-email-test.vala @@ -7,14 +7,45 @@ public class Util.Email.Test : TestCase { + + private Application.Configuration? config = null; + private Geary.AccountInformation? account = null; + + public Test() { - base("UtilEmailTest"); + base("Util.Email.Test"); add_test("null_originator", null_originator); add_test("from_originator", from_originator); add_test("sender_originator", sender_originator); add_test("reply_to_originator", reply_to_originator); add_test("reply_to_via_originator", reply_to_via_originator); add_test("plain_via_originator", plain_via_originator); + add_test("empty_search_query", empty_search_query); + add_test("plain_search_terms", plain_search_terms); + add_test("continuation_search_terms", continuation_search_terms); + add_test("i18n_search_terms", i18n_search_terms); + add_test("multiple_search_terms", multiple_search_terms); + add_test("quoted_search_terms", quoted_search_terms); + add_test("text_op_terms", text_op_terms); + add_test("text_op_single_me_terms", text_op_single_me_terms); + add_test("text_op_multiple_me_terms", text_op_multiple_me_terms); + add_test("boolean_op_terms", boolean_op_terms); + add_test("invalid_op_terms", invalid_op_terms); + } + + public override void set_up() { + this.config = new Application.Configuration(Application.Client.SCHEMA_ID); + this.account = new Geary.AccountInformation( + "test", + OTHER, + new Mock.CredentialsMediator(), + new Geary.RFC822.MailboxAddress("test", "test@example.com") + ); + } + + public override void tear_down() { + this.config = null; + this.account = null; } public void null_originator() throws GLib.Error { @@ -95,6 +126,327 @@ public class Util.Email.Test : TestCase { assert_equal(originator.address, "bot@example.com"); } + public void empty_search_query() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + assert_collection(test_article.parse_query("")).is_empty(); + } + + public void plain_search_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + + var simple1 = test_article.parse_query("hello"); + assert_collection(simple1).size(1); + assert_true(simple1[0] is Geary.SearchQuery.EmailTextTerm); + var text1 = simple1[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text1.target == ALL); + assert_true(text1.matching_strategy == CONSERVATIVE); + assert_collection(text1.terms).size(1).contains("hello"); + + var simple2 = test_article.parse_query("h"); + assert_collection(simple2).size(1); + assert_true(simple2[0] is Geary.SearchQuery.EmailTextTerm); + var text2 = simple2[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text2.terms).size(1).contains("h"); + + var simple3 = test_article.parse_query(" h"); + assert_collection(simple3).size(1); + assert_true(simple3[0] is Geary.SearchQuery.EmailTextTerm); + var text3 = simple3[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text3.terms).size(1).contains("h"); + + var simple4 = test_article.parse_query("h "); + assert_collection(simple4).size(1); + assert_true(simple4[0] is Geary.SearchQuery.EmailTextTerm); + var text4 = simple4[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text4.terms).size(1).contains("h"); + } + + public void continuation_search_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), + this.account + ); + + var simple1 = test_article.parse_query("hello-there"); + assert_collection(simple1).size(1); + assert_true(simple1[0] is Geary.SearchQuery.EmailTextTerm); + var text1 = simple1[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text1.terms).size(1).contains("hello-there"); + + var simple2 = test_article.parse_query("hello-"); + assert_collection(simple2).size(1); + assert_true(simple2[0] is Geary.SearchQuery.EmailTextTerm); + var text2 = simple2[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text2.terms).size(1).contains("hello-"); + + var simple3 = test_article.parse_query("test@example.com"); + assert_collection(simple2).size(1); + assert_true(simple3[0] is Geary.SearchQuery.EmailTextTerm); + var text3 = simple3[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text3.terms).size(1).contains("test@example.com"); + } + + public void i18n_search_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), + this.account + ); + test_article.language = Pango.Language.from_string("th"); + + var multiple = test_article.parse_query("ภาษาไทย"); + assert_collection(multiple).size(2); + assert_true(multiple[0] is Geary.SearchQuery.EmailTextTerm); + assert_true(multiple[1] is Geary.SearchQuery.EmailTextTerm); + assert_collection( + ((Geary.SearchQuery.EmailTextTerm) multiple[0]).terms + ).size(1).contains("ภาษา"); + assert_collection( + ((Geary.SearchQuery.EmailTextTerm) multiple[1]).terms + ).size(1).contains("ไทย"); + } + + public void multiple_search_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + + var multiple = test_article.parse_query("hello there"); + assert_collection(multiple).size(2); + assert_true(multiple[0] is Geary.SearchQuery.EmailTextTerm); + assert_true(multiple[1] is Geary.SearchQuery.EmailTextTerm); + assert_collection( + ((Geary.SearchQuery.EmailTextTerm) multiple[0]).terms + ).size(1).contains("hello"); + assert_collection( + ((Geary.SearchQuery.EmailTextTerm) multiple[1]).terms + ).size(1).contains("there"); + } + + public void quoted_search_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + + var simple1 = test_article.parse_query("\"hello\""); + assert_collection(simple1).size(1); + assert_true(simple1[0] is Geary.SearchQuery.EmailTextTerm); + var text1 = simple1[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text1.target == ALL); + assert_true(text1.matching_strategy == EXACT); + assert_collection(text1.terms).size(1).contains("hello"); + + var simple2 = test_article.parse_query("\"h\""); + assert_collection(simple2).size(1); + assert_true(simple2[0] is Geary.SearchQuery.EmailTextTerm); + var text2 = simple2[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text2.terms).size(1).contains("h"); + + var simple3 = test_article.parse_query(" \"h\""); + assert_collection(simple3).size(1); + assert_true(simple3[0] is Geary.SearchQuery.EmailTextTerm); + var text3 = simple3[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text3.terms).size(1).contains("h"); + + var simple4 = test_article.parse_query("\"h"); + assert_collection(simple4).size(1); + assert_true(simple4[0] is Geary.SearchQuery.EmailTextTerm); + var text4 = simple4[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text4.terms).size(1).contains("h"); + + var simple5 = test_article.parse_query("\"h\" "); + assert_collection(simple5).size(1); + assert_true(simple5[0] is Geary.SearchQuery.EmailTextTerm); + var text5 = simple5[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text5.terms).size(1).contains("h"); + + var simple6 = test_article.parse_query("\"hello there\""); + assert_collection(simple6).size(1); + assert_true(simple6[0] is Geary.SearchQuery.EmailTextTerm); + var text6 = simple6[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text6.terms).size(1).contains("hello there"); + } + + public void text_op_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + + var simple_body = test_article.parse_query("body:hello"); + assert_collection(simple_body).size(1); + assert_true(simple_body[0] is Geary.SearchQuery.EmailTextTerm); + var text_body = simple_body[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_body.target == BODY); + assert_true(text_body.matching_strategy == CONSERVATIVE); + assert_collection(text_body.terms).size(1).contains("hello"); + + var simple_body_quoted = test_article.parse_query("body:\"hello\""); + assert_collection(simple_body_quoted).size(1); + assert_true(simple_body_quoted[0] is Geary.SearchQuery.EmailTextTerm); + var text_body_quoted = simple_body_quoted[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_body_quoted.target == BODY); + assert_true(text_body_quoted.matching_strategy == EXACT); + assert_collection(text_body_quoted.terms).size(1).contains("hello"); + + var simple_attach_name = test_article.parse_query("attachment:hello"); + assert_collection(simple_attach_name).size(1); + assert_true(simple_attach_name[0] is Geary.SearchQuery.EmailTextTerm); + var text_attch_name = simple_attach_name[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_attch_name.target == ATTACHMENT_NAME); + + var simple_bcc = test_article.parse_query("bcc:hello"); + assert_collection(simple_bcc).size(1); + assert_true(simple_bcc[0] is Geary.SearchQuery.EmailTextTerm); + var text_bcc = simple_bcc[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_bcc.target == BCC); + + var simple_cc = test_article.parse_query("cc:hello"); + assert_collection(simple_cc).size(1); + assert_true(simple_cc[0] is Geary.SearchQuery.EmailTextTerm); + var text_cc = simple_cc[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_cc.target == CC); + + var simple_from = test_article.parse_query("from:hello"); + assert_collection(simple_from).size(1); + assert_true(simple_from[0] is Geary.SearchQuery.EmailTextTerm); + var text_from = simple_from[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_from.target == FROM); + + var simple_subject = test_article.parse_query("subject:hello"); + assert_collection(simple_subject).size(1); + assert_true(simple_subject[0] is Geary.SearchQuery.EmailTextTerm); + var text_subject = simple_subject[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_subject.target == SUBJECT); + + var simple_to = test_article.parse_query("to:hello"); + assert_collection(simple_to).size(1); + assert_true(simple_to[0] is Geary.SearchQuery.EmailTextTerm); + var text_to = simple_to[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_to.target == TO); + } + + public void text_op_single_me_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + + var simple_to = test_article.parse_query("to:me"); + assert_collection(simple_to).size(1); + assert_true(simple_to[0] is Geary.SearchQuery.EmailTextTerm); + var text_to = simple_to[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_to.target == TO); + assert_true(text_to.matching_strategy == EXACT); + assert_collection(text_to.terms).size(1).contains("test@example.com"); + + var simple_cc = test_article.parse_query("cc:me"); + assert_collection(simple_cc).size(1); + assert_true(simple_cc[0] is Geary.SearchQuery.EmailTextTerm); + var text_cc = simple_cc[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_cc.target == CC); + assert_true(text_cc.matching_strategy == EXACT); + assert_collection(text_cc.terms).size(1).contains("test@example.com"); + + var simple_bcc = test_article.parse_query("bcc:me"); + assert_collection(simple_bcc).size(1); + assert_true(simple_bcc[0] is Geary.SearchQuery.EmailTextTerm); + var text_bcc = simple_bcc[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_bcc.target == BCC); + assert_true(text_bcc.matching_strategy == EXACT); + assert_collection(text_bcc.terms).size(1).contains("test@example.com"); + + var simple_from = test_article.parse_query("from:me"); + assert_collection(simple_from).size(1); + assert_true(simple_from[0] is Geary.SearchQuery.EmailTextTerm); + var text_from = simple_from[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_from.target == FROM); + assert_true(text_from.matching_strategy == EXACT); + assert_collection(text_from.terms).size(1).contains("test@example.com"); + } + + public void text_op_multiple_me_terms() throws GLib.Error { + this.account.append_sender( + new Geary.RFC822.MailboxAddress("test2", "test2@example.com") + ); + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + + var to = test_article.parse_query("to:me"); + assert_collection(to).size(1); + assert_true(to[0] is Geary.SearchQuery.EmailTextTerm); + var text_to = to[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_to.target == TO); + assert_true(text_to.matching_strategy == EXACT); + assert_collection(text_to.terms).size(2).contains( + "test@example.com" + ).contains( + "test@example.com" + ); + } + + public void boolean_op_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + + var simple_unread = test_article.parse_query("is:unread"); + assert_true(simple_unread[0] is Geary.SearchQuery.EmailFlagTerm); + var bool_unread = simple_unread[0] as Geary.SearchQuery.EmailFlagTerm; + assert_true( + bool_unread.value.equal_to(Geary.EmailFlags.UNREAD), "unread flag" + ); + assert_false(bool_unread.is_negated, "unread negated"); + + var simple_read = test_article.parse_query("is:read"); + assert_true(simple_read[0] is Geary.SearchQuery.EmailFlagTerm); + var bool_read = simple_read[0] as Geary.SearchQuery.EmailFlagTerm; + assert_true( + bool_read.value.equal_to(Geary.EmailFlags.UNREAD), "read flag" + ); + assert_true(bool_read.is_negated, "read negated"); + + var simple_starred = test_article.parse_query("is:starred"); + assert_true(simple_starred[0] is Geary.SearchQuery.EmailFlagTerm); + var bool_starred = simple_starred[0] as Geary.SearchQuery.EmailFlagTerm; + assert_true( + bool_starred.value.equal_to(Geary.EmailFlags.FLAGGED), "starred flag" + ); + assert_false(bool_starred.is_negated, "starred negated"); + } + + public void invalid_op_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + + var simple1 = test_article.parse_query("body:"); + assert_collection(simple1).size(1); + assert_true(simple1[0] is Geary.SearchQuery.EmailTextTerm); + var text1 = simple1[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text1.target == ALL); + assert_true(text1.matching_strategy == CONSERVATIVE); + assert_collection(text1.terms).size(1).contains("body:"); + + var simple2 = test_article.parse_query("blarg:"); + assert_collection(simple2).size(1); + assert_true(simple2[0] is Geary.SearchQuery.EmailTextTerm); + var text2 = simple2[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text2.target == ALL); + assert_true(text2.matching_strategy == CONSERVATIVE); + assert_collection(text2.terms).size(1).contains("blarg:"); + + var simple3 = test_article.parse_query("blarg:hello"); + assert_collection(simple3).size(1); + assert_true(simple3[0] is Geary.SearchQuery.EmailTextTerm); + var text3 = simple3[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text3.target == ALL); + assert_true(text3.matching_strategy == CONSERVATIVE); + assert_collection(text3.terms).size(1).contains("blarg:hello"); + } + private Geary.Email new_email(Geary.RFC822.MailboxAddress? from, Geary.RFC822.MailboxAddress? sender, Geary.RFC822.MailboxAddress? reply_to) diff --git a/test/mock/mock-account.vala b/test/mock/mock-account.vala index 173b2eca..2d08314d 100644 --- a/test/mock/mock-account.vala +++ b/test/mock/mock-account.vala @@ -222,12 +222,12 @@ public class Mock.Account : Geary.Account, ); } - public override async Geary.SearchQuery - new_search_query(string raw, - Geary.SearchQuery.Strategy strategy, - GLib.Cancellable? cancellable) - throws GLib.Error { - return new SearchQuery(this, raw); + public override async Geary.SearchQuery new_search_query( + Gee.List expression, + string raw, + GLib.Cancellable? cancellable + ) throws GLib.Error { + return new SearchQuery(expression, raw); } public override async Gee.Collection? diff --git a/test/mock/mock-search-query.vala b/test/mock/mock-search-query.vala index 6653f96d..310cde7e 100644 --- a/test/mock/mock-search-query.vala +++ b/test/mock/mock-search-query.vala @@ -7,8 +7,9 @@ public class Mock.SearchQuery : Geary.SearchQuery { - internal SearchQuery(Geary.Account owner, string raw) { - base(owner, raw, Geary.SearchQuery.Strategy.EXACT); + internal SearchQuery(Gee.List expression, + string raw) { + base(expression, raw); } }