Improved search experience: Bug #720361
This introduces a new full-text search algorithm that attempts to curb the effects of overstemming in the Porter Snowball stemmer. The FTS table will be regenerated with this update. The crux of this new algorithm is a configurable heuristic that reduces stemmed matching. The configuration is not available via the UI (I suspect it will only confuse users) but can be changed by power users via GSettings. More information is available at: https://wiki.gnome.org/Apps/Geary/FullTextSearchStrategy
This commit is contained in:
parent
068b3abcfc
commit
533ab75ee3
16 changed files with 742 additions and 156 deletions
|
|
@ -74,6 +74,13 @@
|
||||||
<summary>whether to compose emails in HTML</summary>
|
<summary>whether to compose emails in HTML</summary>
|
||||||
<description>True to compose emails in HTML; false for plain text.</description>
|
<description>True to compose emails in HTML; false for plain text.</description>
|
||||||
</key>
|
</key>
|
||||||
|
|
||||||
|
<key name="search-strategy" type="s">
|
||||||
|
<default>"conservative"</default>
|
||||||
|
<summary>Advisory strategy for full-text searching</summary>
|
||||||
|
<description>Acceptable values are EXACT, CONSERVATIVE, AGGRESSIVE, and HORIZON.</description>
|
||||||
|
</key>
|
||||||
|
|
||||||
</schema>
|
</schema>
|
||||||
|
|
||||||
</schemalist>
|
</schemalist>
|
||||||
|
|
|
||||||
|
|
@ -22,3 +22,4 @@ install(FILES version-019.sql DESTINATION ${SQL_DEST})
|
||||||
install(FILES version-020.sql DESTINATION ${SQL_DEST})
|
install(FILES version-020.sql DESTINATION ${SQL_DEST})
|
||||||
install(FILES version-021.sql DESTINATION ${SQL_DEST})
|
install(FILES version-021.sql DESTINATION ${SQL_DEST})
|
||||||
install(FILES version-022.sql DESTINATION ${SQL_DEST})
|
install(FILES version-022.sql DESTINATION ${SQL_DEST})
|
||||||
|
install(FILES version-023.sql DESTINATION ${SQL_DEST})
|
||||||
|
|
|
||||||
21
sql/version-023.sql
Normal file
21
sql/version-023.sql
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
--
|
||||||
|
-- Database upgrade to add FTS tokenize virtual table, which allows for querying the tokenizer
|
||||||
|
-- directly for stemmed words, and dropping the stemmed FTS table for an unstemmed one. We now
|
||||||
|
-- use the stemmer manually to generate search queries.
|
||||||
|
--
|
||||||
|
|
||||||
|
DROP TABLE MessageSearchTable;
|
||||||
|
|
||||||
|
CREATE VIRTUAL TABLE MessageSearchTable USING fts4(
|
||||||
|
body,
|
||||||
|
attachment,
|
||||||
|
subject,
|
||||||
|
from_field,
|
||||||
|
receivers,
|
||||||
|
cc,
|
||||||
|
bcc,
|
||||||
|
|
||||||
|
tokenize=simple,
|
||||||
|
prefix="2,4,6,8,10"
|
||||||
|
);
|
||||||
|
|
||||||
|
|
@ -170,6 +170,8 @@ engine/imap-db/imap-db-email-identifier.vala
|
||||||
engine/imap-db/imap-db-folder.vala
|
engine/imap-db/imap-db-folder.vala
|
||||||
engine/imap-db/imap-db-message-addresses.vala
|
engine/imap-db/imap-db-message-addresses.vala
|
||||||
engine/imap-db/imap-db-message-row.vala
|
engine/imap-db/imap-db-message-row.vala
|
||||||
|
engine/imap-db/imap-db-search-query.vala
|
||||||
|
engine/imap-db/imap-db-search-term.vala
|
||||||
engine/imap-db/imap-db-search-email-identifier.vala
|
engine/imap-db/imap-db-search-email-identifier.vala
|
||||||
engine/imap-db/outbox/smtp-outbox-email-identifier.vala
|
engine/imap-db/outbox/smtp-outbox-email-identifier.vala
|
||||||
engine/imap-db/outbox/smtp-outbox-email-properties.vala
|
engine/imap-db/outbox/smtp-outbox-email-properties.vala
|
||||||
|
|
|
||||||
|
|
@ -135,5 +135,43 @@ public class Configuration {
|
||||||
if (!settings.set_boolean(name, value))
|
if (!settings.set_boolean(name, value))
|
||||||
message("Unable to set configuration value %s = %s", name, value.to_string());
|
message("Unable to set configuration value %s = %s", name, value.to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Geary.SearchQuery.Strategy get_search_strategy() {
|
||||||
|
switch (settings.get_string("search-strategy").down()) {
|
||||||
|
case "exact":
|
||||||
|
return Geary.SearchQuery.Strategy.EXACT;
|
||||||
|
|
||||||
|
case "aggressive":
|
||||||
|
return Geary.SearchQuery.Strategy.AGGRESSIVE;
|
||||||
|
|
||||||
|
case "horizon":
|
||||||
|
return Geary.SearchQuery.Strategy.HORIZON;
|
||||||
|
|
||||||
|
case "conservative":
|
||||||
|
default:
|
||||||
|
return Geary.SearchQuery.Strategy.CONSERVATIVE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void set_search_strategy(Geary.SearchQuery.Strategy strategy) {
|
||||||
|
switch (strategy) {
|
||||||
|
case Geary.SearchQuery.Strategy.EXACT:
|
||||||
|
settings.set_string("search-strategy", "exact");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Geary.SearchQuery.Strategy.AGGRESSIVE:
|
||||||
|
settings.set_string("search-strategy", "aggressive");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Geary.SearchQuery.Strategy.HORIZON:
|
||||||
|
settings.set_string("search-strategy", "horizon");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Geary.SearchQuery.Strategy.CONSERVATIVE:
|
||||||
|
default:
|
||||||
|
settings.set_string("search-strategy", "conservative");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -81,7 +81,7 @@ public class GearyController : Geary.BaseObject {
|
||||||
private const string MOVE_MESSAGE_TOOLTIP_MULTIPLE = _("Move conversations");
|
private const string MOVE_MESSAGE_TOOLTIP_MULTIPLE = _("Move conversations");
|
||||||
|
|
||||||
private const int SELECT_FOLDER_TIMEOUT_USEC = 100 * 1000;
|
private const int SELECT_FOLDER_TIMEOUT_USEC = 100 * 1000;
|
||||||
private const int SEARCH_TIMEOUT_MSEC = 100;
|
private const int SEARCH_TIMEOUT_MSEC = 250;
|
||||||
|
|
||||||
private const string PROP_ATTEMPT_OPEN_ACCOUNT = "attempt-open-account";
|
private const string PROP_ATTEMPT_OPEN_ACCOUNT = "attempt-open-account";
|
||||||
|
|
||||||
|
|
@ -2512,7 +2512,8 @@ public class GearyController : Geary.BaseObject {
|
||||||
|
|
||||||
cancel_search(); // Stop any search in progress.
|
cancel_search(); // Stop any search in progress.
|
||||||
|
|
||||||
folder.set_search_query(search_text, cancellable_search);
|
folder.search(search_text, GearyApplication.instance.config.get_search_strategy(),
|
||||||
|
cancellable_search);
|
||||||
|
|
||||||
main_window.folder_list.set_search(folder);
|
main_window.folder_list.set_search(folder);
|
||||||
search_text_changed(main_window.main_toolbar.search_text);
|
search_text_changed(main_window.main_toolbar.search_text);
|
||||||
|
|
@ -2523,7 +2524,8 @@ public class GearyController : Geary.BaseObject {
|
||||||
// search after a quick delay when they finish typing.
|
// search after a quick delay when they finish typing.
|
||||||
if (search_timeout_id != 0)
|
if (search_timeout_id != 0)
|
||||||
Source.remove(search_timeout_id);
|
Source.remove(search_timeout_id);
|
||||||
search_timeout_id = Timeout.add(SEARCH_TIMEOUT_MSEC, on_search_timeout);
|
|
||||||
|
search_timeout_id = Timeout.add(SEARCH_TIMEOUT_MSEC, on_search_timeout, Priority.LOW);
|
||||||
}
|
}
|
||||||
|
|
||||||
private bool on_search_timeout() {
|
private bool on_search_timeout() {
|
||||||
|
|
|
||||||
|
|
@ -457,11 +457,28 @@ public class ConversationViewer : Gtk.Box {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void on_search_text_changed(string? query) {
|
private void on_search_text_changed(Geary.SearchQuery? query) {
|
||||||
if (query != null)
|
if (query != null)
|
||||||
highlight_search_terms.begin();
|
highlight_search_terms.begin();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This applies a fudge-factor set of matches when the database results
|
||||||
|
// aren't entirely satisfactory, such as when you search for an email
|
||||||
|
// address and the database tokenizes out the @ and ., etc. It's not meant
|
||||||
|
// to be comprehensive, just a little extra highlighting applied to make
|
||||||
|
// the results look a little closer to what you typed.
|
||||||
|
private void add_literal_matches(string raw_query, Gee.Set<string>? search_matches) {
|
||||||
|
foreach (string word in raw_query.split(" ")) {
|
||||||
|
if (word.has_suffix("\""))
|
||||||
|
word = word.substring(0, word.length - 1);
|
||||||
|
if (word.has_prefix("\""))
|
||||||
|
word = word.substring(1);
|
||||||
|
|
||||||
|
if (!Geary.String.is_empty_or_whitespace(word))
|
||||||
|
search_matches.add(word);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private async void highlight_search_terms() {
|
private async void highlight_search_terms() {
|
||||||
if (search_folder == null)
|
if (search_folder == null)
|
||||||
return;
|
return;
|
||||||
|
|
@ -475,8 +492,13 @@ public class ConversationViewer : Gtk.Box {
|
||||||
ids.add(email.id);
|
ids.add(email.id);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Gee.Collection<string>? search_matches = yield search_folder.get_search_matches_async(
|
Gee.Set<string>? search_matches = yield search_folder.get_search_matches_async(
|
||||||
ids, cancellable_fetch);
|
ids, cancellable_fetch);
|
||||||
|
if (search_matches == null)
|
||||||
|
search_matches = new Gee.HashSet<string>();
|
||||||
|
|
||||||
|
if (search_folder.search_query != null)
|
||||||
|
add_literal_matches(search_folder.search_query.raw, search_matches);
|
||||||
|
|
||||||
// Webkit's highlighting is ... weird. In order to actually see
|
// Webkit's highlighting is ... weird. In order to actually see
|
||||||
// all the highlighting you're applying, it seems necessary to
|
// all the highlighting you're applying, it seems necessary to
|
||||||
|
|
@ -484,7 +506,6 @@ public class ConversationViewer : Gtk.Box {
|
||||||
// seems that shorter strings will overwrite longer ones, and
|
// seems that shorter strings will overwrite longer ones, and
|
||||||
// you're left with incomplete highlighting.
|
// you're left with incomplete highlighting.
|
||||||
Gee.ArrayList<string> ordered_matches = new Gee.ArrayList<string>();
|
Gee.ArrayList<string> ordered_matches = new Gee.ArrayList<string>();
|
||||||
if (search_matches != null)
|
|
||||||
ordered_matches.add_all(search_matches);
|
ordered_matches.add_all(search_matches);
|
||||||
ordered_matches.sort((a, b) => a.length - b.length);
|
ordered_matches.sort((a, b) => a.length - b.length);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -118,11 +118,13 @@ public abstract class Geary.AbstractAccount : BaseObject, Geary.Account {
|
||||||
public abstract async Geary.Email local_fetch_email_async(Geary.EmailIdentifier email_id,
|
public abstract async Geary.Email local_fetch_email_async(Geary.EmailIdentifier email_id,
|
||||||
Geary.Email.Field required_fields, Cancellable? cancellable = null) throws Error;
|
Geary.Email.Field required_fields, Cancellable? cancellable = null) throws Error;
|
||||||
|
|
||||||
|
public abstract Geary.SearchQuery open_search(string query, Geary.SearchQuery.Strategy strategy);
|
||||||
|
|
||||||
public abstract async Gee.Collection<Geary.EmailIdentifier>? local_search_async(Geary.SearchQuery query,
|
public abstract async Gee.Collection<Geary.EmailIdentifier>? local_search_async(Geary.SearchQuery query,
|
||||||
int limit = 100, int offset = 0, Gee.Collection<Geary.FolderPath?>? folder_blacklist = null,
|
int limit = 100, int offset = 0, Gee.Collection<Geary.FolderPath?>? folder_blacklist = null,
|
||||||
Gee.Collection<Geary.EmailIdentifier>? search_ids = null, Cancellable? cancellable = null) throws Error;
|
Gee.Collection<Geary.EmailIdentifier>? search_ids = null, Cancellable? cancellable = null) throws Error;
|
||||||
|
|
||||||
public abstract async Gee.Collection<string>? get_search_matches_async(Geary.SearchQuery query,
|
public abstract async Gee.Set<string>? get_search_matches_async(Geary.SearchQuery query,
|
||||||
Gee.Collection<Geary.EmailIdentifier> ids, Cancellable? cancellable = null) throws Error;
|
Gee.Collection<Geary.EmailIdentifier> ids, Cancellable? cancellable = null) throws Error;
|
||||||
|
|
||||||
public abstract async Gee.MultiMap<Geary.EmailIdentifier, Geary.FolderPath>? get_containing_folders_async(
|
public abstract async Gee.MultiMap<Geary.EmailIdentifier, Geary.FolderPath>? get_containing_folders_async(
|
||||||
|
|
|
||||||
|
|
@ -322,6 +322,23 @@ public interface Geary.Account : BaseObject {
|
||||||
public abstract async Geary.Email local_fetch_email_async(Geary.EmailIdentifier email_id,
|
public abstract async Geary.Email local_fetch_email_async(Geary.EmailIdentifier email_id,
|
||||||
Geary.Email.Field required_fields, Cancellable? cancellable = null) throws Error;
|
Geary.Email.Field required_fields, Cancellable? cancellable = null) throws Error;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new {@link SearchQuery} for this {@link Account}.
|
||||||
|
*
|
||||||
|
* See {@link Geary.SearchQuery.Strategy} for more information about how its interpreted by the
|
||||||
|
* Engine. In particular, note that it's an advisory parameter only and may have no effect,
|
||||||
|
* especially on server searches. However, it may also have a dramatic effect on what search
|
||||||
|
* results are returned and so should be used with some caution. Whether this parameter is
|
||||||
|
* user-configurable, available through GSettings or another configuration mechanism, or simply
|
||||||
|
* baked into the caller's code is up to the caller. CONSERVATIVE is designed to be a good
|
||||||
|
* default.
|
||||||
|
*
|
||||||
|
* The SearchQuery object can only be used with calls into this Account.
|
||||||
|
*
|
||||||
|
* Dropping the last reference to the SearchQuery will close it.
|
||||||
|
*/
|
||||||
|
public abstract Geary.SearchQuery open_search(string query, Geary.SearchQuery.Strategy strategy);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs a search with the given query. Optionally, a list of folders not to search
|
* Performs a search with the given query. Optionally, a list of folders not to search
|
||||||
* can be passed as well as a list of email identifiers to restrict the search to only those messages.
|
* can be passed as well as a list of email identifiers to restrict the search to only those messages.
|
||||||
|
|
@ -335,9 +352,9 @@ public interface Geary.Account : BaseObject {
|
||||||
Gee.Collection<Geary.EmailIdentifier>? search_ids = null, Cancellable? cancellable = null) throws Error;
|
Gee.Collection<Geary.EmailIdentifier>? search_ids = null, Cancellable? cancellable = null) throws Error;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a list of mail IDs, returns a list of words that match for the query.
|
* Given a list of mail IDs, returns a set of casefolded words that match for the query.
|
||||||
*/
|
*/
|
||||||
public abstract async Gee.Collection<string>? get_search_matches_async(Geary.SearchQuery query,
|
public abstract async Gee.Set<string>? get_search_matches_async(Geary.SearchQuery query,
|
||||||
Gee.Collection<Geary.EmailIdentifier> ids, Cancellable? cancellable = null) throws Error;
|
Gee.Collection<Geary.EmailIdentifier> ids, Cancellable? cancellable = null) throws Error;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,8 @@ public class Geary.SearchFolder : Geary.AbstractLocalFolder, Geary.FolderSupport
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Geary.SearchQuery? search_query { get; private set; default = null; }
|
||||||
|
|
||||||
private Gee.HashSet<Geary.FolderPath?> exclude_folders = new Gee.HashSet<Geary.FolderPath?>();
|
private Gee.HashSet<Geary.FolderPath?> exclude_folders = new Gee.HashSet<Geary.FolderPath?>();
|
||||||
private Geary.SpecialFolderType[] exclude_types = {
|
private Geary.SpecialFolderType[] exclude_types = {
|
||||||
Geary.SpecialFolderType.SPAM,
|
Geary.SpecialFolderType.SPAM,
|
||||||
|
|
@ -56,7 +58,6 @@ public class Geary.SearchFolder : Geary.AbstractLocalFolder, Geary.FolderSupport
|
||||||
Geary.SpecialFolderType.DRAFTS,
|
Geary.SpecialFolderType.DRAFTS,
|
||||||
// Orphan emails (without a folder) are also excluded; see ctor.
|
// Orphan emails (without a folder) are also excluded; see ctor.
|
||||||
};
|
};
|
||||||
private Geary.SearchQuery? search_query = null;
|
|
||||||
private Gee.TreeSet<ImapDB.SearchEmailIdentifier> search_results;
|
private Gee.TreeSet<ImapDB.SearchEmailIdentifier> search_results;
|
||||||
private Geary.Nonblocking.Mutex result_mutex = new Geary.Nonblocking.Mutex();
|
private Geary.Nonblocking.Mutex result_mutex = new Geary.Nonblocking.Mutex();
|
||||||
|
|
||||||
|
|
@ -64,7 +65,7 @@ public class Geary.SearchFolder : Geary.AbstractLocalFolder, Geary.FolderSupport
|
||||||
* Fired when the search query has changed. This signal is fired *after* the search
|
* Fired when the search query has changed. This signal is fired *after* the search
|
||||||
* has completed.
|
* has completed.
|
||||||
*/
|
*/
|
||||||
public signal void search_query_changed(string? query);
|
public signal void search_query_changed(Geary.SearchQuery? query);
|
||||||
|
|
||||||
public SearchFolder(Account account) {
|
public SearchFolder(Account account) {
|
||||||
base();
|
base();
|
||||||
|
|
@ -203,8 +204,8 @@ public class Geary.SearchFolder : Geary.AbstractLocalFolder, Geary.FolderSupport
|
||||||
/**
|
/**
|
||||||
* Sets the keyword string for this search.
|
* Sets the keyword string for this search.
|
||||||
*/
|
*/
|
||||||
public void set_search_query(string query, Cancellable? cancellable = null) {
|
public void search(string query, SearchQuery.Strategy strategy, Cancellable? cancellable = null) {
|
||||||
set_search_query_async.begin(query, cancellable, on_set_search_query_complete);
|
set_search_query_async.begin(query, strategy, cancellable, on_set_search_query_complete);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void on_set_search_query_complete(Object? source, AsyncResult result) {
|
private void on_set_search_query_complete(Object? source, AsyncResult result) {
|
||||||
|
|
@ -215,8 +216,9 @@ public class Geary.SearchFolder : Geary.AbstractLocalFolder, Geary.FolderSupport
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private async void set_search_query_async(string query, Cancellable? cancellable = null) throws Error {
|
private async void set_search_query_async(string query, SearchQuery.Strategy strategy,
|
||||||
Geary.SearchQuery search_query = new Geary.SearchQuery(query);
|
Cancellable? cancellable) throws Error {
|
||||||
|
Geary.SearchQuery search_query = account.open_search(query, strategy);
|
||||||
|
|
||||||
int result_mutex_token = yield result_mutex.claim_async();
|
int result_mutex_token = yield result_mutex.claim_async();
|
||||||
|
|
||||||
|
|
@ -230,7 +232,7 @@ public class Geary.SearchFolder : Geary.AbstractLocalFolder, Geary.FolderSupport
|
||||||
result_mutex.release(ref result_mutex_token);
|
result_mutex.release(ref result_mutex_token);
|
||||||
|
|
||||||
this.search_query = search_query;
|
this.search_query = search_query;
|
||||||
search_query_changed(search_query.raw);
|
search_query_changed(search_query);
|
||||||
|
|
||||||
if (error != null)
|
if (error != null)
|
||||||
throw error;
|
throw error;
|
||||||
|
|
@ -425,13 +427,14 @@ public class Geary.SearchFolder : Geary.AbstractLocalFolder, Geary.FolderSupport
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a list of mail IDs, returns a list of words that match for the current
|
* Given a list of mail IDs, returns a set of casefolded words that match for the current
|
||||||
* search query.
|
* search query.
|
||||||
*/
|
*/
|
||||||
public async Gee.Collection<string>? get_search_matches_async(
|
public async Gee.Set<string>? get_search_matches_async(
|
||||||
Gee.Collection<Geary.EmailIdentifier> ids, Cancellable? cancellable = null) throws Error {
|
Gee.Collection<Geary.EmailIdentifier> ids, Cancellable? cancellable = null) throws Error {
|
||||||
if (search_query == null)
|
if (search_query == null)
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
return yield account.get_search_matches_async(search_query, ids, cancellable);
|
return yield account.get_search_matches_async(search_query, ids, cancellable);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,39 +6,63 @@
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An object to hold state for various search subsystems that might need to
|
* An object to hold state for various search subsystems that might need to
|
||||||
* parse the same text string different ways. The only interaction the API
|
* parse the same text string different ways.
|
||||||
* user should have with this is creating new ones and then passing them off to
|
|
||||||
* the search methods in the engine.
|
|
||||||
*
|
*
|
||||||
* TODO: support anything other than ImapDB.Account's search methods.
|
* The only interaction the API user should have with this is creating new ones and then passing
|
||||||
|
* them to the search methods in the Engine.
|
||||||
|
*
|
||||||
|
* @see Geary.Account.open_search
|
||||||
|
*/
|
||||||
|
|
||||||
|
public abstract class Geary.SearchQuery : BaseObject {
|
||||||
|
/**
|
||||||
|
* An advisory parameter regarding search quality, scope, and breadth.
|
||||||
|
*
|
||||||
|
* The Engine can perform searches based on (unspecified, uncontracted) textual variations of
|
||||||
|
* a query's search terms. Some of those variations may produce undesirable results due to
|
||||||
|
* "greedy" matching of terms. The Strategy parameter allows for an advisory to the Engine
|
||||||
|
* about how to use those textual variants, if any at all.
|
||||||
|
*
|
||||||
|
* This may be respected or ignored by the Engine. In particular, there's no guarantee it will
|
||||||
|
* have any effect on server search.
|
||||||
|
*/
|
||||||
|
public enum Strategy {
|
||||||
|
/**
|
||||||
|
* Only return exact matches, perform no searches for textual variants.
|
||||||
|
*
|
||||||
|
* Note that Geary's search syntax does prefix-matching for unquoted strings. EXACT means
|
||||||
|
* exact ''prefix-''matching in this case.
|
||||||
|
*/
|
||||||
|
EXACT,
|
||||||
|
/**
|
||||||
|
* Allow for searching for a small set of textual variants and small differences in search
|
||||||
|
* terms. This is a good default.
|
||||||
|
*/
|
||||||
|
CONSERVATIVE,
|
||||||
|
/**
|
||||||
|
* Allow for searching for a broad set of textual variants and larger differences in
|
||||||
|
* search terms.
|
||||||
|
*/
|
||||||
|
AGGRESSIVE,
|
||||||
|
/**
|
||||||
|
* Search for all textual variants, i.e. "the sky's the limit."
|
||||||
|
*/
|
||||||
|
HORIZON
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The original user search text.
|
||||||
*/
|
*/
|
||||||
public class Geary.SearchQuery : BaseObject {
|
|
||||||
public string raw { get; private set; }
|
public string raw { get; private set; }
|
||||||
public bool parsed { get; internal set; default = false; }
|
|
||||||
|
|
||||||
// Not using a MultiMap because we (might) need a guarantee of order.
|
/**
|
||||||
private Gee.HashMap<string?, Gee.ArrayList<string>> field_map
|
* The selected {@link Strategy} quality.
|
||||||
= new Gee.HashMap<string?, Gee.ArrayList<string>>();
|
*/
|
||||||
|
public Strategy strategy { get; private set; }
|
||||||
|
|
||||||
public SearchQuery(string query) {
|
protected SearchQuery(string raw, Strategy strategy) {
|
||||||
raw = query;
|
this.raw = raw;
|
||||||
}
|
this.strategy = strategy;
|
||||||
|
|
||||||
internal void add_token(string? field, string token) {
|
|
||||||
if (!field_map.has_key(field))
|
|
||||||
field_map.set(field, new Gee.ArrayList<string>());
|
|
||||||
|
|
||||||
field_map.get(field).add(token);
|
|
||||||
}
|
|
||||||
|
|
||||||
internal Gee.Collection<string?> get_fields() {
|
|
||||||
return field_map.keys;
|
|
||||||
}
|
|
||||||
|
|
||||||
internal Gee.List<string>? get_tokens(string? field) {
|
|
||||||
if (!field_map.has_key(field))
|
|
||||||
return null;
|
|
||||||
|
|
||||||
return field_map.get(field);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,18 +17,6 @@ private class Geary.ImapDB.Account : BaseObject {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class SearchOffset {
|
|
||||||
public int column; // Column in search table
|
|
||||||
public int byte_offset; // Offset (in bytes) of search term in string
|
|
||||||
public int size; // Size (in bytes) of the search term in string
|
|
||||||
|
|
||||||
public SearchOffset(string[] offset_string) {
|
|
||||||
column = int.parse(offset_string[0]);
|
|
||||||
byte_offset = int.parse(offset_string[2]);
|
|
||||||
size = int.parse(offset_string[3]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public signal void email_sent(Geary.RFC822.Message rfc822);
|
public signal void email_sent(Geary.RFC822.Message rfc822);
|
||||||
|
|
||||||
// Only available when the Account is opened
|
// Only available when the Account is opened
|
||||||
|
|
@ -61,6 +49,14 @@ private class Geary.ImapDB.Account : BaseObject {
|
||||||
throw new EngineError.OPEN_REQUIRED("Database not open");
|
throw new EngineError.OPEN_REQUIRED("Database not open");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private ImapDB.SearchQuery check_search_query(Geary.SearchQuery q) throws Error {
|
||||||
|
ImapDB.SearchQuery? query = q as ImapDB.SearchQuery;
|
||||||
|
if (query == null || query.account != this)
|
||||||
|
throw new EngineError.BAD_PARAMETERS("Geary.SearchQuery not associated with %s", name);
|
||||||
|
|
||||||
|
return query;
|
||||||
|
}
|
||||||
|
|
||||||
public static void get_imap_db_storage_locations(File user_data_dir, out File db_file,
|
public static void get_imap_db_storage_locations(File user_data_dir, out File db_file,
|
||||||
out File attachments_dir) {
|
out File attachments_dir) {
|
||||||
db_file = ImapDB.Database.get_db_file(user_data_dir);
|
db_file = ImapDB.Database.get_db_file(user_data_dir);
|
||||||
|
|
@ -715,7 +711,87 @@ private class Geary.ImapDB.Account : BaseObject {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void prepare_search_query(Geary.SearchQuery query) {
|
/**
|
||||||
|
* This method is used to convert an unquoted user-entered search terms into a stemmed search
|
||||||
|
* term.
|
||||||
|
*
|
||||||
|
* Prior experience with the Unicode Snowball stemmer indicates it's too aggressive for our
|
||||||
|
* tastes when coupled with prefix-matching of all unquoted terms (see
|
||||||
|
* https://bugzilla.gnome.org/show_bug.cgi?id=713179) This method is part of a larger strategy
|
||||||
|
* designed to dampen that aggressiveness without losing the benefits of stemming entirely.
|
||||||
|
*
|
||||||
|
* Database upgrade 23 removes the old Snowball-stemmed FTS table and replaces it with one
|
||||||
|
* with no stemming (using only SQLite's "simple" tokenizer). It also creates a "magic" SQLite
|
||||||
|
* table called TokenizerTable which allows for uniform queries to the Snowball stemmer, which
|
||||||
|
* is still installed in Geary. Thus, we are now in the position to search for the original
|
||||||
|
* term and its stemmed variant, then do post-search processing to strip results which are
|
||||||
|
* too "greedy" due to prefix-matching the stemmed variant.
|
||||||
|
*
|
||||||
|
* Some heuristics are in place simply to determine if stemming should occur:
|
||||||
|
*
|
||||||
|
* # If stemming is unallowed, no stemming occurs.
|
||||||
|
* # If the term is < min. term length for stemming, no stemming occurs.
|
||||||
|
* # If the stemmer returns a stem that is the same as the original term, no stemming occurs.
|
||||||
|
* # If the difference between the stemmed word and the original term is more than
|
||||||
|
* maximum allowed, no stemming occurs. This works under the assumption that if
|
||||||
|
* the user has typed a long word, they do not want to "go back" to searching for a much
|
||||||
|
* shorter version of it. (For example, "accountancies" stems to "account").
|
||||||
|
*
|
||||||
|
* Otherwise, the stem for the term is returned.
|
||||||
|
*/
|
||||||
|
private string? stem_search_term(ImapDB.SearchQuery query, string term) {
|
||||||
|
if (!query.allow_stemming)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
int term_length = term.length;
|
||||||
|
if (term_length < query.min_term_length_for_stemming)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
string? stemmed = null;
|
||||||
|
try {
|
||||||
|
Db.Statement stmt = db.prepare("""
|
||||||
|
SELECT token
|
||||||
|
FROM TokenizerTable
|
||||||
|
WHERE input=?
|
||||||
|
""");
|
||||||
|
stmt.bind_string(0, term);
|
||||||
|
|
||||||
|
// get stemmed string; if no result, fall through
|
||||||
|
Db.Result result = stmt.exec();
|
||||||
|
if (!result.finished)
|
||||||
|
stemmed = result.string_at(0);
|
||||||
|
else
|
||||||
|
debug("No stemmed term returned for \"%s\"", term);
|
||||||
|
} catch (Error err) {
|
||||||
|
debug("Unable to query tokenizer table for stemmed term for \"%s\": %s", term, err.message);
|
||||||
|
|
||||||
|
// fall-through
|
||||||
|
}
|
||||||
|
|
||||||
|
if (String.is_empty(stemmed)) {
|
||||||
|
debug("Empty stemmed term returned for \"%s\"", term);
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If same term returned, treat as non-stemmed
|
||||||
|
if (stemmed == term)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
// Don't search for stemmed words that are significantly shorter than the user's search term
|
||||||
|
if (term_length - stemmed.length > query.max_difference_term_stem_lengths) {
|
||||||
|
debug("Stemmed \"%s\" dropped searching for \"%s\": too much distance in terms",
|
||||||
|
stemmed, term);
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
debug("Search processing: term -> stem is \"%s\" -> \"%s\"", term, stemmed);
|
||||||
|
|
||||||
|
return stemmed;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void prepare_search_query(ImapDB.SearchQuery query) {
|
||||||
if (query.parsed)
|
if (query.parsed)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|
@ -753,17 +829,32 @@ private class Geary.ImapDB.Account : BaseObject {
|
||||||
--quotes;
|
--quotes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SearchTerm? term;
|
||||||
if (in_quote) {
|
if (in_quote) {
|
||||||
// HACK: this helps prevent a syntax error when the user types
|
// HACK: this helps prevent a syntax error when the user types
|
||||||
// something like from:"somebody". If we ever properly support
|
// something like from:"somebody". If we ever properly support
|
||||||
// quotes after : we can get rid of this.
|
// quotes after : we can get rid of this.
|
||||||
s = s.replace(":", " ");
|
term = new SearchTerm(s, s, null, s.replace(":", " "), null);
|
||||||
} else {
|
} else {
|
||||||
|
string original = s;
|
||||||
|
|
||||||
|
// some common search phrases we don't respect and therefore don't want to fall
|
||||||
|
// through to search results
|
||||||
string lower = s.down();
|
string lower = s.down();
|
||||||
if (lower == "" || lower == "and" || lower == "or" || lower == "not" || lower == "near"
|
switch (lower) {
|
||||||
|| lower.has_prefix("near/"))
|
case "":
|
||||||
|
case "and":
|
||||||
|
case "or":
|
||||||
|
case "not":
|
||||||
|
case "near":
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
default:
|
||||||
|
if (lower.has_prefix("near/"))
|
||||||
|
continue;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (s.has_prefix("-"))
|
if (s.has_prefix("-"))
|
||||||
s = s.substring(1);
|
s = s.substring(1);
|
||||||
|
|
||||||
|
|
@ -775,13 +866,29 @@ private class Geary.ImapDB.Account : BaseObject {
|
||||||
if (parts.length > 1)
|
if (parts.length > 1)
|
||||||
field = extract_field_from_token(parts, ref s);
|
field = extract_field_from_token(parts, ref s);
|
||||||
|
|
||||||
s = "\"" + s + "*\"";
|
// SQL MATCH syntax for parsed term
|
||||||
|
string? sql_s = "%s*".printf(s);
|
||||||
|
|
||||||
|
// stem the word, but if stemmed and stem is simply shorter version of original
|
||||||
|
// term, only prefix-match search for it (i.e. avoid searching for
|
||||||
|
// [archive* OR archiv*] when that's the same as [archiv*]), otherwise search for
|
||||||
|
// both
|
||||||
|
string? stemmed = stem_search_term(query, s);
|
||||||
|
|
||||||
|
string? sql_stemmed = null;
|
||||||
|
if (stemmed != null) {
|
||||||
|
sql_stemmed = "%s*".printf(stemmed);
|
||||||
|
if (s.has_prefix(stemmed))
|
||||||
|
sql_s = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
term = new SearchTerm(original, s, stemmed, sql_s, sql_stemmed);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (in_quote && quotes % 2 != 0)
|
if (in_quote && quotes % 2 != 0)
|
||||||
in_quote = false;
|
in_quote = false;
|
||||||
|
|
||||||
query.add_token(field, s);
|
query.add_search_term(field, term);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(!in_quote);
|
assert(!in_quote);
|
||||||
|
|
@ -790,28 +897,53 @@ private class Geary.ImapDB.Account : BaseObject {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return a map of column -> phrase, to use as WHERE column MATCH 'phrase'.
|
// Return a map of column -> phrase, to use as WHERE column MATCH 'phrase'.
|
||||||
private Gee.HashMap<string, string> get_query_phrases(Geary.SearchQuery query) {
|
private Gee.HashMap<string, string> get_query_phrases(ImapDB.SearchQuery query) {
|
||||||
prepare_search_query(query);
|
prepare_search_query(query);
|
||||||
|
|
||||||
Gee.HashMap<string, string> phrases = new Gee.HashMap<string, string>();
|
Gee.HashMap<string, string> phrases = new Gee.HashMap<string, string>();
|
||||||
foreach (string? field in query.get_fields()) {
|
foreach (string? field in query.get_fields()) {
|
||||||
string? phrase = null;
|
Gee.List<SearchTerm>? terms = query.get_search_terms(field);
|
||||||
Gee.List<string>? tokens = query.get_tokens(field);
|
if (terms == null || terms.size == 0)
|
||||||
if (tokens != null) {
|
continue;
|
||||||
string[] array = tokens.to_array();
|
|
||||||
// HACK: work around a bug in vala where it's not null-terminating
|
// Each SearchTerm is an AND but the SQL text within in are OR ... this allows for
|
||||||
// arrays created from generic-typed functions (Gee.Collection.to_array)
|
// each user term to be AND but the variants of each term are or. So, if terms are
|
||||||
// before passing them off to g_strjoinv. Simply making a copy to a
|
// [party] and [eventful] and stems are [parti] and [event], the search would be:
|
||||||
// local proper string array adds the null for us.
|
//
|
||||||
string[] copy = new string[array.length];
|
// (party* OR parti*) AND (eventful* OR event*)
|
||||||
for (int i = 0; i < array.length; ++i)
|
//
|
||||||
copy[i] = array[i];
|
// Obviously with stemming there's the possibility of the stemmed variant being nothing
|
||||||
phrase = string.joinv(" ", copy).strip();
|
// but a broader search of the original term (such as event* and eventful*) but do both
|
||||||
|
// to determine from each hit result which term caused the hit, and if it's too greedy
|
||||||
|
// a match of the stemmed variant, it can be stripped from the results.
|
||||||
|
//
|
||||||
|
// Note that this uses SQLite's "standard" query syntax for MATCH, where AND is implied
|
||||||
|
// (and would be treated as search term if included), parentheses are not allowed, and
|
||||||
|
// OR has a higher precendence than AND. So the above example in standard syntax is:
|
||||||
|
//
|
||||||
|
// party* OR parti* eventful* OR event*
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
foreach (SearchTerm term in terms) {
|
||||||
|
if (term.sql.size == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (term.is_exact) {
|
||||||
|
builder.append_printf("%s ", term.parsed);
|
||||||
|
} else {
|
||||||
|
bool is_first_sql = true;
|
||||||
|
foreach (string sql in term.sql) {
|
||||||
|
if (!is_first_sql)
|
||||||
|
builder.append(" OR ");
|
||||||
|
|
||||||
|
builder.append_printf("%s ", sql);
|
||||||
|
is_first_sql = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Geary.String.is_empty(phrase))
|
phrases.set(field ?? "MessageSearchTable", builder.str);
|
||||||
phrases.set((field == null ? "MessageSearchTable" : field), phrase);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return phrases;
|
return phrases;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -865,19 +997,39 @@ private class Geary.ImapDB.Account : BaseObject {
|
||||||
return sql.str;
|
return sql.str;
|
||||||
}
|
}
|
||||||
|
|
||||||
public async Gee.Collection<Geary.EmailIdentifier>? search_async(Geary.SearchQuery query,
|
public async Gee.Collection<Geary.EmailIdentifier>? search_async(Geary.SearchQuery q,
|
||||||
int limit = 100, int offset = 0, Gee.Collection<Geary.FolderPath?>? folder_blacklist = null,
|
int limit = 100, int offset = 0, Gee.Collection<Geary.FolderPath?>? folder_blacklist = null,
|
||||||
Gee.Collection<Geary.EmailIdentifier>? search_ids = null, Cancellable? cancellable = null) throws Error {
|
Gee.Collection<Geary.EmailIdentifier>? search_ids = null, Cancellable? cancellable = null)
|
||||||
|
throws Error {
|
||||||
check_open();
|
check_open();
|
||||||
|
ImapDB.SearchQuery query = check_search_query(q);
|
||||||
|
|
||||||
Gee.HashMap<string, string> query_phrases = get_query_phrases(query);
|
Gee.HashMap<string, string> query_phrases = get_query_phrases(query);
|
||||||
if (query_phrases.size == 0)
|
if (query_phrases.size == 0)
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
Gee.ArrayList<ImapDB.SearchEmailIdentifier> search_results
|
// Do this outside of transaction to catch invalid search ids up-front
|
||||||
= new Gee.ArrayList<ImapDB.SearchEmailIdentifier>();
|
|
||||||
|
|
||||||
string? search_ids_sql = get_search_ids_sql(search_ids);
|
string? search_ids_sql = get_search_ids_sql(search_ids);
|
||||||
|
|
||||||
|
// for some searches, results are stripped if they're too "greedy", but this requires
|
||||||
|
// examining the matched text, which has an expense to fetch, so avoid doing so unless
|
||||||
|
// necessary
|
||||||
|
bool strip_results = true;
|
||||||
|
|
||||||
|
// HORIZON strategy is configured in such a way to allow all stemmed variants to match,
|
||||||
|
// so don't do any stripping in that case
|
||||||
|
//
|
||||||
|
// If any of the search terms is exact-match (no prefix matching) or none have stemmed
|
||||||
|
// variants, then don't do stripping of "greedy" stemmed matching (because in both cases,
|
||||||
|
// there are none)
|
||||||
|
if (query.strategy == Geary.SearchQuery.Strategy.HORIZON)
|
||||||
|
strip_results = false;
|
||||||
|
else if (traverse<SearchTerm>(query.get_all_terms()).any(term => term.stemmed == null || term.is_exact))
|
||||||
|
strip_results = false;
|
||||||
|
|
||||||
|
Gee.Set<ImapDB.EmailIdentifier> unstripped_ids = new Gee.HashSet<ImapDB.EmailIdentifier>();
|
||||||
|
Gee.Map<ImapDB.EmailIdentifier, Gee.Set<string>>? search_results = null;
|
||||||
|
|
||||||
yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => {
|
yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => {
|
||||||
string blacklisted_ids_sql = do_get_blacklisted_message_ids_sql(
|
string blacklisted_ids_sql = do_get_blacklisted_message_ids_sql(
|
||||||
folder_blacklist, cx, cancellable);
|
folder_blacklist, cx, cancellable);
|
||||||
|
|
@ -919,95 +1071,117 @@ private class Geary.ImapDB.Account : BaseObject {
|
||||||
stmt.bind_int(bind_index++, offset);
|
stmt.bind_int(bind_index++, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Gee.HashMap<int64?, ImapDB.EmailIdentifier> id_map = new Gee.HashMap<int64?, ImapDB.EmailIdentifier>(
|
||||||
|
Collection.int64_hash_func, Collection.int64_equal_func);
|
||||||
|
|
||||||
Db.Result result = stmt.exec(cancellable);
|
Db.Result result = stmt.exec(cancellable);
|
||||||
while (!result.finished) {
|
while (!result.finished) {
|
||||||
int64 id = result.int64_at(0);
|
int64 message_id = result.int64_at(0);
|
||||||
int64 internaldate_time_t = result.int64_at(1);
|
int64 internaldate_time_t = result.int64_at(1);
|
||||||
DateTime? internaldate = (internaldate_time_t == -1
|
DateTime? internaldate = (internaldate_time_t == -1
|
||||||
? null : new DateTime.from_unix_local(internaldate_time_t));
|
? null : new DateTime.from_unix_local(internaldate_time_t));
|
||||||
search_results.add(new ImapDB.SearchEmailIdentifier(id, internaldate));
|
|
||||||
|
ImapDB.EmailIdentifier id = new ImapDB.SearchEmailIdentifier(message_id, internaldate);
|
||||||
|
|
||||||
|
unstripped_ids.add(id);
|
||||||
|
id_map.set(message_id, id);
|
||||||
|
|
||||||
result.next(cancellable);
|
result.next(cancellable);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!strip_results)
|
||||||
|
return Db.TransactionOutcome.DONE;
|
||||||
|
|
||||||
|
search_results = do_get_search_matches(cx, query, id_map, cancellable);
|
||||||
|
|
||||||
return Db.TransactionOutcome.DONE;
|
return Db.TransactionOutcome.DONE;
|
||||||
}, cancellable);
|
}, cancellable);
|
||||||
|
|
||||||
return (search_results.size == 0 ? null : search_results);
|
if (unstripped_ids == null || unstripped_ids.size == 0)
|
||||||
}
|
|
||||||
|
|
||||||
// This applies a fudge-factor set of matches when the database results
|
|
||||||
// aren't entirely satisfactory, such as when you search for an email
|
|
||||||
// address and the database tokenizes out the @ and ., etc. It's not meant
|
|
||||||
// to be comprehensive, just a little extra highlighting applied to make
|
|
||||||
// the results look a little closer to what you typed.
|
|
||||||
private void add_literal_matches(string raw_query, Gee.Set<string> search_matches) {
|
|
||||||
foreach (string word in raw_query.split(" ")) {
|
|
||||||
if (word.has_suffix("\""))
|
|
||||||
word = word.substring(0, word.length - 1);
|
|
||||||
if (word.has_prefix("\""))
|
|
||||||
word = word.substring(1);
|
|
||||||
|
|
||||||
if (!String.is_empty_or_whitespace(word))
|
|
||||||
search_matches.add(word);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Gee.Collection<string>? get_search_matches_async(Geary.SearchQuery query,
|
|
||||||
Gee.Collection<ImapDB.EmailIdentifier> ids, Cancellable? cancellable = null) throws Error {
|
|
||||||
check_open();
|
|
||||||
|
|
||||||
Gee.HashMap<string, string> query_phrases = get_query_phrases(query);
|
|
||||||
if (query_phrases.size == 0)
|
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
Gee.Set<string> search_matches = new Gee.HashSet<string>();
|
if (!strip_results)
|
||||||
|
return unstripped_ids;
|
||||||
|
|
||||||
yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => {
|
// at this point, there should be some "full" search results to strip from
|
||||||
StringBuilder sql = new StringBuilder();
|
assert(search_results != null && search_results.size > 0);
|
||||||
sql.append("""
|
|
||||||
SELECT offsets(MessageSearchTable), *
|
|
||||||
FROM MessageSearchTable
|
|
||||||
WHERE docid IN (
|
|
||||||
""");
|
|
||||||
sql_append_ids(sql,
|
|
||||||
Geary.traverse<ImapDB.EmailIdentifier>(ids).map<int64?>(id => id.message_id).to_gee_iterable());
|
|
||||||
sql.append(")");
|
|
||||||
sql_add_query_phrases(sql, query_phrases);
|
|
||||||
|
|
||||||
Db.Statement stmt = cx.prepare(sql.str);
|
strip_greedy_results(query, search_results);
|
||||||
sql_bind_query_phrases(stmt, 0, query_phrases);
|
|
||||||
|
|
||||||
Db.Result result = stmt.exec(cancellable);
|
return search_results.size == 0 ? null : search_results.keys;
|
||||||
while (!result.finished) {
|
}
|
||||||
// Build a list of search offsets.
|
|
||||||
string[] offset_array = result.nonnull_string_at(0).split(" ");
|
// Strip out search results that only contain a hit due to "greedy" matching of the stemmed
|
||||||
Gee.ArrayList<SearchOffset> all_offsets = new Gee.ArrayList<SearchOffset>();
|
// variants on all search terms
|
||||||
int j = 0;
|
private void strip_greedy_results(ImapDB.SearchQuery query,
|
||||||
while (true) {
|
Gee.Map<ImapDB.EmailIdentifier, Gee.Set<string>> search_results) {
|
||||||
all_offsets.add(new SearchOffset(offset_array[j:j+4]));
|
int prestripped_results = search_results.size;
|
||||||
|
Gee.MapIterator<ImapDB.EmailIdentifier, Gee.Set<string>> iter = search_results.map_iterator();
|
||||||
|
while (iter.next()) {
|
||||||
|
// For each matched string in this message, retain the message in the search results
|
||||||
|
// if it prefix-matches any of the straight-up parsed terms or matches a stemmed
|
||||||
|
// variant (with only max. difference in their lengths allowed, i.e. not a "greedy"
|
||||||
|
// match)
|
||||||
|
bool good_match_found = false;
|
||||||
|
foreach (string match in iter.get_value()) {
|
||||||
|
foreach (SearchTerm term in query.get_all_terms()) {
|
||||||
|
// if prefix-matches parsed term, then don't strip
|
||||||
|
if (match.has_prefix(term.parsed)) {
|
||||||
|
good_match_found = true;
|
||||||
|
|
||||||
j += 4;
|
|
||||||
if (j >= offset_array.length)
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Iterate over the offset list, scrape strings from the database, and push
|
// if prefix-matches stemmed term w/o doing so greedily, then don't strip
|
||||||
// the results into our return set.
|
if (term.stemmed != null && match.has_prefix(term.stemmed)) {
|
||||||
foreach(SearchOffset offset in all_offsets) {
|
int diff = match.length - term.stemmed.length;
|
||||||
string text = result.nonnull_string_at(offset.column + 1);
|
if (diff <= query.max_difference_match_stem_lengths) {
|
||||||
search_matches.add(text[offset.byte_offset : offset.byte_offset + offset.size].down());
|
good_match_found = true;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result.next(cancellable);
|
if (good_match_found)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!good_match_found)
|
||||||
|
iter.unset();
|
||||||
|
}
|
||||||
|
|
||||||
|
debug("Stripped %d emails from search for [%s] due to greedy stem matching",
|
||||||
|
prestripped_results - search_results.size, query.raw);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Gee.Set<string>? get_search_matches_async(Geary.SearchQuery q,
|
||||||
|
Gee.Collection<ImapDB.EmailIdentifier> ids, Cancellable? cancellable = null) throws Error {
|
||||||
|
check_open();
|
||||||
|
ImapDB.SearchQuery query = check_search_query(q);
|
||||||
|
|
||||||
|
Gee.Set<string>? search_matches = null;
|
||||||
|
yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => {
|
||||||
|
Gee.HashMap<int64?, ImapDB.EmailIdentifier> id_map = new Gee.HashMap<
|
||||||
|
int64?, ImapDB.EmailIdentifier>(Collection.int64_hash_func, Collection.int64_equal_func);
|
||||||
|
foreach (ImapDB.EmailIdentifier id in ids)
|
||||||
|
id_map.set(id.message_id, id);
|
||||||
|
|
||||||
|
Gee.Map<ImapDB.EmailIdentifier, Gee.Set<string>>? match_map =
|
||||||
|
do_get_search_matches(cx, query, id_map, cancellable);
|
||||||
|
if (match_map == null || match_map.size == 0)
|
||||||
|
return Db.TransactionOutcome.DONE;
|
||||||
|
|
||||||
|
strip_greedy_results(query, match_map);
|
||||||
|
|
||||||
|
search_matches = new Gee.HashSet<string>();
|
||||||
|
foreach (Gee.Set<string> matches in match_map.values)
|
||||||
|
search_matches.add_all(matches);
|
||||||
|
|
||||||
return Db.TransactionOutcome.DONE;
|
return Db.TransactionOutcome.DONE;
|
||||||
}, cancellable);
|
}, cancellable);
|
||||||
|
|
||||||
add_literal_matches(query.raw, search_matches);
|
return search_matches;
|
||||||
|
|
||||||
return (search_matches.size == 0 ? null : search_matches);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public async Geary.Email fetch_email_async(ImapDB.EmailIdentifier email_id,
|
public async Geary.Email fetch_email_async(ImapDB.EmailIdentifier email_id,
|
||||||
|
|
@ -1561,5 +1735,69 @@ private class Geary.ImapDB.Account : BaseObject {
|
||||||
unread_change.get(path));
|
unread_change.get(path));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Not using a MultiMap because when traversing want to process all values at once per iteration,
|
||||||
|
// not per key-value
|
||||||
|
public Gee.Map<ImapDB.EmailIdentifier, Gee.Set<string>>? do_get_search_matches(Db.Connection cx,
|
||||||
|
ImapDB.SearchQuery query, Gee.Map<int64?, ImapDB.EmailIdentifier> id_map, Cancellable? cancellable)
|
||||||
|
throws Error {
|
||||||
|
if (id_map.size == 0)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
Gee.HashMap<string, string> query_phrases = get_query_phrases(query);
|
||||||
|
if (query_phrases.size == 0)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
StringBuilder sql = new StringBuilder();
|
||||||
|
sql.append("""
|
||||||
|
SELECT docid, offsets(MessageSearchTable), *
|
||||||
|
FROM MessageSearchTable
|
||||||
|
WHERE docid IN (
|
||||||
|
""");
|
||||||
|
sql_append_ids(sql, id_map.keys);
|
||||||
|
sql.append(")");
|
||||||
|
sql_add_query_phrases(sql, query_phrases);
|
||||||
|
|
||||||
|
Db.Statement stmt = cx.prepare(sql.str);
|
||||||
|
sql_bind_query_phrases(stmt, 0, query_phrases);
|
||||||
|
|
||||||
|
Gee.Map<ImapDB.EmailIdentifier, Gee.Set<string>> search_matches = new Gee.HashMap<
|
||||||
|
ImapDB.EmailIdentifier, Gee.Set<string>>();
|
||||||
|
|
||||||
|
Db.Result result = stmt.exec(cancellable);
|
||||||
|
while (!result.finished) {
|
||||||
|
int64 docid = result.rowid_at(0);
|
||||||
|
assert(id_map.contains(docid));
|
||||||
|
ImapDB.EmailIdentifier id = id_map.get(docid);
|
||||||
|
|
||||||
|
// offsets() function returns a list of 4 strings that are ints indicating position
|
||||||
|
// and length of match string in search table corpus
|
||||||
|
string[] offset_array = result.nonnull_string_at(1).split(" ");
|
||||||
|
|
||||||
|
Gee.Set<string> matches = new Gee.HashSet<string>();
|
||||||
|
|
||||||
|
int j = 0;
|
||||||
|
while (true) {
|
||||||
|
unowned string[] offset_string = offset_array[j:j+4];
|
||||||
|
|
||||||
|
int column = int.parse(offset_string[0]);
|
||||||
|
int byte_offset = int.parse(offset_string[2]);
|
||||||
|
int size = int.parse(offset_string[3]);
|
||||||
|
|
||||||
|
unowned string text = result.nonnull_string_at(column + 2);
|
||||||
|
matches.add(text[byte_offset : byte_offset + size].down());
|
||||||
|
|
||||||
|
j += 4;
|
||||||
|
if (j >= offset_array.length)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
search_matches.set(id, matches);
|
||||||
|
|
||||||
|
result.next(cancellable);
|
||||||
|
}
|
||||||
|
|
||||||
|
return search_matches.size > 0 ? search_matches : null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -107,7 +107,11 @@ private class Geary.ImapDB.Database : Geary.Db.VersionedDatabase {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 22:
|
case 22:
|
||||||
post_rebuild_attachments();
|
post_upgrade_rebuild_attachments();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 23:
|
||||||
|
post_upgrade_add_tokenizer_table();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -407,7 +411,7 @@ private class Geary.ImapDB.Database : Geary.Db.VersionedDatabase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Version 22
|
// Version 22
|
||||||
private void post_rebuild_attachments() {
|
private void post_upgrade_rebuild_attachments() {
|
||||||
try {
|
try {
|
||||||
exec_transaction(Db.TransactionType.RW, (cx) => {
|
exec_transaction(Db.TransactionType.RW, (cx) => {
|
||||||
Db.Statement stmt = cx.prepare("""
|
Db.Statement stmt = cx.prepare("""
|
||||||
|
|
@ -471,6 +475,25 @@ private class Geary.ImapDB.Database : Geary.Db.VersionedDatabase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Version 23
|
||||||
|
private void post_upgrade_add_tokenizer_table() {
|
||||||
|
try {
|
||||||
|
string stemmer = find_appropriate_search_stemmer();
|
||||||
|
debug("Creating tokenizer table using %s stemmer", stemmer);
|
||||||
|
|
||||||
|
// These can't go in the .sql file because its schema (the stemmer
|
||||||
|
// algorithm) is determined at runtime.
|
||||||
|
exec("""
|
||||||
|
CREATE VIRTUAL TABLE TokenizerTable USING fts3tokenize(
|
||||||
|
unicodesn,
|
||||||
|
"stemmer=%s"
|
||||||
|
);
|
||||||
|
""".printf(stemmer));
|
||||||
|
} catch (Error e) {
|
||||||
|
error("Error creating tokenizer table: %s", e.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void on_prepare_database_connection(Db.Connection cx) throws Error {
|
private void on_prepare_database_connection(Db.Connection cx) throws Error {
|
||||||
cx.set_busy_timeout_msec(Db.Connection.RECOMMENDED_BUSY_TIMEOUT_MSEC);
|
cx.set_busy_timeout_msec(Db.Connection.RECOMMENDED_BUSY_TIMEOUT_MSEC);
|
||||||
cx.set_foreign_keys(true);
|
cx.set_foreign_keys(true);
|
||||||
|
|
|
||||||
121
src/engine/imap-db/imap-db-search-query.vala
Normal file
121
src/engine/imap-db/imap-db-search-query.vala
Normal file
|
|
@ -0,0 +1,121 @@
|
||||||
|
/* Copyright 2014 Yorba Foundation
|
||||||
|
*
|
||||||
|
* This software is licensed under the GNU Lesser General Public License
|
||||||
|
* (version 2.1 or later). See the COPYING file in this distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Internal implementation of {@link Geary.SearchQuery}.
|
||||||
|
*/
|
||||||
|
|
||||||
|
private class Geary.ImapDB.SearchQuery : Geary.SearchQuery {
|
||||||
|
/**
|
||||||
|
* Associated {@link ImapDB.Account}.
|
||||||
|
*/
|
||||||
|
public weak ImapDB.Account account { get; private set; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether or not the query has been parsed and processed prior to search submission.
|
||||||
|
*/
|
||||||
|
public bool parsed { get; set; default = false; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determined by {@link strategy}.
|
||||||
|
*/
|
||||||
|
public bool allow_stemming { get; private set; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Minimum length of the term before stemming is allowed.
|
||||||
|
*
|
||||||
|
* This prevents short words that might be stemmed from being stemmed.
|
||||||
|
*
|
||||||
|
* Overridden by {@link allow_stemming}. Determined by {@link strategy}.
|
||||||
|
*/
|
||||||
|
public int min_term_length_for_stemming { get; private set; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maximum difference in lengths between term and stemmed variant.
|
||||||
|
*
|
||||||
|
* This prevents long words from being stemmed to much shorter words (which creates
|
||||||
|
* opportunities for greedy matching).
|
||||||
|
*
|
||||||
|
* Overridden by {@link allow_stemming}. Determined by {@link strategy}.
|
||||||
|
*/
|
||||||
|
public int max_difference_term_stem_lengths { get; private set; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maximum difference in lengths between a matched word and the stemmed variant it matched
|
||||||
|
* against.
|
||||||
|
*
|
||||||
|
* This prevents long words being matched to short stem variants (which creates opportunities
|
||||||
|
* for greedy matching).
|
||||||
|
*
|
||||||
|
* Overridden by {@link allow_stemming}. Determined by {@link strategy}.
|
||||||
|
*/
|
||||||
|
public int max_difference_match_stem_lengths { get; private set; }
|
||||||
|
|
||||||
|
// Not using a MultiMap because we (might) need a guarantee of order.
|
||||||
|
private Gee.HashMap<string?, Gee.ArrayList<SearchTerm>> field_map
|
||||||
|
= new Gee.HashMap<string?, Gee.ArrayList<SearchTerm>>();
|
||||||
|
private Gee.ArrayList<SearchTerm> all = new Gee.ArrayList<SearchTerm>();
|
||||||
|
|
||||||
|
public SearchQuery(ImapDB.Account account, string query, Geary.SearchQuery.Strategy strategy) {
|
||||||
|
base (query, strategy);
|
||||||
|
|
||||||
|
this.account = account;
|
||||||
|
|
||||||
|
switch (strategy) {
|
||||||
|
case Strategy.EXACT:
|
||||||
|
allow_stemming = false;
|
||||||
|
min_term_length_for_stemming = int.MAX;
|
||||||
|
max_difference_term_stem_lengths = 0;
|
||||||
|
max_difference_match_stem_lengths = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Strategy.CONSERVATIVE:
|
||||||
|
allow_stemming = true;
|
||||||
|
min_term_length_for_stemming = 6;
|
||||||
|
max_difference_term_stem_lengths = 2;
|
||||||
|
max_difference_match_stem_lengths = 2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Strategy.AGGRESSIVE:
|
||||||
|
allow_stemming = true;
|
||||||
|
min_term_length_for_stemming = 4;
|
||||||
|
max_difference_term_stem_lengths = 4;
|
||||||
|
max_difference_match_stem_lengths = 3;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Strategy.HORIZON:
|
||||||
|
allow_stemming = true;
|
||||||
|
min_term_length_for_stemming = 0;
|
||||||
|
max_difference_term_stem_lengths = int.MAX;
|
||||||
|
max_difference_match_stem_lengths = int.MAX;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
assert_not_reached();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void add_search_term(string? field, SearchTerm term) {
|
||||||
|
if (!field_map.has_key(field))
|
||||||
|
field_map.set(field, new Gee.ArrayList<SearchTerm>());
|
||||||
|
|
||||||
|
field_map.get(field).add(term);
|
||||||
|
all.add(term);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Gee.Collection<string?> get_fields() {
|
||||||
|
return field_map.keys;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Gee.List<SearchTerm>? get_search_terms(string? field) {
|
||||||
|
return field_map.has_key(field) ? field_map.get(field) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Gee.List<SearchTerm>? get_all_terms() {
|
||||||
|
return all;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
62
src/engine/imap-db/imap-db-search-term.vala
Normal file
62
src/engine/imap-db/imap-db-search-term.vala
Normal file
|
|
@ -0,0 +1,62 @@
|
||||||
|
/* Copyright 2014 Yorba Foundation
|
||||||
|
*
|
||||||
|
* This software is licensed under the GNU Lesser General Public License
|
||||||
|
* (version 2.1 or later). See the COPYING file in this distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Various associated state with a single term in a {@link ImapDB.SearchQuery}.
|
||||||
|
*/
|
||||||
|
|
||||||
|
private class Geary.ImapDB.SearchTerm : BaseObject {
|
||||||
|
/**
|
||||||
|
* The original tokenized search term with minimal other processing performed.
|
||||||
|
*
|
||||||
|
* For example, punctuation might be removed, but no casefolding has occurred.
|
||||||
|
*/
|
||||||
|
public string original { get; private set; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The parsed tokenized search term.
|
||||||
|
*
|
||||||
|
* Casefolding and other normalizing text operations have been performed.
|
||||||
|
*/
|
||||||
|
public string parsed { get; private set; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The stemmed search term.
|
||||||
|
*
|
||||||
|
* Only used if stemming is being done ''and'' the stem is different than the {@link parsed}
|
||||||
|
* term.
|
||||||
|
*/
|
||||||
|
public string? stemmed { get; private set; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A list of terms ready for binding to an SQLite statement.
|
||||||
|
*
|
||||||
|
* This should include prefix operators and quotes (i.e. ["party"] or [party*]). These texts
|
||||||
|
* are guaranteed not to be null or empty strings.
|
||||||
|
*/
|
||||||
|
public Gee.List<string> sql { get; private set; default = new Gee.ArrayList<string>(); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the {@link parsed} term is exact-match only (i.e. starts with quotes) and
|
||||||
|
* there is no {@link stemmed} variant.
|
||||||
|
*/
|
||||||
|
public bool is_exact { get { return parsed.has_prefix("\"") && stemmed == null; } }
|
||||||
|
|
||||||
|
public SearchTerm(string original, string parsed, string? stemmed, string? sql_parsed, string? sql_stemmed) {
|
||||||
|
this.original = original;
|
||||||
|
this.parsed = parsed;
|
||||||
|
this.stemmed = stemmed;
|
||||||
|
|
||||||
|
// for now, only two variations: the parsed string and the stemmed; since stem is usually
|
||||||
|
// shorter (and will be first in the OR statement), include it first
|
||||||
|
if (!String.is_empty(sql_stemmed))
|
||||||
|
sql.add(sql_stemmed);
|
||||||
|
|
||||||
|
if (!String.is_empty(sql_parsed))
|
||||||
|
sql.add(sql_parsed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -824,6 +824,10 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.AbstractAccount {
|
||||||
return yield local.fetch_email_async(check_id(email_id), required_fields, cancellable);
|
return yield local.fetch_email_async(check_id(email_id), required_fields, cancellable);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public override Geary.SearchQuery open_search(string query, SearchQuery.Strategy strategy) {
|
||||||
|
return new ImapDB.SearchQuery(local, query, strategy);
|
||||||
|
}
|
||||||
|
|
||||||
public override async Gee.Collection<Geary.EmailIdentifier>? local_search_async(Geary.SearchQuery query,
|
public override async Gee.Collection<Geary.EmailIdentifier>? local_search_async(Geary.SearchQuery query,
|
||||||
int limit = 100, int offset = 0, Gee.Collection<Geary.FolderPath?>? folder_blacklist = null,
|
int limit = 100, int offset = 0, Gee.Collection<Geary.FolderPath?>? folder_blacklist = null,
|
||||||
Gee.Collection<Geary.EmailIdentifier>? search_ids = null, Cancellable? cancellable = null) throws Error {
|
Gee.Collection<Geary.EmailIdentifier>? search_ids = null, Cancellable? cancellable = null) throws Error {
|
||||||
|
|
@ -833,7 +837,7 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.AbstractAccount {
|
||||||
return yield local.search_async(query, limit, offset, folder_blacklist, search_ids, cancellable);
|
return yield local.search_async(query, limit, offset, folder_blacklist, search_ids, cancellable);
|
||||||
}
|
}
|
||||||
|
|
||||||
public override async Gee.Collection<string>? get_search_matches_async(Geary.SearchQuery query,
|
public override async Gee.Set<string>? get_search_matches_async(Geary.SearchQuery query,
|
||||||
Gee.Collection<Geary.EmailIdentifier> ids, Cancellable? cancellable = null) throws Error {
|
Gee.Collection<Geary.EmailIdentifier> ids, Cancellable? cancellable = null) throws Error {
|
||||||
return yield local.get_search_matches_async(query, check_ids(ids), cancellable);
|
return yield local.get_search_matches_async(query, check_ids(ids), cancellable);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue