diff --git a/BUILDING.md b/BUILDING.md index ff2c7e46..f8ca45ae 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -74,8 +74,10 @@ Building Geary requires the following major libraries and tools: See the `meson.build` file in the top-level directory for the complete list of required dependencies and minimum versions. -Geary also requires SQLite to be built with the compiler flag -`-DSQLITE_ENABLE_FTS3`. +Geary requires SQLite is built with both FTS3 and FTS5 support. Ensure +`--enable-fts5`, `-DSQLITE_ENABLE_FTS3` and +`-DSQLITE_ENABLE_FTS3_PARENTHESIS` are passed to the SQLite configure +script. All required libraries and tools are available from major Linux distribution's package repositories: diff --git a/meson.build b/meson.build index 03533ec1..ce279f55 100644 --- a/meson.build +++ b/meson.build @@ -158,8 +158,10 @@ web_extensions_dir = client_lib_dir / 'web-extensions' # Ensure SQLite was built correctly if not cc.has_header_symbol('sqlite3.h', 'SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER', dependencies: sqlite) - error('SQLite3 is missing FTS3 tokenizer support. Please compile it with -DSQLITE_ENABLE_FTS3.\n' - + 'See https://bugzilla.gnome.org/show_bug.cgi?id=763203 for details.') + error('SQLite3 was not built with FTS3 support. See BUILDING.md for details.') +endif +if not cc.has_header_symbol('sqlite3.h', 'Fts5ExtensionApi', dependencies: sqlite) + error('SQLite3 was not built with FTS5 support. See BUILDING.md for details.') endif # diff --git a/sql/meson.build b/sql/meson.build index 6bf30dca..c4797227 100644 --- a/sql/meson.build +++ b/sql/meson.build @@ -28,6 +28,7 @@ sql_files = [ 'version-027.sql', 'version-028.sql', 'version-029.sql', + 'version-030.sql', ] install_data(sql_files, diff --git a/sql/version-030.sql b/sql/version-030.sql new file mode 100644 index 00000000..48af04df --- /dev/null +++ b/sql/version-030.sql @@ -0,0 +1,19 @@ +-- +-- Convert full-text search from FTS3/4 to FTS5 +-- + +DROP TABLE IF EXISTS MessageSearchTable; + +CREATE VIRTUAL TABLE MessageSearchTable USING fts5( + body, + attachments, + subject, + "from", + receivers, + cc, + bcc, + flags, + + tokenize="unicode61 remove_diacritics 2", + prefix="2,4,6,8,10" +) diff --git a/src/engine/imap-db/imap-db-account.vala b/src/engine/imap-db/imap-db-account.vala index 54522b90..7514aeba 100644 --- a/src/engine/imap-db/imap-db-account.vala +++ b/src/engine/imap-db/imap-db-account.vala @@ -670,11 +670,11 @@ private class Geary.ImapDB.Account : BaseObject { if (query_phrases.size != 0) { sql.append(""" WHERE id IN ( - SELECT docid + SELECT rowid FROM MessageSearchTable WHERE 1=1 """); - sql_add_query_phrases(sql, query_phrases, "INTERSECT", "docid", ""); + sql_add_query_phrases(sql, query_phrases, "INTERSECT", "rowid", ""); sql.append(")"); } else sql.append(" WHERE 1=1"); @@ -980,7 +980,7 @@ private class Geary.ImapDB.Account : BaseObject { // the order of seconds, so manually perform the operation var result = cx.prepare( - "SELECT docid FROM MessageSearchTable" + "SELECT rowid FROM MessageSearchTable" ).exec(cancellable); while (!result.finished) { search_ids.add(result.rowid_at(0)); @@ -1061,7 +1061,8 @@ private class Geary.ImapDB.Account : BaseObject { Email.REQUIRED_FOR_MESSAGE | Email.Field.ORIGINATORS | Email.Field.RECEIVERS | - Email.Field.SUBJECT + Email.Field.SUBJECT | + Email.Field.FLAGS ); Email.Field db_fields; @@ -1417,30 +1418,30 @@ private class Geary.ImapDB.Account : BaseObject { StringBuilder sql = new StringBuilder(); sql.append(""" - SELECT docid, offsets(MessageSearchTable), * + SELECT rowid, geary_matches(MessageSearchTable), * FROM MessageSearchTable - WHERE docid IN ( + WHERE rowid IN ( """); sql_append_ids(sql, id_map.keys); sql.append(")"); - StringBuilder condition = new StringBuilder("AND docid IN ("); + StringBuilder condition = new StringBuilder("AND rowid IN ("); sql_append_ids(condition, id_map.keys); condition.append(")"); - sql_add_query_phrases(sql, query_phrases, "UNION", "docid, offsets(MessageSearchTable), *", + sql_add_query_phrases(sql, query_phrases, "UNION", "rowid, geary_matches(MessageSearchTable), *", condition.str); Db.Statement stmt = cx.prepare(sql.str); sql_bind_query_phrases(stmt, 0, query_phrases); - Gee.Map> search_matches = new Gee.HashMap< - ImapDB.EmailIdentifier, Gee.Set>(); + var search_matches = + new Gee.HashMap>(); Db.Result result = stmt.exec(cancellable); while (!result.finished) { - int64 docid = result.rowid_at(0); - assert(id_map.has_key(docid)); - ImapDB.EmailIdentifier id = id_map.get(docid); + int64 rowid = result.rowid_at(0); + assert(id_map.has_key(rowid)); + ImapDB.EmailIdentifier id = id_map.get(rowid); // XXX Avoid a crash when "database disk image is // malformed" error occurs. Remove this when the SQLite @@ -1451,30 +1452,12 @@ private class Geary.ImapDB.Account : BaseObject { continue; } - // offsets() function returns a list of 4 strings that are ints indicating position - // and length of match string in search table corpus - string[] offset_array = result.nonnull_string_at(1).split(" "); + var matches = new Gee.HashSet(); + matches.add_all_array(result.nonnull_string_at(1).split(",")); - Gee.Set matches = new Gee.HashSet(); - - int j = 0; - while (true) { - unowned string[] offset_string = offset_array[j:j+4]; - - int column = int.parse(offset_string[0]); - int byte_offset = int.parse(offset_string[2]); - int size = int.parse(offset_string[3]); - - unowned string text = result.nonnull_string_at(column + 2); - matches.add(text[byte_offset : byte_offset + size].down()); - - j += 4; - if (j >= offset_array.length) - break; - } - - if (search_matches.has_key(id)) + if (search_matches.has_key(id)) { matches.add_all(search_matches.get(id)); + } search_matches.set(id, matches); result.next(cancellable); diff --git a/src/engine/imap-db/imap-db-database.vala b/src/engine/imap-db/imap-db-database.vala index cd428c89..9365f876 100644 --- a/src/engine/imap-db/imap-db-database.vala +++ b/src/engine/imap-db/imap-db-database.vala @@ -7,6 +7,7 @@ [CCode (cname = "g_utf8_collate_key")] extern string utf8_collate_key(string data, ssize_t len); +extern int sqlite3_register_fts5_matches(Sqlite.Database db); extern int sqlite3_register_legacy_tokenizer(Sqlite.Database db); private class Geary.ImapDB.Database : Geary.Db.VersionedDatabase { @@ -629,6 +630,10 @@ private class Geary.ImapDB.Database : Geary.Db.VersionedDatabase { sqlite3_register_legacy_tokenizer(cx.db); } + // Register custom `geary_matches()` FTS5 function to obtain + // matching tokens from FTS queries. + sqlite3_register_fts5_matches(cx.db); + if (cx.db.create_function( UTF8_CASE_INSENSITIVE_FN, 1, // n args diff --git a/src/engine/imap-db/imap-db-folder.vala b/src/engine/imap-db/imap-db-folder.vala index 2c2d4015..46550808 100644 --- a/src/engine/imap-db/imap-db-folder.vala +++ b/src/engine/imap-db/imap-db-folder.vala @@ -1044,7 +1044,7 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { sql = new StringBuilder(); sql.append(""" DELETE FROM MessageSearchTable - WHERE docid IN ( + WHERE rowid IN ( """); sql.append(message_ids_sql_sublist.str); sql.append(")"); @@ -1712,6 +1712,7 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { string? from = email.from != null ? email.from.to_searchable_string() : null; string? cc = email.cc != null ? email.cc.to_searchable_string() : null; string? bcc = email.bcc != null ? email.bcc.to_searchable_string() : null; + string? flags = email.email_flags != null ? email.email_flags.serialise() : null; if (!Geary.String.is_empty(body) || !Geary.String.is_empty(attachments) || @@ -1719,12 +1720,13 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { !Geary.String.is_empty(from) || !Geary.String.is_empty(recipients) || !Geary.String.is_empty(cc) || - !Geary.String.is_empty(bcc)) { + !Geary.String.is_empty(bcc) || + !Geary.String.is_empty(flags)) { Db.Statement stmt = cx.prepare(""" INSERT INTO MessageSearchTable - (docid, body, attachment, subject, from_field, receivers, cc, bcc) - VALUES (?, ?, ?, ?, ?, ?, ?, ?) + (rowid, body, attachments, subject, "from", receivers, cc, bcc, flags) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """); stmt.bind_rowid(0, message_id); stmt.bind_string(1, body); @@ -1734,6 +1736,7 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { stmt.bind_string(5, recipients); stmt.bind_string(6, cc); stmt.bind_string(7, bcc); + stmt.bind_string(8, flags); stmt.exec_insert(cancellable); } @@ -1741,7 +1744,7 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { private static bool do_check_for_message_search_row(Db.Connection cx, int64 message_id, Cancellable? cancellable) throws Error { - Db.Statement stmt = cx.prepare("SELECT 'TRUE' FROM MessageSearchTable WHERE docid=?"); + Db.Statement stmt = cx.prepare("SELECT 'TRUE' FROM MessageSearchTable WHERE rowid=?"); stmt.bind_rowid(0, message_id); Db.Result result = stmt.exec(cancellable); @@ -1923,10 +1926,17 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { // TODO: Unroll loop private void do_set_email_flags(Db.Connection cx, Gee.Map map, Cancellable? cancellable) throws Error { - Db.Statement update_stmt = cx.prepare( - "UPDATE MessageTable SET flags=?, fields = fields | ? WHERE id=?"); + Db.Statement update_message = cx.prepare( + "UPDATE MessageTable SET flags = ?, fields = fields | ? WHERE id = ?" + ); + Db.Statement update_search = cx.prepare(""" + UPDATE MessageSearchTable SET flags = ? WHERE rowid = ? + """ + ); foreach (ImapDB.EmailIdentifier id in map.keys) { + // Find the email location + LocationIdentifier? location = do_get_location_for_id( cx, id, @@ -1940,6 +1950,8 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { ); } + // Update MessageTable + Geary.Imap.EmailFlags? flags = map.get(id) as Geary.Imap.EmailFlags; if (flags == null) { throw new EngineError.BAD_PARAMETERS( @@ -1947,12 +1959,18 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { ); } - update_stmt.reset(Db.ResetScope.CLEAR_BINDINGS); - update_stmt.bind_string(0, flags.message_flags.serialize()); - update_stmt.bind_int(1, Geary.Email.Field.FLAGS); - update_stmt.bind_rowid(2, id.message_id); + update_message.reset(Db.ResetScope.CLEAR_BINDINGS); + update_message.bind_string(0, flags.message_flags.serialize()); + update_message.bind_int(1, Geary.Email.Field.FLAGS); + update_message.bind_rowid(2, id.message_id); + update_message.exec(cancellable); - update_stmt.exec(cancellable); + // Update MessageSearchTable + + update_search.reset(Db.ResetScope.CLEAR_BINDINGS); + update_search.bind_string(0, flags.serialise()); + update_search.bind_rowid(1, id.message_id); + update_search.exec_insert(cancellable); } } @@ -2119,9 +2137,9 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { // existing data, then do a DELETE and INSERT. See Bug 772522. Db.Statement select = cx.prepare(""" - SELECT body, attachment, subject, from_field, receivers, cc, bcc + SELECT body, attachments, subject, "from", receivers, cc, bcc, flags FROM MessageSearchTable - WHERE docid=? + WHERE rowid=? """); select.bind_rowid(0, message_id); Db.Result row = select.exec(cancellable); @@ -2133,6 +2151,7 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { string? recipients = row.string_at(4); string? cc = row.string_at(5); string? bcc = row.string_at(6); + string? flags = row.string_at(7); if (new_fields.is_any_set(Geary.Email.REQUIRED_FOR_MESSAGE) && email.fields.is_all_set(Geary.Email.REQUIRED_FOR_MESSAGE)) { @@ -2165,16 +2184,22 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { bcc = email.bcc.to_searchable_string(); } + if (new_fields.is_any_set(Geary.Email.Field.FLAGS)) { + if (email.email_flags != null) { + flags = email.email_flags.serialise(); + } + } + Db.Statement del = cx.prepare( - "DELETE FROM MessageSearchTable WHERE docid=?" + "DELETE FROM MessageSearchTable WHERE rowid=?" ); del.bind_rowid(0, message_id); del.exec(cancellable); Db.Statement insert = cx.prepare(""" INSERT INTO MessageSearchTable - (docid, body, attachment, subject, from_field, receivers, cc, bcc) - VALUES (?, ?, ?, ?, ?, ?, ?, ?) + (rowid, body, attachments, subject, "from", receivers, cc, bcc, flags) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """); insert.bind_rowid(0, message_id); insert.bind_string(1, body); @@ -2184,6 +2209,7 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { insert.bind_string(5, recipients); insert.bind_string(6, cc); insert.bind_string(7, bcc); + insert.bind_string(8, flags); insert.exec_insert(cancellable); } @@ -2230,7 +2256,6 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { ); post_fields |= Geary.Email.Field.FLAGS; - } } diff --git a/src/engine/imap-db/imap-db-fts5-matches.c b/src/engine/imap-db/imap-db-fts5-matches.c new file mode 100644 index 00000000..4dff4f32 --- /dev/null +++ b/src/engine/imap-db/imap-db-fts5-matches.c @@ -0,0 +1,167 @@ +/* + * Copyright (C) 2011 Nokia + * + * Author: Carlos Garnacho + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +/* + * Borrowed from the Tracker project (see: tracker-fts-tokenizer.c) + * and adapted for Geary by Michael Gratton . + */ + +#include +#include + +typedef struct { + int start; + int end; +} Offset; + + +static int +offsets_tokenizer_func (void *data, + int flags, + const char *token, + int n_token, + int start, + int end) +{ + GArray *offsets = data; + Offset offset = { 0 }; + offset.start = start; + offset.end = end; + g_array_append_val(offsets, offset); + return SQLITE_OK; +} + +static void +geary_matches (const Fts5ExtensionApi *api, + Fts5Context *fts_ctx, + sqlite3_context *ctx, + int n_args, + sqlite3_value **args) +{ + GString *str; + int rc, n_hits, i; + GArray *offsets = NULL; + gint cur_col = -1; + gboolean first = TRUE; + + if (n_args > 0) { + sqlite3_result_error(ctx, "Invalid argument count", -1); + return; + } + + rc = api->xInstCount(fts_ctx, &n_hits); + if (rc != SQLITE_OK) { + sqlite3_result_null(ctx); + return; + } + + str = g_string_new(NULL); + + for (i = 0; i < n_hits; i++) { + int phrase, col, n_token; + const char *text; + int length; + Offset offset; + + rc = api->xInst(fts_ctx, i, &phrase, &col, &n_token); + if (rc != SQLITE_OK) + break; + + if (first || cur_col != col) { + if (offsets) { + g_array_free(offsets, TRUE); + } + + rc = api->xColumnText(fts_ctx, col, &text, &length); + if (rc != SQLITE_OK) + break; + + offsets = g_array_new(FALSE, FALSE, sizeof(Offset)); + rc = api->xTokenize(fts_ctx, + text, + length, + offsets, + &offsets_tokenizer_func); + if (rc != SQLITE_OK) { + break; + } + + cur_col = col; + } + + first = FALSE; + + if (str->len != 0) { + g_string_append_c(str, ','); + } + + offset = g_array_index(offsets, Offset, n_token); + g_string_append_len(str, text + offset.start, offset.end - offset.start); + } + + if (offsets) { + g_array_free (offsets, TRUE); + } + + if (rc == SQLITE_OK) { + sqlite3_result_text (ctx, str->str, str->len, g_free); + g_string_free (str, FALSE); + } else { + sqlite3_result_error_code (ctx, rc); + g_string_free (str, TRUE); + } +} + +static fts5_api *get_fts5_api (sqlite3 *db) { + int rc = SQLITE_OK; + sqlite3_stmt *stmt; + fts5_api *api = NULL; + + rc = sqlite3_prepare_v2(db, "SELECT fts5(?1)", + -1, &stmt, 0); + if (rc != SQLITE_OK) { + return NULL; + } + + sqlite3_bind_pointer(stmt, 1, (void*) &api, "fts5_api_ptr", NULL); + sqlite3_step(stmt); + sqlite3_finalize(stmt); + + return api; +} + +gboolean sqlite3_register_fts5_matches(sqlite3 *db) { + fts5_api *api; + int rc = SQLITE_OK; + + api = get_fts5_api(db); + if (!api) { + return FALSE; + } + + rc = api->xCreateFunction(api, + "geary_matches", + NULL, + &geary_matches, + NULL); + + return (rc == SQLITE_OK) ? TRUE : FALSE; +} diff --git a/src/engine/imap-db/imap-db-gc.vala b/src/engine/imap-db/imap-db-gc.vala index dd090ea8..44d268a5 100644 --- a/src/engine/imap-db/imap-db-gc.vala +++ b/src/engine/imap-db/imap-db-gc.vala @@ -433,7 +433,7 @@ private class Geary.ImapDB.GC { stmt = cx.prepare(""" DELETE FROM MessageSearchTable - WHERE docid = ? + WHERE rowid = ? """); stmt.bind_rowid(0, message_id); diff --git a/src/engine/imap-db/imap-db-search-query.vala b/src/engine/imap-db/imap-db-search-query.vala index 968ac6ea..78f4eda3 100644 --- a/src/engine/imap-db/imap-db-search-query.vala +++ b/src/engine/imap-db/imap-db-search-query.vala @@ -18,11 +18,11 @@ private class Geary.ImapDB.SearchQuery : Geary.SearchQuery { private const unichar[] SEARCH_TERM_CONTINUATION_CHARS = { '-', '_', '.', '@' }; // Search operator field names, eg: "to:foo@example.com" or "is:unread" - private const string SEARCH_OP_ATTACHMENT = "attachment"; + private const string SEARCH_OP_ATTACHMENT = "attachments"; private const string SEARCH_OP_BCC = "bcc"; private const string SEARCH_OP_BODY = "body"; private const string SEARCH_OP_CC = "cc"; - private const string SEARCH_OP_FROM = "from_field"; + private const string SEARCH_OP_FROM = "\"from\""; private const string SEARCH_OP_IS = "is"; private const string SEARCH_OP_SUBJECT = "subject"; private const string SEARCH_OP_TO = "receivers"; diff --git a/src/engine/meson.build b/src/engine/meson.build index 992053ea..1e9eecf1 100644 --- a/src/engine/meson.build +++ b/src/engine/meson.build @@ -176,6 +176,7 @@ engine_vala_sources = files( 'imap-db/imap-db-database.vala', 'imap-db/imap-db-email-identifier.vala', 'imap-db/imap-db-folder.vala', + 'imap-db/imap-db-fts5-matches.c', 'imap-db/imap-db-gc.vala', 'imap-db/imap-db-message-row.vala', 'imap-db/imap-db-search-query.vala', diff --git a/src/meson.build b/src/meson.build index 7cbfa80a..1ce6681c 100644 --- a/src/meson.build +++ b/src/meson.build @@ -49,8 +49,6 @@ geary_c_args = [ '-DGCK_API_SUBJECT_TO_CHANGE', '-DGCR_API_SUBJECT_TO_CHANGE', '-DGOA_API_IS_SUBJECT_TO_CHANGE', - '-DSQLITE_ENABLE_FTS4', - '-DSQLITE_ENABLE_FTS4_UNICODE61' ] subdir('engine') diff --git a/test/engine/imap-db/imap-db-database-test.vala b/test/engine/imap-db/imap-db-database-test.vala index 9d69ac19..60c69ef4 100644 --- a/test/engine/imap-db/imap-db-database-test.vala +++ b/test/engine/imap-db/imap-db-database-test.vala @@ -106,7 +106,7 @@ class Geary.ImapDB.DatabaseTest : TestCase { ); db.open.end(async_result()); - assert_equal(db.get_schema_version(), 29, "Post-upgrade version"); + assert_equal(db.get_schema_version(), 30, "Post-upgrade version"); // Since schema v22 deletes the re-creates all attachments, // attachment 12 should no longer exist on the file system and