diff --git a/src/engine/common/common-contact-store-impl.vala b/src/engine/common/common-contact-store-impl.vala index 433213bd..912fee2e 100644 --- a/src/engine/common/common-contact-store-impl.vala +++ b/src/engine/common/common-contact-store-impl.vala @@ -98,19 +98,19 @@ internal class Geary.ContactStoreImpl : BaseObject, Geary.ContactStore { GLib.Cancellable? cancellable) throws GLib.Error { Gee.Collection contacts = new Gee.LinkedList(); - string normalised_query = query.make_valid().normalize().down(); + string normalised_query = Geary.Db.normalise_case_insensitive_query(query); if (!String.is_empty(normalised_query)) { normalised_query = normalised_query + "%"; Db.Statement stmt = cx.prepare(""" SELECT * FROM ContactTable WHERE highest_importance >= ? AND ( - real_name LIKE ? COLLATE UTF8ICASE OR - normalized_email LIKE ? COLLATE UTF8ICASE + UTF8FOLD(real_name) LIKE ? OR + UTF8FOLD(email) LIKE ? ) ORDER BY highest_importance DESC, real_name IS NULL, - real_name COLLATE UTF8ICASE, - email COLLATE UTF8ICASE + real_name COLLATE UTF8COLL, + email COLLATE UTF8COLL LIMIT ? """); stmt.bind_uint(0, min_importance); diff --git a/src/engine/db/db.vala b/src/engine/db/db.vala index ff362496..b062ee9a 100644 --- a/src/engine/db/db.vala +++ b/src/engine/db/db.vala @@ -81,6 +81,17 @@ public bool set_shared_cache_mode(bool enabled) { return sqlite3_enable_shared_cache(enabled ? 1 : 0) == Sqlite.OK; } +/** Standard transformation for case-insensitive string values. */ +public inline string normalise_case_insensitive_query(string text) { + // This would be a place to do transliteration to improve query + // results, for example normalising `á` to `a`. The built-in GLib + // method `string.to_ascii()` does this but is too strong: It will + // convert e.g. CJK chars to `?`. The `string.tokenize_and_fold` + // function may work better but the calling interface is all + // wrong. + return text.normalize().casefold(); +} + private void check_cancelled(string? method, Cancellable? cancellable) throws IOError { if (cancellable != null && cancellable.is_cancelled()) throw new IOError.CANCELLED("%s cancelled", !String.is_empty(method) ? method : "Operation"); diff --git a/src/engine/imap-db/imap-db-database.vala b/src/engine/imap-db/imap-db-database.vala index cbf65d65..91e2f01c 100644 --- a/src/engine/imap-db/imap-db-database.vala +++ b/src/engine/imap-db/imap-db-database.vala @@ -5,22 +5,45 @@ * (version 2.1 or later). See the COPYING file in this distribution. */ -[CCode (cname = "g_utf8_casefold")] -extern string utf8_casefold(string data, ssize_t len); +[CCode (cname = "g_utf8_collate_key")] +extern string utf8_collate_key(string data, ssize_t len); extern int sqlite3_unicodesn_register_tokenizer(Sqlite.Database db); private class Geary.ImapDB.Database : Geary.Db.VersionedDatabase { - /** Name of UTF-8 case-sensitive SQLite collation function name. */ - public const string UTF8_CASE_INSENSITIVE_COLLATION = "UTF8ICASE"; - private static int case_insensitive_collation(int a_len, void* a_bytes, - int b_len, void* b_bytes) { - string a_str = utf8_casefold((string) a_bytes, a_len).collate_key(); - string b_str = utf8_casefold((string) b_bytes, b_len).collate_key(); - return strcmp(a_str, b_str); + /** SQLite UTF-8 case-insensitive, transliterating function name. */ + public const string UTF8_CASE_INSENSITIVE_FN = "UTF8FOLD"; + + /** SQLite UTF-8 collation name. */ + public const string UTF8_COLLATE = "UTF8COLL"; + + + private static void utf8_transliterate_fold(Sqlite.Context context, + Sqlite.Value[] values) { + string? text = values[0].to_text(); + if (text != null) { + context.result_text(Geary.Db.normalise_case_insensitive_query(text)); + } else { + context.result_value(values[0]); + } } + private static int utf8_collate(int a_len, void* a_bytes, + int b_len, void* b_bytes) { + // Don't need to normalise, collate_key() will do it for us + string? a_str = null; + if (a_bytes != null) { + a_str = utf8_collate_key((string) a_bytes, a_len); + } + string? b_str = null; + if (b_bytes != null) { + b_str = utf8_collate_key((string) b_bytes, b_len); + } + return GLib.strcmp(a_str, b_str); + } + + internal GLib.File attachments_path; private const int OPEN_PUMP_EVENT_LOOP_MSEC = 100; @@ -598,14 +621,29 @@ private class Geary.ImapDB.Database : Geary.Db.VersionedDatabase { cx.set_recursive_triggers(true); cx.set_synchronous(Db.SynchronousMode.NORMAL); sqlite3_unicodesn_register_tokenizer(cx.db); - if (cx.db.create_collation( - UTF8_CASE_INSENSITIVE_COLLATION, + + if (cx.db.create_function( + UTF8_CASE_INSENSITIVE_FN, + 1, // n args Sqlite.UTF8, - Database.case_insensitive_collation + null, + Database.utf8_transliterate_fold, + null, + null ) != Sqlite.OK) { throw new DatabaseError.GENERAL( - "Failed to register collation function %s", - UTF8_CASE_INSENSITIVE_COLLATION + "Failed to register function %s", + UTF8_CASE_INSENSITIVE_FN + ); + } + + if (cx.db.create_collation( + UTF8_COLLATE, + Sqlite.UTF8, + Database.utf8_collate + ) != Sqlite.OK) { + throw new DatabaseError.GENERAL( + "Failed to register collation %s", UTF8_COLLATE ); } } diff --git a/test/engine/common/common-contact-store-impl-test.vala b/test/engine/common/common-contact-store-impl-test.vala index 99530c87..b0a323ae 100644 --- a/test/engine/common/common-contact-store-impl-test.vala +++ b/test/engine/common/common-contact-store-impl-test.vala @@ -20,6 +20,8 @@ class Geary.ContactStoreImplTest : TestCase { add_test("search_no_match", search_no_match); add_test("search_email_match", search_email_match); add_test("search_name_match", search_name_match); + add_test("search_utf8_latin_names", search_utf8_latin_names); + add_test("search_utf8_multi_byte_names", search_utf8_multi_byte_names); add_test("update_new_contact", update_new_contact); add_test("update_existing_contact", update_existing_contact); } @@ -45,20 +47,20 @@ class Geary.ContactStoreImplTest : TestCase { this.db.open.end(async_result()); this.db.exec(""" -INSERT INTO ContactTable ( - id, - normalized_email, - real_name, - email, - highest_importance -) VALUES ( - 1, - 'test@example.com', - 'Test Name', - 'Test@example.com', - 50 -); -"""); + INSERT INTO ContactTable ( + id, + normalized_email, + real_name, + email, + highest_importance + ) VALUES ( + 1, + 'test@example.com', + 'Test Name', + 'Test@example.com', + 50 + ); + """); this.test_article = new ContactStoreImpl(this.db); } @@ -152,6 +154,67 @@ INSERT INTO ContactTable ( assert_false(search_hit.flags.always_load_remote_images(), "Existing flags"); } + public void search_utf8_latin_names() throws GLib.Error { + this.db.exec(""" + INSERT INTO ContactTable ( + real_name, + email, + normalized_email, + highest_importance + ) VALUES ( + 'Germán', + 'latin@example.com', + 'latin@example.com', + 50 + ); + """); + test_article.search.begin( + "germá", + 0, + 10, + null, + (obj, ret) => { async_complete(ret); } + ); + Gee.Collection results = test_article.search.end( + async_result() + ); + assert_int(1, results.size, "results.size"); + + Contact search_hit = Collection.get_first(results); + assert_string("Germán", search_hit.real_name, "Existing real_name"); + } + + public void search_utf8_multi_byte_names() throws GLib.Error { + this.db.exec(""" + INSERT INTO ContactTable ( + real_name, + email, + normalized_email, + highest_importance + ) VALUES ( + '年収1億円目指せ', + 'cjk@example.com', + 'cjk@example.com', + 50 + ); + """); + + test_article.search.begin( + "年収", + 0, + 10, + null, + (obj, ret) => { async_complete(ret); } + ); + Gee.Collection results = test_article.search.end( + async_result() + ); + assert_int(1, results.size, "results.size"); + + Contact search_hit = Collection.get_first(results); + assert_string("年収1億円目指せ", search_hit.real_name, "Existing real_name"); + } + public void update_new_contact() throws GLib.Error { Contact not_persisted = new Contact( "New@example.com", diff --git a/test/engine/imap-db/imap-db-database-test.vala b/test/engine/imap-db/imap-db-database-test.vala index f937456e..44a4845f 100644 --- a/test/engine/imap-db/imap-db-database-test.vala +++ b/test/engine/imap-db/imap-db-database-test.vala @@ -147,10 +147,10 @@ class Geary.ImapDB.DatabaseTest : TestCase { INSERT INTO Test (test_str) VALUES ('BB'); INSERT INTO Test (test_str) VALUES ('🤯'); """); - string[] expected = { "🤯", "BB", "B", "a" }; + string[] expected = { "a", "BB", "B", "🤯" }; Db.Result result = db.query( - "SELECT test_str FROM Test ORDER BY test_str COLLATE UTF8ICASE DESC" + "SELECT test_str FROM Test ORDER BY test_str COLLATE UTF8COLL DESC" ); int i = 0;