Ensure Geary.ContactStoreImpl handles non english searches
Fix DB impl actually do UTF-8 case-insensitive search matching. Add some unit tests.
This commit is contained in:
parent
788a06144f
commit
4e1aa32514
5 changed files with 147 additions and 35 deletions
|
|
@ -98,19 +98,19 @@ internal class Geary.ContactStoreImpl : BaseObject, Geary.ContactStore {
|
|||
GLib.Cancellable? cancellable)
|
||||
throws GLib.Error {
|
||||
Gee.Collection<Contact> contacts = new Gee.LinkedList<Contact>();
|
||||
string normalised_query = query.make_valid().normalize().down();
|
||||
string normalised_query = Geary.Db.normalise_case_insensitive_query(query);
|
||||
if (!String.is_empty(normalised_query)) {
|
||||
normalised_query = normalised_query + "%";
|
||||
Db.Statement stmt = cx.prepare("""
|
||||
SELECT * FROM ContactTable
|
||||
WHERE highest_importance >= ? AND (
|
||||
real_name LIKE ? COLLATE UTF8ICASE OR
|
||||
normalized_email LIKE ? COLLATE UTF8ICASE
|
||||
UTF8FOLD(real_name) LIKE ? OR
|
||||
UTF8FOLD(email) LIKE ?
|
||||
)
|
||||
ORDER BY highest_importance DESC,
|
||||
real_name IS NULL,
|
||||
real_name COLLATE UTF8ICASE,
|
||||
email COLLATE UTF8ICASE
|
||||
real_name COLLATE UTF8COLL,
|
||||
email COLLATE UTF8COLL
|
||||
LIMIT ?
|
||||
""");
|
||||
stmt.bind_uint(0, min_importance);
|
||||
|
|
|
|||
|
|
@ -81,6 +81,17 @@ public bool set_shared_cache_mode(bool enabled) {
|
|||
return sqlite3_enable_shared_cache(enabled ? 1 : 0) == Sqlite.OK;
|
||||
}
|
||||
|
||||
/** Standard transformation for case-insensitive string values. */
|
||||
public inline string normalise_case_insensitive_query(string text) {
|
||||
// This would be a place to do transliteration to improve query
|
||||
// results, for example normalising `á` to `a`. The built-in GLib
|
||||
// method `string.to_ascii()` does this but is too strong: It will
|
||||
// convert e.g. CJK chars to `?`. The `string.tokenize_and_fold`
|
||||
// function may work better but the calling interface is all
|
||||
// wrong.
|
||||
return text.normalize().casefold();
|
||||
}
|
||||
|
||||
private void check_cancelled(string? method, Cancellable? cancellable) throws IOError {
|
||||
if (cancellable != null && cancellable.is_cancelled())
|
||||
throw new IOError.CANCELLED("%s cancelled", !String.is_empty(method) ? method : "Operation");
|
||||
|
|
|
|||
|
|
@ -5,22 +5,45 @@
|
|||
* (version 2.1 or later). See the COPYING file in this distribution.
|
||||
*/
|
||||
|
||||
[CCode (cname = "g_utf8_casefold")]
|
||||
extern string utf8_casefold(string data, ssize_t len);
|
||||
[CCode (cname = "g_utf8_collate_key")]
|
||||
extern string utf8_collate_key(string data, ssize_t len);
|
||||
extern int sqlite3_unicodesn_register_tokenizer(Sqlite.Database db);
|
||||
|
||||
private class Geary.ImapDB.Database : Geary.Db.VersionedDatabase {
|
||||
|
||||
/** Name of UTF-8 case-sensitive SQLite collation function name. */
|
||||
public const string UTF8_CASE_INSENSITIVE_COLLATION = "UTF8ICASE";
|
||||
|
||||
private static int case_insensitive_collation(int a_len, void* a_bytes,
|
||||
int b_len, void* b_bytes) {
|
||||
string a_str = utf8_casefold((string) a_bytes, a_len).collate_key();
|
||||
string b_str = utf8_casefold((string) b_bytes, b_len).collate_key();
|
||||
return strcmp(a_str, b_str);
|
||||
/** SQLite UTF-8 case-insensitive, transliterating function name. */
|
||||
public const string UTF8_CASE_INSENSITIVE_FN = "UTF8FOLD";
|
||||
|
||||
/** SQLite UTF-8 collation name. */
|
||||
public const string UTF8_COLLATE = "UTF8COLL";
|
||||
|
||||
|
||||
private static void utf8_transliterate_fold(Sqlite.Context context,
|
||||
Sqlite.Value[] values) {
|
||||
string? text = values[0].to_text();
|
||||
if (text != null) {
|
||||
context.result_text(Geary.Db.normalise_case_insensitive_query(text));
|
||||
} else {
|
||||
context.result_value(values[0]);
|
||||
}
|
||||
}
|
||||
|
||||
private static int utf8_collate(int a_len, void* a_bytes,
|
||||
int b_len, void* b_bytes) {
|
||||
// Don't need to normalise, collate_key() will do it for us
|
||||
string? a_str = null;
|
||||
if (a_bytes != null) {
|
||||
a_str = utf8_collate_key((string) a_bytes, a_len);
|
||||
}
|
||||
string? b_str = null;
|
||||
if (b_bytes != null) {
|
||||
b_str = utf8_collate_key((string) b_bytes, b_len);
|
||||
}
|
||||
return GLib.strcmp(a_str, b_str);
|
||||
}
|
||||
|
||||
|
||||
internal GLib.File attachments_path;
|
||||
|
||||
private const int OPEN_PUMP_EVENT_LOOP_MSEC = 100;
|
||||
|
|
@ -598,14 +621,29 @@ private class Geary.ImapDB.Database : Geary.Db.VersionedDatabase {
|
|||
cx.set_recursive_triggers(true);
|
||||
cx.set_synchronous(Db.SynchronousMode.NORMAL);
|
||||
sqlite3_unicodesn_register_tokenizer(cx.db);
|
||||
if (cx.db.create_collation(
|
||||
UTF8_CASE_INSENSITIVE_COLLATION,
|
||||
|
||||
if (cx.db.create_function(
|
||||
UTF8_CASE_INSENSITIVE_FN,
|
||||
1, // n args
|
||||
Sqlite.UTF8,
|
||||
Database.case_insensitive_collation
|
||||
null,
|
||||
Database.utf8_transliterate_fold,
|
||||
null,
|
||||
null
|
||||
) != Sqlite.OK) {
|
||||
throw new DatabaseError.GENERAL(
|
||||
"Failed to register collation function %s",
|
||||
UTF8_CASE_INSENSITIVE_COLLATION
|
||||
"Failed to register function %s",
|
||||
UTF8_CASE_INSENSITIVE_FN
|
||||
);
|
||||
}
|
||||
|
||||
if (cx.db.create_collation(
|
||||
UTF8_COLLATE,
|
||||
Sqlite.UTF8,
|
||||
Database.utf8_collate
|
||||
) != Sqlite.OK) {
|
||||
throw new DatabaseError.GENERAL(
|
||||
"Failed to register collation %s", UTF8_COLLATE
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,6 +20,8 @@ class Geary.ContactStoreImplTest : TestCase {
|
|||
add_test("search_no_match", search_no_match);
|
||||
add_test("search_email_match", search_email_match);
|
||||
add_test("search_name_match", search_name_match);
|
||||
add_test("search_utf8_latin_names", search_utf8_latin_names);
|
||||
add_test("search_utf8_multi_byte_names", search_utf8_multi_byte_names);
|
||||
add_test("update_new_contact", update_new_contact);
|
||||
add_test("update_existing_contact", update_existing_contact);
|
||||
}
|
||||
|
|
@ -45,20 +47,20 @@ class Geary.ContactStoreImplTest : TestCase {
|
|||
this.db.open.end(async_result());
|
||||
|
||||
this.db.exec("""
|
||||
INSERT INTO ContactTable (
|
||||
id,
|
||||
normalized_email,
|
||||
real_name,
|
||||
email,
|
||||
highest_importance
|
||||
) VALUES (
|
||||
1,
|
||||
'test@example.com',
|
||||
'Test Name',
|
||||
'Test@example.com',
|
||||
50
|
||||
);
|
||||
""");
|
||||
INSERT INTO ContactTable (
|
||||
id,
|
||||
normalized_email,
|
||||
real_name,
|
||||
email,
|
||||
highest_importance
|
||||
) VALUES (
|
||||
1,
|
||||
'test@example.com',
|
||||
'Test Name',
|
||||
'Test@example.com',
|
||||
50
|
||||
);
|
||||
""");
|
||||
|
||||
this.test_article = new ContactStoreImpl(this.db);
|
||||
}
|
||||
|
|
@ -152,6 +154,67 @@ INSERT INTO ContactTable (
|
|||
assert_false(search_hit.flags.always_load_remote_images(), "Existing flags");
|
||||
}
|
||||
|
||||
public void search_utf8_latin_names() throws GLib.Error {
|
||||
this.db.exec("""
|
||||
INSERT INTO ContactTable (
|
||||
real_name,
|
||||
email,
|
||||
normalized_email,
|
||||
highest_importance
|
||||
) VALUES (
|
||||
'Germán',
|
||||
'latin@example.com',
|
||||
'latin@example.com',
|
||||
50
|
||||
);
|
||||
""");
|
||||
test_article.search.begin(
|
||||
"germá",
|
||||
0,
|
||||
10,
|
||||
null,
|
||||
(obj, ret) => { async_complete(ret); }
|
||||
);
|
||||
Gee.Collection<Contact> results = test_article.search.end(
|
||||
async_result()
|
||||
);
|
||||
assert_int(1, results.size, "results.size");
|
||||
|
||||
Contact search_hit = Collection.get_first(results);
|
||||
assert_string("Germán", search_hit.real_name, "Existing real_name");
|
||||
}
|
||||
|
||||
public void search_utf8_multi_byte_names() throws GLib.Error {
|
||||
this.db.exec("""
|
||||
INSERT INTO ContactTable (
|
||||
real_name,
|
||||
email,
|
||||
normalized_email,
|
||||
highest_importance
|
||||
) VALUES (
|
||||
'年収1億円目指せ',
|
||||
'cjk@example.com',
|
||||
'cjk@example.com',
|
||||
50
|
||||
);
|
||||
""");
|
||||
|
||||
test_article.search.begin(
|
||||
"年収",
|
||||
0,
|
||||
10,
|
||||
null,
|
||||
(obj, ret) => { async_complete(ret); }
|
||||
);
|
||||
Gee.Collection<Contact> results = test_article.search.end(
|
||||
async_result()
|
||||
);
|
||||
assert_int(1, results.size, "results.size");
|
||||
|
||||
Contact search_hit = Collection.get_first(results);
|
||||
assert_string("年収1億円目指せ", search_hit.real_name, "Existing real_name");
|
||||
}
|
||||
|
||||
public void update_new_contact() throws GLib.Error {
|
||||
Contact not_persisted = new Contact(
|
||||
"New@example.com",
|
||||
|
|
|
|||
|
|
@ -147,10 +147,10 @@ class Geary.ImapDB.DatabaseTest : TestCase {
|
|||
INSERT INTO Test (test_str) VALUES ('BB');
|
||||
INSERT INTO Test (test_str) VALUES ('🤯');
|
||||
""");
|
||||
string[] expected = { "🤯", "BB", "B", "a" };
|
||||
string[] expected = { "a", "BB", "B", "🤯" };
|
||||
|
||||
Db.Result result = db.query(
|
||||
"SELECT test_str FROM Test ORDER BY test_str COLLATE UTF8ICASE DESC"
|
||||
"SELECT test_str FROM Test ORDER BY test_str COLLATE UTF8COLL DESC"
|
||||
);
|
||||
|
||||
int i = 0;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue