Geary.ImapDb.SearchQuery: Require stemmer to be passed in to ctor

Since constructing a libstemmer object is non trivial, this allows a
per-account instance to be created just once, and improves testability.
This commit is contained in:
Michael Gratton 2020-11-04 19:01:48 +11:00 committed by Michael James Gratton
parent 0112c8192c
commit e0396c322e
2 changed files with 50 additions and 48 deletions

View file

@ -319,18 +319,19 @@ private class Geary.ImapDB.SearchQuery : Geary.SearchQuery {
// A list of all search terms, regardless of search op field name
private Gee.ArrayList<Term> all = new Gee.ArrayList<Term>();
private SnowBall.Stemmer stemmer;
private unowned SnowBall.Stemmer stemmer;
public async SearchQuery(Geary.Account owner,
ImapDB.Account local,
Gee.Collection<Geary.SearchQuery.Term> expression,
string raw,
SnowBall.Stemmer stemmer,
Geary.SearchQuery.Strategy strategy,
GLib.Cancellable? cancellable) {
base(expression, raw);
this.account = local;
this.stemmer = new SnowBall.Stemmer(find_appropriate_search_stemmer());
this.stemmer = stemmer;
switch (strategy) {
case Strategy.EXACT:
@ -674,49 +675,4 @@ private class Geary.ImapDB.SearchQuery : Geary.SearchQuery {
return stemmed;
}
private string find_appropriate_search_stemmer() {
// Unfortunately, the stemmer library only accepts the full language
// name for the stemming algorithm. This translates between the user's
// preferred language ISO 639-1 code and our available stemmers.
// FIXME: the available list here is determined by what's included in
// src/sqlite3-unicodesn/CMakeLists.txt. We should pass that list in
// instead of hardcoding it here.
foreach (string l in Intl.get_language_names()) {
switch (l) {
case "ar": return "arabic";
case "eu": return "basque";
case "ca": return "catalan";
case "da": return "danish";
case "nl": return "dutch";
case "en": return "english";
case "fi": return "finnish";
case "fr": return "french";
case "de": return "german";
case "el": return "greek";
case "hi": return "hindi";
case "hu": return "hungarian";
case "id": return "indonesian";
case "ga": return "irish";
case "it": return "italian";
case "lt": return "lithuanian";
case "ne": return "nepali";
case "no": return "norwegian";
case "pt": return "portuguese";
case "ro": return "romanian";
case "ru": return "russian";
case "sr": return "serbian";
case "es": return "spanish";
case "sv": return "swedish";
case "ta": return "tamil";
case "tr": return "turkish";
}
}
// Default to English because it seems to be on average the language
// most likely to be present in emails, regardless of the user's
// language setting. This is not an exact science, and search results
// should be ok either way in most cases.
return "english";
}
}

View file

@ -60,6 +60,8 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.Account {
private Gee.Map<Folder.SpecialUse,Gee.List<string>> special_search_names =
new Gee.HashMap<Folder.SpecialUse,Gee.List<string>>();
private SnowBall.Stemmer stemmer;
protected GenericAccount(AccountInformation config,
ImapDB.Account local,
@ -107,6 +109,7 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.Account {
this.db_vacuum_monitor = local.vacuum_monitor;
compile_special_search_names();
this.stemmer = new SnowBall.Stemmer(find_appropriate_search_stemmer());
}
/** {@inheritDoc} */
@ -578,7 +581,7 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.Account {
GLib.Cancellable? cancellable
) throws GLib.Error {
return yield new ImapDB.SearchQuery(
this, this.local, expression, text, EXACT, cancellable
this, this.local, expression, text, this.stemmer, EXACT, cancellable
);
}
@ -1064,6 +1067,49 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.Account {
throw new EngineError.OPEN_REQUIRED("Account %s not opened", to_string());
}
private string find_appropriate_search_stemmer() {
// Unfortunately, the stemmer library only accepts the full
// language name for the stemming algorithm. This translates
// between the desktop sessions's preferred language ISO 639-1
// code and the available stemmers.
//
// FIXME: the available list here is determined by what's
// included in libstemmer. We should pass that list in instead
// of hardcoding it here.
foreach (string l in Intl.get_language_names()) {
switch (l) {
case "ar": return "arabic";
case "eu": return "basque";
case "ca": return "catalan";
case "da": return "danish";
case "nl": return "dutch";
case "en": return "english";
case "fi": return "finnish";
case "fr": return "french";
case "de": return "german";
case "el": return "greek";
case "hi": return "hindi";
case "hu": return "hungarian";
case "id": return "indonesian";
case "ga": return "irish";
case "it": return "italian";
case "lt": return "lithuanian";
case "ne": return "nepali";
case "no": return "norwegian";
case "pt": return "portuguese";
case "ro": return "romanian";
case "ru": return "russian";
case "sr": return "serbian";
case "es": return "spanish";
case "sv": return "swedish";
case "ta": return "tamil";
case "tr": return "turkish";
}
}
return "english";
}
private void on_operation_error(AccountOperation op, Error error) {
notify_service_problem(this.information.incoming, error);
}