Better duplicate detection in local database
This incorporates much better duplicate detection in the local database, using both RFC-822 Message-ID as well as IMAP metadata (internaldate, RFC822 size) to determine if a message is already stored in the database. Very useful when a message is stored in multiple folders, or an already-downloaded message is returned to a folder it originated in (i.e. INBOX). Also some minor fixes to listing email by EmailIdentifier which save a roundtrip to the server for certain edge cases.
This commit is contained in:
parent
d8fe58bf46
commit
f5b7d29a8c
11 changed files with 214 additions and 239 deletions
|
|
@ -223,10 +223,11 @@ private class Geary.EngineFolder : Geary.AbstractFolder {
|
|||
replay_queue.schedule(new ReplayAppend(this, total));
|
||||
}
|
||||
|
||||
// Need to prefetch PROPERTIES (or, in the future NONE or LOCATION) fields to create a
|
||||
// Need to prefetch at least an EmailIdentifier (and duplicate detection fields) to create a
|
||||
// normalized placeholder in the local database of the message, so all positions are
|
||||
// properly relative to the end of the message list; once this is done, notify user of new
|
||||
// messages.
|
||||
// messages. If duplicates, create_email_async() will fall through to an updated merge,
|
||||
// which is exactly what we want.
|
||||
//
|
||||
// This MUST only be called from ReplayAppend.
|
||||
private async void do_replay_appended_messages(int new_remote_count) {
|
||||
|
|
@ -247,7 +248,7 @@ private class Geary.EngineFolder : Geary.AbstractFolder {
|
|||
// normalize starting at the message *after* the highest position of the local store,
|
||||
// which has now changed
|
||||
Gee.List<Geary.Email>? list = yield remote_folder.list_email_async(remote_count + 1, -1,
|
||||
Geary.Email.Field.PROPERTIES, Geary.Folder.ListFlags.NONE, null);
|
||||
local_folder.get_duplicate_detection_fields(), Geary.Folder.ListFlags.NONE, null);
|
||||
assert(list != null && list.size > 0);
|
||||
|
||||
foreach (Geary.Email email in list) {
|
||||
|
|
@ -351,6 +352,10 @@ private class Geary.EngineFolder : Geary.AbstractFolder {
|
|||
flags.is_any_set(Folder.ListFlags.FAST));
|
||||
}
|
||||
|
||||
// TODO: A great optimization would be to fetch message "fragments" from the local database
|
||||
// (retrieve all stored fields that match required_fields, although not all of required_fields
|
||||
// are present) and only fetch the missing parts from the remote; to do this right, requests
|
||||
// would have to be parallelized.
|
||||
private async void do_list_email_async(int low, int count, Geary.Email.Field required_fields,
|
||||
Gee.List<Geary.Email>? accumulator, EmailCallback? cb, Cancellable? cancellable,
|
||||
bool local_only) throws Error {
|
||||
|
|
@ -706,11 +711,18 @@ private class Geary.EngineFolder : Geary.AbstractFolder {
|
|||
high = initial_position;
|
||||
}
|
||||
|
||||
int actual_count = (high - low + 1);
|
||||
// low should never be -1, so don't need to check for that
|
||||
low = low.clamp(1, int.MAX);
|
||||
|
||||
debug("do_list_email_by_id_async: initial_id=%s initial_position=%d count=%d actual_count=%d low=%d high=%d local_count=%d remote_count=%d",
|
||||
int actual_count = ((high - low) + 1);
|
||||
|
||||
// one more check for exclusive listing
|
||||
if (actual_count == 0 || (excluding_id && actual_count == 1))
|
||||
return;
|
||||
|
||||
debug("do_list_email_by_id_async: initial_id=%s initial_position=%d count=%d actual_count=%d low=%d high=%d local_count=%d remote_count=%d excl=%s",
|
||||
initial_id.to_string(), initial_position, count, actual_count, low, high, local_count,
|
||||
remote_count);
|
||||
remote_count, excluding_id.to_string());
|
||||
|
||||
yield do_list_email_async(low, actual_count, required_fields, accumulator, cb, cancellable,
|
||||
local_only);
|
||||
|
|
@ -724,6 +736,11 @@ private class Geary.EngineFolder : Geary.AbstractFolder {
|
|||
|
||||
debug("Background fetching %d emails for %s", needed_by_position.length, to_string());
|
||||
|
||||
// Always get the flags for normalization and whatever the local store requires for duplicate
|
||||
// detection
|
||||
Geary.Email.Field full_fields =
|
||||
required_fields | Geary.Email.Field.PROPERTIES | local_folder.get_duplicate_detection_fields();
|
||||
|
||||
Gee.List<Geary.Email> full = new Gee.ArrayList<Geary.Email>();
|
||||
|
||||
int index = 0;
|
||||
|
|
@ -738,52 +755,16 @@ private class Geary.EngineFolder : Geary.AbstractFolder {
|
|||
list = needed_by_position;
|
||||
}
|
||||
|
||||
// Always get the flags, and the generic end-user won't know to ask for them until they
|
||||
// need them
|
||||
Gee.List<Geary.Email>? remote_list = yield remote_folder.list_email_sparse_async(
|
||||
list, required_fields | Geary.Email.Field.PROPERTIES, Geary.Folder.ListFlags.NONE,
|
||||
cancellable);
|
||||
list, full_fields, Geary.Folder.ListFlags.NONE, cancellable);
|
||||
|
||||
if (remote_list == null || remote_list.size == 0)
|
||||
break;
|
||||
|
||||
// if any were fetched, store locally
|
||||
// TODO: Bulk writing
|
||||
foreach (Geary.Email email in remote_list) {
|
||||
bool exists_in_system = false;
|
||||
if (email.message_id != null) {
|
||||
int count;
|
||||
exists_in_system = yield local.has_message_id_async(email.message_id, out count,
|
||||
cancellable);
|
||||
}
|
||||
|
||||
bool exists_in_folder = yield local_folder.is_email_associated_async(email,
|
||||
cancellable);
|
||||
|
||||
// NOTE: Although this looks redundant, this is a complex decision case and laying
|
||||
// it out like this helps explain the logic. Also, this code relies on the fact
|
||||
// that update_email_async() is a powerful call which might be broken down in the
|
||||
// future (requiring a duplicate email be manually associated with the folder,
|
||||
// for example), and so would like to keep this around to facilitate that.
|
||||
if (!exists_in_system && !exists_in_folder) {
|
||||
// This case indicates the email is new to the local store OR has no
|
||||
// Message-ID and so a new copy must be stored.
|
||||
yield local_folder.create_email_async(email, cancellable);
|
||||
} else if (exists_in_system && !exists_in_folder) {
|
||||
// This case indicates the email has been (partially) stored previously but
|
||||
// was not associated with this folder; update it (which implies association)
|
||||
yield local_folder.update_email_async(email, false, cancellable);
|
||||
} else if (!exists_in_system && exists_in_folder) {
|
||||
// This case indicates the message doesn't have a Message-ID and can only be
|
||||
// identified by a folder-specific ID, so it can be updated in the folder
|
||||
// (This may result in multiple copies of the message stored locally.)
|
||||
yield local_folder.update_email_async(email, true, cancellable);
|
||||
} else if (exists_in_system && exists_in_folder) {
|
||||
// This indicates the message is in the local store and was previously
|
||||
// associated with this folder, so merely update the local store
|
||||
yield local_folder.update_email_async(email, false, cancellable);
|
||||
}
|
||||
}
|
||||
foreach (Geary.Email email in remote_list)
|
||||
yield local_folder.create_email_async(email, cancellable);
|
||||
|
||||
if (cb != null)
|
||||
cb(remote_list, null);
|
||||
|
|
@ -824,7 +805,7 @@ private class Geary.EngineFolder : Geary.AbstractFolder {
|
|||
Geary.Email email = yield remote_folder.fetch_email_async(id, fields, cancellable);
|
||||
|
||||
// save to local store
|
||||
yield local_folder.update_email_async(email, false, cancellable);
|
||||
yield local_folder.create_email_async(email, cancellable);
|
||||
|
||||
return email;
|
||||
}
|
||||
|
|
@ -869,13 +850,10 @@ private class Geary.EngineFolder : Geary.AbstractFolder {
|
|||
debug("prefetching %d (%d) for %s (local_low=%d)", high, prefetch_count, to_string(),
|
||||
local_low);
|
||||
|
||||
// Use PROPERTIES as they're the most useful information for certain actions (such as
|
||||
// finding duplicates when we start using INTERNALDATE and RFC822.SIZE) and cheap to fetch
|
||||
//
|
||||
// TODO: Consider only fetching their UID; would need Geary.Email.Field.LOCATION (or
|
||||
// perhaps NONE is considered a call for just the UID).
|
||||
// Normalize the local folder by fetching EmailIdentifiers for all missing email as well
|
||||
// as fields for duplicate detection
|
||||
Gee.List<Geary.Email>? list = yield remote_folder.list_email_async(high, prefetch_count,
|
||||
Geary.Email.Field.PROPERTIES, Geary.Folder.ListFlags.NONE, cancellable);
|
||||
local_folder.get_duplicate_detection_fields(), Geary.Folder.ListFlags.NONE, cancellable);
|
||||
if (list == null || list.size != prefetch_count) {
|
||||
throw new EngineError.BAD_PARAMETERS("Unable to prefetch %d email starting at %d in %s",
|
||||
count, low, to_string());
|
||||
|
|
|
|||
|
|
@ -91,7 +91,8 @@ private class Geary.GenericImapFolder : Geary.EngineFolder {
|
|||
cancellable);
|
||||
|
||||
if (newest != null && newest.size > 0) {
|
||||
debug("saving %d newest emails in %s", newest.size, to_string());
|
||||
debug("saving %d newest emails starting at %s in %s", newest.size, uid_start.to_string(),
|
||||
to_string());
|
||||
foreach (Geary.Email email in newest) {
|
||||
try {
|
||||
yield local_folder.create_email_async(email, cancellable);
|
||||
|
|
@ -159,8 +160,7 @@ private class Geary.GenericImapFolder : Geary.EngineFolder {
|
|||
if (remote_uid.value == local_uid.value) {
|
||||
// same, update flags and move on
|
||||
try {
|
||||
yield imap_local_folder.update_email_async(old_remote[remote_ctr], true,
|
||||
cancellable);
|
||||
yield local_folder.create_email_async(old_remote[remote_ctr], cancellable);
|
||||
} catch (Error update_err) {
|
||||
debug("Unable to update old email in %s: %s", to_string(), update_err.message);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,23 +10,19 @@ private interface Geary.LocalAccount : Object, Geary.Account {
|
|||
|
||||
public abstract async void update_folder_async(Geary.Folder folder, Cancellable? cancellable = null)
|
||||
throws Error;
|
||||
|
||||
/**
|
||||
* Returns true if the email (identified by its Message-ID) already exists in the account's
|
||||
* local store, no matter the folder.
|
||||
*
|
||||
* Note that there are no guarantees of the uniqueness of a Message-ID, or even that a message
|
||||
* will have one. Because of this situation the method can return the number of messages
|
||||
* found with that ID.
|
||||
*/
|
||||
public async abstract bool has_message_id_async(Geary.RFC822.MessageID message_id,
|
||||
out int count, Cancellable? cancellable = null) throws Error;
|
||||
}
|
||||
|
||||
private interface Geary.LocalFolder : Object, Geary.Folder {
|
||||
public async abstract bool is_email_present_async(Geary.EmailIdentifier id,
|
||||
out Geary.Email.Field available_fields, Cancellable? cancellable = null) throws Error;
|
||||
|
||||
|
||||
/**
|
||||
* Returns the Geary.Email.Field bitfield of all email fields that must be requested from the
|
||||
* remote folder in order to do proper duplicate detection within the local folder. May
|
||||
* return Geary.Email.Field.NONE if no duplicate detection is available.
|
||||
*/
|
||||
public abstract Geary.Email.Field get_duplicate_detection_fields();
|
||||
|
||||
/**
|
||||
* Converts an EmailIdentifier into positional addressing in the Folder. This call relies on
|
||||
* the fact that when a Folder is fully opened, the local stores' tail list of messages (the
|
||||
|
|
@ -41,47 +37,5 @@ private interface Geary.LocalFolder : Object, Geary.Folder {
|
|||
*/
|
||||
public async abstract int get_id_position_async(Geary.EmailIdentifier id, Cancellable? cancellable)
|
||||
throws Error;
|
||||
|
||||
/**
|
||||
* Geary allows for a single message to exist in multiple folders. This method checks if the
|
||||
* email is associated with this folder. It may rely on a Message-ID being present, in which
|
||||
* case if it's not the method will throw an EngineError.INCOMPLETE_MESSAGE.
|
||||
*
|
||||
* If the email is not in the local store, this method returns false.
|
||||
*/
|
||||
public async abstract bool is_email_associated_async(Geary.Email email, Cancellable? cancellable = null)
|
||||
throws Error;
|
||||
|
||||
/**
|
||||
* Geary allows for a single message to exist in multiple folders. It also allows for partial
|
||||
* email information to be stored and updated, building the local store as more information is
|
||||
* downloaded from the server.
|
||||
*
|
||||
* update_email_async() updates the email's information in the local store, adding any new
|
||||
* fields not already present. If the email has fields already stored, the local version *will*
|
||||
* be overwritten with this new information. However, if the email has fewer fields than the
|
||||
* local version, the old information will not be lost. In this sense this is a merge
|
||||
* operation.
|
||||
*
|
||||
* update_email_async() will also attempt to associate an email existing in the system with this
|
||||
* folder. If the message has folder-specific properties that identify it, those will be used;
|
||||
* if not, update_email_async() will attempt to use the Message-ID. If the Message-ID is not
|
||||
* available in the email, it will throw EngineError.INCOMPLETE_MESSAGE unless
|
||||
* duplicate_okay is true, which confirms that it's okay to not attempt the linkage (which
|
||||
* should be done if the message simply lacks a Message-ID).
|
||||
* TODO: Examine other fields in the email and attempt to match it with existing messages.
|
||||
*
|
||||
* The EmailLocation field is used to position the email in the folder's ordering.
|
||||
* If another email exists at the same EmailLocation.position, EngineError.ALREADY_EXISTS
|
||||
* will be thrown.
|
||||
*
|
||||
* If the email does not exist in the local store OR the email has no Message-ID and
|
||||
* no_incomplete_error is true OR multiple messages are found in the system with the same
|
||||
* Message-ID, update_email-async() will see if there's any indication of the email being
|
||||
* associated with the folder. If so, it will merge in the new information. If not, this
|
||||
* method will fall-through to create_email_async().
|
||||
*/
|
||||
public async abstract void update_email_async(Geary.Email email, bool duplicate_okay,
|
||||
Cancellable? cancellable = null) throws Error;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue