From f5b7d29a8c0e040edda191ee0e9a80030dc894df Mon Sep 17 00:00:00 2001 From: Jim Nelson Date: Mon, 14 Nov 2011 12:09:52 -0800 Subject: [PATCH] Better duplicate detection in local database This incorporates much better duplicate detection in the local database, using both RFC-822 Message-ID as well as IMAP metadata (internaldate, RFC822 size) to determine if a message is already stored in the database. Very useful when a message is stored in multiple folders, or an already-downloaded message is returned to a folder it originated in (i.e. INBOX). Also some minor fixes to listing email by EmailIdentifier which save a roundtrip to the server for certain edge cases. --- src/client/ui/main-window.vala | 4 +- src/engine/api/geary-folder.vala | 19 +- src/engine/imap/transport/imap-mailbox.vala | 14 +- src/engine/impl/geary-engine-folder.vala | 82 +++---- .../impl/geary-generic-imap-folder.vala | 6 +- src/engine/impl/geary-local-interfaces.vala | 62 +---- src/engine/sqlite/api/sqlite-account.vala | 7 - src/engine/sqlite/api/sqlite-folder.vala | 213 +++++++++--------- .../sqlite/email/sqlite-message-row.vala | 2 +- .../sqlite/email/sqlite-message-table.vala | 2 + .../sqlite-imap-message-properties-table.vala | 42 ++++ 11 files changed, 214 insertions(+), 239 deletions(-) diff --git a/src/client/ui/main-window.vala b/src/client/ui/main-window.vala index f5175553..cd040676 100644 --- a/src/client/ui/main-window.vala +++ b/src/client/ui/main-window.vala @@ -251,9 +251,11 @@ public class MainWindow : Gtk.Window { message_list_view.enable_load_more = false; Geary.EmailIdentifier? low_id = message_list_store.get_email_id_lowest(); + if (low_id == null) + return; current_conversations.load_by_id_async.begin(low_id, - FETCH_EMAIL_CHUNK_COUNT, - Geary.Folder.ListFlags.NONE, cancellable_folder, on_load_more_completed); + Geary.Folder.ListFlags.EXCLUDING_ID, cancellable_folder, on_load_more_completed); } private void on_load_more_completed(Object? source, AsyncResult result) { diff --git a/src/engine/api/geary-folder.vala b/src/engine/api/geary-folder.vala index 1d071007..c4cc3947 100644 --- a/src/engine/api/geary-folder.vala +++ b/src/engine/api/geary-folder.vala @@ -179,11 +179,10 @@ public interface Geary.Folder : Object { * get_required_fields_for_writing() to determine which fields must be present to create the * email. * - * This method will throw EngineError.ALREADY_EXISTS if the email already exists in the folder - * *and* the backing medium allows for checking prior to creation (which is not necessarily - * the case with network folders). Use LocalFolder.update_email_async() to update fields on - * an existing message in the local store. Saving an email on the server will be available - * later. + * If the Folder supports duplicate detection, it may merge in additional fields from this Email + * and associate the revised Email with this Folder. See LocalFolder for specific calls that + * deal with this. Callers from outside the Engine don't need to worry about this; it's taken + * care of under the covers. * * The Folder must be opened prior to attempting this operation. */ @@ -287,11 +286,6 @@ public interface Geary.Folder : Object { * only the Email with the specified initial_id will be listed, making this method operate * like fetch_email_async(). * - * There is no guarantee that a message with the initial_id will be returned however. - * (It is up to the implementation to deal with spans starting from a non-existant or - * unavailable EmailIdentifier.) To fetch email exclusive of the initial_id, use - * EmailIdentifier.next() or EmailIdentifier.previous(). - * * If count is positive, initial_id is the *lowest* identifier and the returned list is going * up the stack (toward the most recently added). If the count is negative, initial_id is * the *highest* identifier and the returned list is going down the stack (toward the earliest @@ -304,8 +298,9 @@ public interface Geary.Folder : Object { * some times desirable to list messages excluding the specified EmailIdentifier, callers may * use ListFlags.EXCLUDING_ID (which is a flag only recognized by this method and * lazy_list_email_by_id()). This ListFlag *must* be supported by all Folders and will not - * necessarily be returned by get_supported_flags(). Note that this flag doesn't make sense - * when count is zero or one and will be ignored. + * necessarily be returned by get_supported_flags(). If the count is zero or one (or + * the number of messages remaining on the stack from the initial ID's position is zero or one) + * *and* this flag is set, no messages will be returned. * * There's no guarantee of the returned messages' order. * diff --git a/src/engine/imap/transport/imap-mailbox.vala b/src/engine/imap/transport/imap-mailbox.vala index 2ddb53c1..9e2dd31c 100644 --- a/src/engine/imap/transport/imap-mailbox.vala +++ b/src/engine/imap/transport/imap-mailbox.vala @@ -298,14 +298,13 @@ public class Geary.Imap.Mailbox : Geary.SmartReference { RFC822.Header headers = new RFC822.Header(body_data[0]); // DATE - if (!email.fields.is_all_set(Geary.Email.Field.DATE)) { + if (!email.fields.is_all_set(Geary.Email.Field.DATE) && fields.require(Geary.Email.Field.DATE)) { string? value = headers.get_header("Date"); - if (!String.is_empty(value)) - email.set_send_date(new RFC822.Date(value)); + email.set_send_date(!String.is_empty(value) ? new RFC822.Date(value) : null); } // ORIGINATORS - if (!email.fields.is_all_set(Geary.Email.Field.ORIGINATORS)) { + if (!email.fields.is_all_set(Geary.Email.Field.ORIGINATORS) && fields.require(Geary.Email.Field.ORIGINATORS)) { RFC822.MailboxAddresses? from = null; RFC822.MailboxAddresses? sender = null; RFC822.MailboxAddresses? reply_to = null; @@ -326,7 +325,7 @@ public class Geary.Imap.Mailbox : Geary.SmartReference { } // RECEIVERS - if (!email.fields.is_all_set(Geary.Email.Field.RECEIVERS)) { + if (!email.fields.is_all_set(Geary.Email.Field.RECEIVERS) && fields.require(Geary.Email.Field.RECEIVERS)) { RFC822.MailboxAddresses? to = null; RFC822.MailboxAddresses? cc = null; RFC822.MailboxAddresses? bcc = null; @@ -369,10 +368,9 @@ public class Geary.Imap.Mailbox : Geary.SmartReference { } // SUBJECT - if (!email.fields.is_all_set(Geary.Email.Field.SUBJECT)) { + if (!email.fields.is_all_set(Geary.Email.Field.SUBJECT) && fields.require(Geary.Email.Field.SUBJECT)) { string? value = headers.get_header("Subject"); - if (!String.is_empty(value)) - email.set_message_subject(new RFC822.Subject(value)); + email.set_message_subject(!String.is_empty(value) ? new RFC822.Subject(value) : null); } } diff --git a/src/engine/impl/geary-engine-folder.vala b/src/engine/impl/geary-engine-folder.vala index 7dc68d81..94c80438 100644 --- a/src/engine/impl/geary-engine-folder.vala +++ b/src/engine/impl/geary-engine-folder.vala @@ -223,10 +223,11 @@ private class Geary.EngineFolder : Geary.AbstractFolder { replay_queue.schedule(new ReplayAppend(this, total)); } - // Need to prefetch PROPERTIES (or, in the future NONE or LOCATION) fields to create a + // Need to prefetch at least an EmailIdentifier (and duplicate detection fields) to create a // normalized placeholder in the local database of the message, so all positions are // properly relative to the end of the message list; once this is done, notify user of new - // messages. + // messages. If duplicates, create_email_async() will fall through to an updated merge, + // which is exactly what we want. // // This MUST only be called from ReplayAppend. private async void do_replay_appended_messages(int new_remote_count) { @@ -247,7 +248,7 @@ private class Geary.EngineFolder : Geary.AbstractFolder { // normalize starting at the message *after* the highest position of the local store, // which has now changed Gee.List? list = yield remote_folder.list_email_async(remote_count + 1, -1, - Geary.Email.Field.PROPERTIES, Geary.Folder.ListFlags.NONE, null); + local_folder.get_duplicate_detection_fields(), Geary.Folder.ListFlags.NONE, null); assert(list != null && list.size > 0); foreach (Geary.Email email in list) { @@ -351,6 +352,10 @@ private class Geary.EngineFolder : Geary.AbstractFolder { flags.is_any_set(Folder.ListFlags.FAST)); } + // TODO: A great optimization would be to fetch message "fragments" from the local database + // (retrieve all stored fields that match required_fields, although not all of required_fields + // are present) and only fetch the missing parts from the remote; to do this right, requests + // would have to be parallelized. private async void do_list_email_async(int low, int count, Geary.Email.Field required_fields, Gee.List? accumulator, EmailCallback? cb, Cancellable? cancellable, bool local_only) throws Error { @@ -706,11 +711,18 @@ private class Geary.EngineFolder : Geary.AbstractFolder { high = initial_position; } - int actual_count = (high - low + 1); + // low should never be -1, so don't need to check for that + low = low.clamp(1, int.MAX); - debug("do_list_email_by_id_async: initial_id=%s initial_position=%d count=%d actual_count=%d low=%d high=%d local_count=%d remote_count=%d", + int actual_count = ((high - low) + 1); + + // one more check for exclusive listing + if (actual_count == 0 || (excluding_id && actual_count == 1)) + return; + + debug("do_list_email_by_id_async: initial_id=%s initial_position=%d count=%d actual_count=%d low=%d high=%d local_count=%d remote_count=%d excl=%s", initial_id.to_string(), initial_position, count, actual_count, low, high, local_count, - remote_count); + remote_count, excluding_id.to_string()); yield do_list_email_async(low, actual_count, required_fields, accumulator, cb, cancellable, local_only); @@ -724,6 +736,11 @@ private class Geary.EngineFolder : Geary.AbstractFolder { debug("Background fetching %d emails for %s", needed_by_position.length, to_string()); + // Always get the flags for normalization and whatever the local store requires for duplicate + // detection + Geary.Email.Field full_fields = + required_fields | Geary.Email.Field.PROPERTIES | local_folder.get_duplicate_detection_fields(); + Gee.List full = new Gee.ArrayList(); int index = 0; @@ -738,52 +755,16 @@ private class Geary.EngineFolder : Geary.AbstractFolder { list = needed_by_position; } - // Always get the flags, and the generic end-user won't know to ask for them until they - // need them Gee.List? remote_list = yield remote_folder.list_email_sparse_async( - list, required_fields | Geary.Email.Field.PROPERTIES, Geary.Folder.ListFlags.NONE, - cancellable); + list, full_fields, Geary.Folder.ListFlags.NONE, cancellable); if (remote_list == null || remote_list.size == 0) break; // if any were fetched, store locally // TODO: Bulk writing - foreach (Geary.Email email in remote_list) { - bool exists_in_system = false; - if (email.message_id != null) { - int count; - exists_in_system = yield local.has_message_id_async(email.message_id, out count, - cancellable); - } - - bool exists_in_folder = yield local_folder.is_email_associated_async(email, - cancellable); - - // NOTE: Although this looks redundant, this is a complex decision case and laying - // it out like this helps explain the logic. Also, this code relies on the fact - // that update_email_async() is a powerful call which might be broken down in the - // future (requiring a duplicate email be manually associated with the folder, - // for example), and so would like to keep this around to facilitate that. - if (!exists_in_system && !exists_in_folder) { - // This case indicates the email is new to the local store OR has no - // Message-ID and so a new copy must be stored. - yield local_folder.create_email_async(email, cancellable); - } else if (exists_in_system && !exists_in_folder) { - // This case indicates the email has been (partially) stored previously but - // was not associated with this folder; update it (which implies association) - yield local_folder.update_email_async(email, false, cancellable); - } else if (!exists_in_system && exists_in_folder) { - // This case indicates the message doesn't have a Message-ID and can only be - // identified by a folder-specific ID, so it can be updated in the folder - // (This may result in multiple copies of the message stored locally.) - yield local_folder.update_email_async(email, true, cancellable); - } else if (exists_in_system && exists_in_folder) { - // This indicates the message is in the local store and was previously - // associated with this folder, so merely update the local store - yield local_folder.update_email_async(email, false, cancellable); - } - } + foreach (Geary.Email email in remote_list) + yield local_folder.create_email_async(email, cancellable); if (cb != null) cb(remote_list, null); @@ -824,7 +805,7 @@ private class Geary.EngineFolder : Geary.AbstractFolder { Geary.Email email = yield remote_folder.fetch_email_async(id, fields, cancellable); // save to local store - yield local_folder.update_email_async(email, false, cancellable); + yield local_folder.create_email_async(email, cancellable); return email; } @@ -869,13 +850,10 @@ private class Geary.EngineFolder : Geary.AbstractFolder { debug("prefetching %d (%d) for %s (local_low=%d)", high, prefetch_count, to_string(), local_low); - // Use PROPERTIES as they're the most useful information for certain actions (such as - // finding duplicates when we start using INTERNALDATE and RFC822.SIZE) and cheap to fetch - // - // TODO: Consider only fetching their UID; would need Geary.Email.Field.LOCATION (or - // perhaps NONE is considered a call for just the UID). + // Normalize the local folder by fetching EmailIdentifiers for all missing email as well + // as fields for duplicate detection Gee.List? list = yield remote_folder.list_email_async(high, prefetch_count, - Geary.Email.Field.PROPERTIES, Geary.Folder.ListFlags.NONE, cancellable); + local_folder.get_duplicate_detection_fields(), Geary.Folder.ListFlags.NONE, cancellable); if (list == null || list.size != prefetch_count) { throw new EngineError.BAD_PARAMETERS("Unable to prefetch %d email starting at %d in %s", count, low, to_string()); diff --git a/src/engine/impl/geary-generic-imap-folder.vala b/src/engine/impl/geary-generic-imap-folder.vala index 7a718b50..f7bd7a81 100644 --- a/src/engine/impl/geary-generic-imap-folder.vala +++ b/src/engine/impl/geary-generic-imap-folder.vala @@ -91,7 +91,8 @@ private class Geary.GenericImapFolder : Geary.EngineFolder { cancellable); if (newest != null && newest.size > 0) { - debug("saving %d newest emails in %s", newest.size, to_string()); + debug("saving %d newest emails starting at %s in %s", newest.size, uid_start.to_string(), + to_string()); foreach (Geary.Email email in newest) { try { yield local_folder.create_email_async(email, cancellable); @@ -159,8 +160,7 @@ private class Geary.GenericImapFolder : Geary.EngineFolder { if (remote_uid.value == local_uid.value) { // same, update flags and move on try { - yield imap_local_folder.update_email_async(old_remote[remote_ctr], true, - cancellable); + yield local_folder.create_email_async(old_remote[remote_ctr], cancellable); } catch (Error update_err) { debug("Unable to update old email in %s: %s", to_string(), update_err.message); } diff --git a/src/engine/impl/geary-local-interfaces.vala b/src/engine/impl/geary-local-interfaces.vala index 6317b31e..dfeb720a 100644 --- a/src/engine/impl/geary-local-interfaces.vala +++ b/src/engine/impl/geary-local-interfaces.vala @@ -10,23 +10,19 @@ private interface Geary.LocalAccount : Object, Geary.Account { public abstract async void update_folder_async(Geary.Folder folder, Cancellable? cancellable = null) throws Error; - - /** - * Returns true if the email (identified by its Message-ID) already exists in the account's - * local store, no matter the folder. - * - * Note that there are no guarantees of the uniqueness of a Message-ID, or even that a message - * will have one. Because of this situation the method can return the number of messages - * found with that ID. - */ - public async abstract bool has_message_id_async(Geary.RFC822.MessageID message_id, - out int count, Cancellable? cancellable = null) throws Error; } private interface Geary.LocalFolder : Object, Geary.Folder { public async abstract bool is_email_present_async(Geary.EmailIdentifier id, out Geary.Email.Field available_fields, Cancellable? cancellable = null) throws Error; - + + /** + * Returns the Geary.Email.Field bitfield of all email fields that must be requested from the + * remote folder in order to do proper duplicate detection within the local folder. May + * return Geary.Email.Field.NONE if no duplicate detection is available. + */ + public abstract Geary.Email.Field get_duplicate_detection_fields(); + /** * Converts an EmailIdentifier into positional addressing in the Folder. This call relies on * the fact that when a Folder is fully opened, the local stores' tail list of messages (the @@ -41,47 +37,5 @@ private interface Geary.LocalFolder : Object, Geary.Folder { */ public async abstract int get_id_position_async(Geary.EmailIdentifier id, Cancellable? cancellable) throws Error; - - /** - * Geary allows for a single message to exist in multiple folders. This method checks if the - * email is associated with this folder. It may rely on a Message-ID being present, in which - * case if it's not the method will throw an EngineError.INCOMPLETE_MESSAGE. - * - * If the email is not in the local store, this method returns false. - */ - public async abstract bool is_email_associated_async(Geary.Email email, Cancellable? cancellable = null) - throws Error; - - /** - * Geary allows for a single message to exist in multiple folders. It also allows for partial - * email information to be stored and updated, building the local store as more information is - * downloaded from the server. - * - * update_email_async() updates the email's information in the local store, adding any new - * fields not already present. If the email has fields already stored, the local version *will* - * be overwritten with this new information. However, if the email has fewer fields than the - * local version, the old information will not be lost. In this sense this is a merge - * operation. - * - * update_email_async() will also attempt to associate an email existing in the system with this - * folder. If the message has folder-specific properties that identify it, those will be used; - * if not, update_email_async() will attempt to use the Message-ID. If the Message-ID is not - * available in the email, it will throw EngineError.INCOMPLETE_MESSAGE unless - * duplicate_okay is true, which confirms that it's okay to not attempt the linkage (which - * should be done if the message simply lacks a Message-ID). - * TODO: Examine other fields in the email and attempt to match it with existing messages. - * - * The EmailLocation field is used to position the email in the folder's ordering. - * If another email exists at the same EmailLocation.position, EngineError.ALREADY_EXISTS - * will be thrown. - * - * If the email does not exist in the local store OR the email has no Message-ID and - * no_incomplete_error is true OR multiple messages are found in the system with the same - * Message-ID, update_email-async() will see if there's any indication of the email being - * associated with the folder. If so, it will merge in the new information. If not, this - * method will fall-through to create_email_async(). - */ - public async abstract void update_email_async(Geary.Email email, bool duplicate_okay, - Cancellable? cancellable = null) throws Error; } diff --git a/src/engine/sqlite/api/sqlite-account.vala b/src/engine/sqlite/api/sqlite-account.vala index b7ae6c06..2e164f2e 100644 --- a/src/engine/sqlite/api/sqlite-account.vala +++ b/src/engine/sqlite/api/sqlite-account.vala @@ -186,13 +186,6 @@ private class Geary.Sqlite.Account : Geary.AbstractAccount, Geary.LocalAccount { (properties != null) ? properties.get_imap_folder_properties() : null, path); } - public async bool has_message_id_async(Geary.RFC822.MessageID message_id, out int count, - Cancellable? cancellable = null) throws Error { - count = yield message_table.search_message_id_count_async(null, message_id, cancellable); - - return (count > 0); - } - private Geary.Sqlite.Folder? get_sqlite_folder(Geary.FolderPath path) { FolderReference? folder_ref = folder_refs.get(path); diff --git a/src/engine/sqlite/api/sqlite-folder.vala b/src/engine/sqlite/api/sqlite-folder.vala index 36f6fdea..c7213545 100644 --- a/src/engine/sqlite/api/sqlite-folder.vala +++ b/src/engine/sqlite/api/sqlite-folder.vala @@ -8,6 +8,9 @@ // the future, to support other email services, will need to break this up. private class Geary.Sqlite.Folder : Geary.AbstractFolder, Geary.LocalFolder, Geary.ReferenceSemantics { + private const Geary.Email.Field REQUIRED_FOR_DUPLICATE_DETECTION = + Geary.Email.Field.REFERENCES | Geary.Email.Field.PROPERTIES; + protected int manual_ref_count { get; protected set; } private ImapDatabase db; @@ -49,6 +52,10 @@ private class Geary.Sqlite.Folder : Geary.AbstractFolder, Geary.LocalFolder, Gea return Geary.Folder.ListFlags.NONE; } + public Geary.Email.Field get_duplicate_detection_fields() { + return REQUIRED_FOR_DUPLICATE_DETECTION; + } + internal void update_properties(Geary.Imap.FolderProperties? properties) { this.properties = properties; } @@ -100,6 +107,90 @@ private class Geary.Sqlite.Folder : Geary.AbstractFolder, Geary.LocalFolder, Gea yield atomic_create_email_async(null, email, cancellable); } + // TODO: Need to break out IMAP-specific functionality + private async int64 search_for_duplicate_async(Transaction transaction, Geary.Email email, + Cancellable? cancellable) throws Error { + // if fields not present, then no duplicate can reliably be found + if (!email.fields.is_all_set(REQUIRED_FOR_DUPLICATE_DETECTION)) + return Sqlite.Row.INVALID_ID; + + // what's more, actually need all those fields to be available, not merely attempted, + // to err on the side of safety + if (email.message_id == null) + return Sqlite.Row.INVALID_ID; + + Imap.EmailProperties? imap_properties = (Imap.EmailProperties) email.properties; + string? internaldate = (imap_properties != null && imap_properties.internaldate != null) + ? imap_properties.internaldate.original : null; + long rfc822_size = (imap_properties != null) ? imap_properties.rfc822_size.value : -1; + + if (String.is_empty(internaldate) || rfc822_size < 0) + return Sqlite.Row.INVALID_ID; + + // See if it already exists; first by UID (which is only guaranteed to be unique in a folder, + // not account-wide) + int64 message_id; + if (yield location_table.does_ordering_exist_async(transaction, folder_row.id, + email.id.ordering, out message_id, cancellable)) { + return message_id; + } + + // reset + message_id = Sqlite.Row.INVALID_ID; + + // look for duplicate via Message-ID + Gee.List? list = yield message_table.search_message_id_async(transaction, + email.message_id, cancellable); + + // only a duplicate candidate if exactly one found, otherwise err on the side of safety + if (list != null && list.size == 1) + message_id = list[0]; + + // look for duplicate in IMAP message properties + Gee.List? duplicate_ids = yield imap_message_properties_table.search_for_duplicates_async( + transaction, internaldate, rfc822_size, cancellable); + if (duplicate_ids != null && duplicate_ids.size > 0) { + // if a message_id was found via Message-ID, search for a match; else if one duplicate + // was found via IMAP properties, use that, otherwise err on the side of safety + if (message_id != Sqlite.Row.INVALID_ID) { + int64 match_id = Sqlite.Row.INVALID_ID; + foreach (int64 duplicate_id in duplicate_ids) { + if (message_id == duplicate_id) { + match_id = duplicate_id; + + break; + } + } + + // use the matched ID, which if not found, invalidates the discovered ID + message_id = match_id; + } else if (duplicate_ids.size == 1) { + message_id = duplicate_ids[0]; + } else { + message_id = Sqlite.Row.INVALID_ID; + } + } + + return message_id; + } + + // Returns false if the message already exists at the specified position + private async bool associate_with_folder_async(Transaction transaction, int64 message_id, + Geary.Email email, Cancellable? cancellable) throws Error { + // see if an email exists at this position + MessageLocationRow? location_row = yield location_table.fetch_async(transaction, + folder_row.id, email.position, cancellable); + if (location_row != null) + return false; + + // insert email at supplied position + location_row = new MessageLocationRow(location_table, Row.INVALID_ID, message_id, + folder_row.id, email.id.ordering, email.position); + yield location_table.create_async(transaction, location_row, cancellable); + + return true; + } + private async void atomic_create_email_async(Transaction? supplied_transaction, Geary.Email email, Cancellable? cancellable) throws Error { check_open(); @@ -107,18 +198,29 @@ private class Geary.Sqlite.Folder : Geary.AbstractFolder, Geary.LocalFolder, Gea Transaction transaction = supplied_transaction ?? yield db.begin_transaction_async( "Folder.atomic_create_email_async", cancellable); - // See if it already exists; first by UID (which is only guaranteed to be unique in a folder, - // not account-wide) + // See if this Email is already associated with the folder int64 message_id; - if (yield location_table.does_ordering_exist_async(transaction, folder_row.id, - email.id.ordering, out message_id, cancellable)) { - throw new EngineError.ALREADY_EXISTS("Email with ID %s already exists in %s", - email.id.to_string(), to_string()); + bool associated = yield location_table.does_ordering_exist_async(transaction, folder_row.id, + email.id.ordering, out message_id, cancellable); + + // if duplicate found, associate this email with this folder and merge in any new details + if (!associated || message_id == Sqlite.Row.INVALID_ID) + message_id = yield search_for_duplicate_async(transaction, email, cancellable); + + // if already associated or a duplicate, associated + if (message_id != Sqlite.Row.INVALID_ID) { + if (!associated) + yield associate_with_folder_async(transaction, message_id, email, cancellable); + + yield merge_email_async(transaction, message_id, email, cancellable); + + if (supplied_transaction == null) + yield transaction.commit_if_required_async(cancellable); + + return; } - // TODO: Also check by Message-ID (and perhaps other EmailProperties) to link an existing - // message in the database to this Folder - + // not found, so create and associate with this folder message_id = yield message_table.create_async(transaction, new MessageRow.from_email(message_table, email), cancellable); @@ -382,97 +484,6 @@ private class Geary.Sqlite.Folder : Geary.AbstractFolder, Geary.LocalFolder, Gea out available_fields, cancellable); } - public async bool is_email_associated_async(Geary.Email email, Cancellable? cancellable = null) - throws Error { - check_open(); - - int64 message_id; - return yield location_table.does_ordering_exist_async(null, folder_row.id, - ((Geary.Imap.EmailIdentifier) email.id).uid.value, out message_id, cancellable); - } - - public async void update_email_async(Geary.Email email, bool duplicate_okay, - Cancellable? cancellable = null) throws Error { - check_open(); - - Transaction transaction = yield db.begin_transaction_async("Folder.update_email_async", - cancellable); - - // See if the message can be identified in the folder (which both reveals association and - // a message_id that can be used for a merge; note that this works without a Message-ID) - int64 message_id; - bool associated = yield location_table.does_ordering_exist_async(transaction, folder_row.id, - email.id.ordering, out message_id, cancellable); - - // If working around the lack of a Message-ID and not associated with this folder, treat - // this operation as a create; otherwise, since a folder-association is determined, do - // a merge - if (email.message_id == null) { - if (!associated) { - if (!duplicate_okay) - throw new EngineError.INCOMPLETE_MESSAGE("No Message-ID"); - - yield atomic_create_email_async(transaction, email, cancellable); - } else { - yield merge_email_async(transaction, message_id, email, cancellable); - } - - yield transaction.commit_if_required_async(cancellable); - - return; - } - - // If not associated, find message with matching Message-ID - if (!associated) { - Gee.List? list = yield message_table.search_message_id_async(transaction, - email.message_id, cancellable); - - // If none found, this operation is a create - if (list == null || list.size == 0) { - yield atomic_create_email_async(transaction, email, cancellable); - - yield transaction.commit_if_required_async(cancellable); - - return; - } - - // Too many found turns this operation into a create - if (list.size != 1) { - yield atomic_create_email_async(transaction, email, cancellable); - - yield transaction.commit_if_required_async(cancellable); - - return; - } - - message_id = list[0]; - } - - // Found a message. If not associated with this folder, associate now. - // TODO: Need to lock the database during this operation, as these steps should be atomic. - if (!associated) { - // see if an email exists at this position - MessageLocationRow? location_row = yield location_table.fetch_async(transaction, - folder_row.id, email.position, cancellable); - if (location_row != null) { - throw new EngineError.ALREADY_EXISTS("Email already exists at position %d in %s", - email.position, to_string()); - } - - // insert email at supplied position - location_row = new MessageLocationRow(location_table, Row.INVALID_ID, message_id, - folder_row.id, email.id.ordering, email.position); - yield location_table.create_async(transaction, location_row, cancellable); - } - - // Merge any new information with the existing message in the local store - yield merge_email_async(transaction, message_id, email, cancellable); - - yield transaction.commit_if_required_async(cancellable); - - // Done. - } - private async void merge_email_async(Transaction transaction, int64 message_id, Geary.Email email, Cancellable? cancellable = null) throws Error { assert(message_id != Row.INVALID_ID); @@ -486,7 +497,7 @@ private class Geary.Sqlite.Folder : Geary.AbstractFolder, Geary.LocalFolder, Gea cancellable); assert(message_row != null); - message_row.merge_from_network(email); + message_row.merge_from_remote(email); // possible nothing has changed or been added if (message_row.fields != Geary.Email.Field.NONE) diff --git a/src/engine/sqlite/email/sqlite-message-row.vala b/src/engine/sqlite/email/sqlite-message-row.vala index 5c3aec78..a6e6f834 100644 --- a/src/engine/sqlite/email/sqlite-message-row.vala +++ b/src/engine/sqlite/email/sqlite-message-row.vala @@ -117,7 +117,7 @@ public class Geary.Sqlite.MessageRow : Geary.Sqlite.Row { return email; } - public void merge_from_network(Geary.Email email) { + public void merge_from_remote(Geary.Email email) { foreach (Geary.Email.Field field in Geary.Email.Field.all()) { if ((email.fields & field) != 0) set_from_email(field, email); diff --git a/src/engine/sqlite/email/sqlite-message-table.vala b/src/engine/sqlite/email/sqlite-message-table.vala index 5ad938ab..ffbf9295 100644 --- a/src/engine/sqlite/email/sqlite-message-table.vala +++ b/src/engine/sqlite/email/sqlite-message-table.vala @@ -70,6 +70,8 @@ public class Geary.Sqlite.MessageTable : Geary.Sqlite.Table { return id; } + // TODO: This could be improved greatly, in particular making this a single SQL command or + // parallelizing the commands. public async void merge_async(Transaction? transaction, MessageRow row, Cancellable? cancellable) throws Error { Transaction locked = yield obtain_lock_async(transaction, "MessageTable.merge_async", diff --git a/src/engine/sqlite/imap/sqlite-imap-message-properties-table.vala b/src/engine/sqlite/imap/sqlite-imap-message-properties-table.vala index 37489aca..05fd51f8 100644 --- a/src/engine/sqlite/imap/sqlite-imap-message-properties-table.vala +++ b/src/engine/sqlite/imap/sqlite-imap-message-properties-table.vala @@ -75,5 +75,47 @@ public class Geary.Sqlite.ImapMessagePropertiesTable : Geary.Sqlite.Table { yield release_lock_async(transaction, locked, cancellable); } + + public async Gee.List? search_for_duplicates_async(Transaction? transaction, string? internaldate, + long rfc822_size, Cancellable? cancellable) throws Error { + bool has_internaldate = !String.is_empty(internaldate); + bool has_size = rfc822_size >= 0; + + // at least one parameter must be available + if (!has_internaldate && !has_size) + throw new EngineError.BAD_PARAMETERS("Cannot search for IMAP duplicates without a valid parameter"); + + Transaction locked = yield obtain_lock_async(transaction, "ImapMessagePropertiesTable.search_for_duplicates", + cancellable); + + SQLHeavy.Query query; + if (has_internaldate && has_size) { + query = locked.prepare( + "SELECT message_id FROM ImapMessagePropertiesTable WHERE internaldate=? AND rfc822_size=?"); + query.bind_string(0, internaldate); + query.bind_int64(1, rfc822_size); + } else if (has_internaldate) { + query = locked.prepare( + "SELECT message_id FROM ImapMessagePropertiesTable WHERE internaldate=?"); + query.bind_string(0, internaldate); + } else { + assert(has_size); + query = locked.prepare( + "SELECT message_id FROM ImapMessagePropertiesTable WHERE rfc822_size=?"); + query.bind_int64(0, rfc822_size); + } + + SQLHeavy.QueryResult result = yield query.execute_async(cancellable); + if (result.finished) + return null; + + Gee.List list = new Gee.ArrayList(); + do { + list.add(result.fetch_int64(0)); + yield result.next_async(cancellable); + } while (!result.finished); + + return list; + } }