Implement new contact harvester for the engine

Replaces harvesting code in ImapDB code so as to be stand-alone and
reusable. Implement harvesting policy to only harvest from a set of
whitelisted special folder types (so junk and trash is not harvested)
and only harvest valid, non-spoofed, addresses.
This commit is contained in:
Michael Gratton 2019-06-10 08:43:40 +10:00 committed by Michael James Gratton
parent 96cbb83f83
commit 65ab37938f
7 changed files with 323 additions and 0 deletions

View file

@ -164,6 +164,7 @@ src/engine/app/email-store/app-copy-operation.vala
src/engine/app/email-store/app-fetch-operation.vala
src/engine/app/email-store/app-list-operation.vala
src/engine/app/email-store/app-mark-operation.vala
src/engine/common/common-contact-harvester.vala
src/engine/common/common-contact-store-impl.vala
src/engine/common/common-message-data.vala
src/engine/db/db.vala

View file

@ -0,0 +1,106 @@
/*
* Copyright 2016 Software Freedom Conservancy Inc.
* Copyright 2019 Michael Gratton <mike@vee.net>
*
* This software is licensed under the GNU Lesser General Public License
* (version 2.1 or later). See the COPYING file in this distribution.
*/
/** Denotes objects that extract contacts from email meages. */
internal interface Geary.ContactHarvester : GLib.Object {
public abstract async void harvest_from_email(Gee.Collection<Email> messages,
GLib.Cancellable? cancellable)
throws GLib.Error;
}
/** Default harvester that saves contacts in the contact store. */
internal class Geary.ContactHarvesterImpl : BaseObject, ContactHarvester {
private const Email.Field REQUIRED_FIELDS = ORIGINATORS | RECEIVERS;
private const SpecialFolderType[] FOLDER_WHITELIST = {
INBOX,
ARCHIVE,
SENT,
NONE
};
private ContactStore store;
private Gee.Collection<RFC822.MailboxAddress> owner_mailboxes;
private SpecialFolderType location;
private bool is_whitelisted;
public ContactHarvesterImpl(ContactStore store,
SpecialFolderType location,
Gee.Collection<RFC822.MailboxAddress> owners) {
this.store = store;
this.owner_mailboxes = owners;
this.location = location;
this.is_whitelisted = (location in FOLDER_WHITELIST);
}
public async void harvest_from_email(Gee.Collection<Email> messages,
GLib.Cancellable? cancellable)
throws GLib.Error {
if (this.is_whitelisted && !messages.is_empty) {
Gee.Map<string,Contact> contacts = new Gee.HashMap<string,Contact>();
int importance = Contact.Importance.SEEN;
if (this.location == SENT) {
importance = Contact.Importance.SENT_TO;
}
Email.Field type = 0;
foreach (Email message in messages) {
if (message.fields.fulfills(REQUIRED_FIELDS)) {
type = Email.Field.ORIGINATORS;
add_contacts(contacts, message.from, type, importance);
if (message.sender != null) {
add_contact(contacts, message.sender, type, importance);
}
add_contacts(contacts, message.bcc, type, importance);
type = Email.Field.RECEIVERS;
add_contacts(contacts, message.to, type, importance);
add_contacts(contacts, message.cc, type, importance);
add_contacts(contacts, message.bcc, type, importance);
}
}
yield this.store.update_contacts(contacts.values, cancellable);
}
}
private void add_contacts(Gee.Map<string, Contact> contacts,
RFC822.MailboxAddresses? addresses,
Email.Field type,
int importance) {
if (addresses != null) {
foreach (RFC822.MailboxAddress address in addresses) {
add_contact(contacts, address, type, importance);
}
}
}
private inline void add_contact(Gee.Map<string, Contact> contacts,
RFC822.MailboxAddress address,
Email.Field type,
int importance) {
if (address.is_valid() && !address.is_spoofed()) {
if (type == RECEIVERS && address in this.owner_mailboxes) {
importance = Contact.Importance.RECEIVED_FROM;
}
Contact contact = new Contact.from_rfc822_address(
address, importance
);
Contact? existing = contacts[contact.normalized_email];
if (existing == null ||
existing.highest_importance < contact.highest_importance) {
contacts[contact.normalized_email] = contact;
}
}
}
}

View file

@ -66,6 +66,7 @@ geary_engine_vala_sources = files(
'app/email-store/app-list-operation.vala',
'app/email-store/app-mark-operation.vala',
'common/common-contact-harvester.vala',
'common/common-contact-store-impl.vala',
'common/common-message-data.vala',

View file

@ -0,0 +1,26 @@
/*
* Copyright 2019 Michael Gratton <mike@vee.net>
*
* This software is licensed under the GNU Lesser General Public License
* (version 2.1 or later). See the COPYING file in this distribution.
*/
internal class Geary.ContactStoreMock : ContactStore, MockObject, GLib.Object {
protected Gee.Queue<ExpectedCall> expected {
get; set; default = new Gee.LinkedList<ExpectedCall>();
}
public async Contact? get_by_rfc822(Geary.RFC822.MailboxAddress address,
GLib.Cancellable? cancellable)
throws GLib.Error {
return object_call<Contact?>("get_by_rfc822", { address }, null);
}
public async void update_contacts(Gee.Collection<Contact> updated,
GLib.Cancellable? cancellable)
throws GLib.Error {
void_call("update_contacts", { updated, cancellable });
}
}

View file

@ -0,0 +1,186 @@
/*
* Copyright 2019 Michael Gratton <mike@vee.net>
*
* This software is licensed under the GNU Lesser General Public License
* (version 2.1 or later). See the COPYING file in this distribution.
*/
class Geary.ContactHarvesterImplTest : TestCase {
private ContactStoreMock? store = null;
private Email? email = null;
private RFC822.MailboxAddress test_address = null;
private RFC822.MailboxAddress sender_address = null;
private Gee.Collection<RFC822.MailboxAddress> senders = null;
public ContactHarvesterImplTest() {
base("Geary.ContactHarvesterImplTest");
add_test("whitelisted_folder_type", whitelisted_folder_type);
add_test("blacklisted_folder_type", blacklisted_folder_type);
add_test("seen_priority", seen_priority);
add_test("sent_priority", sent_priority);
add_test("received_priority", received_priority);
}
public override void set_up() throws GLib.Error {
this.store = new ContactStoreMock();
this.email = new Email(
new ImapDB.EmailIdentifier.no_message_id(new Imap.UID(1))
);
// Ensure the minimum required email flags are set
this.email.set_originators(null, null, null);
this.email.set_receivers(null, null, null);
this.test_address = new RFC822.MailboxAddress(
"Test", "test@example.com"
);
this.sender_address = new RFC822.MailboxAddress(
"Sender", "sender@example.com"
);
this.senders = Collection.single(this.sender_address);
}
public override void tear_down() throws GLib.Error {
this.store = null;
this.email = null;
this.test_address = null;
this.sender_address = null;
this.senders = null;
}
public void whitelisted_folder_type() throws GLib.Error {
ContactHarvesterImpl whitelisted = new ContactHarvesterImpl(
this.store,
SpecialFolderType.INBOX,
this.senders
);
ExpectedCall call = this.store.expect_call("update_contacts");
this.email.set_receivers(
new RFC822.MailboxAddresses.single(this.test_address), null, null
);
whitelisted.harvest_from_email.begin(
Collection.single(this.email), null,
(obj, ret) => { async_complete(ret); }
);
whitelisted.harvest_from_email.end(async_result());
this.store.assert_expectations();
Gee.Collection<Contact> contacts = call.called_arg<Gee.Collection<Contact>>(0);
assert_int(1, contacts.size, "contacts length");
Contact? created = Collection.get_first<Contact>(contacts) as Contact;
assert_non_null(created, "contacts contents");
assert_string("Test", created.real_name);
assert_string("test@example.com", created.email);
assert_string("test@example.com", created.normalized_email);
}
public void blacklisted_folder_type() throws GLib.Error {
ContactHarvesterImpl whitelisted = new ContactHarvesterImpl(
this.store,
SpecialFolderType.SPAM,
this.senders
);
this.email.set_receivers(
new RFC822.MailboxAddresses.single(this.test_address), null, null
);
whitelisted.harvest_from_email.begin(
Collection.single(this.email), null,
(obj, ret) => { async_complete(ret); }
);
whitelisted.harvest_from_email.end(async_result());
this.store.assert_expectations();
}
public void seen_priority() throws GLib.Error {
ContactHarvesterImpl whitelisted = new ContactHarvesterImpl(
this.store,
SpecialFolderType.INBOX,
this.senders
);
ExpectedCall call = this.store.expect_call("update_contacts");
this.email.set_receivers(
new RFC822.MailboxAddresses.single(this.test_address), null, null
);
whitelisted.harvest_from_email.begin(
Collection.single(this.email), null,
(obj, ret) => { async_complete(ret); }
);
whitelisted.harvest_from_email.end(async_result());
this.store.assert_expectations();
Gee.Collection<Contact> contacts = call.called_arg<Gee.Collection<Contact>>(0);
Contact? created = Collection.get_first<Contact>(contacts) as Contact;
assert_int(
Contact.Importance.SEEN,
created.highest_importance,
"call contact importance"
);
}
public void sent_priority() throws GLib.Error {
ContactHarvesterImpl whitelisted = new ContactHarvesterImpl(
this.store,
SpecialFolderType.SENT,
this.senders
);
ExpectedCall call = this.store.expect_call("update_contacts");
this.email.set_receivers(
new RFC822.MailboxAddresses.single(this.test_address), null, null
);
whitelisted.harvest_from_email.begin(
Collection.single(this.email), null,
(obj, ret) => { async_complete(ret); }
);
whitelisted.harvest_from_email.end(async_result());
this.store.assert_expectations();
Gee.Collection<Contact> contacts = call.called_arg<Gee.Collection<Contact>>(0);
Contact? created = Collection.get_first<Contact>(contacts) as Contact;
assert_int(
Contact.Importance.SENT_TO,
created.highest_importance,
"call contact importance"
);
}
public void received_priority() throws GLib.Error {
ContactHarvesterImpl whitelisted = new ContactHarvesterImpl(
this.store,
SpecialFolderType.SENT,
this.senders
);
ExpectedCall call = this.store.expect_call("update_contacts");
this.email.set_receivers(
new RFC822.MailboxAddresses.single(this.sender_address), null, null
);
whitelisted.harvest_from_email.begin(
Collection.single(this.email), null,
(obj, ret) => { async_complete(ret); }
);
whitelisted.harvest_from_email.end(async_result());
this.store.assert_expectations();
Gee.Collection<Contact> contacts = call.called_arg<Gee.Collection<Contact>>(0);
Contact? created = Collection.get_first<Contact>(contacts) as Contact;
assert_int(
Contact.Importance.RECEIVED_FROM,
created.highest_importance,
"call contact importance"
);
}
}

View file

@ -14,6 +14,7 @@ geary_test_engine_sources = [
# geary-engine_internal.vapi, which leads to duplicate symbols when
# linking
'engine/api/geary-account-mock.vala',
'engine/api/geary-contact-store-mock.vala',
'engine/api/geary-credentials-mediator-mock.vala',
'engine/api/geary-email-identifier-mock.vala',
'engine/api/geary-email-properties-mock.vala',
@ -29,6 +30,7 @@ geary_test_engine_sources = [
'engine/app/app-conversation-monitor-test.vala',
'engine/app/app-conversation-set-test.vala',
'engine/common/common-contact-store-impl-test.vala',
'engine/common/common-contact-harvester-test.vala',
'engine/db/db-database-test.vala',
'engine/db/db-versioned-database-test.vala',
'engine/imap/command/imap-create-command-test.vala',

View file

@ -24,6 +24,7 @@ int main(string[] args) {
engine.add_suite(new Geary.AccountInformationTest().get_suite());
engine.add_suite(new Geary.AttachmentTest().get_suite());
engine.add_suite(new Geary.ContactHarvesterImplTest().get_suite());
engine.add_suite(new Geary.EngineTest().get_suite());
engine.add_suite(new Geary.FolderPathTest().get_suite());
engine.add_suite(new Geary.IdleManagerTest().get_suite());