Ensure encoded mailbox addresses are decoded correctly.

Both RFC mailbox address names and mailboxes/local-names may by RFC 2047
be Quoted-Printable or Base64 encoded. This patch ensures these parts are
correctly decoded when parsing a RFC 822 message, so that they are
displayed to the user in human-readable form.

Part 2 of Mailsploit mitigation.

* src/engine/rfc822/rfc822-message.vala (Message): Since GMime.Message's
  convenience properties for accessing header values such as senders,
  recipients, etc. in string form are presented as human-readable, not
  RFC822 compliant strings, we can't re-parse them for use by this class
  when it is being constructed from a GMime-based source. Instead,
  iterate over all headers to get the raw values and parse those we are
  interested in instead. Add unit tests.

* src/engine/rfc822/rfc822-mailbox-address.vala (BaseObject): Add gmime
  constructor so we can handle construction and decoding from a GMime
  InternetAddressMailbox object in a consistent way. Ensure both names
  are decoded correctly, and mailboxes are decoded at all, from both
  GMime and IMAP sources. If a GMime source's address has no @-symbol,
  try to decode the whole thing first it in case the whole address is
  encoded. Add unit tests.

* src/engine/rfc822/rfc822-mailbox-addresses.vala (MailboxAddresses):
  Add append method and handle group addresses here instead of in Message
  to simplify updated Message implementation.

* src/engine/rfc822/rfc822-message-data.vala (MessageData): Add append
  method to simplify updated Message implementation.
This commit is contained in:
Michael James Gratton 2018-01-31 11:41:03 +10:30
parent 8810e95753
commit 8a1906fa96
7 changed files with 379 additions and 147 deletions

View file

@ -1376,7 +1376,7 @@ namespace GMime {
[CCode (cheader_filename = "gmime/gmime.h", cname = "g_mime_utils_structured_header_fold")]
public static string utils_structured_header_fold (string header);
[CCode (cheader_filename = "gmime/gmime.h", cname = "g_mime_utils_text_is_8bit")]
public static bool utils_text_is_8bit (uint text, size_t len);
public static bool utils_text_is_8bit (string text, size_t len);
[CCode (cheader_filename = "gmime/gmime.h", cname = "g_mime_utils_unquote_string")]
public static void utils_unquote_string (string str);
[CCode (cheader_filename = "gmime/gmime.h", cname = "g_mime_utils_unstructured_header_fold")]

View file

@ -34,6 +34,47 @@ public class Geary.RFC822.MailboxAddress : Geary.MessageData.SearchableMessageDa
}
}
private static string decode_name(string name) {
return GMime.utils_header_decode_phrase(prepare_header_text_part(name));
}
private static string decode_address_part(string mailbox) {
return GMime.utils_header_decode_text(prepare_header_text_part(mailbox));
}
private static string prepare_header_text_part(string part) {
// Borrowed liberally from GMime's internal
// _internet_address_decode_name() function.
// see if a broken mailer has sent raw 8-bit information
string text = GMime.utils_text_is_8bit(part, part.length)
? part : GMime.utils_decode_8bit(part, part.length);
// unquote the string then decode the text
GMime.utils_unquote_string(text);
// Sometimes quoted printables contain unencoded spaces which trips up GMime, so we want to
// encode them all here.
int offset = 0;
int start;
while ((start = text.index_of("=?", offset)) != -1) {
// Find the closing marker.
int end = text.index_of("?=", start + 2) + 2;
if (end == -1) {
end = text.length;
}
// Replace any spaces inside the encoded string.
string encoded = text.substring(start, end - start);
if (encoded.contains("\x20")) {
text = text.replace(encoded, encoded.replace("\x20", "_"));
}
offset = end;
}
return text;
}
internal delegate string ListToStringDelegate(MailboxAddress address);
@ -84,26 +125,24 @@ public class Geary.RFC822.MailboxAddress : Geary.MessageData.SearchableMessageDa
public MailboxAddress(string? name, string address) {
this.name = name;
this.source_route = null;
this.address = address;
source_route = null;
int atsign = address.last_index_of_char('@');
if (atsign > 0) {
mailbox = address.slice(0, atsign);
domain = address.slice(atsign + 1, address.length);
this.mailbox = address[0:atsign];
this.domain = address[atsign + 1:address.length];
} else {
mailbox = "";
domain = "";
this.mailbox = "";
this.domain = "";
}
}
public MailboxAddress.imap(string? name, string? source_route, string mailbox, string domain) {
this.name = (name != null) ? decode_name(name) : null;
this.source_route = source_route;
this.mailbox = mailbox;
this.mailbox = decode_address_part(mailbox);
this.domain = domain;
this.address = "%s@%s".printf(mailbox, domain);
}
@ -119,41 +158,39 @@ public class Geary.RFC822.MailboxAddress : Geary.MessageData.SearchableMessageDa
// TODO: Handle group lists
InternetAddressMailbox? mbox_addr = addr as InternetAddressMailbox;
if (mbox_addr != null) {
this(mbox_addr.get_name(), mbox_addr.get_addr());
this.gmime(mbox_addr);
return;
}
}
throw new RFC822Error.INVALID("Could not parse RFC822 address: %s", rfc822);
}
// Borrowed liberally from GMime's internal _internet_address_decode_name() function.
private static string decode_name(string name) {
// see if a broken mailer has sent raw 8-bit information
string text = name.validate() ? name : GMime.utils_decode_8bit(name, name.length);
// unquote the string and decode the text
GMime.utils_unquote_string(text);
// Sometimes quoted printables contain unencoded spaces which trips up GMime, so we want to
// encode them all here.
int offset = 0;
int start;
while ((start = text.index_of("=?", offset)) != -1) {
// Find the closing marker.
int end = text.index_of("?=", start + 2) + 2;
if (end == -1) {
end = text.length;
}
// Replace any spaces inside the encoded string.
string encoded = text.substring(start, end - start);
if (encoded.contains("\x20")) {
text = text.replace(encoded, encoded.replace("\x20", "_"));
}
offset = end;
public MailboxAddress.gmime(InternetAddressMailbox mailbox) {
// GMime strips source route for us, so the address part
// should only ever contain a single '@'
string? name = mailbox.get_name();
if (name != null) {
this.name = decode_name(name);
}
return GMime.utils_header_decode_text(text);
string address = mailbox.get_addr();
int atsign = address.last_index_of_char('@');
if (atsign == -1) {
// No @ detected, try decoding in case a mailer (wrongly)
// encoded the whole thing and re-try
address = decode_address_part(address);
atsign = address.last_index_of_char('@');
}
if (atsign >= 0) {
this.mailbox = decode_address_part(address[0:atsign]);
this.domain = address[atsign + 1:address.length];
this.address = "%s@%s".printf(this.mailbox, this.domain);
} else {
this.mailbox = "";
this.domain = "";
this.address = address;
}
}
/**

View file

@ -4,6 +4,14 @@
* (version 2.1 or later). See the COPYING file in this distribution.
*/
/**
* An immutable representation an RFC 822 address list.
*
* This would typically be found as the value of the To, CC, BCC and
* other headers fields.
*
* See [[https://tools.ietf.org/html/rfc5322#section-3.4]]
*/
public class Geary.RFC822.MailboxAddresses : Geary.MessageData.AbstractMessageData,
Geary.MessageData.SearchableMessageData, Geary.RFC822.MessageData, Gee.Hashable<MailboxAddresses> {
@ -27,13 +35,26 @@ public class Geary.RFC822.MailboxAddresses : Geary.MessageData.AbstractMessageDa
int length = addrlist.length();
for (int ctr = 0; ctr < length; ctr++) {
InternetAddress? addr = addrlist.get_address(ctr);
// TODO: Handle group lists
InternetAddressMailbox? mbox_addr = addr as InternetAddressMailbox;
if (mbox_addr == null)
continue;
addrs.add(new MailboxAddress(mbox_addr.get_name(), mbox_addr.get_addr()));
if (mbox_addr != null) {
this.addrs.add(new MailboxAddress.gmime(mbox_addr));
} else {
// XXX this is pretty bad - we just flatten the
// group's addresses into this list, merging lists and
// losing the group names.
InternetAddressGroup? mbox_group = addr as InternetAddressGroup;
if (mbox_group != null) {
InternetAddressList group_list = mbox_group.get_members();
for (int i = 0; i < group_list.length(); i++) {
InternetAddressMailbox? group_addr =
addrlist.get_address(i) as InternetAddressMailbox;
if (group_addr != null) {
this.addrs.add(new MailboxAddress.gmime(group_addr));
}
}
}
}
}
}
@ -73,7 +94,16 @@ public class Geary.RFC822.MailboxAddresses : Geary.MessageData.AbstractMessageDa
return false;
}
/**
* Returns a new list with the given addresses appended to this list's.
*/
public MailboxAddresses append(MailboxAddresses others) {
MailboxAddresses new_addrs = new MailboxAddresses(this.addrs);
new_addrs.addrs.add_all(others.addrs);
return new_addrs;
}
/**
* Returns the addresses suitable for insertion into an RFC822 message. RFC822 quoting is
* performed if required.

View file

@ -142,7 +142,17 @@ public class Geary.RFC822.MessageIDList : Geary.MessageData.AbstractMessageData,
// don't assert that list.size > 0; even though this method should generated a decoded ID
// from any non-empty string, an empty Message-ID (i.e. "<>") won't.
}
/**
* Returns a new list with the given messages ids appended to this list's.
*/
public MessageIDList append(MessageIDList others) {
MessageIDList new_ids = new MessageIDList();
new_ids.list.add_all(this.list);
new_ids.list.add_all(others.list);
return new_ids;
}
public override string to_string() {
return "MessageIDList (%d)".printf(list.size);
}

View file

@ -416,106 +416,6 @@ public class Geary.RFC822.Message : BaseObject {
return null;
}
private void stock_from_gmime() {
// GMime calls the From address the "sender"
string? message_sender = message.get_sender();
if (message_sender != null) {
this.from = new RFC822.MailboxAddresses.from_rfc822_string(message_sender);
}
// And it doesn't provide a convenience method for Sender header
if (!String.is_empty(message.get_header(HEADER_SENDER))) {
string sender = GMime.utils_header_decode_text(message.get_header(HEADER_SENDER));
try {
this.sender = new RFC822.MailboxAddress.from_rfc822_string(sender);
} catch (RFC822Error e) {
debug("Invalid RDC822 Sender address: %s", sender);
}
}
if (!String.is_empty(message.get_reply_to()))
this.reply_to = new RFC822.MailboxAddresses.from_rfc822_string(message.get_reply_to());
Gee.List<RFC822.MailboxAddress>? converted = convert_gmime_address_list(
message.get_recipients(GMime.RecipientType.TO));
if (converted != null && converted.size > 0)
to = new RFC822.MailboxAddresses(converted);
converted = convert_gmime_address_list(message.get_recipients(GMime.RecipientType.CC));
if (converted != null && converted.size > 0)
cc = new RFC822.MailboxAddresses(converted);
converted = convert_gmime_address_list(message.get_recipients(GMime.RecipientType.BCC));
if (converted != null && converted.size > 0)
bcc = new RFC822.MailboxAddresses(converted);
if (!String.is_empty(message.get_header(HEADER_IN_REPLY_TO)))
in_reply_to = new RFC822.MessageIDList.from_rfc822_string(message.get_header(HEADER_IN_REPLY_TO));
if (!String.is_empty(message.get_header(HEADER_REFERENCES)))
references = new RFC822.MessageIDList.from_rfc822_string(message.get_header(HEADER_REFERENCES));
if (!String.is_empty(message.get_subject()))
subject = new RFC822.Subject.decode(message.get_subject());
if (!String.is_empty(message.get_header(HEADER_MAILER)))
mailer = message.get_header(HEADER_MAILER);
if (!String.is_empty(message.get_date_as_string())) {
try {
date = new Geary.RFC822.Date(message.get_date_as_string());
} catch (Error error) {
debug("Could not get date from message: %s", error.message);
}
}
}
private Gee.List<RFC822.MailboxAddress>? convert_gmime_address_list(InternetAddressList? addrlist,
int depth = 0) {
if (addrlist == null || addrlist.length() == 0)
return null;
Gee.List<RFC822.MailboxAddress>? converted = new Gee.ArrayList<RFC822.MailboxAddress>();
int length = addrlist.length();
for (int ctr = 0; ctr < length; ctr++) {
InternetAddress addr = addrlist.get_address(ctr);
InternetAddressMailbox? mbox_addr = addr as InternetAddressMailbox;
if (mbox_addr != null) {
converted.add(new RFC822.MailboxAddress(mbox_addr.get_name(), mbox_addr.get_addr()));
continue;
}
// Two problems here:
//
// First, GMime crashes when parsing a malformed group list (the case seen in the
// wild is -- weirdly enough -- a date appended to the end of a cc: list on a spam
// email. GMime interprets it as a group list but segfaults when destroying the
// InterneAddresses it generated from it. See:
// https://bugzilla.gnome.org/show_bug.cgi?id=695319
//
// Second, RFC 822 6.2.6: "This standard does not permit recursive specification
// of groups within groups." So don't do it.
InternetAddressGroup? group = addr as InternetAddressGroup;
if (group != null) {
if (depth == 0) {
Gee.List<RFC822.MailboxAddress>? grouplist = convert_gmime_address_list(
group.get_members(), depth + 1);
if (grouplist != null)
converted.add_all(grouplist);
}
continue;
}
warning("Unknown InternetAddress in list: %s", addr.get_type().name());
}
return (converted.size > 0) ? converted : null;
}
public Gee.List<RFC822.MailboxAddress>? get_recipients() {
Gee.List<RFC822.MailboxAddress> addrs = new Gee.ArrayList<RFC822.MailboxAddress>();
@ -873,7 +773,86 @@ public class Geary.RFC822.Message : BaseObject {
get_attachments_recursively(attachments, message.get_mime_part(), disposition);
return attachments;
}
private void stock_from_gmime() {
this.message.get_header_list().foreach((name, value) => {
switch (name.down()) {
case "from":
this.from = append_address(this.from, value);
break;
case "sender":
try {
this.sender = new RFC822.MailboxAddress.from_rfc822_string(value);
} catch (Error err) {
debug("Could parse subject: %s", err.message);
}
break;
case "reply-to":
this.reply_to = append_address(this.reply_to, value);
break;
case "to":
this.to = append_address(this.to, value);
break;
case "cc":
this.cc = append_address(this.cc, value);
break;
case "bcc":
this.bcc = append_address(this.bcc, value);
break;
case "subject":
this.subject = new RFC822.Subject.decode(value);
break;
case "date":
try {
this.date = new Geary.RFC822.Date(value);
} catch (Error err) {
debug("Could not parse date: %s", err.message);
}
break;
case "in-reply-to":
this.in_reply_to = append_message_id(this.in_reply_to, value);
break;
case "references":
this.references = append_message_id(this.references, value);
break;
case "x-mailer":
this.mailer = GMime.utils_header_decode_text(value);
break;
default:
break;
}
});
}
private MailboxAddresses append_address(MailboxAddresses? existing,
string header_value) {
MailboxAddresses addresses = new MailboxAddresses.from_rfc822_string(header_value);
if (existing != null) {
addresses = existing.append(addresses);
}
return addresses;
}
private MessageIDList append_message_id(MessageIDList? existing,
string header_value) {
MessageIDList ids = new MessageIDList.from_rfc822_string(header_value);
if (existing != null) {
ids = existing.append(ids);
}
return ids;
}
private void get_attachments_recursively(Gee.List<GMime.Part> attachments, GMime.Object root,
Mime.DispositionType requested_disposition) throws RFC822Error {
// If this is a multipart container, dive into each of its children.

View file

@ -11,6 +11,7 @@ class Geary.RFC822.MailboxAddressTest : Gee.TestCase {
base("Geary.RFC822.MailboxAddressTest");
add_test("is_valid_address", is_valid_address);
add_test("unescaped_constructor", unescaped_constructor);
add_test("from_rfc822_string_encoded", from_rfc822_string_encoded);
add_test("is_spoofed", is_spoofed);
add_test("has_distinct_name", has_distinct_name);
add_test("to_full_display", to_full_display);
@ -66,6 +67,87 @@ class Geary.RFC822.MailboxAddressTest : Gee.TestCase {
assert(addr5.domain == "");
}
public void from_rfc822_string_encoded() {
try {
MailboxAddress addr = new MailboxAddress.from_rfc822_string("test@example.com");
assert(addr.name == null);
assert(addr.mailbox == "test");
assert(addr.domain == "example.com");
addr = new MailboxAddress.from_rfc822_string("\"test\"@example.com");
assert(addr.name == null);
assert(addr.address == "test@example.com");
assert(addr.mailbox == "test");
assert(addr.domain == "example.com");
addr = new MailboxAddress.from_rfc822_string("=?UTF-8?b?dGVzdA==?=@example.com");
assert(addr.name == null);
assert(addr.address == "test@example.com");
assert(addr.mailbox == "test");
assert(addr.domain == "example.com");
addr = new MailboxAddress.from_rfc822_string("\"=?UTF-8?b?dGVzdA==?=\"@example.com");
assert(addr.name == null);
assert(addr.address == "test@example.com");
assert(addr.mailbox == "test");
assert(addr.domain == "example.com");
addr = new MailboxAddress.from_rfc822_string("<test@example.com>");
assert(addr.name == null);
assert(addr.address == "test@example.com");
assert(addr.mailbox == "test");
assert(addr.domain == "example.com");
addr = new MailboxAddress.from_rfc822_string("<\"test\"@example.com>");
assert(addr.name == null);
assert(addr.address == "test@example.com");
assert(addr.mailbox == "test");
assert(addr.domain == "example.com");
addr = new MailboxAddress.from_rfc822_string("Test 1 <test2@example.com>");
assert(addr.name == "Test 1");
assert(addr.address == "test2@example.com");
assert(addr.mailbox == "test2");
assert(addr.domain == "example.com");
addr = new MailboxAddress.from_rfc822_string("\"Test 1\" <test2@example.com>");
assert(addr.name == "Test 1");
assert(addr.address == "test2@example.com");
assert(addr.mailbox == "test2");
assert(addr.domain == "example.com");
addr = new MailboxAddress.from_rfc822_string("Test 1 <\"test2\"@example.com>");
assert(addr.name == "Test 1");
assert(addr.address == "test2@example.com");
assert(addr.mailbox == "test2");
assert(addr.domain == "example.com");
addr = new MailboxAddress.from_rfc822_string("=?UTF-8?b?VGVzdCAx?= <test2@example.com>");
assert(addr.name == "Test 1");
assert(addr.address == "test2@example.com");
assert(addr.mailbox == "test2");
assert(addr.domain == "example.com");
addr = new MailboxAddress.from_rfc822_string("\"=?UTF-8?b?VGVzdCAx?=\" <test2@example.com>");
assert(addr.name == "Test 1");
assert(addr.address == "test2@example.com");
assert(addr.mailbox == "test2");
assert(addr.domain == "example.com");
// Courtesy Mailsploit https://www.mailsploit.com
addr = new MailboxAddress.from_rfc822_string("\"=?utf-8?b?dGVzdCIgPHBvdHVzQHdoaXRlaG91c2UuZ292Pg==?==?utf-8?Q?=00=0A?=\" <demo@mailsploit.com>");
assert(addr.name == "test <potus@whitehouse.gov>?\n");
assert(addr.address == "demo@mailsploit.com");
// Courtesy Mailsploit https://www.mailsploit.com
addr = new MailboxAddress.from_rfc822_string("\"=?utf-8?Q?=42=45=47=49=4E=20=2F=20=28=7C=29=7C=3C=7C=3E=7C=40=7C=2C=7C=3B=7C=3A=7C=5C=7C=22=7C=2F=7C=5B=7C=5D=7C=3F=7C=2E=7C=3D=20=2F=20=00=20=50=41=53=53=45=44=20=4E=55=4C=4C=20=42=59=54=45=20=2F=20=0D=0A=20=50=41=53=53=45=44=20=43=52=4C=46=20=2F=20?==?utf-8?b?RU5E=?=\"");
assert(addr.name == null);
assert(addr.address == "BEGIN / (|)|<|>|@|,|;|:|\\|\"|/|[|]|?|.|= / ? PASSED NULL BYTE / \r\n PASSED CRLF / END");
} catch (Error err) {
assert_not_reached();
}
}
public void is_spoofed() {
assert(new MailboxAddress(null, "example@example.com").is_spoofed() == false);
assert(new MailboxAddress("", "example@example.com").is_spoofed() == false);

File diff suppressed because one or more lines are too long