diff --git a/src/engine/rfc822/rfc822-message-data.vala b/src/engine/rfc822/rfc822-message-data.vala index b114b2d7..d144e7db 100644 --- a/src/engine/rfc822/rfc822-message-data.vala +++ b/src/engine/rfc822/rfc822-message-data.vala @@ -30,13 +30,43 @@ public class Geary.RFC822.MessageIDList : Geary.Common.StringMessageData, Geary. string[] ids = value.split_set(" \n\r\t"); foreach (string id in ids) { - id = id.strip(); - if (!String.is_empty(id)) { - if (list == null) - list = new Gee.ArrayList(); + if (String.is_empty(id)) + continue; + + // Have seen some mailers use commas between Message-IDs, meaning that the standard + // whitespace tokenizer is not sufficient; however, can't add the comma (or every other + // delimiter that mailers dream up) because it may be used within a Message-ID. The + // only guarantee made of a Message-ID is that it's surrounded by angle brackets, so + // mark anything not an angle bracket as a space and strip + // + // NOTE: Seen at least one spamfilter mailer that imaginatively uses parens instead of + // angle brackets for its Message-IDs; accounting for that as well here. + int start = id.index_of_char('<'); + if (start < 0) + start = id.index_of_char('('); + + int end = id.last_index_of_char('>'); + if (end < 0) + end = id.last_index_of_char(')'); + + // if either end not found or the end comes before the beginning, invalid Message-ID + if (start < 0 || end < 0 || (start >= end)) { + debug("Invalid Message-ID found: \"%s\"", id); - list.add(new MessageID(id)); + continue; } + + // take out the valid slice of the string + string valid = id.slice(start, end + 1); + assert(!String.is_empty(valid)); + + if (id != valid) + debug("Corrected Message-ID: \"%s\" -> \"%s\"", id, valid); + + if (list == null) + list = new Gee.ArrayList(); + + list.add(new MessageID(valid)); } } }