From 755d5cfa39c72e1d2a9cbd495cdd8fc482054aeb Mon Sep 17 00:00:00 2001 From: Jim Nelson Date: Wed, 4 Jan 2012 11:18:38 -0800 Subject: [PATCH] Fixes problems discovered with Message-ID list parsing This morning I discovered that Geary had crashed. When I attempted to rerun it, it kept asserting in the Conversations logic. I discovered that there was a problem with associating an email's Message-ID with its own Message-ID processed in a prior email's References header field. Turns out that there's a mailer out there that uses commas to separate Message-IDs (a no-no in the spec). There's also a spam filtering mailer out there that uses parentheses instead of angle brackets for its Message-IDs (another no-no). This commit strengthens the Message-ID parsing to handle these situations. --- src/engine/rfc822/rfc822-message-data.vala | 40 +++++++++++++++++++--- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/src/engine/rfc822/rfc822-message-data.vala b/src/engine/rfc822/rfc822-message-data.vala index b114b2d7..d144e7db 100644 --- a/src/engine/rfc822/rfc822-message-data.vala +++ b/src/engine/rfc822/rfc822-message-data.vala @@ -30,13 +30,43 @@ public class Geary.RFC822.MessageIDList : Geary.Common.StringMessageData, Geary. string[] ids = value.split_set(" \n\r\t"); foreach (string id in ids) { - id = id.strip(); - if (!String.is_empty(id)) { - if (list == null) - list = new Gee.ArrayList(); + if (String.is_empty(id)) + continue; + + // Have seen some mailers use commas between Message-IDs, meaning that the standard + // whitespace tokenizer is not sufficient; however, can't add the comma (or every other + // delimiter that mailers dream up) because it may be used within a Message-ID. The + // only guarantee made of a Message-ID is that it's surrounded by angle brackets, so + // mark anything not an angle bracket as a space and strip + // + // NOTE: Seen at least one spamfilter mailer that imaginatively uses parens instead of + // angle brackets for its Message-IDs; accounting for that as well here. + int start = id.index_of_char('<'); + if (start < 0) + start = id.index_of_char('('); + + int end = id.last_index_of_char('>'); + if (end < 0) + end = id.last_index_of_char(')'); + + // if either end not found or the end comes before the beginning, invalid Message-ID + if (start < 0 || end < 0 || (start >= end)) { + debug("Invalid Message-ID found: \"%s\"", id); - list.add(new MessageID(id)); + continue; } + + // take out the valid slice of the string + string valid = id.slice(start, end + 1); + assert(!String.is_empty(valid)); + + if (id != valid) + debug("Corrected Message-ID: \"%s\" -> \"%s\"", id, valid); + + if (list == null) + list = new Gee.ArrayList(); + + list.add(new MessageID(valid)); } } }