From c306c2c5faee6f7c58baea1e802e6bb4c9c8364c Mon Sep 17 00:00:00 2001 From: Michael Gratton Date: Wed, 24 Apr 2019 20:38:11 +1000 Subject: [PATCH] Correctly handle escaping UTF-8 characters in RFC822 mailboxes Use string.get_next_char() to handle iterating over an RFC822.MailboxAddress mailbox local part, and explicitly allow UTF-8 2, 3, and 4 byte chars without quoting, per RFC 5322. This lets us fix the test cases that use UTF-8 chars in the local part. --- src/engine/rfc822/rfc822-mailbox-address.vala | 25 +++++++++++++------ test/engine/rfc822-mailbox-address-test.vala | 10 ++++++-- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/engine/rfc822/rfc822-mailbox-address.vala b/src/engine/rfc822/rfc822-mailbox-address.vala index a1864baf..eda28fe1 100644 --- a/src/engine/rfc822/rfc822-mailbox-address.vala +++ b/src/engine/rfc822/rfc822-mailbox-address.vala @@ -21,7 +21,7 @@ public class Geary.RFC822.MailboxAddress : Gee.Hashable, BaseObject { - private static char[] ATEXT = { + private static unichar[] ATEXT = { '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~' }; @@ -64,17 +64,26 @@ public class Geary.RFC822.MailboxAddress : if (!String.is_empty(local_part)) { int index = 0; for (;;) { - char ch = local_part[index++]; - if (ch == String.EOS) + unichar ch; + if (!local_part.get_next_char(ref index, out ch)) { break; + } is_dot = (ch == '.'); - if (!(ch >= 0x41 && ch <= 0x5A) && // A-Z - !(ch >= 0x61 && ch <= 0x7A) && // a-z - !(ch >= 0x30 && ch <= 0x39) && // 0-9 - !(ch in ATEXT) && - !(is_dot && index > 1)) { // no leading dots + if (!( + // RFC 5322 ASCII + (ch >= 0x61 && ch <= 0x7A) || // a-z + (ch >= 0x41 && ch <= 0x5A) || // A-Z + (ch >= 0x30 && ch <= 0x39) || // 0-9 + // RFC 6532 UTF8 + (ch >= 0x80 && ch <= 0x07FF) || // UTF-8 2 byte + (ch >= 0x800 && ch <= 0xFFFF) || // UTF-8 3 byte + (ch >= 0x10000 && ch <= 0x10FFFF) || // UTF-8 4 byte + // RFC 5322 atext + (ch in ATEXT) || + // RFC 5322 dot-atom (no leading quotes) + (is_dot && index > 1))) { needs_quote = true; break; } diff --git a/test/engine/rfc822-mailbox-address-test.vala b/test/engine/rfc822-mailbox-address-test.vala index 761cf796..d65a5869 100644 --- a/test/engine/rfc822-mailbox-address-test.vala +++ b/test/engine/rfc822-mailbox-address-test.vala @@ -228,6 +228,7 @@ class Geary.RFC822.MailboxAddressTest : TestCase { // "\"test\" test\"@example.com"); //assert(new MailboxAddress(null, "test\"test@example.com").to_rfc822_address() == // "\"test\"test\"@example.com"); + assert_string( "$test@example.com", new MailboxAddress(null, "$test@example.com").to_rfc822_address() @@ -236,14 +237,19 @@ class Geary.RFC822.MailboxAddressTest : TestCase { "\"test@test\"@example.com", new MailboxAddress(null, "test@test@example.com").to_rfc822_address() ); + + // Likewise, Unicode chars should be passed through. Note that + // these can only be sent if a UTF8 connection is negotiated + // with the SMTP server assert_string( - "=?iso-8859-1?b?qQ==?=@example.com", + "©@example.com", new MailboxAddress(null, "©@example.com").to_rfc822_address() ); assert_string( - "=?UTF-8?b?8J+YuA==?=@example.com", + "😸@example.com", new MailboxAddress(null, "😸@example.com").to_rfc822_address() ); + } public void to_rfc822_string() throws Error {