From 2cb0a83342bcdab3375b866fd549ec5b5c9dd744 Mon Sep 17 00:00:00 2001 From: Michael Gratton Date: Thu, 18 Jul 2019 14:38:38 +1000 Subject: [PATCH] Don't munge Base64 encoded text parts when saving to disk Avoid doing character encoding conversion and CRLF stripping when saving text parts to disk. This prevents UTF-8 encoded text attachments from being broken when base64 encoded and no encoding is specified. See #362 --- .../conversation-message.vala | 5 +- src/engine/imap-db/imap-db-attachment.vala | 2 +- src/engine/rfc822/rfc822-message-data.vala | 4 +- src/engine/rfc822/rfc822-message.vala | 1 + src/engine/rfc822/rfc822-part.vala | 63 +++++++++++++++---- src/engine/rfc822/rfc822-utils.vala | 30 --------- test/engine/rfc822-part-test.vala | 16 ++++- 7 files changed, 73 insertions(+), 48 deletions(-) diff --git a/src/client/conversation-viewer/conversation-message.vala b/src/client/conversation-viewer/conversation-message.vala index c8fef70b..a52b6ff8 100644 --- a/src/client/conversation-viewer/conversation-message.vala +++ b/src/client/conversation-viewer/conversation-message.vala @@ -943,7 +943,10 @@ public class ConversationMessage : Gtk.Grid, Geary.BaseInterface { } try { - this.web_view.add_internal_resource(id, part.write_to_buffer()); + this.web_view.add_internal_resource( + id, + part.write_to_buffer(Geary.RFC822.Part.EncodingConversion.UTF8) + ); } catch (Geary.RFC822Error err) { debug("Failed to get inline buffer: %s", err.message); return null; diff --git a/src/engine/imap-db/imap-db-attachment.vala b/src/engine/imap-db/imap-db-attachment.vala index 540222d7..f2aacc25 100644 --- a/src/engine/imap-db/imap-db-attachment.vala +++ b/src/engine/imap-db/imap-db-attachment.vala @@ -183,7 +183,7 @@ private class Geary.ImapDB.Attachment : Geary.Attachment { stream, GMime.StreamBufferMode.BLOCK_WRITE ); - part.write_to_stream(stream); + part.write_to_stream(stream, RFC822.Part.EncodingConversion.NONE); // Using the stream's length is a bit of a hack, but at // least on one system we are getting 0 back for the file diff --git a/src/engine/rfc822/rfc822-message-data.vala b/src/engine/rfc822/rfc822-message-data.vala index 9476ce78..f5eeb628 100644 --- a/src/engine/rfc822/rfc822-message-data.vala +++ b/src/engine/rfc822/rfc822-message-data.vala @@ -414,7 +414,9 @@ public class Geary.RFC822.PreviewText : Geary.RFC822.Text { output_stream.set_owner(false); try { - part.write_to_stream(output_stream); + part.write_to_stream( + output_stream, Part.EncodingConversion.NONE + ); uint8[] data = output.data; data += (uint8) '\0'; diff --git a/src/engine/rfc822/rfc822-message.vala b/src/engine/rfc822/rfc822-message.vala index 9eba825b..e140fad3 100644 --- a/src/engine/rfc822/rfc822-message.vala +++ b/src/engine/rfc822/rfc822-message.vala @@ -596,6 +596,7 @@ public class Geary.RFC822.Message : BaseObject, EmailHeaderSet { if (content_type.is_type("text", text_subtype)) { body = part.write_to_buffer( + Part.EncodingConversion.UTF8, to_html ? Part.BodyFormatting.HTML : Part.BodyFormatting.NONE ).to_string(); } else if (replacer != null && diff --git a/src/engine/rfc822/rfc822-part.vala b/src/engine/rfc822/rfc822-part.vala index c7d941fd..cfbdde90 100644 --- a/src/engine/rfc822/rfc822-part.vala +++ b/src/engine/rfc822/rfc822-part.vala @@ -16,7 +16,17 @@ public class Geary.RFC822.Part : Object { - /** Specifies a format to apply to body data when writing it. */ + /** Specifies a character set encoding when writing text bodies. */ + public enum EncodingConversion { + + /** No conversion will be applied. */ + NONE, + + /** Plain text bodies will be converted to UTF-8. */ + UTF8; + } + + /** Specifies a format to apply when writing text bodies. */ public enum BodyFormatting { /** No formatting will be applied. */ @@ -133,18 +143,20 @@ public class Geary.RFC822.Part : Object { return filename; } - public Memory.Buffer write_to_buffer(BodyFormatting format = BodyFormatting.NONE) + public Memory.Buffer write_to_buffer(EncodingConversion conversion, + BodyFormatting format = BodyFormatting.NONE) throws RFC822Error { ByteArray byte_array = new ByteArray(); GMime.StreamMem stream = new GMime.StreamMem.with_byte_array(byte_array); stream.set_owner(false); - write_to_stream(stream, format); + write_to_stream(stream, conversion, format); return new Geary.Memory.ByteBuffer.from_byte_array(byte_array); } internal void write_to_stream(GMime.Stream destination, + EncodingConversion conversion, BodyFormatting format = BodyFormatting.NONE) throws RFC822Error { GMime.DataWrapper? wrapper = (this.source_part != null) @@ -156,22 +168,40 @@ public class Geary.RFC822.Part : Object { ); } - Mime.ContentType content_type = this.get_effective_content_type(); - if (content_type.is_type("text", Mime.ContentType.WILDCARD)) { - // Assume encoded text, convert to unencoded UTF-8 + if (this.content_type.is_type("text", Mime.ContentType.WILDCARD)) { GMime.StreamFilter filter = new GMime.StreamFilter(destination); - string? charset = content_type.params.get_value("charset"); - filter.add( - Geary.RFC822.Utils.create_utf8_filter_charset(charset) - ); + + // Do charset conversion if needed + string? charset = this.content_type.params.get_value("charset"); + if (String.is_empty(charset)) { + // Fallback charset per RFC 2045, Section 5.2 + charset = "US-ASCII"; + } + if (conversion == UTF8 && !is_utf_8(charset)) { + print("\nconverting!\n"); + GMime.FilterCharset? filter_charset = new GMime.FilterCharset( + charset, Geary.RFC822.UTF8_CHARSET + ); + if (filter_charset == null) { + // Source charset not supported, so assume + // US-ASCII + filter_charset = new GMime.FilterCharset( + "US-ASCII", Geary.RFC822.UTF8_CHARSET + ); + } + filter.add(filter_charset); + } bool flowed = content_type.params.has_value_ci("format", "flowed"); bool delsp = content_type.params.has_value_ci("DelSp", "yes"); // Remove the CR's in any CRLF sequence since they are // effectively a wire encoding, unless the format requires - // them. - if (!(content_type.media_subtype in CR_PRESERVING_TEXT_TYPES)) { + // them or the content encoding is Base64 (being a binary + // format) + if ((this.source_part == null || + this.source_part.encoding != BASE64) && + !(content_type.media_subtype in CR_PRESERVING_TEXT_TYPES)) { filter.add(new GMime.FilterCRLF(false, false)); } @@ -207,3 +237,12 @@ public class Geary.RFC822.Part : Object { } } + +private inline bool is_utf_8(string charset) { + return ( + charset == "UTF-8" || + charset == "utf-8" || + charset == "UTF8" || + charset == "utf8" + ); +} \ No newline at end of file diff --git a/src/engine/rfc822/rfc822-utils.vala b/src/engine/rfc822/rfc822-utils.vala index 7672d8ae..dfa4180e 100644 --- a/src/engine/rfc822/rfc822-utils.vala +++ b/src/engine/rfc822/rfc822-utils.vala @@ -11,36 +11,6 @@ namespace Geary.RFC822.Utils { // in UTF-8, and is unmolested by GMime.FilterHTML. public const char QUOTE_MARKER = '\x7f'; -/** - * Charset to use when it is otherwise missing or invalid - * - * Per RFC 2045, Section 5.2. - */ -public const string DEFAULT_MIME_CHARSET = "us-ascii"; - -/** - * Creates a filter to convert a MIME charset to UTF-8. - * - * Param `from_charset` may be null, empty or invalid, in which case - * `DEFAULT_MIME_CHARSET` will be used instead. - */ -public GMime.FilterCharset create_utf8_filter_charset(string? from_charset) { - string actual_charset = from_charset != null ? from_charset.strip() : ""; - if (Geary.String.is_empty(actual_charset)) { - actual_charset = DEFAULT_MIME_CHARSET; - } - GMime.FilterCharset? filter_charset = new GMime.FilterCharset( - actual_charset, Geary.RFC822.UTF8_CHARSET - ); - if (filter_charset == null) { - debug("Unknown charset: %s; using RFC 2045 default instead", from_charset); - filter_charset = new GMime.FilterCharset( - DEFAULT_MIME_CHARSET, Geary.RFC822.UTF8_CHARSET - ); - assert(filter_charset != null); - } - return filter_charset; -} /** * Uses the best-possible transfer of bytes from the Memory.Buffer to the GMime.StreamMem object. diff --git a/test/engine/rfc822-part-test.vala b/test/engine/rfc822-part-test.vala index ab5f026e..3af30ec0 100644 --- a/test/engine/rfc822-part-test.vala +++ b/test/engine/rfc822-part-test.vala @@ -10,6 +10,7 @@ class Geary.RFC822.PartTest : TestCase { private const string CR_BODY = "This is an attachment.\n"; private const string CRLF_BODY = "This is an attachment.\r\n"; private const string ICAL_BODY = "BEGIN:VCALENDAR\r\nEND:VCALENDAR\r\n"; + private const string UTF8_BODY = "Тест."; public PartTest() { @@ -19,6 +20,7 @@ class Geary.RFC822.PartTest : TestCase { add_test("write_to_buffer_plain", write_to_buffer_plain); add_test("write_to_buffer_plain_crlf", write_to_buffer_plain_crlf); add_test("write_to_buffer_plain_ical", write_to_buffer_plain_ical); + add_test("write_to_buffer_plain_utf8", write_to_buffer_plain_utf8); } public void new_from_minimal_mime_part() throws Error { @@ -56,7 +58,7 @@ class Geary.RFC822.PartTest : TestCase { public void write_to_buffer_plain() throws Error { Part test = new Part(new_part("text/plain", CR_BODY.data)); - Memory.Buffer buf = test.write_to_buffer(); + Memory.Buffer buf = test.write_to_buffer(Part.EncodingConversion.NONE); assert_string(CR_BODY, buf.to_string()); } @@ -64,7 +66,7 @@ class Geary.RFC822.PartTest : TestCase { public void write_to_buffer_plain_crlf() throws Error { Part test = new Part(new_part("text/plain", CRLF_BODY.data)); - Memory.Buffer buf = test.write_to_buffer(); + Memory.Buffer buf = test.write_to_buffer(Part.EncodingConversion.NONE); // CRLF should be stripped assert_string(CR_BODY, buf.to_string()); @@ -73,12 +75,20 @@ class Geary.RFC822.PartTest : TestCase { public void write_to_buffer_plain_ical() throws Error { Part test = new Part(new_part("text/calendar", ICAL_BODY.data)); - Memory.Buffer buf = test.write_to_buffer(); + Memory.Buffer buf = test.write_to_buffer(Part.EncodingConversion.NONE); // CRLF should not be stripped assert_string(ICAL_BODY, buf.to_string()); } + public void write_to_buffer_plain_utf8() throws GLib.Error { + Part test = new Part(new_part("text/plain", UTF8_BODY.data)); + + Memory.Buffer buf = test.write_to_buffer(Part.EncodingConversion.NONE); + + assert_string(UTF8_BODY, buf.to_string()); + } + private GMime.Part new_part(string? mime_type, uint8[] body, GMime.ContentEncoding encoding = GMime.ContentEncoding.DEFAULT) {