Don't munge Base64 encoded text parts when saving to disk

Avoid doing character encoding conversion and CRLF stripping when saving
text parts to disk. This prevents UTF-8 encoded text attachments from
being broken when base64 encoded and no encoding is specified.

See #362
This commit is contained in:
Michael Gratton 2019-07-18 14:38:38 +10:00
parent faf84db9f4
commit 2cb0a83342
7 changed files with 73 additions and 48 deletions

View file

@ -943,7 +943,10 @@ public class ConversationMessage : Gtk.Grid, Geary.BaseInterface {
}
try {
this.web_view.add_internal_resource(id, part.write_to_buffer());
this.web_view.add_internal_resource(
id,
part.write_to_buffer(Geary.RFC822.Part.EncodingConversion.UTF8)
);
} catch (Geary.RFC822Error err) {
debug("Failed to get inline buffer: %s", err.message);
return null;

View file

@ -183,7 +183,7 @@ private class Geary.ImapDB.Attachment : Geary.Attachment {
stream, GMime.StreamBufferMode.BLOCK_WRITE
);
part.write_to_stream(stream);
part.write_to_stream(stream, RFC822.Part.EncodingConversion.NONE);
// Using the stream's length is a bit of a hack, but at
// least on one system we are getting 0 back for the file

View file

@ -414,7 +414,9 @@ public class Geary.RFC822.PreviewText : Geary.RFC822.Text {
output_stream.set_owner(false);
try {
part.write_to_stream(output_stream);
part.write_to_stream(
output_stream, Part.EncodingConversion.NONE
);
uint8[] data = output.data;
data += (uint8) '\0';

View file

@ -596,6 +596,7 @@ public class Geary.RFC822.Message : BaseObject, EmailHeaderSet {
if (content_type.is_type("text", text_subtype)) {
body = part.write_to_buffer(
Part.EncodingConversion.UTF8,
to_html ? Part.BodyFormatting.HTML : Part.BodyFormatting.NONE
).to_string();
} else if (replacer != null &&

View file

@ -16,7 +16,17 @@
public class Geary.RFC822.Part : Object {
/** Specifies a format to apply to body data when writing it. */
/** Specifies a character set encoding when writing text bodies. */
public enum EncodingConversion {
/** No conversion will be applied. */
NONE,
/** Plain text bodies will be converted to UTF-8. */
UTF8;
}
/** Specifies a format to apply when writing text bodies. */
public enum BodyFormatting {
/** No formatting will be applied. */
@ -133,18 +143,20 @@ public class Geary.RFC822.Part : Object {
return filename;
}
public Memory.Buffer write_to_buffer(BodyFormatting format = BodyFormatting.NONE)
public Memory.Buffer write_to_buffer(EncodingConversion conversion,
BodyFormatting format = BodyFormatting.NONE)
throws RFC822Error {
ByteArray byte_array = new ByteArray();
GMime.StreamMem stream = new GMime.StreamMem.with_byte_array(byte_array);
stream.set_owner(false);
write_to_stream(stream, format);
write_to_stream(stream, conversion, format);
return new Geary.Memory.ByteBuffer.from_byte_array(byte_array);
}
internal void write_to_stream(GMime.Stream destination,
EncodingConversion conversion,
BodyFormatting format = BodyFormatting.NONE)
throws RFC822Error {
GMime.DataWrapper? wrapper = (this.source_part != null)
@ -156,22 +168,40 @@ public class Geary.RFC822.Part : Object {
);
}
Mime.ContentType content_type = this.get_effective_content_type();
if (content_type.is_type("text", Mime.ContentType.WILDCARD)) {
// Assume encoded text, convert to unencoded UTF-8
if (this.content_type.is_type("text", Mime.ContentType.WILDCARD)) {
GMime.StreamFilter filter = new GMime.StreamFilter(destination);
string? charset = content_type.params.get_value("charset");
filter.add(
Geary.RFC822.Utils.create_utf8_filter_charset(charset)
);
// Do charset conversion if needed
string? charset = this.content_type.params.get_value("charset");
if (String.is_empty(charset)) {
// Fallback charset per RFC 2045, Section 5.2
charset = "US-ASCII";
}
if (conversion == UTF8 && !is_utf_8(charset)) {
print("\nconverting!\n");
GMime.FilterCharset? filter_charset = new GMime.FilterCharset(
charset, Geary.RFC822.UTF8_CHARSET
);
if (filter_charset == null) {
// Source charset not supported, so assume
// US-ASCII
filter_charset = new GMime.FilterCharset(
"US-ASCII", Geary.RFC822.UTF8_CHARSET
);
}
filter.add(filter_charset);
}
bool flowed = content_type.params.has_value_ci("format", "flowed");
bool delsp = content_type.params.has_value_ci("DelSp", "yes");
// Remove the CR's in any CRLF sequence since they are
// effectively a wire encoding, unless the format requires
// them.
if (!(content_type.media_subtype in CR_PRESERVING_TEXT_TYPES)) {
// them or the content encoding is Base64 (being a binary
// format)
if ((this.source_part == null ||
this.source_part.encoding != BASE64) &&
!(content_type.media_subtype in CR_PRESERVING_TEXT_TYPES)) {
filter.add(new GMime.FilterCRLF(false, false));
}
@ -207,3 +237,12 @@ public class Geary.RFC822.Part : Object {
}
}
private inline bool is_utf_8(string charset) {
return (
charset == "UTF-8" ||
charset == "utf-8" ||
charset == "UTF8" ||
charset == "utf8"
);
}

View file

@ -11,36 +11,6 @@ namespace Geary.RFC822.Utils {
// in UTF-8, and is unmolested by GMime.FilterHTML.
public const char QUOTE_MARKER = '\x7f';
/**
* Charset to use when it is otherwise missing or invalid
*
* Per RFC 2045, Section 5.2.
*/
public const string DEFAULT_MIME_CHARSET = "us-ascii";
/**
* Creates a filter to convert a MIME charset to UTF-8.
*
* Param `from_charset` may be null, empty or invalid, in which case
* `DEFAULT_MIME_CHARSET` will be used instead.
*/
public GMime.FilterCharset create_utf8_filter_charset(string? from_charset) {
string actual_charset = from_charset != null ? from_charset.strip() : "";
if (Geary.String.is_empty(actual_charset)) {
actual_charset = DEFAULT_MIME_CHARSET;
}
GMime.FilterCharset? filter_charset = new GMime.FilterCharset(
actual_charset, Geary.RFC822.UTF8_CHARSET
);
if (filter_charset == null) {
debug("Unknown charset: %s; using RFC 2045 default instead", from_charset);
filter_charset = new GMime.FilterCharset(
DEFAULT_MIME_CHARSET, Geary.RFC822.UTF8_CHARSET
);
assert(filter_charset != null);
}
return filter_charset;
}
/**
* Uses the best-possible transfer of bytes from the Memory.Buffer to the GMime.StreamMem object.

View file

@ -10,6 +10,7 @@ class Geary.RFC822.PartTest : TestCase {
private const string CR_BODY = "This is an attachment.\n";
private const string CRLF_BODY = "This is an attachment.\r\n";
private const string ICAL_BODY = "BEGIN:VCALENDAR\r\nEND:VCALENDAR\r\n";
private const string UTF8_BODY = "Тест.";
public PartTest() {
@ -19,6 +20,7 @@ class Geary.RFC822.PartTest : TestCase {
add_test("write_to_buffer_plain", write_to_buffer_plain);
add_test("write_to_buffer_plain_crlf", write_to_buffer_plain_crlf);
add_test("write_to_buffer_plain_ical", write_to_buffer_plain_ical);
add_test("write_to_buffer_plain_utf8", write_to_buffer_plain_utf8);
}
public void new_from_minimal_mime_part() throws Error {
@ -56,7 +58,7 @@ class Geary.RFC822.PartTest : TestCase {
public void write_to_buffer_plain() throws Error {
Part test = new Part(new_part("text/plain", CR_BODY.data));
Memory.Buffer buf = test.write_to_buffer();
Memory.Buffer buf = test.write_to_buffer(Part.EncodingConversion.NONE);
assert_string(CR_BODY, buf.to_string());
}
@ -64,7 +66,7 @@ class Geary.RFC822.PartTest : TestCase {
public void write_to_buffer_plain_crlf() throws Error {
Part test = new Part(new_part("text/plain", CRLF_BODY.data));
Memory.Buffer buf = test.write_to_buffer();
Memory.Buffer buf = test.write_to_buffer(Part.EncodingConversion.NONE);
// CRLF should be stripped
assert_string(CR_BODY, buf.to_string());
@ -73,12 +75,20 @@ class Geary.RFC822.PartTest : TestCase {
public void write_to_buffer_plain_ical() throws Error {
Part test = new Part(new_part("text/calendar", ICAL_BODY.data));
Memory.Buffer buf = test.write_to_buffer();
Memory.Buffer buf = test.write_to_buffer(Part.EncodingConversion.NONE);
// CRLF should not be stripped
assert_string(ICAL_BODY, buf.to_string());
}
public void write_to_buffer_plain_utf8() throws GLib.Error {
Part test = new Part(new_part("text/plain", UTF8_BODY.data));
Memory.Buffer buf = test.write_to_buffer(Part.EncodingConversion.NONE);
assert_string(UTF8_BODY, buf.to_string());
}
private GMime.Part new_part(string? mime_type,
uint8[] body,
GMime.ContentEncoding encoding = GMime.ContentEncoding.DEFAULT) {