Don't munge Base64 encoded text parts when saving to disk
Avoid doing character encoding conversion and CRLF stripping when saving text parts to disk. This prevents UTF-8 encoded text attachments from being broken when base64 encoded and no encoding is specified. See #362
This commit is contained in:
parent
faf84db9f4
commit
2cb0a83342
7 changed files with 73 additions and 48 deletions
|
|
@ -943,7 +943,10 @@ public class ConversationMessage : Gtk.Grid, Geary.BaseInterface {
|
|||
}
|
||||
|
||||
try {
|
||||
this.web_view.add_internal_resource(id, part.write_to_buffer());
|
||||
this.web_view.add_internal_resource(
|
||||
id,
|
||||
part.write_to_buffer(Geary.RFC822.Part.EncodingConversion.UTF8)
|
||||
);
|
||||
} catch (Geary.RFC822Error err) {
|
||||
debug("Failed to get inline buffer: %s", err.message);
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ private class Geary.ImapDB.Attachment : Geary.Attachment {
|
|||
stream, GMime.StreamBufferMode.BLOCK_WRITE
|
||||
);
|
||||
|
||||
part.write_to_stream(stream);
|
||||
part.write_to_stream(stream, RFC822.Part.EncodingConversion.NONE);
|
||||
|
||||
// Using the stream's length is a bit of a hack, but at
|
||||
// least on one system we are getting 0 back for the file
|
||||
|
|
|
|||
|
|
@ -414,7 +414,9 @@ public class Geary.RFC822.PreviewText : Geary.RFC822.Text {
|
|||
output_stream.set_owner(false);
|
||||
|
||||
try {
|
||||
part.write_to_stream(output_stream);
|
||||
part.write_to_stream(
|
||||
output_stream, Part.EncodingConversion.NONE
|
||||
);
|
||||
uint8[] data = output.data;
|
||||
data += (uint8) '\0';
|
||||
|
||||
|
|
|
|||
|
|
@ -596,6 +596,7 @@ public class Geary.RFC822.Message : BaseObject, EmailHeaderSet {
|
|||
|
||||
if (content_type.is_type("text", text_subtype)) {
|
||||
body = part.write_to_buffer(
|
||||
Part.EncodingConversion.UTF8,
|
||||
to_html ? Part.BodyFormatting.HTML : Part.BodyFormatting.NONE
|
||||
).to_string();
|
||||
} else if (replacer != null &&
|
||||
|
|
|
|||
|
|
@ -16,7 +16,17 @@
|
|||
public class Geary.RFC822.Part : Object {
|
||||
|
||||
|
||||
/** Specifies a format to apply to body data when writing it. */
|
||||
/** Specifies a character set encoding when writing text bodies. */
|
||||
public enum EncodingConversion {
|
||||
|
||||
/** No conversion will be applied. */
|
||||
NONE,
|
||||
|
||||
/** Plain text bodies will be converted to UTF-8. */
|
||||
UTF8;
|
||||
}
|
||||
|
||||
/** Specifies a format to apply when writing text bodies. */
|
||||
public enum BodyFormatting {
|
||||
|
||||
/** No formatting will be applied. */
|
||||
|
|
@ -133,18 +143,20 @@ public class Geary.RFC822.Part : Object {
|
|||
return filename;
|
||||
}
|
||||
|
||||
public Memory.Buffer write_to_buffer(BodyFormatting format = BodyFormatting.NONE)
|
||||
public Memory.Buffer write_to_buffer(EncodingConversion conversion,
|
||||
BodyFormatting format = BodyFormatting.NONE)
|
||||
throws RFC822Error {
|
||||
ByteArray byte_array = new ByteArray();
|
||||
GMime.StreamMem stream = new GMime.StreamMem.with_byte_array(byte_array);
|
||||
stream.set_owner(false);
|
||||
|
||||
write_to_stream(stream, format);
|
||||
write_to_stream(stream, conversion, format);
|
||||
|
||||
return new Geary.Memory.ByteBuffer.from_byte_array(byte_array);
|
||||
}
|
||||
|
||||
internal void write_to_stream(GMime.Stream destination,
|
||||
EncodingConversion conversion,
|
||||
BodyFormatting format = BodyFormatting.NONE)
|
||||
throws RFC822Error {
|
||||
GMime.DataWrapper? wrapper = (this.source_part != null)
|
||||
|
|
@ -156,22 +168,40 @@ public class Geary.RFC822.Part : Object {
|
|||
);
|
||||
}
|
||||
|
||||
Mime.ContentType content_type = this.get_effective_content_type();
|
||||
if (content_type.is_type("text", Mime.ContentType.WILDCARD)) {
|
||||
// Assume encoded text, convert to unencoded UTF-8
|
||||
if (this.content_type.is_type("text", Mime.ContentType.WILDCARD)) {
|
||||
GMime.StreamFilter filter = new GMime.StreamFilter(destination);
|
||||
string? charset = content_type.params.get_value("charset");
|
||||
filter.add(
|
||||
Geary.RFC822.Utils.create_utf8_filter_charset(charset)
|
||||
);
|
||||
|
||||
// Do charset conversion if needed
|
||||
string? charset = this.content_type.params.get_value("charset");
|
||||
if (String.is_empty(charset)) {
|
||||
// Fallback charset per RFC 2045, Section 5.2
|
||||
charset = "US-ASCII";
|
||||
}
|
||||
if (conversion == UTF8 && !is_utf_8(charset)) {
|
||||
print("\nconverting!\n");
|
||||
GMime.FilterCharset? filter_charset = new GMime.FilterCharset(
|
||||
charset, Geary.RFC822.UTF8_CHARSET
|
||||
);
|
||||
if (filter_charset == null) {
|
||||
// Source charset not supported, so assume
|
||||
// US-ASCII
|
||||
filter_charset = new GMime.FilterCharset(
|
||||
"US-ASCII", Geary.RFC822.UTF8_CHARSET
|
||||
);
|
||||
}
|
||||
filter.add(filter_charset);
|
||||
}
|
||||
|
||||
bool flowed = content_type.params.has_value_ci("format", "flowed");
|
||||
bool delsp = content_type.params.has_value_ci("DelSp", "yes");
|
||||
|
||||
// Remove the CR's in any CRLF sequence since they are
|
||||
// effectively a wire encoding, unless the format requires
|
||||
// them.
|
||||
if (!(content_type.media_subtype in CR_PRESERVING_TEXT_TYPES)) {
|
||||
// them or the content encoding is Base64 (being a binary
|
||||
// format)
|
||||
if ((this.source_part == null ||
|
||||
this.source_part.encoding != BASE64) &&
|
||||
!(content_type.media_subtype in CR_PRESERVING_TEXT_TYPES)) {
|
||||
filter.add(new GMime.FilterCRLF(false, false));
|
||||
}
|
||||
|
||||
|
|
@ -207,3 +237,12 @@ public class Geary.RFC822.Part : Object {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
private inline bool is_utf_8(string charset) {
|
||||
return (
|
||||
charset == "UTF-8" ||
|
||||
charset == "utf-8" ||
|
||||
charset == "UTF8" ||
|
||||
charset == "utf8"
|
||||
);
|
||||
}
|
||||
|
|
@ -11,36 +11,6 @@ namespace Geary.RFC822.Utils {
|
|||
// in UTF-8, and is unmolested by GMime.FilterHTML.
|
||||
public const char QUOTE_MARKER = '\x7f';
|
||||
|
||||
/**
|
||||
* Charset to use when it is otherwise missing or invalid
|
||||
*
|
||||
* Per RFC 2045, Section 5.2.
|
||||
*/
|
||||
public const string DEFAULT_MIME_CHARSET = "us-ascii";
|
||||
|
||||
/**
|
||||
* Creates a filter to convert a MIME charset to UTF-8.
|
||||
*
|
||||
* Param `from_charset` may be null, empty or invalid, in which case
|
||||
* `DEFAULT_MIME_CHARSET` will be used instead.
|
||||
*/
|
||||
public GMime.FilterCharset create_utf8_filter_charset(string? from_charset) {
|
||||
string actual_charset = from_charset != null ? from_charset.strip() : "";
|
||||
if (Geary.String.is_empty(actual_charset)) {
|
||||
actual_charset = DEFAULT_MIME_CHARSET;
|
||||
}
|
||||
GMime.FilterCharset? filter_charset = new GMime.FilterCharset(
|
||||
actual_charset, Geary.RFC822.UTF8_CHARSET
|
||||
);
|
||||
if (filter_charset == null) {
|
||||
debug("Unknown charset: %s; using RFC 2045 default instead", from_charset);
|
||||
filter_charset = new GMime.FilterCharset(
|
||||
DEFAULT_MIME_CHARSET, Geary.RFC822.UTF8_CHARSET
|
||||
);
|
||||
assert(filter_charset != null);
|
||||
}
|
||||
return filter_charset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses the best-possible transfer of bytes from the Memory.Buffer to the GMime.StreamMem object.
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ class Geary.RFC822.PartTest : TestCase {
|
|||
private const string CR_BODY = "This is an attachment.\n";
|
||||
private const string CRLF_BODY = "This is an attachment.\r\n";
|
||||
private const string ICAL_BODY = "BEGIN:VCALENDAR\r\nEND:VCALENDAR\r\n";
|
||||
private const string UTF8_BODY = "Тест.";
|
||||
|
||||
|
||||
public PartTest() {
|
||||
|
|
@ -19,6 +20,7 @@ class Geary.RFC822.PartTest : TestCase {
|
|||
add_test("write_to_buffer_plain", write_to_buffer_plain);
|
||||
add_test("write_to_buffer_plain_crlf", write_to_buffer_plain_crlf);
|
||||
add_test("write_to_buffer_plain_ical", write_to_buffer_plain_ical);
|
||||
add_test("write_to_buffer_plain_utf8", write_to_buffer_plain_utf8);
|
||||
}
|
||||
|
||||
public void new_from_minimal_mime_part() throws Error {
|
||||
|
|
@ -56,7 +58,7 @@ class Geary.RFC822.PartTest : TestCase {
|
|||
public void write_to_buffer_plain() throws Error {
|
||||
Part test = new Part(new_part("text/plain", CR_BODY.data));
|
||||
|
||||
Memory.Buffer buf = test.write_to_buffer();
|
||||
Memory.Buffer buf = test.write_to_buffer(Part.EncodingConversion.NONE);
|
||||
|
||||
assert_string(CR_BODY, buf.to_string());
|
||||
}
|
||||
|
|
@ -64,7 +66,7 @@ class Geary.RFC822.PartTest : TestCase {
|
|||
public void write_to_buffer_plain_crlf() throws Error {
|
||||
Part test = new Part(new_part("text/plain", CRLF_BODY.data));
|
||||
|
||||
Memory.Buffer buf = test.write_to_buffer();
|
||||
Memory.Buffer buf = test.write_to_buffer(Part.EncodingConversion.NONE);
|
||||
|
||||
// CRLF should be stripped
|
||||
assert_string(CR_BODY, buf.to_string());
|
||||
|
|
@ -73,12 +75,20 @@ class Geary.RFC822.PartTest : TestCase {
|
|||
public void write_to_buffer_plain_ical() throws Error {
|
||||
Part test = new Part(new_part("text/calendar", ICAL_BODY.data));
|
||||
|
||||
Memory.Buffer buf = test.write_to_buffer();
|
||||
Memory.Buffer buf = test.write_to_buffer(Part.EncodingConversion.NONE);
|
||||
|
||||
// CRLF should not be stripped
|
||||
assert_string(ICAL_BODY, buf.to_string());
|
||||
}
|
||||
|
||||
public void write_to_buffer_plain_utf8() throws GLib.Error {
|
||||
Part test = new Part(new_part("text/plain", UTF8_BODY.data));
|
||||
|
||||
Memory.Buffer buf = test.write_to_buffer(Part.EncodingConversion.NONE);
|
||||
|
||||
assert_string(UTF8_BODY, buf.to_string());
|
||||
}
|
||||
|
||||
private GMime.Part new_part(string? mime_type,
|
||||
uint8[] body,
|
||||
GMime.ContentEncoding encoding = GMime.ContentEncoding.DEFAULT) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue