Fix high-bit char corruption when sent as ISO-8859-1.
Commit 0c31607 made it possible for Geary to send messages as both
US-ASCII and ISO-8859-1, as part of the workaround for Bug 769137.
This introduced a bug however where if a message contained 8-bit
ISO-8859-1 chars but didn't include ant UTF-8 chars, the UTF-8 message
body would be encoded as if it was ISO-8859-1, and hence the high-bit
chars would be corrupted.
The solution is to re-encode the body in whatever charset it will be sent
as, before sending it.
* src/engine/rfc822/rfc822-message.vala (Message): Break out the common
parts of generating a message body part from
Message::from_composed_email into the new ::body_data_to_part method.
Ensure that the body data is conveted into US-ASCII or ISO-8859-1
before attempting to guess what transfer encoding should be used or
setting it as the content of the new body part.
* src/engine/rfc822/rfc822-utils.vala (Geary.RFC822.Utils): Replace
get_best() with get_best_charset(), since we can't guess both charset
and transfer encoding at the same time any more. Ensure we reset the
source stream after guessing (and in get_best_encoding) so that
subsequent uses don't simply get an EOS.
This commit is contained in:
parent
da2d2c9840
commit
048fa15c7b
2 changed files with 88 additions and 52 deletions
|
|
@ -15,8 +15,8 @@ public class Geary.RFC822.Message : BaseObject {
|
|||
*/
|
||||
public delegate string? InlinePartReplacer(string filename, Mime.ContentType? content_type,
|
||||
Mime.ContentDisposition? disposition, string? content_id, Geary.Memory.Buffer buffer);
|
||||
|
||||
private const string DEFAULT_ENCODING = "UTF8";
|
||||
|
||||
private const string DEFAULT_CHARSET = "UTF-8";
|
||||
|
||||
private const string HEADER_SENDER = "Sender";
|
||||
private const string HEADER_IN_REPLY_TO = "In-Reply-To";
|
||||
|
|
@ -152,53 +152,29 @@ public class Geary.RFC822.Message : BaseObject {
|
|||
this.message.set_header(HEADER_MAILER, email.mailer);
|
||||
}
|
||||
|
||||
// Body: text format (optional)
|
||||
string? charset = null;
|
||||
GMime.ContentEncoding? encoding = null;
|
||||
// Share the body charset and encoding between plain and HTML
|
||||
// parts, so we don't need to work it out twice.
|
||||
string? body_charset = null;
|
||||
GMime.ContentEncoding? body_encoding = null;
|
||||
|
||||
// Body: text format (optional)
|
||||
GMime.Part? body_text = null;
|
||||
if (email.body_text != null) {
|
||||
GMime.Stream stream = new GMime.StreamMem.with_buffer(email.body_text.data);
|
||||
Geary.RFC822.Utils.get_best(stream, out charset, out encoding);
|
||||
if (encoding == GMime.ContentEncoding.BASE64) {
|
||||
// Base64-encoded text needs to have CR's added after
|
||||
// LF's before encoding, otherwise it breaks
|
||||
// format=flowed. See bug 753528.
|
||||
GMime.StreamFilter filter_stream = new GMime.StreamFilter(stream);
|
||||
filter_stream.add(new GMime.FilterCRLF(true, false));
|
||||
stream = filter_stream;
|
||||
}
|
||||
GMime.DataWrapper content = new GMime.DataWrapper.with_stream(
|
||||
stream, GMime.ContentEncoding.DEFAULT
|
||||
);
|
||||
body_text = new GMime.Part();
|
||||
body_text.set_content_type(
|
||||
new GMime.ContentType.from_string(
|
||||
"text/plain; charset=%s; format=flowed".printf(charset)
|
||||
)
|
||||
);
|
||||
body_text.set_content_object(content);
|
||||
body_text.set_content_encoding(encoding);
|
||||
body_text = body_data_to_part(email.body_text.data,
|
||||
ref body_charset,
|
||||
ref body_encoding,
|
||||
"text/plain",
|
||||
true);
|
||||
}
|
||||
|
||||
// Body: HTML format (also optional)
|
||||
GMime.Part? body_html = null;
|
||||
if (email.body_html != null) {
|
||||
GMime.StreamMem stream = new GMime.StreamMem.with_buffer(email.body_html.data);
|
||||
if (charset == null) {
|
||||
Geary.RFC822.Utils.get_best(stream, out charset, out encoding);
|
||||
}
|
||||
GMime.DataWrapper content = new GMime.DataWrapper.with_stream(stream,
|
||||
GMime.ContentEncoding.DEFAULT);
|
||||
|
||||
body_html = new GMime.Part();
|
||||
body_html.set_content_type(
|
||||
new GMime.ContentType.from_string(
|
||||
"text/html; charset=%s".printf(charset)
|
||||
)
|
||||
);
|
||||
body_html.set_content_object(content);
|
||||
body_html.set_content_encoding(encoding);
|
||||
body_html = body_data_to_part(email.body_html.data,
|
||||
ref body_charset,
|
||||
ref body_encoding,
|
||||
"text/html",
|
||||
false);
|
||||
}
|
||||
|
||||
// Build the message's mime part.
|
||||
|
|
@ -921,12 +897,12 @@ public class Geary.RFC822.Message : BaseObject {
|
|||
GMime.StreamMem stream = new GMime.StreamMem.with_byte_array(byte_array);
|
||||
stream.set_owner(false);
|
||||
|
||||
// Convert encoding to UTF-8.
|
||||
// Convert to UTF-8.
|
||||
GMime.StreamFilter stream_filter = new GMime.StreamFilter(stream);
|
||||
if (to_utf8) {
|
||||
string? charset = (content_type != null) ? content_type.params.get_value("charset") : null;
|
||||
if (String.is_empty(charset))
|
||||
charset = DEFAULT_ENCODING;
|
||||
charset = DEFAULT_CHARSET;
|
||||
stream_filter.add(Geary.RFC822.Utils.create_utf8_filter_charset(charset));
|
||||
}
|
||||
|
||||
|
|
@ -957,5 +933,67 @@ public class Geary.RFC822.Message : BaseObject {
|
|||
public string to_string() {
|
||||
return message.to_string();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a MIME part for some body content.
|
||||
*
|
||||
* Determining the appropriate body charset and encoding is
|
||||
* unfortunately a multi-step process that involves reading it
|
||||
* completely, several times:
|
||||
*
|
||||
* 1. Guess the best charset by scanning the complete body.
|
||||
* 2. Convert the body into the preferred charset, essential
|
||||
* to avoid e.g. guessing Base64 encoding for ISO-8859-1
|
||||
* because of the 0x0's present in UTF bytes with high-bit
|
||||
* chars.
|
||||
* 3. Determine, given the correctly encoded charset
|
||||
* what the appropriate encoding is by scanning the
|
||||
* complete, encoded body.
|
||||
*
|
||||
* This applies to both text/plain and text/html parts, but we
|
||||
* don't need to do it repeatedly for each, since HTML is 7-bit
|
||||
* clean ASCII. So if we have guessed both already for a plain
|
||||
* text body, it will still apply for any HTML part.
|
||||
*/
|
||||
private GMime.Part body_data_to_part(uint8[] content,
|
||||
ref string? charset,
|
||||
ref GMime.ContentEncoding? encoding,
|
||||
string content_type,
|
||||
bool is_flowed) {
|
||||
GMime.Stream content_stream = new GMime.StreamMem.with_buffer(content);
|
||||
if (charset == null) {
|
||||
charset = Geary.RFC822.Utils.get_best_charset(content_stream);
|
||||
}
|
||||
GMime.StreamFilter filter_stream = new GMime.StreamFilter(content_stream);
|
||||
if (charset != DEFAULT_CHARSET) {
|
||||
filter_stream.add(new GMime.FilterCharset(DEFAULT_CHARSET, charset));
|
||||
}
|
||||
if (encoding == null) {
|
||||
encoding = Geary.RFC822.Utils.get_best_encoding(filter_stream);
|
||||
}
|
||||
if (is_flowed && encoding == GMime.ContentEncoding.BASE64) {
|
||||
// Base64-encoded text needs to have CR's added after LF's
|
||||
// before encoding, otherwise it breaks format=flowed. See
|
||||
// Bug 753528.
|
||||
filter_stream.add(new GMime.FilterCRLF(true, false));
|
||||
}
|
||||
|
||||
GMime.ContentType complete_type =
|
||||
new GMime.ContentType.from_string(content_type);
|
||||
complete_type.set_parameter("charset", charset);
|
||||
if (is_flowed) {
|
||||
complete_type.set_parameter("format", "flowed");
|
||||
}
|
||||
|
||||
GMime.DataWrapper body = new GMime.DataWrapper.with_stream(
|
||||
filter_stream, GMime.ContentEncoding.DEFAULT
|
||||
);
|
||||
|
||||
GMime.Part body_part = new GMime.Part();
|
||||
body_part.set_content_type(complete_type);
|
||||
body_part.set_content_object(body);
|
||||
body_part.set_content_encoding(encoding);
|
||||
return body_part;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -322,24 +322,21 @@ public bool comp_char_arr_slice(char[] array, uint start, string comp) {
|
|||
}
|
||||
|
||||
/**
|
||||
* Uses a GMime.FilterBest to determine the best charset and encoding.
|
||||
* Uses a GMime.FilterBest to determine the best charset.
|
||||
*
|
||||
* WARNING: This call does not perform async I/O, meaning it will loop on the
|
||||
* stream without relinquishing control to the event loop. Use with
|
||||
* caution.
|
||||
*/
|
||||
public void get_best(GMime.Stream in_stream,
|
||||
out string charset,
|
||||
out GMime.ContentEncoding encoding) {
|
||||
public string get_best_charset(GMime.Stream in_stream) {
|
||||
GMime.FilterBest filter = new GMime.FilterBest(
|
||||
GMime.FilterBestFlags.CHARSET |
|
||||
GMime.FilterBestFlags.ENCODING
|
||||
GMime.FilterBestFlags.CHARSET
|
||||
);
|
||||
GMime.StreamFilter out_stream = new GMime.StreamFilter(new GMime.StreamNull());
|
||||
out_stream.add(filter);
|
||||
in_stream.write_to_stream(out_stream);
|
||||
charset = filter.charset();
|
||||
encoding = filter.encoding(GMime.EncodingConstraint.7BIT);
|
||||
in_stream.reset();
|
||||
return filter.charset();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -356,6 +353,7 @@ public GMime.ContentEncoding get_best_encoding(GMime.Stream in_stream) {
|
|||
GMime.StreamFilter out_stream = new GMime.StreamFilter(new GMime.StreamNull());
|
||||
out_stream.add(filter);
|
||||
in_stream.write_to_stream(out_stream);
|
||||
in_stream.reset();
|
||||
return filter.encoding(GMime.EncodingConstraint.7BIT);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue