Don't always UTF-8 as the charset, use ASCII or 8859-1 if possible.

Previously, Geary would simply assume that UTF-8 is a reasonable charset
to use when sending both plain text and HTML parts. It is, however
Mailman will re-encode content sent ast UTF-8 using Base64, and break
format=flowed in the process.

This works around the problem for messages containing US-ASCII or
ISO-8859-1 chars, at least. No solution yet for UTF-8 except maybe
getting Mailman fixed.

Bug 769137

* src/engine/rfc822/rfc822-message.vala (Message): Guess the best charset
  for a message's content rather than assuming UTF-8. Set the charset for
  both plain text and HTML parts, but only guess it and the encoding at
  max once for both.

* src/engine/rfc822/rfc822-utils.vala: Replace get_best_content_encoding
  with get_best for determing both charset and encoding, and
  get_best_encoding for just guessing encoding, both using
  GMime.FilterBest instead of our own custom code.
This commit is contained in:
Michael James Gratton 2016-08-01 23:21:28 +10:00
parent f4d19831ee
commit 0c3160779e
2 changed files with 52 additions and 71 deletions

View file

@ -153,12 +153,13 @@ public class Geary.RFC822.Message : BaseObject {
} }
// Body: text format (optional) // Body: text format (optional)
string? charset = null;
GMime.ContentEncoding? encoding = null;
GMime.Part? body_text = null; GMime.Part? body_text = null;
if (email.body_text != null) { if (email.body_text != null) {
GMime.Stream stream = new GMime.StreamMem.with_buffer(email.body_text.data); GMime.Stream stream = new GMime.StreamMem.with_buffer(email.body_text.data);
GMime.ContentEncoding encoding = Geary.RFC822.Utils.get_best_content_encoding( Geary.RFC822.Utils.get_best(stream, out charset, out encoding);
stream, GMime.EncodingConstraint.7BIT
);
if (encoding == GMime.ContentEncoding.BASE64) { if (encoding == GMime.ContentEncoding.BASE64) {
// Base64-encoded text needs to have CR's added after // Base64-encoded text needs to have CR's added after
// LF's before encoding, otherwise it breaks // LF's before encoding, otherwise it breaks
@ -171,7 +172,11 @@ public class Geary.RFC822.Message : BaseObject {
stream, GMime.ContentEncoding.DEFAULT stream, GMime.ContentEncoding.DEFAULT
); );
body_text = new GMime.Part(); body_text = new GMime.Part();
body_text.set_content_type(new GMime.ContentType.from_string("text/plain; charset=utf-8; format=flowed")); body_text.set_content_type(
new GMime.ContentType.from_string(
"text/plain; charset=%s; format=flowed".printf(charset)
)
);
body_text.set_content_object(content); body_text.set_content_object(content);
body_text.set_content_encoding(encoding); body_text.set_content_encoding(encoding);
} }
@ -180,16 +185,22 @@ public class Geary.RFC822.Message : BaseObject {
GMime.Part? body_html = null; GMime.Part? body_html = null;
if (email.body_html != null) { if (email.body_html != null) {
GMime.StreamMem stream = new GMime.StreamMem.with_buffer(email.body_html.data); GMime.StreamMem stream = new GMime.StreamMem.with_buffer(email.body_html.data);
if (charset == null) {
Geary.RFC822.Utils.get_best(stream, out charset, out encoding);
}
GMime.DataWrapper content = new GMime.DataWrapper.with_stream(stream, GMime.DataWrapper content = new GMime.DataWrapper.with_stream(stream,
GMime.ContentEncoding.DEFAULT); GMime.ContentEncoding.DEFAULT);
body_html = new GMime.Part(); body_html = new GMime.Part();
body_html.set_content_type(new GMime.ContentType.from_string("text/html; charset=utf-8")); body_html.set_content_type(
new GMime.ContentType.from_string(
"text/html; charset=%s".printf(charset)
)
);
body_html.set_content_object(content); body_html.set_content_object(content);
body_html.set_content_encoding(Geary.RFC822.Utils.get_best_content_encoding(stream, body_html.set_content_encoding(encoding);
GMime.EncodingConstraint.7BIT));
} }
// Build the message's mime part. // Build the message's mime part.
Gee.List<GMime.Object> main_parts = new Gee.LinkedList<GMime.Object>(); Gee.List<GMime.Object> main_parts = new Gee.LinkedList<GMime.Object>();
@ -283,11 +294,7 @@ public class Geary.RFC822.Message : BaseObject {
GMime.StreamGIO stream = new GMime.StreamGIO(file); GMime.StreamGIO stream = new GMime.StreamGIO(file);
stream.set_owner(false); stream.set_owner(false);
part.set_content_object(new GMime.DataWrapper.with_stream(stream, GMime.ContentEncoding.BINARY)); part.set_content_object(new GMime.DataWrapper.with_stream(stream, GMime.ContentEncoding.BINARY));
part.set_content_encoding(Geary.RFC822.Utils.get_best_encoding(stream));
// This encoding is the "Content-Transfer-Encoding", which GMime automatically converts to.
part.set_content_encoding(Geary.RFC822.Utils.get_best_content_encoding(stream,
GMime.EncodingConstraint.7BIT));
return part; return part;
} }

View file

@ -321,68 +321,42 @@ public bool comp_char_arr_slice(char[] array, uint start, string comp) {
return true; return true;
} }
/* /**
* This function is adapted from the GMimeFilterBest source in the GMime * Uses a GMime.FilterBest to determine the best charset and encoding.
* library (gmime-filter-best.c) by Jeffrey Stedfast, LGPL 2.1.
* *
* WARNING: This call does not perform async I/O, meaning it will loop on the * WARNING: This call does not perform async I/O, meaning it will loop on the
* stream without relinquishing control to the event loop. Use with * stream without relinquishing control to the event loop. Use with
* caution. * caution.
*/ */
public GMime.ContentEncoding get_best_content_encoding(GMime.Stream stream, public void get_best(GMime.Stream in_stream,
GMime.EncodingConstraint constraint) { out string charset,
int count0 = 0, count8 = 0, linelen = 0, maxline = 0; out GMime.ContentEncoding encoding) {
size_t total = 0, readlen; GMime.FilterBest filter = new GMime.FilterBest(
// TODO: Increase buffer size? GMime.FilterBestFlags.CHARSET |
uint8[] buffer = new uint8[1024]; GMime.FilterBestFlags.ENCODING
);
while ((readlen = stream.read(buffer)) > 0) { GMime.StreamFilter out_stream = new GMime.StreamFilter(new GMime.StreamNull());
total += readlen; out_stream.add(filter);
for(int i = 0; i < readlen; i++) { in_stream.write_to_stream(out_stream);
char c = (char) buffer[i]; charset = filter.charset();
if (c == '\n') { encoding = filter.encoding(GMime.EncodingConstraint.7BIT);
maxline = maxline > linelen ? maxline : linelen; }
linelen = 0;
} else { /**
linelen++; * Uses a GMime.FilterBest to determine the best encoding.
if (c == 0) *
count0++; * WARNING: This call does not perform async I/O, meaning it will loop on the
else if ((c & 0x80) != 0) * stream without relinquishing control to the event loop. Use with
count8++; * caution.
} */
} public GMime.ContentEncoding get_best_encoding(GMime.Stream in_stream) {
} GMime.FilterBest filter = new GMime.FilterBest(
maxline = maxline > linelen ? maxline : linelen; GMime.FilterBestFlags.ENCODING
);
GMime.ContentEncoding encoding = GMime.ContentEncoding.DEFAULT; GMime.StreamFilter out_stream = new GMime.StreamFilter(new GMime.StreamNull());
switch (constraint) { out_stream.add(filter);
case GMime.EncodingConstraint.7BIT: in_stream.write_to_stream(out_stream);
if (count0 > 0) { return filter.encoding(GMime.EncodingConstraint.7BIT);
encoding = GMime.ContentEncoding.BASE64;
} else if (count8 > 0) {
if (count8 > (int) (total * 0.17))
encoding = GMime.ContentEncoding.BASE64;
else
encoding = GMime.ContentEncoding.QUOTEDPRINTABLE;
} else if (maxline > 998) {
encoding = GMime.ContentEncoding.QUOTEDPRINTABLE;
}
break;
case GMime.EncodingConstraint.8BIT:
if (count0 > 0)
encoding = GMime.ContentEncoding.BASE64;
else if (maxline > 998)
encoding = GMime.ContentEncoding.QUOTEDPRINTABLE;
break;
case GMime.EncodingConstraint.BINARY:
if (count0 + count8 > 0)
encoding = GMime.ContentEncoding.BINARY;
break;
}
return encoding;
} }
public string get_clean_attachment_filename(GMime.Part part) { public string get_clean_attachment_filename(GMime.Part part) {