Don't always UTF-8 as the charset, use ASCII or 8859-1 if possible.

Previously, Geary would simply assume that UTF-8 is a reasonable charset
to use when sending both plain text and HTML parts. It is, however
Mailman will re-encode content sent ast UTF-8 using Base64, and break
format=flowed in the process.

This works around the problem for messages containing US-ASCII or
ISO-8859-1 chars, at least. No solution yet for UTF-8 except maybe
getting Mailman fixed.

Bug 769137

* src/engine/rfc822/rfc822-message.vala (Message): Guess the best charset
  for a message's content rather than assuming UTF-8. Set the charset for
  both plain text and HTML parts, but only guess it and the encoding at
  max once for both.

* src/engine/rfc822/rfc822-utils.vala: Replace get_best_content_encoding
  with get_best for determing both charset and encoding, and
  get_best_encoding for just guessing encoding, both using
  GMime.FilterBest instead of our own custom code.
This commit is contained in:
Michael James Gratton 2016-08-01 23:21:28 +10:00
parent f4d19831ee
commit 0c3160779e
2 changed files with 52 additions and 71 deletions

View file

@ -153,12 +153,13 @@ public class Geary.RFC822.Message : BaseObject {
}
// Body: text format (optional)
string? charset = null;
GMime.ContentEncoding? encoding = null;
GMime.Part? body_text = null;
if (email.body_text != null) {
GMime.Stream stream = new GMime.StreamMem.with_buffer(email.body_text.data);
GMime.ContentEncoding encoding = Geary.RFC822.Utils.get_best_content_encoding(
stream, GMime.EncodingConstraint.7BIT
);
Geary.RFC822.Utils.get_best(stream, out charset, out encoding);
if (encoding == GMime.ContentEncoding.BASE64) {
// Base64-encoded text needs to have CR's added after
// LF's before encoding, otherwise it breaks
@ -171,7 +172,11 @@ public class Geary.RFC822.Message : BaseObject {
stream, GMime.ContentEncoding.DEFAULT
);
body_text = new GMime.Part();
body_text.set_content_type(new GMime.ContentType.from_string("text/plain; charset=utf-8; format=flowed"));
body_text.set_content_type(
new GMime.ContentType.from_string(
"text/plain; charset=%s; format=flowed".printf(charset)
)
);
body_text.set_content_object(content);
body_text.set_content_encoding(encoding);
}
@ -180,16 +185,22 @@ public class Geary.RFC822.Message : BaseObject {
GMime.Part? body_html = null;
if (email.body_html != null) {
GMime.StreamMem stream = new GMime.StreamMem.with_buffer(email.body_html.data);
if (charset == null) {
Geary.RFC822.Utils.get_best(stream, out charset, out encoding);
}
GMime.DataWrapper content = new GMime.DataWrapper.with_stream(stream,
GMime.ContentEncoding.DEFAULT);
body_html = new GMime.Part();
body_html.set_content_type(new GMime.ContentType.from_string("text/html; charset=utf-8"));
body_html.set_content_type(
new GMime.ContentType.from_string(
"text/html; charset=%s".printf(charset)
)
);
body_html.set_content_object(content);
body_html.set_content_encoding(Geary.RFC822.Utils.get_best_content_encoding(stream,
GMime.EncodingConstraint.7BIT));
body_html.set_content_encoding(encoding);
}
// Build the message's mime part.
Gee.List<GMime.Object> main_parts = new Gee.LinkedList<GMime.Object>();
@ -283,11 +294,7 @@ public class Geary.RFC822.Message : BaseObject {
GMime.StreamGIO stream = new GMime.StreamGIO(file);
stream.set_owner(false);
part.set_content_object(new GMime.DataWrapper.with_stream(stream, GMime.ContentEncoding.BINARY));
// This encoding is the "Content-Transfer-Encoding", which GMime automatically converts to.
part.set_content_encoding(Geary.RFC822.Utils.get_best_content_encoding(stream,
GMime.EncodingConstraint.7BIT));
part.set_content_encoding(Geary.RFC822.Utils.get_best_encoding(stream));
return part;
}

View file

@ -321,68 +321,42 @@ public bool comp_char_arr_slice(char[] array, uint start, string comp) {
return true;
}
/*
* This function is adapted from the GMimeFilterBest source in the GMime
* library (gmime-filter-best.c) by Jeffrey Stedfast, LGPL 2.1.
/**
* Uses a GMime.FilterBest to determine the best charset and encoding.
*
* WARNING: This call does not perform async I/O, meaning it will loop on the
* stream without relinquishing control to the event loop. Use with
* caution.
*/
public GMime.ContentEncoding get_best_content_encoding(GMime.Stream stream,
GMime.EncodingConstraint constraint) {
int count0 = 0, count8 = 0, linelen = 0, maxline = 0;
size_t total = 0, readlen;
// TODO: Increase buffer size?
uint8[] buffer = new uint8[1024];
while ((readlen = stream.read(buffer)) > 0) {
total += readlen;
for(int i = 0; i < readlen; i++) {
char c = (char) buffer[i];
if (c == '\n') {
maxline = maxline > linelen ? maxline : linelen;
linelen = 0;
} else {
linelen++;
if (c == 0)
count0++;
else if ((c & 0x80) != 0)
count8++;
}
}
}
maxline = maxline > linelen ? maxline : linelen;
GMime.ContentEncoding encoding = GMime.ContentEncoding.DEFAULT;
switch (constraint) {
case GMime.EncodingConstraint.7BIT:
if (count0 > 0) {
encoding = GMime.ContentEncoding.BASE64;
} else if (count8 > 0) {
if (count8 > (int) (total * 0.17))
encoding = GMime.ContentEncoding.BASE64;
else
encoding = GMime.ContentEncoding.QUOTEDPRINTABLE;
} else if (maxline > 998) {
encoding = GMime.ContentEncoding.QUOTEDPRINTABLE;
}
break;
case GMime.EncodingConstraint.8BIT:
if (count0 > 0)
encoding = GMime.ContentEncoding.BASE64;
else if (maxline > 998)
encoding = GMime.ContentEncoding.QUOTEDPRINTABLE;
break;
case GMime.EncodingConstraint.BINARY:
if (count0 + count8 > 0)
encoding = GMime.ContentEncoding.BINARY;
break;
}
return encoding;
public void get_best(GMime.Stream in_stream,
out string charset,
out GMime.ContentEncoding encoding) {
GMime.FilterBest filter = new GMime.FilterBest(
GMime.FilterBestFlags.CHARSET |
GMime.FilterBestFlags.ENCODING
);
GMime.StreamFilter out_stream = new GMime.StreamFilter(new GMime.StreamNull());
out_stream.add(filter);
in_stream.write_to_stream(out_stream);
charset = filter.charset();
encoding = filter.encoding(GMime.EncodingConstraint.7BIT);
}
/**
* Uses a GMime.FilterBest to determine the best encoding.
*
* WARNING: This call does not perform async I/O, meaning it will loop on the
* stream without relinquishing control to the event loop. Use with
* caution.
*/
public GMime.ContentEncoding get_best_encoding(GMime.Stream in_stream) {
GMime.FilterBest filter = new GMime.FilterBest(
GMime.FilterBestFlags.ENCODING
);
GMime.StreamFilter out_stream = new GMime.StreamFilter(new GMime.StreamNull());
out_stream.add(filter);
in_stream.write_to_stream(out_stream);
return filter.encoding(GMime.EncodingConstraint.7BIT);
}
public string get_clean_attachment_filename(GMime.Part part) {