Workaround Mailman/Python/others sending F=F without CRLF line endings.

Despite RFC 3676 defining format=flowed lines as ending in CRLF, some software doesn't do that. Workaround this when parsing F=F by always converting CRLF to just LF, then just looking for LF alone as line endings. Bug 769137 * src/engine/rfc822/rfc822-message.vala (Message::mime_part_to_memory_buffer): Unconditionally convert CRLF to just LF, do so before applying the F=F filter. * src/engine/rfc822/rfc822-gmime-filter-flowed.vala: Re-write to not expect CRLF sequences, or treat CR any differently. As a bonus, also check for signature lines without requiring the whole sig line to be in the bufer.
2016-08-04 11:05:26 +10:00 · 2016-08-04 11:05:26 +10:00 · 254297cd59
commit 254297cd59
parent 9c31401606
2 changed files with 108 additions and 128 deletions
--- a/src/engine/rfc822/rfc822-gmime-filter-flowed.vala
+++ b/src/engine/rfc822/rfc822-gmime-filter-flowed.vala
@ -1,175 +1,152 @@
-/* Copyright 2016 Software Freedom Conservancy Inc.
-*
-* This software is licensed under the GNU Lesser General Public License
-* (version 2.1 or later).  See the COPYING file in this distribution. 
-*/
+/*
+ * Copyright 2016 Software Freedom Conservancy Inc.
+ * Copyright 2016 Michael James Gratton <mike@vee.net>
+ *
+ * This software is licensed under the GNU Lesser General Public License
+ * (version 2.1 or later). See the COPYING file in this distribution.
+ */

-// Filter to correctly handle flowed text as described in RFC 2646.
+/**
+ * Filter to correctly handle flowed text as described in RFC 3676.
+ *
+ * This class assumes that CRLF sequences have been replaced with a
+ * single LF character.
+ *
+ * The character `Geary.RFC822.Utils.QUOTE_MARKER` will be output as the
+ * quote character instead of `>` if the `to_html` constructor
+ * argument is set to `true`.
+ */
 private class Geary.RFC822.FilterFlowed : GMime.Filter {
+
    private char quote_marker;
    private bool delsp;
-    
-    // Invariant: True iff the last character seen was a space OR the penultimate character seen
-    // was a space and the last character seen was a \r.
-    private bool saw_space;
-    
-    // Invariant: True iff the last character seen was a \r.
-    private bool saw_cr;
-    
-    // Invariant: True iff the last \r\n encountered was preceded by a space.
-    private bool last_line_was_flowed;
-    
-    // Invariant: True iff we are in the prefix for the first line.
-    private bool in_first_prefix;
-    
-    // Invariant: True iff we are either at the beginning of a line, or all characters seen so far
-    // have been '>'s.
-    private bool in_prefix;
-    
-    // Invariant: The quote depth of the last complete line seen, or 0 if we have not yet seen a
-    // complete line.
-    private uint last_quote_level;
-    
-    // Invariant: The number of '>'s seen so far if we are parsing the prefix, or 0 if we are not
-    // parsing the prefix.
-    private uint current_quote_level;
-    
+
+    // Invariant: True iff the last character seen was a space.
+    private bool saw_space = false;
+
+    // Invariant: True iff we are either at the beginning of a line,
+    // or all characters seen so far have been '>'s.
+    private bool in_prefix = false;
+
+    // Invariant: The number of '>'s seen after parsing the prefix.
+    private uint quote_level = 0;
+
+    // Invariant: The number of consecutive '-'s seen.
+    private uint sig_level = 0;
+
    public FilterFlowed(bool to_html, bool delsp) {
-        quote_marker = to_html ? Geary.RFC822.Utils.QUOTE_MARKER : '>';
+        this.quote_marker = to_html ? Geary.RFC822.Utils.QUOTE_MARKER : '>';
        this.delsp = delsp;
-        reset();
    }
-    
+
    public override void reset() {
-        saw_space = false;
-        saw_cr = false;
-        last_line_was_flowed = false;
-        in_first_prefix = true;
-        in_prefix = true;
-        last_quote_level = 0;
-        current_quote_level = 0;
+        this.saw_space = false;
+        this.in_prefix = true;
+        this.quote_level = 0;
+        this.sig_level = 0;
    }
-    
+
    public override GMime.Filter copy() {
        FilterFlowed new_filter = new FilterFlowed(quote_marker == Geary.RFC822.Utils.QUOTE_MARKER, delsp);
-        
-        new_filter.saw_space = saw_space;
-        new_filter.saw_cr = saw_cr;
-        new_filter.last_line_was_flowed = last_line_was_flowed;
-        new_filter.in_first_prefix = in_first_prefix;
-        new_filter.in_prefix = in_prefix;
-        new_filter.last_quote_level = last_quote_level;
-        new_filter.current_quote_level = current_quote_level;
-        
+
+        new_filter.saw_space = this.saw_space;
+        new_filter.in_prefix = this.in_prefix;
+        new_filter.quote_level = this.quote_level;
+        new_filter.sig_level = this.sig_level;
+
        return new_filter;
    }
-    
+
    public override void filter(char[] inbuf, size_t prespace, out unowned char[] processed_buffer,
        out size_t outprespace) {
-        
-        // Worst-case scenario: We are about to leave the prefix, resulting in an extra
-        // (current_quote_level + 2) characters being written.
-        set_size(inbuf.length + current_quote_level + 2, false);
-        
+
+        // Worst-case scenario: We are about to leave the prefix,
+        // resulting in an extra (quote_level + 2) characters being
+        // written.
+        set_size(inbuf.length + this.quote_level + 2, false);
+
        uint out_index = 0;
        for (uint i = 0; i < inbuf.length; i++) {
            char c = inbuf[i];
-            
-            if (in_prefix) {
+
+            if (this.in_prefix) {
                if (c == '>') {
-                    // Don't write the prefix right away, because we don't want to write it if the
-                    // last line was flowed.
-                    current_quote_level++;
+                    // Don't write the prefix right away, because we
+                    // don't want to write it if the last line was
+                    // flowed.
+                    this.quote_level++;
                    continue;
                }
-                
-                if (in_first_prefix) {
-                    for (uint j = 0; j < current_quote_level; j++)
-                        outbuf[out_index++] = quote_marker;
-                } else if (!last_line_was_flowed || current_quote_level != last_quote_level ||
-                    (out_index > 3 && Geary.RFC822.Utils.comp_char_arr_slice(outbuf, out_index - 4, "\n-- "))) {
-                    // We encountered a non-flowed line-break, so insert a CRLF.
-                    outbuf[out_index++] = '\r';
-                    outbuf[out_index++] = '\n';
-                    
-                    // We haven't been writing the quote prefix as we've scanned it, so we need to
-                    // write it now.
-                    for (uint j = 0; j < current_quote_level; j++)
-                        outbuf[out_index++] = quote_marker;
-                }
-                
-                // We saw a character other than '>', so we're done scanning the prefix.
-                in_first_prefix = false;
-                in_prefix = false;
-                last_quote_level = current_quote_level;
-                current_quote_level = 0;
-                
+
+                // Found something other than a '>', so we are out of
+                // the prefix and need to write out an appropriate
+                // number of quote chars.
+
+                for (uint j = 0; j < this.quote_level; j++)
+                    outbuf[out_index++] = this.quote_marker;
+
+                this.in_prefix = false;
+
                // A single space following the prefix is space stuffed
                if (c == ' ')
                    continue;
            }
-            
+
            switch (c) {
                case ' ':
-                    if (saw_space)
+                    if (this.saw_space) {
+                        // Two spaces in a row, so output the last
+                        // space and clear the signature test
                        outbuf[out_index++] = ' ';
-                    saw_space = true;
-                    saw_cr = false;
-                    // We can't write the space yet, since it might be removed if DelSp is true.
-                break;
-                
-                case '\r':
-                    if (saw_cr) {
-                        // The last 2 charcters were '\r\r', so we can't have '\r\n'.
-                        if (saw_space)
-                            outbuf[out_index++] = ' ';
-                        saw_space = false;
-                        // We didn't write the preceding CR when we saw it, so we write it now.
-                        outbuf[out_index++] = '\r';
+                        this.sig_level = 0;
                    }
-                    
-                    saw_cr = true;
-                    // We can't write the CR until we know it isn't part of a flowed line.
+                    // We can't write the space yet, since it might be
+                    // removed if DelSp is true. Don't clear the
+                    // signature test since we might be in one.
+                    this.saw_space = true;
                break;
-                
+
                case '\n':
-                    if (saw_cr) {
-                        // If the last 3 charcters were ' \r\n', the line was flowed.
-                        last_line_was_flowed = saw_space;
-                        if (saw_space && !delsp)
+                    if (this.saw_space && this.sig_level != 2) {
+                        // We have a SP+LF sequence that wasn't part
+                        // of a signature, so treat as a soft break
+                        // and flow line on to the next one.
+                        if (!this.delsp)
                            outbuf[out_index++] = ' ';
-                        
-                        // We are done with this line, so we are in the prefix of the next line
-                        // (and have not yet seen any '>' charcacters in the next line).
-                        in_prefix = true;
-                        current_quote_level = 0;
                    } else {
-                        // The LF wasn't part of a CRLF, so just write it.
-                        if (saw_space)
+                        // Else this is a hard break.
+                        if (this.saw_space) {
                            outbuf[out_index++] = ' ';
+                        }
                        outbuf[out_index++] = c;
                    }
-                    
-                    saw_space = false;
-                    saw_cr = false;
+
+                    // We are done with this line, so we are in the
+                    // prefix of the next line (and have not yet seen
+                    // any '>' characters in the next line).
+                    this.in_prefix = true;
+                    this.quote_level = 0;
+                    this.saw_space = false;
+                    this.sig_level = 0;
                break;
-                
+
                default:
-                    // We cannot be in a ' \r\n' sequence, so just write the character.
-                    if (saw_space)
+                    // We cannot be in a ' \n' sequence, so just write
+                    // the character.
+                    if (this.saw_space)
                        outbuf[out_index++] = ' ';
-                    saw_space = false;
-                    saw_cr = false;
                    outbuf[out_index++] = c;
+                    this.sig_level = (c == '-') ? this.sig_level + 1 : 0;
+                    this.saw_space = false;
                break;
            }
        }
-        
+
        // Slicing the buffer is important, because the buffer is not null-terminated,
        processed_buffer = outbuf[0:out_index];
        outprespace = this.outpre;
    }
-    
+
    public override void complete(char[] inbuf, size_t prespace, out unowned char[] processed_buffer,
        out size_t outprespace) {
        filter(inbuf, prespace, out processed_buffer, out outprespace);
--- a/src/engine/rfc822/rfc822-message.vala
+++ b/src/engine/rfc822/rfc822-message.vala
@ -932,14 +932,17 @@ public class Geary.RFC822.Message : BaseObject {
        
        bool flowed = (content_type != null) ? content_type.params.has_value_ci("format", "flowed") : false;
        bool delsp = (content_type != null) ? content_type.params.has_value_ci("DelSp", "yes") : false;
+
+        // Unconditionally remove the CR's in any CRLF sequence, since
+        // they are effectively a wire encoding.
+        stream_filter.add(new GMime.FilterCRLF(false, false));
+
        if (flowed)
            stream_filter.add(new Geary.RFC822.FilterFlowed(to_html, delsp));
-        
+
        if (to_html) {
            if (!flowed)
                stream_filter.add(new Geary.RFC822.FilterPlain());
-            // HTML filter does stupid stuff to \r, so get rid of them.
-            stream_filter.add(new GMime.FilterCRLF(false, false));
            stream_filter.add(new GMime.FilterHTML(
                GMime.FILTER_HTML_CONVERT_URLS | GMime.FILTER_HTML_CONVERT_ADDRESSES, 0));
            stream_filter.add(new Geary.RFC822.FilterBlockquotes());