Skip to content

Commit 8499acb

Browse files
committed
Added BinaryQuotedPrintableDecoder
Some mail software will attach pdf files using the quoted-printable encoding. On Linux/macOS, because MimeKit saves messages with the Unix line endings, decoding these pdfs can produce corrupted output when the pdf contains \r's of significance. These same pdfs typically decode correctly on Windows because it just so happens that unencoded \r's aren't stripped on that platform. BinaryQuotedPrintableDecoder re-inserts the \r's as needed during the decoding process to overcome this problem. Note: The current unit test does not properly test this scenario and so needs some work. Unfortunately, none of the pdfs that I have received that illustrate this problem can be made public.
1 parent 27be1bf commit 8499acb

File tree

8 files changed

+753
-9
lines changed

8 files changed

+753
-9
lines changed
Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
//
2+
// BinaryQuotedPrintableDecoder.cs
3+
//
4+
// Author: Jeffrey Stedfast <[email protected]>
5+
//
6+
// Copyright (c) 2013-2022 .NET Foundation and Contributors
7+
//
8+
// Permission is hereby granted, free of charge, to any person obtaining a copy
9+
// of this software and associated documentation files (the "Software"), to deal
10+
// in the Software without restriction, including without limitation the rights
11+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12+
// copies of the Software, and to permit persons to whom the Software is
13+
// furnished to do so, subject to the following conditions:
14+
//
15+
// The above copyright notice and this permission notice shall be included in
16+
// all copies or substantial portions of the Software.
17+
//
18+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24+
// THE SOFTWARE.
25+
//
26+
27+
using System;
28+
29+
using MimeKit.Utils;
30+
31+
namespace MimeKit.Encodings {
32+
/// <summary>
33+
/// Incrementally decodes binary content encoded with the quoted-printable encoding.
34+
/// </summary>
35+
/// <remarks>
36+
/// Quoted-Printable is an encoding often used in MIME to textual content outside
37+
/// of the ASCII range in order to ensure that the text remains intact when sent
38+
/// via 7bit transports such as SMTP.
39+
/// </remarks>
40+
class BinaryQuotedPrintableDecoder : IMimeDecoder
41+
{
42+
enum QpDecoderState : byte {
43+
PassThrough,
44+
EqualSign,
45+
SoftBreak,
46+
DecodeByte
47+
}
48+
49+
QpDecoderState state;
50+
byte saved;
51+
52+
/// <summary>
53+
/// Initialize a new instance of the <see cref="BinaryQuotedPrintableDecoder"/> class.
54+
/// </summary>
55+
/// <remarks>
56+
/// Creates a new quoted-printable decoder.
57+
/// </remarks>
58+
public BinaryQuotedPrintableDecoder ()
59+
{
60+
}
61+
62+
/// <summary>
63+
/// Clone the <see cref="BinaryQuotedPrintableDecoder"/> with its current state.
64+
/// </summary>
65+
/// <remarks>
66+
/// Creates a new <see cref="BinaryQuotedPrintableDecoder"/> with exactly the same state as the current decoder.
67+
/// </remarks>
68+
/// <returns>A new <see cref="BinaryQuotedPrintableDecoder"/> with identical state.</returns>
69+
public IMimeDecoder Clone ()
70+
{
71+
var decoder = new BinaryQuotedPrintableDecoder ();
72+
73+
decoder.state = state;
74+
decoder.saved = saved;
75+
76+
return decoder;
77+
}
78+
79+
/// <summary>
80+
/// Get the encoding.
81+
/// </summary>
82+
/// <remarks>
83+
/// Gets the encoding that the decoder supports.
84+
/// </remarks>
85+
/// <value>The encoding.</value>
86+
public ContentEncoding Encoding {
87+
get { return ContentEncoding.QuotedPrintable; }
88+
}
89+
90+
/// <summary>
91+
/// Estimate the length of the output.
92+
/// </summary>
93+
/// <remarks>
94+
/// Estimates the number of bytes needed to decode the specified number of input bytes.
95+
/// </remarks>
96+
/// <returns>The estimated output length.</returns>
97+
/// <param name="inputLength">The input length.</param>
98+
public int EstimateOutputLength (int inputLength)
99+
{
100+
int length = inputLength * 2 + (saved == '\r' ? 1 : 0);
101+
102+
switch (state) {
103+
case QpDecoderState.PassThrough: return length;
104+
case QpDecoderState.EqualSign: return length + 1; // add an extra byte in case the '=' character is not the start of a valid hex sequence
105+
default: return length + 2; // add an extra 2 bytes in case the =X sequence is not the start of a valid hex sequence
106+
}
107+
}
108+
109+
void ValidateArguments (byte[] input, int startIndex, int length, byte[] output)
110+
{
111+
if (input == null)
112+
throw new ArgumentNullException (nameof (input));
113+
114+
if (startIndex < 0 || startIndex > input.Length)
115+
throw new ArgumentOutOfRangeException (nameof (startIndex));
116+
117+
if (length < 0 || length > (input.Length - startIndex))
118+
throw new ArgumentOutOfRangeException (nameof (length));
119+
120+
if (output == null)
121+
throw new ArgumentNullException (nameof (output));
122+
123+
if (output.Length < EstimateOutputLength (length))
124+
throw new ArgumentException ("The output buffer is not large enough to contain the decoded input.", nameof (output));
125+
}
126+
127+
/// <summary>
128+
/// Decode the specified input into the output buffer.
129+
/// </summary>
130+
/// <remarks>
131+
/// <para>Decodes the specified input into the output buffer.</para>
132+
/// <para>The output buffer should be large enough to hold all of the
133+
/// decoded input. For estimating the size needed for the output buffer,
134+
/// see <see cref="EstimateOutputLength"/>.</para>
135+
/// </remarks>
136+
/// <returns>The number of bytes written to the output buffer.</returns>
137+
/// <param name="input">A pointer to the beginning of the input buffer.</param>
138+
/// <param name="length">The length of the input buffer.</param>
139+
/// <param name="output">A pointer to the beginning of the output buffer.</param>
140+
public unsafe int Decode (byte* input, int length, byte* output)
141+
{
142+
byte* inend = input + length;
143+
byte* outptr = output;
144+
byte* inptr = input;
145+
byte c;
146+
147+
while (inptr < inend) {
148+
switch (state) {
149+
case QpDecoderState.PassThrough:
150+
while (inptr < inend) {
151+
c = *inptr++;
152+
153+
if (c == '=') {
154+
state = QpDecoderState.EqualSign;
155+
break;
156+
} else {
157+
if (c == (byte) '\n' && saved != (byte) '\r')
158+
*outptr++ = (byte) '\r';
159+
*outptr++ = c;
160+
saved = c;
161+
}
162+
}
163+
break;
164+
case QpDecoderState.EqualSign:
165+
c = *inptr++;
166+
167+
if (c.IsXDigit ()) {
168+
state = QpDecoderState.DecodeByte;
169+
saved = c;
170+
} else if (c == '=') {
171+
// invalid encoded sequence - pass it through undecoded
172+
*outptr++ = (byte) '=';
173+
} else if (c == '\r') {
174+
state = QpDecoderState.SoftBreak;
175+
} else if (c == '\n') {
176+
state = QpDecoderState.PassThrough;
177+
} else {
178+
// invalid encoded sequence - pass it through undecoded
179+
state = QpDecoderState.PassThrough;
180+
*outptr++ = (byte) '=';
181+
*outptr++ = c;
182+
}
183+
break;
184+
case QpDecoderState.SoftBreak:
185+
state = QpDecoderState.PassThrough;
186+
c = *inptr++;
187+
188+
if (c != '\n') {
189+
// invalid encoded sequence - pass it through undecoded
190+
*outptr++ = (byte) '=';
191+
*outptr++ = (byte) '\r';
192+
*outptr++ = c;
193+
}
194+
break;
195+
case QpDecoderState.DecodeByte:
196+
c = *inptr++;
197+
if (c.IsXDigit ()) {
198+
saved = saved.ToXDigit ();
199+
c = c.ToXDigit ();
200+
201+
saved = *outptr++ = (byte) ((saved << 4) | c);
202+
} else {
203+
// invalid encoded sequence - pass it through undecoded
204+
*outptr++ = (byte) '=';
205+
*outptr++ = saved;
206+
*outptr++ = c;
207+
saved = c;
208+
}
209+
210+
state = QpDecoderState.PassThrough;
211+
break;
212+
}
213+
}
214+
215+
return (int) (outptr - output);
216+
}
217+
218+
/// <summary>
219+
/// Decode the specified input into the output buffer.
220+
/// </summary>
221+
/// <remarks>
222+
/// <para>Decodes the specified input into the output buffer.</para>
223+
/// <para>The output buffer should be large enough to hold all of the
224+
/// decoded input. For estimating the size needed for the output buffer,
225+
/// see <see cref="EstimateOutputLength"/>.</para>
226+
/// </remarks>
227+
/// <returns>The number of bytes written to the output buffer.</returns>
228+
/// <param name="input">The input buffer.</param>
229+
/// <param name="startIndex">The starting index of the input buffer.</param>
230+
/// <param name="length">The length of the input buffer.</param>
231+
/// <param name="output">The output buffer.</param>
232+
/// <exception cref="System.ArgumentNullException">
233+
/// <para><paramref name="input"/> is <c>null</c>.</para>
234+
/// <para>-or-</para>
235+
/// <para><paramref name="output"/> is <c>null</c>.</para>
236+
/// </exception>
237+
/// <exception cref="System.ArgumentOutOfRangeException">
238+
/// <paramref name="startIndex"/> and <paramref name="length"/> do not specify
239+
/// a valid range in the <paramref name="input"/> byte array.
240+
/// </exception>
241+
/// <exception cref="System.ArgumentException">
242+
/// <para><paramref name="output"/> is not large enough to contain the encoded content.</para>
243+
/// <para>Use the <see cref="EstimateOutputLength"/> method to properly determine the
244+
/// necessary length of the <paramref name="output"/> byte array.</para>
245+
/// </exception>
246+
public int Decode (byte[] input, int startIndex, int length, byte[] output)
247+
{
248+
ValidateArguments (input, startIndex, length, output);
249+
250+
unsafe {
251+
fixed (byte* inptr = input, outptr = output) {
252+
return Decode (inptr + startIndex, length, outptr);
253+
}
254+
}
255+
}
256+
257+
/// <summary>
258+
/// Reset the decoder.
259+
/// </summary>
260+
/// <remarks>
261+
/// Resets the state of the decoder.
262+
/// </remarks>
263+
public void Reset ()
264+
{
265+
state = QpDecoderState.PassThrough;
266+
saved = 0;
267+
}
268+
}
269+
}

MimeKit/MimeContent.cs

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,10 @@
3030
using System.Threading;
3131
using System.Threading.Tasks;
3232

33+
using MimeKit;
3334
using MimeKit.IO;
3435
using MimeKit.IO.Filters;
36+
using MimeKit.Encodings;
3537

3638
namespace MimeKit {
3739
/// <summary>
@@ -126,6 +128,17 @@ public ContentEncoding Encoding {
126128
/// <value>The new-line format, if known.</value>
127129
public NewLineFormat? NewLineFormat { get; set; }
128130

131+
/// <summary>
132+
/// Get or set whether the content is text-based.
133+
/// </summary>
134+
/// <remarks>
135+
/// <para>This property is typically only set by the <see cref="MimeParser"/> as it parses
136+
/// the content of a <see cref="MimePart"/> and is only used as a hint when decoding
137+
/// quoted-printable as to whether to canonicalize the content stream to DOS format
138+
/// before decoding.</para>
139+
/// </remarks>
140+
internal bool IsText { get; set; }
141+
129142
/// <summary>
130143
/// Get the content stream.
131144
/// </summary>
@@ -137,6 +150,21 @@ public Stream Stream {
137150
get; private set;
138151
}
139152

153+
FilteredStream CreateFilteredStream (Stream stream)
154+
{
155+
var filtered = new FilteredStream (stream);
156+
IMimeFilter filter;
157+
158+
if (!IsText && Encoding == ContentEncoding.QuotedPrintable)
159+
filter = new DecoderFilter (new BinaryQuotedPrintableDecoder ());
160+
else
161+
filter = DecoderFilter.Create (Encoding);
162+
163+
filtered.Add (filter);
164+
165+
return filtered;
166+
}
167+
140168
/// <summary>
141169
/// Open the decoded content stream.
142170
/// </summary>
@@ -154,10 +182,7 @@ public Stream Open ()
154182

155183
Stream.Seek (0, SeekOrigin.Begin);
156184

157-
var filtered = new FilteredStream (Stream);
158-
filtered.Add (DecoderFilter.Create (Encoding));
159-
160-
return filtered;
185+
return CreateFilteredStream (Stream);
161186
}
162187

163188
/// <summary>
@@ -320,8 +345,7 @@ public void DecodeTo (Stream stream, CancellationToken cancellationToken = defau
320345

321346
CheckDisposed ();
322347

323-
using (var filtered = new FilteredStream (stream)) {
324-
filtered.Add (DecoderFilter.Create (Encoding));
348+
using (var filtered = CreateFilteredStream (stream)) {
325349
WriteTo (filtered, cancellationToken);
326350
filtered.Flush (cancellationToken);
327351
}
@@ -360,8 +384,7 @@ public async Task DecodeToAsync (Stream stream, CancellationToken cancellationTo
360384

361385
CheckDisposed ();
362386

363-
using (var filtered = new FilteredStream (stream)) {
364-
filtered.Add (DecoderFilter.Create (Encoding));
387+
using (var filtered = CreateFilteredStream (stream)) {
365388
await WriteToAsync (filtered, cancellationToken).ConfigureAwait (false);
366389
await filtered.FlushAsync (cancellationToken).ConfigureAwait (false);
367390
}

MimeKit/MimePart.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ public string FileName {
444444
/// <code language="c#" source="Examples\AttachmentExamples.cs" region="SaveAttachments" />
445445
/// </example>
446446
/// <value>The MIME content.</value>
447-
public IMimeContent Content {
447+
public virtual IMimeContent Content {
448448
get; set;
449449
}
450450

MimeKit/TextPart.cs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,26 @@ void CheckDisposed ()
204204
CheckDisposed (nameof (TextPart));
205205
}
206206

207+
/// <summary>
208+
/// Get or set the MIME content.
209+
/// </summary>
210+
/// <remarks>
211+
/// Gets or sets the MIME content.
212+
/// </remarks>
213+
/// <example>
214+
/// <code language="c#" source="Examples\AttachmentExamples.cs" region="SaveAttachments" />
215+
/// </example>
216+
/// <value>The MIME content.</value>
217+
public override IMimeContent Content {
218+
get { return base.Content; }
219+
set {
220+
if (value != null && value is MimeContent content)
221+
content.IsText = true;
222+
223+
base.Content = value;
224+
}
225+
}
226+
207227
/// <summary>
208228
/// Get the text format of the content.
209229
/// </summary>

0 commit comments

Comments
 (0)