From b37f6f5e2f1a29d7645d4b7e92588f49650f1465 Mon Sep 17 00:00:00 2001 From: Sina Saeidi Date: Mon, 18 May 2020 18:29:48 +0200 Subject: [PATCH 1/3] use case-insensitive string to compare the encoding fields like Content-Transfer-Encoding --- parsemail.go | 6 ++++-- parsemail_test.go | 22 +++++++++++----------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/parsemail.go b/parsemail.go index 6a60192..e716ae3 100644 --- a/parsemail.go +++ b/parsemail.go @@ -345,6 +345,8 @@ func decodeAttachment(part *multipart.Part) (at Attachment, err error) { } func decodeContent(content io.Reader, encoding string) (io.Reader, error) { + encoding = strings.ToLower(encoding) + switch encoding { case "base64": decoded := base64.NewDecoder(base64.StdEncoding, content) @@ -483,11 +485,11 @@ type Email struct { ResentMessageID string ContentType string - Content io.Reader + Content io.Reader HTMLBody string TextBody string Attachments []Attachment EmbeddedFiles []EmbeddedFile -} \ No newline at end of file +} diff --git a/parsemail_test.go b/parsemail_test.go index 109e734..6375e9f 100644 --- a/parsemail_test.go +++ b/parsemail_test.go @@ -274,7 +274,7 @@ So, "Hello".`, messageID: "1234@local.machine.example", date: parseDate("Fri, 21 Nov 1997 09:55:06 -0600"), contentType: `image/jpeg; x-unix-mode=0644; name="image.gif"`, - content: `GIF89a;`, + content: `GIF89a;`, }, 9: { contentType: `multipart/mixed; boundary="0000000000007e2bb40587e36196"`, @@ -372,15 +372,15 @@ So, "Hello".`, htmlBody: "

", attachments: []attachmentData{ { - filename: "unencoded.csv", - contentType: "application/csv", - data: fmt.Sprintf("\n"+`"%s", "%s", "%s", "%s", "%s"`+"\n"+`"%s", "%s", "%s", "%s", "%s"`+"\n", "Some", "Data", "In", "Csv", "Format", "Foo", "Bar", "Baz", "Bum", "Poo"), + filename: "unencoded.csv", + contentType: "application/csv", + data: fmt.Sprintf("\n"+`"%s", "%s", "%s", "%s", "%s"`+"\n"+`"%s", "%s", "%s", "%s", "%s"`+"\n", "Some", "Data", "In", "Csv", "Format", "Foo", "Bar", "Baz", "Bum", "Poo"), }, }, }, 13: { contentType: "multipart/related; boundary=\"000000000000ab2e2205a26de587\"", - mailData: multipartRelatedExample, + mailData: multipartRelatedExample, subject: "Saying Hello", from: []mail.Address{ { @@ -389,7 +389,7 @@ So, "Hello".`, }, }, sender: mail.Address{ - Name: "Michael Jones", + Name: "Michael Jones", Address: "mjones@machine.example", }, to: []mail.Address{ @@ -401,7 +401,7 @@ So, "Hello".`, messageID: "1234@local.machine.example", date: parseDate("Fri, 21 Nov 1997 09:55:06 -0600"), htmlBody: "
Time for the egg.



", - textBody: "Time for the egg.", + textBody: "Time for the egg.", }, } @@ -595,9 +595,9 @@ func parseDate(in string) time.Time { } type attachmentData struct { - filename string - contentType string - data string + filename string + contentType string + data string } type embeddedFileData struct { @@ -879,7 +879,7 @@ Message-ID: <1234@local.machine.example> Content-Type: image/jpeg; x-unix-mode=0644; name="image.gif" -Content-Transfer-Encoding: base64 +Content-Transfer-Encoding: Base64 R0lGODlhAQE7` From 7ce522d87710974ef389c6f0cd966cfb18140070 Mon Sep 17 00:00:00 2001 From: Ryoto Saito Date: Mon, 21 Sep 2020 02:47:33 +0900 Subject: [PATCH 2/3] Decode text/plain and text/html body with encoding fixes #23 --- parsemail.go | 56 +++++++++++++++++++++++++++++++++++++++++++---- parsemail_test.go | 49 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 4 deletions(-) diff --git a/parsemail.go b/parsemail.go index 6a60192..b157cc0 100644 --- a/parsemail.go +++ b/parsemail.go @@ -46,9 +46,31 @@ func Parse(r io.Reader) (email Email, err error) { email.TextBody, email.HTMLBody, email.EmbeddedFiles, err = parseMultipartRelated(msg.Body, params["boundary"]) case contentTypeTextPlain: message, _ := ioutil.ReadAll(msg.Body) + var reader io.Reader + reader, err = decodeContent(strings.NewReader(string(message[:])), msg.Header.Get("Content-Transfer-Encoding")) + if err != nil { + return + } + + message, err = ioutil.ReadAll(reader) + if err != nil { + return + } + email.TextBody = strings.TrimSuffix(string(message[:]), "\n") case contentTypeTextHtml: message, _ := ioutil.ReadAll(msg.Body) + var reader io.Reader + reader, err = decodeContent(strings.NewReader(string(message[:])), msg.Header.Get("Content-Transfer-Encoding")) + if err != nil { + return + } + + message, err = ioutil.ReadAll(reader) + if err != nil { + return + } + email.HTMLBody = strings.TrimSuffix(string(message[:]), "\n") default: email.Content, err = decodeContent(msg.Body, msg.Header.Get("Content-Transfer-Encoding")) @@ -121,14 +143,26 @@ func parseMultipartRelated(msg io.Reader, boundary string) (textBody, htmlBody s switch contentType { case contentTypeTextPlain: - ppContent, err := ioutil.ReadAll(part) + message, _ := ioutil.ReadAll(part) + reader, err := decodeContent(strings.NewReader(string(message[:])), part.Header.Get("Content-Transfer-Encoding")) + if err != nil { + return textBody, htmlBody, embeddedFiles, err + } + + ppContent, err := ioutil.ReadAll(reader) if err != nil { return textBody, htmlBody, embeddedFiles, err } textBody += strings.TrimSuffix(string(ppContent[:]), "\n") case contentTypeTextHtml: - ppContent, err := ioutil.ReadAll(part) + message, _ := ioutil.ReadAll(part) + reader, err := decodeContent(strings.NewReader(string(message[:])), part.Header.Get("Content-Transfer-Encoding")) + if err != nil { + return textBody, htmlBody, embeddedFiles, err + } + + ppContent, err := ioutil.ReadAll(reader) if err != nil { return textBody, htmlBody, embeddedFiles, err } @@ -178,14 +212,26 @@ func parseMultipartAlternative(msg io.Reader, boundary string) (textBody, htmlBo switch contentType { case contentTypeTextPlain: - ppContent, err := ioutil.ReadAll(part) + message, _ := ioutil.ReadAll(part) + reader, err := decodeContent(strings.NewReader(string(message[:])), part.Header.Get("Content-Transfer-Encoding")) + if err != nil { + return textBody, htmlBody, embeddedFiles, err + } + + ppContent, err := ioutil.ReadAll(reader) if err != nil { return textBody, htmlBody, embeddedFiles, err } textBody += strings.TrimSuffix(string(ppContent[:]), "\n") case contentTypeTextHtml: - ppContent, err := ioutil.ReadAll(part) + message, _ := ioutil.ReadAll(part) + reader, err := decodeContent(strings.NewReader(string(message[:])), part.Header.Get("Content-Transfer-Encoding")) + if err != nil { + return textBody, htmlBody, embeddedFiles, err + } + + ppContent, err := ioutil.ReadAll(reader) if err != nil { return textBody, htmlBody, embeddedFiles, err } @@ -361,6 +407,8 @@ func decodeContent(content io.Reader, encoding string) (io.Reader, error) { } return bytes.NewReader(dd), nil + case "8bit": + return content, nil case "": return content, nil default: diff --git a/parsemail_test.go b/parsemail_test.go index 109e734..a57533f 100644 --- a/parsemail_test.go +++ b/parsemail_test.go @@ -403,6 +403,31 @@ So, "Hello".`, htmlBody: "
Time for the egg.



", textBody: "Time for the egg.", }, + 14: { + contentType: "multipart/alternative; boundary=\"000000000000ab2e1f05a26de586\"", + mailData: base64Content, + subject: "Saying Hello", + from: []mail.Address{ + { + Name: "John Doe", + Address: "jdoe@machine.example", + }, + }, + sender: mail.Address{ + Name: "Michael Jones", + Address: "mjones@machine.example", + }, + to: []mail.Address{ + { + Name: "Mary Smith", + Address: "mary@example.net", + }, + }, + messageID: "1234@local.machine.example", + date: parseDate("Fri, 21 Nov 1997 09:55:06 -0600"), + htmlBody: "
👍
", + textBody: "👍", + }, } for index, td := range testData { @@ -946,3 +971,27 @@ Content-Disposition: attachment; --f403045f1dcc043a44054c8e6bbf-- ` + +var base64Content = `MIME-Version: 1.0 +From: John Doe +Sender: Michael Jones +To: Mary Smith +Subject: Saying Hello +Date: Fri, 21 Nov 1997 09:55:06 -0600 +Message-ID: <1234@local.machine.example> +Content-Type: multipart/alternative; boundary="000000000000ab2e1f05a26de586" + +--000000000000ab2e1f05a26de586 +Content-Type: text/plain; charset="UTF-8" +Content-Transfer-Encoding: base64 + +8J+RjQo= + +--000000000000ab2e1f05a26de586 +Content-Type: text/html; charset="UTF-8" +Content-Transfer-Encoding: base64 + +PGRpdiBkaXI9Imx0ciI+8J+RjTwvZGl2Pgo= + +--000000000000ab2e1f05a26de586-- +` From 9e2f5b154f9f57ad498f129d454c3bf2e305e54c Mon Sep 17 00:00:00 2001 From: marcospgmelo Date: Wed, 23 Sep 2020 15:56:14 -0300 Subject: [PATCH 3/3] Find Attachments, dont matter the content-type msg --- parsemail.go | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/parsemail.go b/parsemail.go index 6a60192..b46cba7 100644 --- a/parsemail.go +++ b/parsemail.go @@ -232,6 +232,14 @@ func parseMultipartMixed(msg io.Reader, boundary string) (textBody, htmlBody str return textBody, htmlBody, attachments, embeddedFiles, err } + if isAttachment(part) { + at, err := decodeAttachment(part) + if err != nil { + return textBody, htmlBody, attachments, embeddedFiles, err + } + attachments = append(attachments, at) + } + if contentType == contentTypeMultipartAlternative { textBody, htmlBody, embeddedFiles, err = parseMultipartAlternative(part, params["boundary"]) if err != nil { @@ -256,13 +264,6 @@ func parseMultipartMixed(msg io.Reader, boundary string) (textBody, htmlBody str } htmlBody += strings.TrimSuffix(string(ppContent[:]), "\n") - } else if isAttachment(part) { - at, err := decodeAttachment(part) - if err != nil { - return textBody, htmlBody, attachments, embeddedFiles, err - } - - attachments = append(attachments, at) } else { return textBody, htmlBody, attachments, embeddedFiles, fmt.Errorf("Unknown multipart/mixed nested mime type: %s", contentType) } @@ -483,11 +484,11 @@ type Email struct { ResentMessageID string ContentType string - Content io.Reader + Content io.Reader HTMLBody string TextBody string Attachments []Attachment EmbeddedFiles []EmbeddedFile -} \ No newline at end of file +}