From 77f5863bdbfa14210e261dbbee3f5d141f8e610b Mon Sep 17 00:00:00 2001 From: ArkaGPL Date: Sun, 27 Sep 2020 11:33:22 +0200 Subject: [PATCH 1/2] handle lower encoding, add quoteprintable, fix attachment handling, fix html/plain decoding --- parsemail.go | 86 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 13 deletions(-) diff --git a/parsemail.go b/parsemail.go index 6a60192..8b34aff 100644 --- a/parsemail.go +++ b/parsemail.go @@ -8,6 +8,7 @@ import ( "io/ioutil" "mime" "mime/multipart" + "mime/quotedprintable" "net/mail" "strings" "time" @@ -46,9 +47,31 @@ func Parse(r io.Reader) (email Email, err error) { email.TextBody, email.HTMLBody, email.EmbeddedFiles, err = parseMultipartRelated(msg.Body, params["boundary"]) case contentTypeTextPlain: message, _ := ioutil.ReadAll(msg.Body) + var reader io.Reader + reader, err = decodeContent(strings.NewReader(string(message[:])), msg.Header.Get("Content-Transfer-Encoding")) + if err != nil { + return + } + + message, err = ioutil.ReadAll(reader) + if err != nil { + return + } + email.TextBody = strings.TrimSuffix(string(message[:]), "\n") case contentTypeTextHtml: message, _ := ioutil.ReadAll(msg.Body) + var reader io.Reader + reader, err = decodeContent(strings.NewReader(string(message[:])), msg.Header.Get("Content-Transfer-Encoding")) + if err != nil { + return + } + + message, err = ioutil.ReadAll(reader) + if err != nil { + return + } + email.HTMLBody = strings.TrimSuffix(string(message[:]), "\n") default: email.Content, err = decodeContent(msg.Body, msg.Header.Get("Content-Transfer-Encoding")) @@ -121,14 +144,22 @@ func parseMultipartRelated(msg io.Reader, boundary string) (textBody, htmlBody s switch contentType { case contentTypeTextPlain: - ppContent, err := ioutil.ReadAll(part) + decoded, err := decodeContent(part, part.Header.Get("Content-Transfer-Encoding")) + if err != nil { + return textBody, htmlBody, embeddedFiles, err + } + ppContent, err := ioutil.ReadAll(decoded) if err != nil { return textBody, htmlBody, embeddedFiles, err } textBody += strings.TrimSuffix(string(ppContent[:]), "\n") case contentTypeTextHtml: - ppContent, err := ioutil.ReadAll(part) + decoded, err := decodeContent(part, part.Header.Get("Content-Transfer-Encoding")) + if err != nil { + return textBody, htmlBody, embeddedFiles, err + } + ppContent, err := ioutil.ReadAll(decoded) if err != nil { return textBody, htmlBody, embeddedFiles, err } @@ -178,14 +209,22 @@ func parseMultipartAlternative(msg io.Reader, boundary string) (textBody, htmlBo switch contentType { case contentTypeTextPlain: - ppContent, err := ioutil.ReadAll(part) + decoded, err := decodeContent(part, part.Header.Get("Content-Transfer-Encoding")) + if err != nil { + return textBody, htmlBody, embeddedFiles, err + } + ppContent, err := ioutil.ReadAll(decoded) if err != nil { return textBody, htmlBody, embeddedFiles, err } textBody += strings.TrimSuffix(string(ppContent[:]), "\n") case contentTypeTextHtml: - ppContent, err := ioutil.ReadAll(part) + decoded, err := decodeContent(part, part.Header.Get("Content-Transfer-Encoding")) + if err != nil { + return textBody, htmlBody, embeddedFiles, err + } + ppContent, err := ioutil.ReadAll(decoded) if err != nil { return textBody, htmlBody, embeddedFiles, err } @@ -232,6 +271,14 @@ func parseMultipartMixed(msg io.Reader, boundary string) (textBody, htmlBody str return textBody, htmlBody, attachments, embeddedFiles, err } + if isAttachment(part) { + at, err := decodeAttachment(part) + if err != nil { + return textBody, htmlBody, attachments, embeddedFiles, err + } + attachments = append(attachments, at) + } + if contentType == contentTypeMultipartAlternative { textBody, htmlBody, embeddedFiles, err = parseMultipartAlternative(part, params["boundary"]) if err != nil { @@ -243,26 +290,27 @@ func parseMultipartMixed(msg io.Reader, boundary string) (textBody, htmlBody str return textBody, htmlBody, attachments, embeddedFiles, err } } else if contentType == contentTypeTextPlain { - ppContent, err := ioutil.ReadAll(part) + decoded, err := decodeContent(part, part.Header.Get("Content-Transfer-Encoding")) + if err != nil { + return textBody, htmlBody, attachments, embeddedFiles, err + } + ppContent, err := ioutil.ReadAll(decoded) if err != nil { return textBody, htmlBody, attachments, embeddedFiles, err } textBody += strings.TrimSuffix(string(ppContent[:]), "\n") } else if contentType == contentTypeTextHtml { - ppContent, err := ioutil.ReadAll(part) + decoded, err := decodeContent(part, part.Header.Get("Content-Transfer-Encoding")) if err != nil { return textBody, htmlBody, attachments, embeddedFiles, err } - - htmlBody += strings.TrimSuffix(string(ppContent[:]), "\n") - } else if isAttachment(part) { - at, err := decodeAttachment(part) + ppContent, err := ioutil.ReadAll(decoded) if err != nil { return textBody, htmlBody, attachments, embeddedFiles, err } - attachments = append(attachments, at) + htmlBody += strings.TrimSuffix(string(ppContent[:]), "\n") } else { return textBody, htmlBody, attachments, embeddedFiles, fmt.Errorf("Unknown multipart/mixed nested mime type: %s", contentType) } @@ -345,6 +393,7 @@ func decodeAttachment(part *multipart.Part) (at Attachment, err error) { } func decodeContent(content io.Reader, encoding string) (io.Reader, error) { + encoding = strings.ToLower(encoding) switch encoding { case "base64": decoded := base64.NewDecoder(base64.StdEncoding, content) @@ -353,6 +402,14 @@ func decodeContent(content io.Reader, encoding string) (io.Reader, error) { return nil, err } + return bytes.NewReader(b), nil + case "quoted-printable": + decoded := quotedprintable.NewReader(content) + b, err := ioutil.ReadAll(decoded) + if err != nil { + return nil, err + } + return bytes.NewReader(b), nil case "7bit": dd, err := ioutil.ReadAll(content) @@ -361,6 +418,8 @@ func decodeContent(content io.Reader, encoding string) (io.Reader, error) { } return bytes.NewReader(dd), nil + case "8bit": + return content, nil case "": return content, nil default: @@ -483,11 +542,12 @@ type Email struct { ResentMessageID string ContentType string - Content io.Reader + Content io.Reader HTMLBody string TextBody string Attachments []Attachment EmbeddedFiles []EmbeddedFile -} \ No newline at end of file +} + From adfc39325219455fb8c660cc22fadd1b409daabc Mon Sep 17 00:00:00 2001 From: ArkaGPL Date: Tue, 29 Sep 2020 21:50:36 +0200 Subject: [PATCH 2/2] fix go.mod --- go.mod | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 5b91a53..fbbedef 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ -module github.com/DusanKasan/parsemail +module github.com/ArkaGPL/parsemail -go 1.12 \ No newline at end of file +go 1.12