diff --git a/parsemail.go b/parsemail.go index bc19e93..6289f70 100644 --- a/parsemail.go +++ b/parsemail.go @@ -8,6 +8,7 @@ import ( "io/ioutil" "mime" "mime/multipart" + "mime/quotedprintable" "net/mail" "strings" "time" @@ -38,6 +39,8 @@ func Parse(r io.Reader) (email Email, err error) { return } + cte := msg.Header.Get("Content-Transfer-Encoding") + switch contentType { case contentTypeMultipartMixed: email.TextBody, email.HTMLBody, email.Attachments, email.EmbeddedFiles, err = parseMultipartMixed(msg.Body, params["boundary"]) @@ -47,14 +50,36 @@ func Parse(r io.Reader) (email Email, err error) { email.TextBody, email.HTMLBody, email.EmbeddedFiles, err = parseMultipartRelated(msg.Body, params["boundary"]) case contentTypeTextPlain: message, _ := ioutil.ReadAll(msg.Body) + var reader io.Reader + reader, err = decodeContent(strings.NewReader(string(message[:])), cte) + if err != nil { + return + } + + message, err = ioutil.ReadAll(reader) + if err != nil { + return + } + email.TextBody = strings.TrimSuffix(string(message[:]), "\n") case contentTypeTextHtml: message, _ := ioutil.ReadAll(msg.Body) + var reader io.Reader + reader, err = decodeContent(strings.NewReader(string(message[:])), cte) + if err != nil { + return + } + + message, err = ioutil.ReadAll(reader) + if err != nil { + return + } + email.HTMLBody = strings.TrimSuffix(string(message[:]), "\n") case contentTypeOctetStream: email.Attachments, err = parseAttachmentOnlyEmail(msg.Body, msg.Header) default: - email.Content, err = decodeContent(msg.Body, msg.Header.Get("Content-Transfer-Encoding")) + email.Content, err = decodeContent(msg.Body, cte) } return @@ -134,7 +159,7 @@ func parseAttachmentOnlyEmail(body io.Reader, header mail.Header) (attachments [ func parseMultipartRelated(msg io.Reader, boundary string) (textBody, htmlBody string, embeddedFiles []EmbeddedFile, err error) { pmr := multipart.NewReader(msg, boundary) for { - part, err := pmr.NextPart() + part, err := pmr.NextRawPart() if err == io.EOF { break @@ -142,6 +167,8 @@ func parseMultipartRelated(msg io.Reader, boundary string) (textBody, htmlBody s return textBody, htmlBody, embeddedFiles, err } + cte := part.Header.Get("Content-Transfer-Encoding") + contentType, params, err := mime.ParseMediaType(part.Header.Get("Content-Type")) if err != nil { return textBody, htmlBody, embeddedFiles, err @@ -149,14 +176,22 @@ func parseMultipartRelated(msg io.Reader, boundary string) (textBody, htmlBody s switch contentType { case contentTypeTextPlain: - ppContent, err := ioutil.ReadAll(part) + decoded, err := decodeContent(part, cte) + if err != nil { + return textBody, htmlBody, embeddedFiles, err + } + ppContent, err := ioutil.ReadAll(decoded) if err != nil { return textBody, htmlBody, embeddedFiles, err } textBody += strings.TrimSuffix(string(ppContent[:]), "\n") case contentTypeTextHtml: - ppContent, err := ioutil.ReadAll(part) + decoded, err := decodeContent(part, cte) + if err != nil { + return textBody, htmlBody, embeddedFiles, err + } + ppContent, err := ioutil.ReadAll(decoded) if err != nil { return textBody, htmlBody, embeddedFiles, err } @@ -191,7 +226,7 @@ func parseMultipartRelated(msg io.Reader, boundary string) (textBody, htmlBody s func parseMultipartAlternative(msg io.Reader, boundary string) (textBody, htmlBody string, embeddedFiles []EmbeddedFile, err error) { pmr := multipart.NewReader(msg, boundary) for { - part, err := pmr.NextPart() + part, err := pmr.NextRawPart() if err == io.EOF { break @@ -199,6 +234,8 @@ func parseMultipartAlternative(msg io.Reader, boundary string) (textBody, htmlBo return textBody, htmlBody, embeddedFiles, err } + cte := part.Header.Get("Content-Transfer-Encoding") + contentType, params, err := mime.ParseMediaType(part.Header.Get("Content-Type")) if err != nil { return textBody, htmlBody, embeddedFiles, err @@ -206,14 +243,22 @@ func parseMultipartAlternative(msg io.Reader, boundary string) (textBody, htmlBo switch contentType { case contentTypeTextPlain: - ppContent, err := ioutil.ReadAll(part) + decoded, err := decodeContent(part, cte) + if err != nil { + return textBody, htmlBody, embeddedFiles, err + } + ppContent, err := ioutil.ReadAll(decoded) if err != nil { return textBody, htmlBody, embeddedFiles, err } textBody += strings.TrimSuffix(string(ppContent[:]), "\n") case contentTypeTextHtml: - ppContent, err := ioutil.ReadAll(part) + decoded, err := decodeContent(part, cte) + if err != nil { + return textBody, htmlBody, embeddedFiles, err + } + ppContent, err := ioutil.ReadAll(decoded) if err != nil { return textBody, htmlBody, embeddedFiles, err } @@ -248,7 +293,7 @@ func parseMultipartAlternative(msg io.Reader, boundary string) (textBody, htmlBo func parseMultipartMixed(msg io.Reader, boundary string) (textBody, htmlBody string, attachments []Attachment, embeddedFiles []EmbeddedFile, err error) { mr := multipart.NewReader(msg, boundary) for { - part, err := mr.NextPart() + part, err := mr.NextRawPart() if err == io.EOF { break } else if err != nil { @@ -265,11 +310,21 @@ func parseMultipartMixed(msg io.Reader, boundary string) (textBody, htmlBody str continue } + cte := part.Header.Get("Content-Transfer-Encoding") + contentType, params, err := mime.ParseMediaType(part.Header.Get("Content-Type")) if err != nil { return textBody, htmlBody, attachments, embeddedFiles, err } + if isAttachment(part) { + at, err := decodeAttachment(part) + if err != nil { + return textBody, htmlBody, attachments, embeddedFiles, err + } + attachments = append(attachments, at) + } + if contentType == contentTypeMultipartAlternative { textBody, htmlBody, embeddedFiles, err = parseMultipartAlternative(part, params["boundary"]) if err != nil { @@ -281,14 +336,22 @@ func parseMultipartMixed(msg io.Reader, boundary string) (textBody, htmlBody str return textBody, htmlBody, attachments, embeddedFiles, err } } else if contentType == contentTypeTextPlain { - ppContent, err := ioutil.ReadAll(part) + decoded, err := decodeContent(part, cte) + if err != nil { + return textBody, htmlBody, attachments, embeddedFiles, err + } + ppContent, err := ioutil.ReadAll(decoded) if err != nil { return textBody, htmlBody, attachments, embeddedFiles, err } textBody += strings.TrimSuffix(string(ppContent[:]), "\n") } else if contentType == contentTypeTextHtml { - ppContent, err := ioutil.ReadAll(part) + decoded, err := decodeContent(part, cte) + if err != nil { + return textBody, htmlBody, attachments, embeddedFiles, err + } + ppContent, err := ioutil.ReadAll(decoded) if err != nil { return textBody, htmlBody, attachments, embeddedFiles, err } @@ -383,17 +446,25 @@ func decodeContent(content io.Reader, encoding string) (io.Reader, error) { if err != nil { return nil, err } - return bytes.NewReader(b), nil - case "7bit", "8bit", "binary: - dd, err := ioutil.ReadAll(content) + case "quoted-printable": + decoded := quotedprintable.NewReader(content) + b, err := ioutil.ReadAll(decoded) if err != nil { return nil, err } - - return bytes.NewReader(dd), nil - case "": - return content, nil + return bytes.NewReader(b), nil + // The values "8bit", "7bit", and "binary" all imply that NO encoding has been performed and data need to be read as bytes. + // "7bit" means that the data is all represented as short lines of US-ASCII data. + // "8bit" means that the lines are short, but there may be non-ASCII characters (octets with the high-order bit set). + // "Binary" means that not only may non-ASCII characters be present, but also that the lines are not necessarily short enough for SMTP transport. + case "", "7bit", "8bit", "binary": + decoded := quotedprintable.NewReader(content) + b, err := ioutil.ReadAll(decoded) + if err != nil { + return nil, err + } + return bytes.NewReader(b), nil default: return nil, fmt.Errorf("unknown encoding: %s", encoding) } diff --git a/parsemail_test.go b/parsemail_test.go index cb088aa..973efa7 100644 --- a/parsemail_test.go +++ b/parsemail_test.go @@ -372,15 +372,15 @@ So, "Hello".`, htmlBody: "

", attachments: []attachmentData{ { - filename: "unencoded.csv", - contentType: "application/csv", - data: fmt.Sprintf("\n"+`"%s", "%s", "%s", "%s", "%s"`+"\n"+`"%s", "%s", "%s", "%s", "%s"`+"\n", "Some", "Data", "In", "Csv", "Format", "Foo", "Bar", "Baz", "Bum", "Poo"), + filename: "unencoded.csv", + contentType: "application/csv", + data: fmt.Sprintf("\n"+`"%s", "%s", "%s", "%s", "%s"`+"\n"+`"%s", "%s", "%s", "%s", "%s"`+"\n", "Some", "Data", "In", "Csv", "Format", "Foo", "Bar", "Baz", "Bum", "Poo"), }, }, }, 13: { contentType: "multipart/related; boundary=\"000000000000ab2e2205a26de587\"", - mailData: multipartRelatedExample, + mailData: multipartRelatedExample, subject: "Saying Hello", from: []mail.Address{ { @@ -389,7 +389,7 @@ So, "Hello".`, }, }, sender: mail.Address{ - Name: "Michael Jones", + Name: "Michael Jones", Address: "mjones@machine.example", }, to: []mail.Address{ @@ -401,7 +401,7 @@ So, "Hello".`, messageID: "1234@local.machine.example", date: parseDate("Fri, 21 Nov 1997 09:55:06 -0600"), htmlBody: "
Time for the egg.



", - textBody: "Time for the egg.", + textBody: "Time for the egg.", }, 14: { mailData: data3, @@ -563,10 +563,14 @@ So, "Hello".`, t.Error(err) } - if ra.Filename == ad.filename && string(b) == ad.data && ra.ContentType == ad.contentType { + if ra.Filename == ad.filename && ra.ContentType == ad.contentType { found = true attachs = append(attachs[:i], attachs[i+1:]...) } + + if string(b) != ad.data { + t.Errorf("[Test Case %v] Bad data for attachment: \nEXPECTED:\n%s\nHAVE:\n%s", index, ad.data, string(b)) + } } if !found { @@ -623,9 +627,9 @@ func parseDate(in string) time.Time { } type attachmentData struct { - filename string - contentType string - data string + filename string + contentType string + data string } type embeddedFileData struct { @@ -869,8 +873,8 @@ Message-ID: <5678.21-Nov-1997@example.com> Hi everyone. ` -//todo: not yet implemented in net/mail -//once there is support for this, add it +// todo: not yet implemented in net/mail +// once there is support for this, add it var rfc5322exampleA13 = `From: Pete To: A Group:Ed Jones ,joe@where.test,John ; Cc: Undisclosed recipients:; @@ -880,7 +884,7 @@ Message-ID: Testing. ` -//we skipped the first message bcause it's the same as A 1.1 +// we skipped the first message bcause it's the same as A 1.1 var rfc5322exampleA2a = `From: Mary Smith To: John Doe Reply-To: "Mary Smith: Personal Account"