Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 42 additions & 17 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ fn find_from_u8(line: &[u8], ix_start: usize, key: &[u8]) -> Option<usize> {
}
let ix_end = line.len() - key.len();
if ix_start <= ix_end {
for i in ix_start..ix_end {
for i in ix_start..=ix_end {
if line[i] == key[0] {
let mut success = true;
for j in 1..key.len() {
Expand All @@ -151,7 +151,8 @@ fn test_find_from_u8() {
assert_eq!(find_from_u8(b"hello world", 4, b"o"), Some(4));
assert_eq!(find_from_u8(b"hello world", 5, b"o"), Some(7));
assert_eq!(find_from_u8(b"hello world", 8, b"o"), None);
assert_eq!(find_from_u8(b"hello world", 10, b"d"), None);
assert_eq!(find_from_u8(b"hello world", 10, b"d"), Some(10));
assert_eq!(find_from_u8(b"hello world", 0, b"world"), Some(6));
}

// Like find_from_u8, but additionally filters such that `key` is at the start
Expand All @@ -176,6 +177,10 @@ fn test_find_from_u8_line_prefix() {
assert_eq!(find_from_u8_line_prefix(b"hello\nworld", 0, b"wo"), Some(6));
assert_eq!(find_from_u8_line_prefix(b"hello\nworld", 6, b"wo"), Some(6));
assert_eq!(find_from_u8_line_prefix(b"hello\nworld", 7, b"wo"), None);
assert_eq!(
find_from_u8_line_prefix(b"hello\nworld", 0, b"world"),
Some(6)
);
}

impl<'a> MailHeader<'a> {
Expand Down Expand Up @@ -922,7 +927,7 @@ impl<'a> Iterator for PartsIterator<'a> {
/// Some("This is a test email".to_string()));
/// assert_eq!(parsed.subparts.len(), 2);
/// assert_eq!(parsed.subparts[0].get_body().unwrap(),
/// "This is the plaintext version, in utf-8. Proof by Euro: \u{20AC}\r\n");
/// "This is the plaintext version, in utf-8. Proof by Euro: \u{20AC}");
/// assert_eq!(parsed.subparts[1].headers[1].get_value(), "base64");
/// assert_eq!(parsed.subparts[1].ctype.mimetype, "text/html");
/// assert!(parsed.subparts[1].get_body().unwrap().starts_with("<html>"));
Expand All @@ -932,6 +937,20 @@ pub fn parse_mail(raw_data: &[u8]) -> Result<ParsedMail, MailParseError> {
parse_mail_recursive(raw_data, false)
}

/// Strips LF or CRLF if there is one at the end of the string raw_data[ix_start..ix].
/// This is used to ensure that CRLF just before a boundary is treated as part of the
/// boundary, not the body part that was before the boundary. See discussion in
/// https://github.com/staktrace/mailparse/issues/127.
fn strip_trailing_crlf(raw_data: &[u8], ix_start: usize, mut ix: usize) -> usize {
if ix > ix_start && raw_data[ix - 1] == b'\n' {
ix -= 1;
if ix > ix_start && raw_data[ix - 1] == b'\r' {
ix -= 1;
}
}
ix
}

fn parse_mail_recursive(
raw_data: &[u8],
in_multipart_digest: bool,
Expand All @@ -956,23 +975,29 @@ fn parse_mail_recursive(
{
let in_multipart_digest = result.ctype.mimetype == "multipart/digest";
let boundary = String::from("--") + &result.ctype.params["boundary"];
if let Some(ix_body_end) = find_from_u8_line_prefix(raw_data, ix_body, boundary.as_bytes())
if let Some(ix_boundary_start) =
find_from_u8_line_prefix(raw_data, ix_body, boundary.as_bytes())
{
let ix_body_end = strip_trailing_crlf(raw_data, ix_body, ix_boundary_start);
result.body_bytes = &raw_data[ix_body..ix_body_end];
let mut ix_boundary_end = ix_body_end + boundary.len();
let mut ix_boundary_end = ix_boundary_start + boundary.len();
while let Some(ix_part_start) =
find_from_u8(raw_data, ix_boundary_end, b"\n").map(|v| v + 1)
{
// if there is no terminating boundary, assume the part end is the end of the email
let ix_part_end =
find_from_u8_line_prefix(raw_data, ix_part_start, boundary.as_bytes())
.unwrap_or(raw_data.len());
let ix_part_boundary_start =
find_from_u8_line_prefix(raw_data, ix_part_start, boundary.as_bytes());
let ix_part_end = ix_part_boundary_start
.map(|x| strip_trailing_crlf(raw_data, ix_part_start, x))
// if there is no terminating boundary, assume the part end is the end of the email
.unwrap_or(raw_data.len());

result.subparts.push(parse_mail_recursive(
&raw_data[ix_part_start..ix_part_end],
in_multipart_digest,
)?);
ix_boundary_end = ix_part_end + boundary.len();
ix_boundary_end = ix_part_boundary_start
.map(|x| x + boundary.len())
.unwrap_or(raw_data.len());
if ix_boundary_end + 2 > raw_data.len()
|| (raw_data[ix_boundary_end] == b'-' && raw_data[ix_boundary_end + 1] == b'-')
{
Expand Down Expand Up @@ -1648,7 +1673,7 @@ mod tests {
.as_bytes(),
)
.unwrap();
assert_eq!(mail.subparts[0].get_body().unwrap(), "part0\r\n");
assert_eq!(mail.subparts[0].get_body().unwrap(), "part0");
assert_eq!(mail.subparts[1].get_body().unwrap(), "part1\r\n");
}

Expand Down Expand Up @@ -2016,20 +2041,20 @@ mod tests {
part = parts.next().unwrap(); // mail.subparts[0]
assert_eq!(part.headers.len(), 0);
assert_eq!(part.ctype.mimetype, "text/plain");
assert_eq!(part.get_body_raw().unwrap(), b"blah blah blah\n");
assert_eq!(part.get_body_raw().unwrap(), b"blah blah blah");

part = parts.next().unwrap(); // mail.subparts[1]
assert_eq!(part.ctype.mimetype, "multipart/digest");

part = parts.next().unwrap(); // mail.subparts[1].subparts[0]
assert_eq!(part.headers.len(), 0);
assert_eq!(part.ctype.mimetype, "message/rfc822");
assert_eq!(part.get_body_raw().unwrap(), b"nested default part\n");
assert_eq!(part.get_body_raw().unwrap(), b"nested default part");

part = parts.next().unwrap(); // mail.subparts[1].subparts[1]
assert_eq!(part.headers.len(), 1);
assert_eq!(part.ctype.mimetype, "text/html");
assert_eq!(part.get_body_raw().unwrap(), b"nested html part\n");
assert_eq!(part.get_body_raw().unwrap(), b"nested html part");

part = parts.next().unwrap(); // mail.subparts[1].subparts[2]
assert_eq!(part.headers.len(), 1);
Expand All @@ -2038,7 +2063,7 @@ mod tests {
part = parts.next().unwrap(); // mail.subparts[1].subparts[2].subparts[0]
assert_eq!(part.headers.len(), 0);
assert_eq!(part.ctype.mimetype, "text/plain");
assert_eq!(part.get_body_raw().unwrap(), b"inside part\n");
assert_eq!(part.get_body_raw().unwrap(), b"inside part");

assert!(parts.next().is_none());
}
Expand Down Expand Up @@ -2083,13 +2108,13 @@ mod tests {
part = parts.next().unwrap(); // mail.subparts[0].subparts[0]
assert_eq!(part.headers.len(), 1);
assert_eq!(part.ctype.mimetype, "text/html");
assert_eq!(part.get_body_raw().unwrap(), b"<em>Good evening!</em>\n");
assert_eq!(part.get_body_raw().unwrap(), b"<em>Good evening!</em>");
assert_eq!(part.subparts.len(), 0);

part = parts.next().unwrap(); // mail.subparts[0].subparts[1]
assert_eq!(part.headers.len(), 1);
assert_eq!(part.ctype.mimetype, "text/plain");
assert_eq!(part.get_body_raw().unwrap(), b"Good evening!\n");
assert_eq!(part.get_body_raw().unwrap(), b"Good evening!");
assert_eq!(part.subparts.len(), 0);

assert!(parts.next().is_none());
Expand Down