Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve error enum naming #36

Closed
wants to merge 13 commits into from
16 changes: 16 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

- Renaming the following enum values:
- `LexerError::ParseBytesError` => `LexerError::InvalidBytes`
- `LexerError::ParseEncodedWordTooLongError` => `LexerError::EncodedWordTooLong`

- `ParserError::ParseEncodingTooBigError` => `ParserError::EncodingTooBig`
- `ParserError::ParseEncodingEmptyError` => `ParserError::EncodingEmpty`
- `ParserError::ParseEncodingError` => `ParserError::UnexpectedeEncoding`

- `EvaluatorError::DecodeUtf8Error` => `EvaluatorError::DecodeUtf8`
- `EvaluatorError::DecodeBase64Error` => `EvaluatorError::DecodeBase64`
- `EvaluatorError::DecodeQuotedPrintableError` => `EvaluatorError::DecodeQuotedPrintable`

## [1.0.1] - 2023-09-17
- extending docs about some structs
- fixing `Display` implementation of `TooLongEncodedWord`

## [1.0.0] - 2023-09-16

### Changed
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "rfc2047-decoder"
description = "Rust library for decoding RFC 2047 MIME Message Headers."
version = "1.0.0" # do not forget html_root_url
version = "1.0.1" # do not forget html_root_url
authors = ["soywod <[email protected]>", "TornaxO7 <[email protected]>"]
edition = "2018"
repository = "https://github.com/soywod/rfc2047-decoder"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ fn main() {
// using the decoder builder (custom options)
assert_eq!(
rfc2047_decoder::Decoder::new()
.skip_encoded_word_length(true)
.too_long_encoded_word_strategy(rfc2047_decoder::RecoverStrategy::Skip)
.decode(encoded_str.as_bytes())
.unwrap(),
decoded_str
Expand Down
13 changes: 8 additions & 5 deletions src/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ type Result<T> = result::Result<T, Error>;
///
/// let decoder = Decoder::new()
/// .too_long_encoded_word_strategy(RecoverStrategy::Skip);
/// let decoded_str = decoder.decode("=?UTF-8?B?c3Ry?=");
/// let decoded_str = decoder.decode("=?UTF-8?B?c3Ry?=").unwrap();
///
/// assert_eq!(decoded_str, "str");
/// ```
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Decoder {
Expand Down Expand Up @@ -117,16 +119,17 @@ impl Decoder {
///
/// ```rust
/// use rfc2047_decoder::{Decoder, RecoverStrategy, Error::{self, Lexer}};
/// use rfc2047_decoder::LexerError::ParseEncodedWordTooLongError;
/// use rfc2047_decoder::LexerError::EncodedWordTooLong;
/// use rfc2047_decoder::TooLongEncodedWords;
///
/// let message = "=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=";
/// let message = "=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?="
/// .to_string();
/// // `RecoverStrategy::Abort` is the default strategy
/// let decoder = Decoder::new();
///
/// let parsed = decoder.decode(message);
/// let parsed = decoder.decode(message.clone());
///
/// assert_eq!(parsed, Err(Lexer(ParseEncodedWordTooLongError(TooLongEncodedWords(vec!["=?utf-8?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?B?=".to_string()])))));
/// assert_eq!(parsed, Err(Lexer(EncodedWordTooLong(TooLongEncodedWords(vec![message])))));
/// ```
pub fn too_long_encoded_word_strategy(mut self, strategy: RecoverStrategy) -> Self {
self.too_long_encoded_word = strategy;
Expand Down
9 changes: 5 additions & 4 deletions src/evaluator.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use base64::{
alphabet,
engine::{GeneralPurpose, GeneralPurposeConfig}, Engine,
engine::{GeneralPurpose, GeneralPurposeConfig},
Engine,
};
use charset::Charset;
use std::{result, string};
Expand All @@ -12,11 +13,11 @@ use crate::parser::{ClearText, Encoding, ParsedEncodedWord, ParsedEncodedWords};
#[derive(Error, Debug, PartialEq)]
pub enum Error {
#[error(transparent)]
DecodeUtf8Error(#[from] string::FromUtf8Error),
DecodeUtf8(#[from] string::FromUtf8Error),
#[error(transparent)]
DecodeBase64Error(#[from] base64::DecodeError),
DecodeBase64(#[from] base64::DecodeError),
#[error(transparent)]
DecodeQuotedPrintableError(#[from] quoted_printable::QuotedPrintableError),
DecodeQuotedPrintable(#[from] quoted_printable::QuotedPrintableError),
}

type Result<T> = result::Result<T, Error>;
Expand Down
4 changes: 2 additions & 2 deletions src/lexer/encoded_word.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ impl EncodedWord {
impl Display for EncodedWord {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let charset = String::from_utf8(self.charset.clone()).unwrap();
let encoding = String::from_utf8(self.encoded_text.clone()).unwrap();
let encoded_text = String::from_utf8(self.encoding.clone()).unwrap();
let encoding = String::from_utf8(self.encoding.clone()).unwrap();
let encoded_text = String::from_utf8(self.encoded_text.clone()).unwrap();

write!(f, "=?{}?{}?{}?=", charset, encoding, encoded_text)
}
Expand Down
33 changes: 27 additions & 6 deletions src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,27 @@ const SPACE: u8 = b' ';

/// A helper struct which implements [std::fmt::Display] for `Vec<String>` and
/// which contains the encoded words which are too long as a `String`.
///
/// # Example
/// ```
/// use rfc2047_decoder::{self, decode, RecoverStrategy, LexerError};
///
/// // the first string and the third string are more than 75 characters, hence
/// // they are actually invalid encoded words
/// let message = concat![
/// "=?utf-8?B?bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb==?=",
/// "among us",
/// "=?utf-8?B?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa==?=",
/// ];

/// let result = decode(message).unwrap_err();
/// if let rfc2047_decoder::Error::Lexer(LexerError::EncodedWordTooLong(invalid_encoded_words)) = result {
/// assert_eq!(invalid_encoded_words.0[0], "=?utf-8?B?bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb==?=");
/// assert_eq!(invalid_encoded_words.0[1], "=?utf-8?B?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa==?=");
/// } else {
/// assert!(false);
/// }
/// ```
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct TooLongEncodedWords(pub Vec<String>);

Expand Down Expand Up @@ -42,9 +63,9 @@ impl Display for TooLongEncodedWords {
#[derive(Error, Debug, Clone, PartialEq)]
pub enum Error {
#[error("cannot parse bytes into tokens")]
ParseBytesError(Vec<Simple<u8>>),
InvalidBytes(Vec<Simple<u8>>),
#[error("Cannot parse the following encoded words, because they are too long: {0}")]
ParseEncodedWordTooLongError(TooLongEncodedWords),
EncodedWordTooLong(TooLongEncodedWords),
}

type Result<T> = result::Result<T, Error>;
Expand All @@ -70,7 +91,7 @@ impl Token {
pub fn run(encoded_bytes: &[u8], decoder: Decoder) -> Result<Tokens> {
let tokens = get_parser(&decoder)
.parse(encoded_bytes)
.map_err(Error::ParseBytesError)?;
.map_err(Error::InvalidBytes)?;

validate_tokens(tokens, &decoder)
}
Expand Down Expand Up @@ -150,7 +171,7 @@ fn get_especials() -> HashSet<u8> {

fn validate_tokens(tokens: Tokens, decoder: &Decoder) -> Result<Tokens> {
if let Some(too_long_encoded_words) = get_too_long_encoded_words(&tokens, decoder) {
return Err(Error::ParseEncodedWordTooLongError(too_long_encoded_words));
return Err(Error::EncodedWordTooLong(too_long_encoded_words));
}

Ok(tokens)
Expand Down Expand Up @@ -178,7 +199,7 @@ fn get_too_long_encoded_words(tokens: &Tokens, decoder: &Decoder) -> Option<TooL
#[cfg(test)]
mod tests {
use crate::{
lexer::{encoded_word::EncodedWord, Token, run},
lexer::{encoded_word::EncodedWord, run, Token},
Decoder,
};

Expand Down Expand Up @@ -351,7 +372,7 @@ mod tests {

assert_eq!(
parsed,
Err(Error::ParseEncodedWordTooLongError(
Err(Error::EncodedWordTooLong(
TooLongEncodedWords::new(vec![EncodedWord {
charset: "ISO-8859-1".as_bytes().to_vec(),
encoding: "Q".as_bytes().to_vec(),
Expand Down
33 changes: 29 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,43 @@
#![doc(html_root_url = "https://docs.rs/rfc2047-decoder/1.0.0")]
#![doc(html_root_url = "https://docs.rs/rfc2047-decoder/1.0.1")]
//! # Introduction
//! This crate provides a [Decoder] and the function [decode], in order to
//! encoded words as described in the [RFC 2047].
//!
//! [RFC 2047]: https://datatracker.ietf.org/doc/html/rfc2047
//!
//! # Where to look
//! You will likely want to start looking into [Decoder] and/or the [decode]
//! to use this crate.

mod decoder;
pub use decoder::{Decoder, RecoverStrategy, Error};
pub use decoder::{Decoder, Error, RecoverStrategy};

mod evaluator;
mod lexer;
mod parser;

pub use lexer::{TooLongEncodedWords, Error as LexerError};
pub use parser::Error as ParserError;
pub use evaluator::Error as EvaluatorError;
pub use lexer::{Error as LexerError, TooLongEncodedWords};
pub use parser::Error as ParserError;

/// Decodes the given RFC 2047 MIME Message Header encoded string
/// using a default decoder.
///
/// This function equals doing `Decoder::new().decode`.
///
/// # Example
/// ```
/// use rfc2047_decoder::{decode, Decoder};
///
/// let encoded_message = "=?ISO-8859-1?Q?hello_there?=".as_bytes();
/// let decoded_message = "hello there";
///
/// // This ...
/// assert_eq!(decode(encoded_message).unwrap(), decoded_message);
///
/// // ... equals this:
/// assert_eq!(Decoder::new().decode(encoded_message).unwrap(), decoded_message);
/// ```
pub fn decode<T: AsRef<[u8]>>(encoded_str: T) -> Result<String, Error> {
Decoder::new().decode(encoded_str)
}
18 changes: 10 additions & 8 deletions src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
use charset::Charset;
use std::{convert::TryFrom, result};

use crate::lexer::{Token, Tokens, encoded_word};
use crate::lexer::{encoded_word, Token, Tokens};

/// All errors which the parser can throw.
#[derive(thiserror::Error, Debug, Clone, PartialEq)]
pub enum Error {
#[error("cannot parse encoding: encoding is bigger than a char")]
ParseEncodingTooBigError,
EncodingTooBig,
#[error("cannot parse encoding: encoding is empty")]
ParseEncodingEmptyError,
EncodingEmpty,
#[error("cannot parse encoding {0}: B or Q is expected")]
ParseEncodingError(char),
UnexpectedeEncoding(char),
}

type Result<T> = result::Result<T, Error>;
Expand All @@ -36,16 +36,16 @@ impl TryFrom<Vec<u8>> for Encoding {

fn try_from(token: Vec<u8>) -> Result<Self> {
if token.len() > Self::MAX_LENGTH {
return Err(Error::ParseEncodingTooBigError);
return Err(Error::EncodingTooBig);
}

let encoding = token.first().ok_or(Error::ParseEncodingEmptyError)?;
let encoding = token.first().ok_or(Error::EncodingEmpty)?;
let encoding = *encoding as char;

match encoding.to_ascii_lowercase() {
Encoding::Q_CHAR => Ok(Self::Q),
Encoding::B_CHAR => Ok(Self::B),
_ => Err(Error::ParseEncodingError(encoding)),
_ => Err(Error::UnexpectedeEncoding(encoding)),
}
}
}
Expand Down Expand Up @@ -83,7 +83,9 @@ fn convert_tokens_to_encoded_words(tokens: Tokens) -> Result<ParsedEncodedWords>
.into_iter()
.map(|token: Token| match token {
Token::ClearText(clear_text) => Ok(ParsedEncodedWord::ClearText(clear_text)),
Token::EncodedWord(encoded_word) => ParsedEncodedWord::convert_encoded_word(encoded_word),
Token::EncodedWord(encoded_word) => {
ParsedEncodedWord::convert_encoded_word(encoded_word)
}
})
.collect()
}
Expand Down
Loading