diff --git a/harper-core/src/currency.rs b/harper-core/src/currency.rs new file mode 100644 index 00000000..7f7532ee --- /dev/null +++ b/harper-core/src/currency.rs @@ -0,0 +1,87 @@ +use is_macro::Is; +use serde::{Deserialize, Serialize}; + +use crate::NumberSuffix; + +#[derive(Debug, Is, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Hash)] +pub enum Currency { + // $ + Dollar, + // ¢ + Cent, + // € + Euro, + // ₽ + Ruble, + // ₺ + Lira, + // £ + Pound, + // ¥ + Yen, + // ฿ + Baht, + // ₩ + Won, + // ₭, + Kip, +} + +impl Currency { + pub fn from_char(c: char) -> Option { + let cur = match c { + '$' => Self::Dollar, + '¢' => Self::Cent, + '€' => Self::Euro, + '₽' => Self::Ruble, + '₺' => Self::Lira, + '£' => Self::Pound, + '¥' => Self::Yen, + '฿' => Self::Baht, + '₩' => Self::Won, + '₭' => Self::Kip, + _ => return None, + }; + + Some(cur) + } + + pub fn to_char(&self) -> char { + match self { + Self::Dollar => '$', + Self::Cent => '¢', + Self::Euro => '€', + Self::Ruble => '₽', + Self::Lira => '₺', + Self::Pound => '£', + Self::Yen => '¥', + Self::Baht => '฿', + Self::Won => '₩', + Self::Kip => '₭', + } + } + + /// Format an amount of the specific currency. + pub fn format_amount(&self, value: f64, suffix: Option) -> String { + let c = self.to_char(); + + let mut amount = value.to_string(); + + if let Some(suffix) = suffix { + amount.extend(suffix.to_chars()); + } + + match self { + Currency::Dollar => format!("{}{amount}", c), + Currency::Cent => format!("{amount}{}", c), + Currency::Euro => format!("{}{amount}", c), + Currency::Ruble => format!("{amount} {}", c), + Currency::Lira => format!("{amount} {}", c), + Currency::Pound => format!("{}{amount}", c), + Currency::Yen => format!("{} {amount}", c), + Currency::Baht => format!("{amount} {}", c), + Currency::Won => format!("{} {amount}", c), + Currency::Kip => format!("{}{amount}", c), + } + } +} diff --git a/harper-core/src/document.rs b/harper-core/src/document.rs index a3f1da1b..8e10d07c 100644 --- a/harper-core/src/document.rs +++ b/harper-core/src/document.rs @@ -510,6 +510,7 @@ impl TokenStringExt for Document { create_fns_on_doc!(paragraph_break); create_fns_on_doc!(chunk_terminator); create_fns_on_doc!(punctuation); + create_fns_on_doc!(currency); create_fns_on_doc!(likely_homograph); fn first_sentence_word(&self) -> Option { diff --git a/harper-core/src/lib.rs b/harper-core/src/lib.rs index c3a71c80..37801a97 100644 --- a/harper-core/src/lib.rs +++ b/harper-core/src/lib.rs @@ -3,6 +3,7 @@ mod char_ext; mod char_string; +mod currency; mod document; mod fat_token; pub mod language_detection; @@ -24,6 +25,7 @@ mod word_metadata; use std::collections::VecDeque; pub use char_string::{CharString, CharStringExt}; +pub use currency::Currency; pub use document::Document; pub use fat_token::FatToken; use linting::Lint; diff --git a/harper-core/src/linting/currency_placement.rs b/harper-core/src/linting/currency_placement.rs new file mode 100644 index 00000000..e80dc7f5 --- /dev/null +++ b/harper-core/src/linting/currency_placement.rs @@ -0,0 +1,144 @@ +use itertools::Itertools; + +use crate::{remove_overlaps, Document, Token, TokenStringExt}; + +use super::{Lint, LintKind, Linter, Suggestion}; + +#[derive(Debug, Default)] +pub struct CurrencyPlacement {} + +impl Linter for CurrencyPlacement { + fn lint(&mut self, document: &Document) -> Vec { + let mut lints = Vec::new(); + + for chunk in document.iter_chunks() { + for (a, b) in chunk.iter().tuple_windows() { + lints.extend(generate_lint_for_tokens(*a, *b, document)); + } + + for (a, b, c) in chunk.iter().tuple_windows() { + if !b.kind.is_whitespace() { + continue; + } + + lints.extend(generate_lint_for_tokens(*a, *c, document)); + } + } + + remove_overlaps(&mut lints); + + lints + } + + fn description(&self) -> &str { + "The location of currency symbols varies by country. The rule looks for and corrects improper positioning." + } +} + +// Given two tokens that may have an error, check if they do and create a [`Lint`]. +fn generate_lint_for_tokens(a: Token, b: Token, document: &Document) -> Option { + let matched_tokens = [a, b]; + + let punct = matched_tokens + .first_punctuation()? + .kind + .expect_punctuation(); + let currency = punct.as_currency()?; + + let (value, suffix) = matched_tokens.first_number()?.kind.expect_number(); + + let span = matched_tokens.span().unwrap(); + + let correct: Vec<_> = currency + .format_amount(value.into(), suffix) + .chars() + .collect(); + let actual = document.get_span_content(span); + + if correct != actual { + Some(Lint { + span, + lint_kind: LintKind::Formatting, + suggestions: vec![Suggestion::ReplaceWith(correct)], + message: "The position of the currency symbol matters.".to_string(), + priority: 63, + }) + } else { + None + } +} + +#[cfg(test)] +mod tests { + use crate::linting::tests::{assert_lint_count, assert_suggestion_result}; + + use super::CurrencyPlacement; + + #[test] + fn eof() { + assert_suggestion_result( + "It was my last bill worth more than 4$.", + CurrencyPlacement::default(), + "It was my last bill worth more than $4.", + ); + } + + #[test] + fn blog_title_allows_correct() { + assert_lint_count("The Best $25 I Ever Spent", CurrencyPlacement::default(), 0); + } + + #[test] + fn blog_title() { + assert_suggestion_result( + "The Best 25$ I Ever Spent", + CurrencyPlacement::default(), + "The Best $25 I Ever Spent", + ); + } + + #[test] + fn blog_title_cents() { + assert_suggestion_result( + "The Best ¢25 I Ever Spent", + CurrencyPlacement::default(), + "The Best 25¢ I Ever Spent", + ); + } + + #[test] + fn blog_title_with_space() { + assert_suggestion_result( + "The Best 25 $ I Ever Spent", + CurrencyPlacement::default(), + "The Best $25 I Ever Spent", + ); + } + + #[test] + fn multiple_dollar() { + assert_suggestion_result( + "They were either 25$ 24$ or 23$.", + CurrencyPlacement::default(), + "They were either $25 $24 or $23.", + ); + } + + #[test] + fn multiple_pound() { + assert_suggestion_result( + "They were either 25£ 24£ or 23£.", + CurrencyPlacement::default(), + "They were either £25 £24 or £23.", + ); + } + + #[test] + fn suffix() { + assert_suggestion_result( + "It was my 20th$.", + CurrencyPlacement::default(), + "It was my $20th.", + ); + } +} diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index 2c1fc5ea..906a49c6 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -30,7 +30,7 @@ use super::that_which::ThatWhich; use super::unclosed_quotes::UnclosedQuotes; use super::use_genitive::UseGenitive; use super::wrong_quotes::WrongQuotes; -use super::{Lint, Linter, OxfordComma}; +use super::{CurrencyPlacement, Lint, Linter, OxfordComma}; use crate::{Dictionary, Document}; macro_rules! create_lint_group_config { @@ -186,7 +186,8 @@ create_lint_group_config!( MergeWords => true, PluralConjugate => false, OxfordComma => true, - PronounContraction => true + PronounContraction => true, + CurrencyPlacement => true ); impl Default for LintGroup { diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 94fad8d5..addc123c 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -3,6 +3,7 @@ mod avoid_curses; mod boring_words; mod capitalize_personal_pronouns; mod correct_number_suffix; +mod currency_placement; mod dashes; mod dot_initialisms; mod ellipsis_length; @@ -36,6 +37,7 @@ pub use avoid_curses::AvoidCurses; pub use boring_words::BoringWords; pub use capitalize_personal_pronouns::CapitalizePersonalPronouns; pub use correct_number_suffix::CorrectNumberSuffix; +pub use currency_placement::CurrencyPlacement; pub use dot_initialisms::DotInitialisms; pub use ellipsis_length::EllipsisLength; pub use linking_verbs::LinkingVerbs; diff --git a/harper-core/src/patterns/sequence_pattern.rs b/harper-core/src/patterns/sequence_pattern.rs index e581c180..c1eb7ac5 100644 --- a/harper-core/src/patterns/sequence_pattern.rs +++ b/harper-core/src/patterns/sequence_pattern.rs @@ -11,6 +11,7 @@ pub struct SequencePattern { token_patterns: Vec>, } +/// Generate a `then_*` method from an available `is_*` function on [`TokenKind`]. macro_rules! gen_then_from_is { ($quality:ident) => { paste! { @@ -53,6 +54,7 @@ impl SequencePattern { gen_then_from_is!(conjunction); gen_then_from_is!(comma); gen_then_from_is!(period); + gen_then_from_is!(number); gen_then_from_is!(case_separator); gen_then_from_is!(adverb); gen_then_from_is!(adjective); diff --git a/harper-core/src/punctuation.rs b/harper-core/src/punctuation.rs index 767b809e..a625e80a 100644 --- a/harper-core/src/punctuation.rs +++ b/harper-core/src/punctuation.rs @@ -1,6 +1,8 @@ use is_macro::Is; use serde::{Deserialize, Serialize}; +use crate::Currency; + #[derive( Debug, Is, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Default, Hash, )] @@ -69,8 +71,7 @@ pub enum Punctuation { Carrot, /// `+` Plus, - /// `$` - Dollar, + Currency(Currency), /// `|` Pipe, /// `_` @@ -111,10 +112,9 @@ impl Punctuation { '…' => Punctuation::Ellipsis, '^' => Punctuation::Carrot, '+' => Punctuation::Plus, - '$' => Punctuation::Dollar, '|' => Punctuation::Pipe, '_' => Punctuation::Underscore, - _ => return None, + _ => Punctuation::Currency(Currency::from_char(c)?), }; Some(punct) diff --git a/harper-core/src/token.rs b/harper-core/src/token.rs index f5e66c0e..06ab11ce 100644 --- a/harper-core/src/token.rs +++ b/harper-core/src/token.rs @@ -93,6 +93,7 @@ pub trait TokenStringExt { create_decl_for!(paragraph_break); create_decl_for!(chunk_terminator); create_decl_for!(punctuation); + create_decl_for!(currency); create_decl_for!(likely_homograph); fn iter_linking_verb_indices(&self) -> impl Iterator + '_; @@ -133,6 +134,7 @@ impl TokenStringExt for [Token] { create_fns_for!(sentence_terminator); create_fns_for!(paragraph_break); create_fns_for!(chunk_terminator); + create_fns_for!(currency); create_fns_for!(likely_homograph); fn first_non_whitespace(&self) -> Option { diff --git a/harper-core/src/token_kind.rs b/harper-core/src/token_kind.rs index c3145849..93df3a30 100644 --- a/harper-core/src/token_kind.rs +++ b/harper-core/src/token_kind.rs @@ -103,6 +103,10 @@ impl TokenKind { } } + pub fn is_currency(&self) -> bool { + matches!(self, TokenKind::Punctuation(Punctuation::Currency(..))) + } + pub fn is_article(&self) -> bool { matches!(self, TokenKind::Word(WordMetadata { article: true, .. })) }