|
| 1 | +use crate::{ |
| 2 | + spell::DictWord, Document, Lint, LintKind, Linter, Punctuation, Span, Suggestion, Token, |
| 3 | + TokenKind, |
| 4 | +}; |
| 5 | + |
| 6 | +#[derive(Debug, PartialEq, PartialOrd, Clone)] |
| 7 | +struct PatternToken { |
| 8 | + kind: TokenKind, |
| 9 | + content: Option<DictWord>, |
| 10 | +} |
| 11 | + |
| 12 | +impl PatternToken { |
| 13 | + fn from_token(token: Token, document: &Document) -> Self { |
| 14 | + if token.kind.is_word() { |
| 15 | + Self { |
| 16 | + kind: token.kind, |
| 17 | + content: Some(document.get_span_content(token.span).into()), |
| 18 | + } |
| 19 | + } else { |
| 20 | + Self { |
| 21 | + kind: token.kind, |
| 22 | + content: None, |
| 23 | + } |
| 24 | + } |
| 25 | + } |
| 26 | +} |
| 27 | + |
| 28 | +macro_rules! vecword { |
| 29 | + ($lit:literal) => { |
| 30 | + $lit.chars().collect() |
| 31 | + }; |
| 32 | +} |
| 33 | + |
| 34 | +macro_rules! pt { |
| 35 | + ($str:literal) => { |
| 36 | + PatternToken { |
| 37 | + kind: TokenKind::Word, |
| 38 | + content: Some($str.chars().collect()), |
| 39 | + } |
| 40 | + }; |
| 41 | + (Hyphen) => { |
| 42 | + PatternToken { |
| 43 | + kind: TokenKind::Punctuation(Punctuation::Hyphen), |
| 44 | + content: None, |
| 45 | + } |
| 46 | + }; |
| 47 | + (Space) => { |
| 48 | + PatternToken { |
| 49 | + kind: TokenKind::Space(1), |
| 50 | + content: None, |
| 51 | + } |
| 52 | + }; |
| 53 | + ( $($($str:literal),* => $repl:literal),*) => { |
| 54 | + vec![ |
| 55 | + $( |
| 56 | + { |
| 57 | + let mut rule = Rule { |
| 58 | + pattern: vec![$( |
| 59 | + pt!($str), |
| 60 | + pt!(Space), |
| 61 | + )*], |
| 62 | + replace_with: $repl.chars().collect() |
| 63 | + }; |
| 64 | + |
| 65 | + if rule.pattern.len() > 0{ |
| 66 | + rule.pattern.pop(); |
| 67 | + } |
| 68 | + |
| 69 | + rule |
| 70 | + }, |
| 71 | + )* |
| 72 | + ] |
| 73 | + }; |
| 74 | +} |
| 75 | + |
| 76 | +struct Rule { |
| 77 | + pattern: Vec<PatternToken>, |
| 78 | + replace_with: Vec<char>, |
| 79 | +} |
| 80 | + |
| 81 | +/// A linter that uses a variety of curated pattern matches to find and fix common |
| 82 | +/// grammatical issues. |
| 83 | +pub struct Matcher { |
| 84 | + triggers: Vec<Rule>, |
| 85 | +} |
| 86 | + |
| 87 | +impl Matcher { |
| 88 | + pub fn new() -> Self { |
| 89 | + let mut triggers = pt! { |
| 90 | + "There","fore" => "Therefore", |
| 91 | + "south","America" => "South America", |
| 92 | + "South","america" => "South America", |
| 93 | + "south","america" => "South America", |
| 94 | + "North","america" => "North America", |
| 95 | + "north","America" => "North America", |
| 96 | + "north","america" => "North America", |
| 97 | + "fatal","outcome" => "death", |
| 98 | + "geiger","counter" => "Geiger counter", |
| 99 | + "veterans","day" => "Veterans Day", |
| 100 | + "presidents","day" => "Presidents' Day", |
| 101 | + "president's","day" => "Presidents' Day", |
| 102 | + "valentines","day" => "Valentine's Day", |
| 103 | + "world","war","2" => "World War II", |
| 104 | + "World","war","ii" => "World War II", |
| 105 | + "world","War","ii" => "World War II", |
| 106 | + "World","War","Ii" => "World War II", |
| 107 | + "World","War","iI" => "World War II", |
| 108 | + "black","sea" => "Black Sea", |
| 109 | + "I","a","m" => "I am", |
| 110 | + "We","a","re" => "We are", |
| 111 | + "The","re" => "There", |
| 112 | + "my","french" => "my French", |
| 113 | + "It","cam" => "It can", |
| 114 | + "can","be","seem" => "can be seen", |
| 115 | + "mu","house" => "my house", |
| 116 | + "kid","regards" => "kind regards", |
| 117 | + "miss","understand" => "misunderstand", |
| 118 | + "miss","use" => "misuse", |
| 119 | + "miss","used" => "misused", |
| 120 | + "bee","there" => "been there", |
| 121 | + "want","be" => "won't be", |
| 122 | + "more","then" => "more than", |
| 123 | + "gong","to" => "going to", |
| 124 | + "then","others" => "than others", |
| 125 | + "then","before" => "than before", |
| 126 | + "then","last","week" => "than last week", |
| 127 | + "then","her" => "than her", |
| 128 | + "then","hers" => "than hers", |
| 129 | + "then","him" => "than him", |
| 130 | + "then","his" => "than his" |
| 131 | + }; |
| 132 | + |
| 133 | + triggers.push(Rule { |
| 134 | + pattern: vec![pt!("break"), pt!(Hyphen), pt!("up")], |
| 135 | + replace_with: vecword!("break-up"), |
| 136 | + }); |
| 137 | + |
| 138 | + Self { triggers } |
| 139 | + } |
| 140 | +} |
| 141 | + |
| 142 | +impl Default for Matcher { |
| 143 | + fn default() -> Self { |
| 144 | + Self::new() |
| 145 | + } |
| 146 | +} |
| 147 | + |
| 148 | +impl Linter for Matcher { |
| 149 | + fn lint(&mut self, document: &Document) -> Vec<Lint> { |
| 150 | + let mut lints = Vec::new(); |
| 151 | + |
| 152 | + for (index, _) in document.tokens().enumerate() { |
| 153 | + for trigger in &self.triggers { |
| 154 | + let mut match_tokens = Vec::new(); |
| 155 | + |
| 156 | + for (p_index, pattern) in trigger.pattern.iter().enumerate() { |
| 157 | + let Some(token) = document.get_token(index + p_index) else { |
| 158 | + break; |
| 159 | + }; |
| 160 | + |
| 161 | + let t_pattern = PatternToken::from_token(token, document); |
| 162 | + |
| 163 | + if t_pattern != *pattern { |
| 164 | + break; |
| 165 | + } |
| 166 | + |
| 167 | + match_tokens.push(token); |
| 168 | + } |
| 169 | + |
| 170 | + if match_tokens.len() == trigger.pattern.len() && !match_tokens.is_empty() { |
| 171 | + let span = Span::new( |
| 172 | + match_tokens.first().unwrap().span.start, |
| 173 | + match_tokens.last().unwrap().span.end, |
| 174 | + ); |
| 175 | + |
| 176 | + lints.push(Lint { |
| 177 | + span, |
| 178 | + lint_kind: LintKind::Miscellaneous, |
| 179 | + suggestions: vec![Suggestion::ReplaceWith(trigger.replace_with.to_owned())], |
| 180 | + message: format!( |
| 181 | + "Did you mean “{}”?", |
| 182 | + trigger.replace_with.iter().collect::<String>() |
| 183 | + ), |
| 184 | + }) |
| 185 | + } |
| 186 | + } |
| 187 | + } |
| 188 | + |
| 189 | + lints |
| 190 | + } |
| 191 | +} |
| 192 | + |
| 193 | +#[cfg(test)] |
| 194 | +mod tests { |
| 195 | + use crate::{Document, Linter}; |
| 196 | + |
| 197 | + use super::Matcher; |
| 198 | + |
| 199 | + #[test] |
| 200 | + fn matches_therefore() { |
| 201 | + let document = Document::new_plain_english("There fore."); |
| 202 | + let mut matcher = Matcher::new(); |
| 203 | + let lints = matcher.lint(&document); |
| 204 | + assert!(lints.len() == 1) |
| 205 | + } |
| 206 | +} |
0 commit comments