From 748d9bb6a8f10e1c2e21c3e22a184b91a2a444e8 Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Tue, 12 Nov 2024 13:58:12 -0700 Subject: [PATCH] feat(#253): remove requirement that word be in list to lint for repetition --- harper-core/src/linting/repeated_words.rs | 111 ++++++++++------------ 1 file changed, 49 insertions(+), 62 deletions(-) diff --git a/harper-core/src/linting/repeated_words.rs b/harper-core/src/linting/repeated_words.rs index 4271442a..d377c0b2 100644 --- a/harper-core/src/linting/repeated_words.rs +++ b/harper-core/src/linting/repeated_words.rs @@ -1,66 +1,53 @@ -use super::{Lint, LintKind, PatternLinter, Suggestion}; -use crate::patterns::{Pattern, SequencePattern, WordPatternGroup}; -use crate::token::{Token, TokenStringExt}; - -pub struct RepeatedWords { - pattern: Box, -} - -impl RepeatedWords { - pub fn new() -> Self { - Self::default() - } -} - -impl Default for RepeatedWords { - fn default() -> Self { - let words = [ - "the", "be", "to", "of", "and", "a", "in", "that", "have", "I", "it", "for", "not", - "on", "with", "he", "as", "you", "do", "at", "this", "is", "but", "his", "by", "from", - "they", "we", "say", "her", "she", "or", "an", "will", "my", "one", "all", "would", - "there", "their", "what", "so", "up", "out", "if", "about", "who", "get", "which", - "go", "me", "when", "make", "can", "like", "time", "no", "just", "him", "know", "take", - "people", "into", "year", "your", "good", "some", "could", "them", "see", "other", - "than", "then", "now", "look", "only", "come", "its", "over", "think", "also", "back", - "after", "use", "two", "how", "our", "work", "first", "well", "way", "even", "new", - "want", "because", "any", "these", "give", "day", "most", "us", "are", - ]; - - let mut pattern = WordPatternGroup::default(); - - for word in words { - pattern.add( - word, - Box::new( - SequencePattern::default() - .then_exact_word(word) - .then_whitespace() - .then_exact_word(word), - ), - ); +use super::{Lint, LintKind, Linter, Suggestion}; +use crate::token::{Token, TokenKind, TokenStringExt}; +use crate::{Document, Span}; + +#[derive(Debug, Clone, Default)] +pub struct RepeatedWords; + +impl Linter for RepeatedWords { + fn lint(&mut self, document: &Document) -> Vec { + let mut lints = Vec::new(); + + for chunk in document.iter_chunks() { + let mut iter = chunk.iter_word_indices().zip(chunk.iter_words()).peekable(); + + while let (Some((idx_a, tok_a)), Some((idx_b, tok_b))) = (iter.next(), iter.peek()) { + let word_a = document.get_span_content(tok_a.span); + let word_b = document.get_span_content(tok_b.span); + + if word_a == word_b { + let intervening_tokens = &chunk[idx_a + 1..*idx_b]; + + if intervening_tokens.iter().any(|t| !t.kind.is_whitespace()) { + continue; + } + + // Detect and remove the whitespace between the repetitions. + let remove_end = tok_b.span.end; + + let remove_start = if let Some(Token { + span, + kind: TokenKind::Space(_), + }) = intervening_tokens.last() + { + span.start + } else { + tok_b.span.start + }; + + lints.push(Lint { + span: Span::new(remove_start, remove_end), + lint_kind: LintKind::Repetition, + suggestions: vec![Suggestion::Remove], + message: "Did you mean to repeat this word?".to_string(), + ..Default::default() + }) + } + } } - Self { - pattern: Box::new(pattern), - } - } -} - -impl PatternLinter for RepeatedWords { - fn pattern(&self) -> &dyn Pattern { - self.pattern.as_ref() - } - - fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Lint { - Lint { - span: matched_tokens.span().unwrap(), - lint_kind: LintKind::Repetition, - suggestions: vec![Suggestion::ReplaceWith( - matched_tokens[0].span.get_content(source).to_vec(), - )], - message: "Did you mean to repeat this word?".to_string(), - ..Default::default() - } + lints } } @@ -71,6 +58,6 @@ mod tests { #[test] fn catches_basic() { - assert_lint_count("I wanted the the banana.", RepeatedWords::new(), 1) + assert_lint_count("I wanted the the banana.", RepeatedWords::default(), 1) } }