Skip to content

Commit

Permalink
feat: refactored sequence patterns
Browse files Browse the repository at this point in the history
  • Loading branch information
elijah-potter committed Sep 2, 2024
1 parent e3bf6be commit 0778d5a
Show file tree
Hide file tree
Showing 9 changed files with 172 additions and 101 deletions.
16 changes: 14 additions & 2 deletions harper-core/src/linting/multiple_sequential_pronouns.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use hashbrown::HashSet;

use super::pattern_linter::PatternLinter;
use super::Suggestion;
use crate::linting::LintKind;
use crate::patterns::{Pattern, SequencePattern};
use crate::{Lint, Lrc, Token, TokenStringExt};
Expand Down Expand Up @@ -42,13 +43,24 @@ impl PatternLinter for MultipleSequentialPronouns {
self.pattern.as_ref()
}

fn match_to_lint(&self, matched_tokens: &[Token], _source: &[char]) -> Lint {
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Lint {
let mut suggestions = Vec::new();

if matched_tokens.len() == 3 {
suggestions.push(Suggestion::ReplaceWith(
matched_tokens[0].span.get_content(source).to_vec()
));
suggestions.push(Suggestion::ReplaceWith(
matched_tokens[2].span.get_content(source).to_vec()
));
}

Lint {
span: matched_tokens.span().unwrap(),
lint_kind: LintKind::Repetition,
message: "There are too many personal pronouns in sequence here.".to_owned(),
priority: 63,
..Default::default()
suggestions
}
}
}
Expand Down
10 changes: 6 additions & 4 deletions harper-core/src/linting/repeated_words.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::{Lint, LintKind, Linter, PatternLinter, Suggestion};
use super::{Lint, LintKind, PatternLinter, Suggestion};
use crate::patterns::{Pattern, SequencePattern, WordPatternGroup};
use crate::token::{Token, TokenStringExt};

Expand Down Expand Up @@ -48,11 +48,13 @@ impl PatternLinter for RepeatedWords {
self.pattern.as_ref()
}

fn match_to_lint(&self, matched_tokens: &[Token], _source: &[char]) -> Lint {
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Lint {
Lint {
span: matched_tokens[1..].span().unwrap(),
span: matched_tokens.span().unwrap(),
lint_kind: LintKind::Repetition,
suggestions: vec![Suggestion::Remove],
suggestions: vec![Suggestion::ReplaceWith(
matched_tokens[0].span.get_content(source).to_vec()
)],
message: "Did you mean to repeat this word?".to_string(),
..Default::default()
}
Expand Down
14 changes: 14 additions & 0 deletions harper-core/src/patterns/any_pattern.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use super::Pattern;

/// A [`Pattern`] that will match any single token.
pub struct AnyPattern;

impl Pattern for AnyPattern {
fn matches(&self, tokens: &[crate::Token], _source: &[char]) -> usize {
if tokens.is_empty() {
0
} else {
1
}
}
}
51 changes: 47 additions & 4 deletions harper-core/src/patterns/mod.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
use crate::Token;

mod any_pattern;
mod naive_pattern_group;
mod repeating_pattern;
mod sequence_pattern;
mod token_kind_pattern_group;
mod token_pattern;
mod token_sequence_pattern;
mod whitespace_pattern;
mod word_pattern_group;

pub use any_pattern::AnyPattern;
pub use naive_pattern_group::NaivePatternGroup;
pub use repeating_pattern::RepeatingPattern;
pub use sequence_pattern::SequencePattern;
pub use token_kind_pattern_group::TokenKindPatternGroup;
pub use token_pattern::TokenPattern;
pub use token_sequence_pattern::SequencePattern;
pub use whitespace_pattern::WhitespacePattern;
pub use word_pattern_group::WordPatternGroup;

#[cfg(not(feature = "concurrent"))]
Expand All @@ -23,3 +25,44 @@ pub trait Pattern {
pub trait Pattern: Send + Sync {
fn matches(&self, tokens: &[Token], source: &[char]) -> usize;
}

#[cfg(feature = "concurrent")]
impl<F> Pattern for F
where
F: Fn(&Token, &[char]) -> bool,
F: Send + Sync
{
fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
if tokens.is_empty() {
return 0;
}

let tok = &tokens[0];

if self(tok, source) {
1
} else {
0
}
}
}

#[cfg(not(feature = "concurrent"))]
impl<F> Pattern for F
where
F: Fn(&Token, &[char]) -> bool
{
fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
if tokens.is_empty() {
return 0;
}

let tok = &tokens[0];

if self(tok, source) {
1
} else {
0
}
}
}
8 changes: 5 additions & 3 deletions harper-core/src/patterns/repeating_pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,15 @@ impl Pattern for RepeatingPattern {
#[cfg(test)]
mod tests {
use super::RepeatingPattern;
use crate::patterns::{Pattern, TokenPattern};
use crate::patterns::{AnyPattern, Pattern};
use crate::Document;

#[test]
fn matches_anything() {
let doc = Document::new_plain_english_curated("This matcher will match anything!");
let pat = RepeatingPattern::new(Box::new(TokenPattern::Any));
let doc = Document::new_plain_english_curated(
"This matcher will match the entirety of any document!"
);
let pat = RepeatingPattern::new(Box::new(AnyPattern));

assert_eq!(
pat.matches(doc.get_tokens(), doc.get_source()),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use hashbrown::HashSet;
use paste::paste;

use super::token_pattern::TokenPattern;
use super::whitespace_pattern::WhitespacePattern;
use super::{Pattern, RepeatingPattern};
use crate::{Lrc, Token, TokenKind};

Expand All @@ -10,39 +11,81 @@ pub struct SequencePattern {
token_patterns: Vec<Box<dyn Pattern>>
}

macro_rules! gen_then_from_is {
($quality:ident) => {
paste! {
fn [< then_$quality >] (&mut self){
self.token_patterns.push(Box::new(|tok: &Token, _source: &[char]| {
tok.kind.[< is_$quality >]()
}))
}
}
};
}

impl SequencePattern {
gen_then_from_is!(noun);
gen_then_from_is!(verb);
gen_then_from_is!(linking_verb);

pub fn then_exact_word(&mut self, word: &'static str) -> &mut Self {
self.token_patterns
.push(Box::new(TokenPattern::WordExact(word)));
.push(Box::new(|tok: &Token, source: &[char]| {
if !tok.kind.is_word() {
return false;
}

let tok_chars = tok.span.get_content(source);

let mut w_char_count = 0;
for (i, w_char) in word.chars().enumerate() {
w_char_count += 1;

if tok_chars.get(i).cloned() != Some(w_char) {
return false;
}
}

w_char_count == tok_chars.len()
}));
self
}

pub fn then_loose(&mut self, kind: TokenKind) -> &mut Self {
self.token_patterns
.push(Box::new(TokenPattern::KindLoose(kind)));
.push(Box::new(move |tok: &Token, _source: &[char]| {
kind.with_default_data() == tok.kind.with_default_data()
}));

self
}

pub fn then_any_word(&mut self) -> &mut Self {
self.token_patterns
.push(Box::new(TokenPattern::KindLoose(TokenKind::blank_word())));
.push(Box::new(|tok: &Token, _source: &[char]| tok.kind.is_word()));
self
}

pub fn then_strict(&mut self, kind: TokenKind) -> &mut Self {
self.token_patterns
.push(Box::new(TokenPattern::KindStrict(kind)));
.push(Box::new(move |tok: &Token, _source: &[char]| {
tok.kind == kind
}));
self
}

pub fn then_whitespace(&mut self) -> &mut Self {
self.token_patterns.push(Box::new(TokenPattern::WhiteSpace));
self.token_patterns.push(Box::new(WhitespacePattern));
self
}

pub fn then_any_word_in(&mut self, word_set: Lrc<HashSet<&'static str>>) -> &mut Self {
self.token_patterns
.push(Box::new(TokenPattern::WordInSet(word_set)));
.push(Box::new(move |tok: &Token, source: &[char]| {
let tok_chars = tok.span.get_content(source);
let word: String = tok_chars.iter().collect();
word_set.contains(word.as_str())
}));
self
}

Expand Down
81 changes: 0 additions & 81 deletions harper-core/src/patterns/token_pattern.rs

This file was deleted.

12 changes: 12 additions & 0 deletions harper-core/src/patterns/whitespace_pattern.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
use super::Pattern;

pub struct WhitespacePattern;

impl Pattern for WhitespacePattern {
fn matches(&self, tokens: &[crate::Token], _source: &[char]) -> usize {
tokens
.iter()
.position(|t| !t.kind.is_whitespace())
.unwrap_or(tokens.len())
}
}
24 changes: 24 additions & 0 deletions harper-core/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,30 @@ impl TokenKind {
matches!(self, TokenKind::Punctuation(Punctuation::At))
}

pub fn is_verb(&self) -> bool {
let TokenKind::Word(metadata) = self else {
return false;
};

metadata.is_verb()
}

pub fn is_linking_verb(&self) -> bool {
let TokenKind::Word(metadata) = self else {
return false;
};

metadata.is_linking_verb()
}

pub fn is_noun(&self) -> bool {
let TokenKind::Word(metadata) = self else {
return false;
};

metadata.is_noun()
}

/// Checks whether the token is whitespace.
pub fn is_whitespace(&self) -> bool {
matches!(self, TokenKind::Space(_) | TokenKind::Newline(_))
Expand Down

0 comments on commit 0778d5a

Please sign in to comment.