Skip to content

Commit

Permalink
refactor(core): CompoundWords is more accurately called MergeWords
Browse files Browse the repository at this point in the history
  • Loading branch information
elijah-potter committed Jan 6, 2025
1 parent 91adb6e commit 6cc4509
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 166 deletions.
162 changes: 0 additions & 162 deletions harper-core/src/linting/compound_words.rs

This file was deleted.

4 changes: 2 additions & 2 deletions harper-core/src/linting/lint_group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ use super::an_a::AnA;
use super::avoid_curses::AvoidCurses;
use super::boring_words::BoringWords;
use super::capitalize_personal_pronouns::CapitalizePersonalPronouns;
use super::compound_words::CompoundWords;
use super::correct_number_suffix::CorrectNumberSuffix;
use super::dot_initialisms::DotInitialisms;
use super::ellipsis_length::EllipsisLength;
use super::linking_verbs::LinkingVerbs;
use super::long_sentences::LongSentences;
use super::matcher::Matcher;
use super::merge_words::MergeWords;
use super::multiple_sequential_pronouns::MultipleSequentialPronouns;
use super::number_suffix_capitalization::NumberSuffixCapitalization;
use super::plural_conjugate::PluralConjugate;
Expand Down Expand Up @@ -182,7 +182,7 @@ create_lint_group_config!(
MicrosoftNames => true,
AppleNames => true,
AzureNames => true,
CompoundWords => true,
MergeWords => true,
PluralConjugate => false
);

Expand Down
1 change: 0 additions & 1 deletion harper-core/src/linting/matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,6 @@ impl Matcher {
"that","s" => "that is",
"That","s" => "that is",
"ms" => "milliseconds",
"t","he" => "the",
"the","hing" => "the thing",
"The","hing" => "The thing",
"need","helps" => "need help",
Expand Down
100 changes: 100 additions & 0 deletions harper-core/src/linting/merge_words.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
use std::sync::Arc;

use itertools::Itertools;

use crate::{CharString, CharStringExt, Dictionary, Document, FstDictionary, Span};

use super::{Lint, LintKind, Linter, Suggestion};

pub struct MergeWords {
dict: Arc<FstDictionary>,
}

impl MergeWords {
pub fn new() -> Self {
Self {
dict: FstDictionary::curated(),
}
}
}

impl Default for MergeWords {
fn default() -> Self {
Self::new()
}
}

impl Linter for MergeWords {
fn lint(&mut self, document: &Document) -> Vec<Lint> {
let mut lints = Vec::new();

let mut merged_word = CharString::new();

for (a, w, b) in document.tokens().tuple_windows() {
if !a.kind.is_word() || !w.kind.is_whitespace() || !b.kind.is_word() {
continue;
}

let a_chars = document.get_span_content(a.span);
let b_chars = document.get_span_content(b.span);

// Not super helpful in this case, so we skip it
if matches!(a_chars, ['a']) || matches!(b_chars, ['a']) {
continue;
}

merged_word.clear();
merged_word.extend_from_slice(&a_chars.to_lower());
merged_word.extend_from_slice(&b_chars.to_lower());

if self.dict.contains_word(&merged_word)
&& (!self.dict.contains_word(a_chars) || !self.dict.contains_word(b_chars))
{
lints.push(Lint {
span: Span::new(a.span.start, b.span.end),
lint_kind: LintKind::Spelling,
suggestions: vec![Suggestion::ReplaceWith(merged_word.to_vec())],
message: "These two words are often combined to form a closed compound word."
.to_owned(),
priority: 63,
});
}
}

lints
}

fn description(&self) -> &str {
"Accidentally inserting a space inside a word is common. This rule looks for valid words that are split by whitespace."
}
}

#[cfg(test)]
mod tests {
use crate::linting::tests::assert_lint_count;

use super::MergeWords;

#[test]
fn clean() {
assert_lint_count(
"When referring to the political party, make sure to treat them as a proper noun.",
MergeWords::default(),
0,
);
}

#[test]
fn heretofore() {
assert_lint_count(
"This is a her etofore unseen problem.",
MergeWords::default(),
1,
);
}

#[test]
fn therefore() {
assert_lint_count("The refore", MergeWords::default(), 1);
}
}
3 changes: 2 additions & 1 deletion harper-core/src/linting/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ mod an_a;
mod avoid_curses;
mod boring_words;
mod capitalize_personal_pronouns;
mod compound_words;
mod correct_number_suffix;
mod dashes;
mod dot_initialisms;
Expand All @@ -12,6 +11,7 @@ mod lint;
mod lint_group;
mod long_sentences;
mod matcher;
mod merge_words;
mod multiple_sequential_pronouns;
mod number_suffix_capitalization;
mod pattern_linter;
Expand Down Expand Up @@ -40,6 +40,7 @@ pub use lint::{Lint, LintKind, Suggestion};
pub use lint_group::{LintGroup, LintGroupConfig};
pub use long_sentences::LongSentences;
pub use matcher::Matcher;
pub use merge_words::MergeWords;
pub use multiple_sequential_pronouns::MultipleSequentialPronouns;
pub use number_suffix_capitalization::NumberSuffixCapitalization;
pub use pattern_linter::PatternLinter;
Expand Down

0 comments on commit 6cc4509

Please sign in to comment.