Skip to content

Commit 9227913

Browse files
committed
fix(Automattic#228): add stricter guards to make sure it is a complete sentence
1 parent 2314e6d commit 9227913

File tree

3 files changed

+107
-21
lines changed

3 files changed

+107
-21
lines changed

harper-core/src/document.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,7 @@ impl TokenStringExt for Document {
500500
create_fns_on_doc!(ellipsis);
501501
create_fns_on_doc!(unlintable);
502502
create_fns_on_doc!(sentence_terminator);
503+
create_fns_on_doc!(paragraph_break);
503504
create_fns_on_doc!(chunk_terminator);
504505
create_fns_on_doc!(punctuation);
505506
create_fns_on_doc!(likely_homograph);
@@ -528,6 +529,10 @@ impl TokenStringExt for Document {
528529
self.tokens.iter_chunks()
529530
}
530531

532+
fn iter_paragraphs(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
533+
self.tokens.iter_paragraphs()
534+
}
535+
531536
fn iter_sentences(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
532537
self.tokens.iter_sentences()
533538
}

harper-core/src/linting/sentence_capitalization.rs

Lines changed: 72 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use itertools::Itertools;
33
use super::lint::Suggestion;
44
use super::{Lint, LintKind, Linter};
55
use crate::document::Document;
6-
use crate::TokenStringExt;
6+
use crate::{Token, TokenKind, TokenStringExt};
77

88
#[derive(Debug, Clone, Copy, Default)]
99
pub struct SentenceCapitalization;
@@ -14,26 +14,45 @@ impl Linter for SentenceCapitalization {
1414
fn lint(&mut self, document: &Document) -> Vec<Lint> {
1515
let mut lints = Vec::new();
1616

17-
for sentence in document.iter_sentences() {
18-
if let Some(first_word) = sentence.first_non_whitespace() {
19-
if !first_word.kind.is_word() {
17+
for paragraph in document.iter_paragraphs() {
18+
// Allows short, label-like comments in code.
19+
if paragraph.iter_sentences().count() == 1 {
20+
let only_sentence = paragraph.iter_sentences().next().unwrap();
21+
22+
if !only_sentence
23+
.iter_chunks()
24+
.map(|c| c.iter_words().count())
25+
.any(|c| c > 5)
26+
{
2027
continue;
2128
}
29+
}
30+
31+
for sentence in paragraph.iter_sentences() {
32+
if !is_full_sentence(sentence) {
33+
continue;
34+
}
35+
36+
if let Some(first_word) = sentence.first_non_whitespace() {
37+
if !first_word.kind.is_word() {
38+
continue;
39+
}
2240

23-
let letters = document.get_span_content(first_word.span);
24-
25-
if let Some(first_letter) = letters.first() {
26-
if first_letter.is_alphabetic() && !first_letter.is_uppercase() {
27-
lints.push(Lint {
28-
span: first_word.span.with_len(1),
29-
lint_kind: LintKind::Capitalization,
30-
suggestions: vec![Suggestion::ReplaceWith(
31-
first_letter.to_uppercase().collect_vec(),
32-
)],
33-
priority: 31,
34-
message: "This sentence does not start with a capital letter"
35-
.to_string(),
36-
})
41+
let letters = document.get_span_content(first_word.span);
42+
43+
if let Some(first_letter) = letters.first() {
44+
if first_letter.is_alphabetic() && !first_letter.is_uppercase() {
45+
lints.push(Lint {
46+
span: first_word.span.with_len(1),
47+
lint_kind: LintKind::Capitalization,
48+
suggestions: vec![Suggestion::ReplaceWith(
49+
first_letter.to_uppercase().collect_vec(),
50+
)],
51+
priority: 31,
52+
message: "This sentence does not start with a capital letter"
53+
.to_string(),
54+
})
55+
}
3756
}
3857
}
3958
}
@@ -43,25 +62,52 @@ impl Linter for SentenceCapitalization {
4362
}
4463
}
4564

65+
fn is_full_sentence(toks: &[Token]) -> bool {
66+
let mut has_noun = false;
67+
let mut has_verb = false;
68+
69+
for tok in toks {
70+
if let TokenKind::Word(metadata) = tok.kind {
71+
if metadata.is_noun() {
72+
has_noun = true;
73+
}
74+
75+
if metadata.is_verb() {
76+
has_verb = true;
77+
}
78+
}
79+
}
80+
81+
has_noun && has_verb
82+
}
83+
4684
#[cfg(test)]
4785
mod tests {
4886
use super::super::tests::assert_lint_count;
4987
use super::SentenceCapitalization;
5088

5189
#[test]
5290
fn catches_basic() {
53-
assert_lint_count("there is no way.", SentenceCapitalization, 1)
91+
assert_lint_count(
92+
"there is no way she is not guilty.",
93+
SentenceCapitalization,
94+
1,
95+
)
5496
}
5597

5698
#[test]
5799
fn no_period() {
58-
assert_lint_count("there is no way", SentenceCapitalization, 1)
100+
assert_lint_count(
101+
"there is no way she is not guilty",
102+
SentenceCapitalization,
103+
1,
104+
)
59105
}
60106

61107
#[test]
62108
fn two_sentence() {
63109
assert_lint_count(
64-
"i have complete conviction. she is guilty",
110+
"i have complete conviction in this. she is absolutely guilty",
65111
SentenceCapitalization,
66112
2,
67113
)
@@ -111,4 +157,9 @@ mod tests {
111157
1,
112158
)
113159
}
160+
161+
#[test]
162+
fn issue_228_allows_labels() {
163+
assert_lint_count("python lsp (fork of pyright)", SentenceCapitalization, 0)
164+
}
114165
}

harper-core/src/token.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,7 @@ pub trait TokenStringExt {
403403
create_decl_for!(ellipsis);
404404
create_decl_for!(unlintable);
405405
create_decl_for!(sentence_terminator);
406+
create_decl_for!(paragraph_break);
406407
create_decl_for!(chunk_terminator);
407408
create_decl_for!(punctuation);
408409
create_decl_for!(likely_homograph);
@@ -420,6 +421,10 @@ pub trait TokenStringExt {
420421
/// ```
421422
fn iter_chunks(&self) -> impl Iterator<Item = &'_ [Token]> + '_;
422423

424+
/// Get an iterator over token slices that represent the individual
425+
/// paragraphs in a document.
426+
fn iter_paragraphs(&self) -> impl Iterator<Item = &'_ [Token]> + '_;
427+
423428
/// Get an iterator over token slices that represent the individual
424429
/// sentences in a document.
425430
fn iter_sentences(&self) -> impl Iterator<Item = &'_ [Token]> + '_;
@@ -437,6 +442,7 @@ impl TokenStringExt for [Token] {
437442
create_fns_for!(ellipsis);
438443
create_fns_for!(unlintable);
439444
create_fns_for!(sentence_terminator);
445+
create_fns_for!(paragraph_break);
440446
create_fns_for!(chunk_terminator);
441447
create_fns_for!(likely_homograph);
442448

@@ -501,6 +507,30 @@ impl TokenStringExt for [Token] {
501507
first_chunk.into_iter().chain(rest).chain(last)
502508
}
503509

510+
fn iter_paragraphs(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
511+
let first_pg = self
512+
.iter_paragraph_break_indices()
513+
.next()
514+
.map(|first_term| &self[0..=first_term]);
515+
516+
let rest = self
517+
.iter_paragraph_break_indices()
518+
.tuple_windows()
519+
.map(move |(a, b)| &self[a + 1..=b]);
520+
521+
let last_pg = if let Some(last_i) = self.last_paragraph_break_index() {
522+
if last_i + 1 < self.len() {
523+
Some(&self[last_i + 1..])
524+
} else {
525+
None
526+
}
527+
} else {
528+
Some(self)
529+
};
530+
531+
first_pg.into_iter().chain(rest).chain(last_pg)
532+
}
533+
504534
fn iter_sentences(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
505535
let first_sentence = self
506536
.iter_sentence_terminator_indices()

0 commit comments

Comments
 (0)