Skip to content

Commit 5e7ce90

Browse files
authored
Improve doc comment code language tag parsing, don't use a full parser (#15967)
Now only tokenizes the contents of code blocks instead of spawning a thread for a full parser The language tag handling now correctly picks up things like `rust, ignore` `no_test` (#10491) is no longer specially handled, rustdoc considers `rust,no_test` to be a regular test changelog: [`needless_doctest_main`], [`test_attr_in_doctest`]: now handles whitespace in language tags
2 parents d05f74e + 0ff3a38 commit 5e7ce90

11 files changed

+198
-298
lines changed

clippy_lints/src/doc/mod.rs

Lines changed: 80 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ use rustc_resolve::rustdoc::{
2222
};
2323
use rustc_session::impl_lint_pass;
2424
use rustc_span::Span;
25-
use rustc_span::edition::Edition;
2625
use std::ops::Range;
2726
use url::Url;
2827

@@ -36,6 +35,7 @@ mod markdown;
3635
mod missing_headers;
3736
mod needless_doctest_main;
3837
mod suspicious_doc_comments;
38+
mod test_attr_in_doctest;
3939
mod too_long_first_doc_paragraph;
4040

4141
declare_clippy_lint! {
@@ -900,8 +900,6 @@ fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet<String>, attrs: &[
900900
))
901901
}
902902

903-
const RUST_CODE: &[&str] = &["rust", "no_run", "should_panic", "compile_fail"];
904-
905903
enum Container {
906904
Blockquote,
907905
List(usize),
@@ -966,6 +964,70 @@ fn check_for_code_clusters<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a
966964
}
967965
}
968966

967+
#[derive(Clone, Copy)]
968+
#[expect(clippy::struct_excessive_bools)]
969+
struct CodeTags {
970+
no_run: bool,
971+
ignore: bool,
972+
compile_fail: bool,
973+
974+
rust: bool,
975+
}
976+
977+
impl Default for CodeTags {
978+
fn default() -> Self {
979+
Self {
980+
no_run: false,
981+
ignore: false,
982+
compile_fail: false,
983+
984+
rust: true,
985+
}
986+
}
987+
}
988+
989+
impl CodeTags {
990+
/// Based on <https://github.com/rust-lang/rust/blob/1.90.0/src/librustdoc/html/markdown.rs#L1169>
991+
fn parse(lang: &str) -> Self {
992+
let mut tags = Self::default();
993+
994+
let mut seen_rust_tags = false;
995+
let mut seen_other_tags = false;
996+
for item in lang.split([',', ' ', '\t']) {
997+
match item.trim() {
998+
"" => {},
999+
"rust" => {
1000+
tags.rust = true;
1001+
seen_rust_tags = true;
1002+
},
1003+
"ignore" => {
1004+
tags.ignore = true;
1005+
seen_rust_tags = !seen_other_tags;
1006+
},
1007+
"no_run" => {
1008+
tags.no_run = true;
1009+
seen_rust_tags = !seen_other_tags;
1010+
},
1011+
"should_panic" => seen_rust_tags = !seen_other_tags,
1012+
"compile_fail" => {
1013+
tags.compile_fail = true;
1014+
seen_rust_tags = !seen_other_tags || seen_rust_tags;
1015+
},
1016+
"test_harness" | "standalone_crate" => {
1017+
seen_rust_tags = !seen_other_tags || seen_rust_tags;
1018+
},
1019+
_ if item.starts_with("ignore-") => seen_rust_tags = true,
1020+
_ if item.starts_with("edition") => {},
1021+
_ => seen_other_tags = true,
1022+
}
1023+
}
1024+
1025+
tags.rust &= seen_rust_tags || !seen_other_tags;
1026+
1027+
tags
1028+
}
1029+
}
1030+
9691031
/// Checks parsed documentation.
9701032
/// This walks the "events" (think sections of markdown) produced by `pulldown_cmark`,
9711033
/// so lints here will generally access that information.
@@ -981,14 +1043,10 @@ fn check_doc<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a>, Range<usize
9811043
) -> DocHeaders {
9821044
// true if a safety header was found
9831045
let mut headers = DocHeaders::default();
984-
let mut in_code = false;
1046+
let mut code = None;
9851047
let mut in_link = None;
9861048
let mut in_heading = false;
9871049
let mut in_footnote_definition = false;
988-
let mut is_rust = false;
989-
let mut no_test = false;
990-
let mut ignore = false;
991-
let mut edition = None;
9921050
let mut ticks_unbalanced = false;
9931051
let mut text_to_check: Vec<(CowStr<'_>, Range<usize>, isize)> = Vec::new();
9941052
let mut paragraph_range = 0..0;
@@ -1048,31 +1106,12 @@ fn check_doc<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a>, Range<usize
10481106
containers.pop();
10491107
},
10501108
Start(CodeBlock(ref kind)) => {
1051-
in_code = true;
1052-
if let CodeBlockKind::Fenced(lang) = kind {
1053-
for item in lang.split(',') {
1054-
if item == "ignore" {
1055-
is_rust = false;
1056-
break;
1057-
} else if item == "no_test" {
1058-
no_test = true;
1059-
} else if item == "no_run" || item == "compile_fail" {
1060-
ignore = true;
1061-
}
1062-
if let Some(stripped) = item.strip_prefix("edition") {
1063-
is_rust = true;
1064-
edition = stripped.parse::<Edition>().ok();
1065-
} else if item.is_empty() || RUST_CODE.contains(&item) {
1066-
is_rust = true;
1067-
}
1068-
}
1069-
}
1070-
},
1071-
End(TagEnd::CodeBlock) => {
1072-
in_code = false;
1073-
is_rust = false;
1074-
ignore = false;
1109+
code = Some(match kind {
1110+
CodeBlockKind::Indented => CodeTags::default(),
1111+
CodeBlockKind::Fenced(lang) => CodeTags::parse(lang),
1112+
});
10751113
},
1114+
End(TagEnd::CodeBlock) => code = None,
10761115
Start(Link { dest_url, .. }) => in_link = Some(dest_url),
10771116
End(TagEnd::Link) => in_link = None,
10781117
Start(Heading { .. } | Paragraph | Item) => {
@@ -1182,7 +1221,7 @@ fn check_doc<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a>, Range<usize
11821221
paragraph_range.end = range.end;
11831222
let range_ = range.clone();
11841223
ticks_unbalanced |= text.contains('`')
1185-
&& !in_code
1224+
&& code.is_none()
11861225
&& doc[range.clone()].bytes().enumerate().any(|(i, c)| {
11871226
// scan the markdown source code bytes for backquotes that aren't preceded by backslashes
11881227
// - use bytes, instead of chars, to avoid utf8 decoding overhead (special chars are ascii)
@@ -1205,10 +1244,14 @@ fn check_doc<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a>, Range<usize
12051244
headers.safety |= in_heading && trimmed_text == "Implementation Safety";
12061245
headers.errors |= in_heading && trimmed_text == "Errors";
12071246
headers.panics |= in_heading && trimmed_text == "Panics";
1208-
if in_code {
1209-
if is_rust && !no_test {
1210-
let edition = edition.unwrap_or_else(|| cx.tcx.sess.edition());
1211-
needless_doctest_main::check(cx, &text, edition, range.clone(), fragments, ignore);
1247+
1248+
if let Some(tags) = code {
1249+
if tags.rust && !tags.compile_fail && !tags.ignore {
1250+
needless_doctest_main::check(cx, &text, range.start, fragments);
1251+
1252+
if !tags.no_run {
1253+
test_attr_in_doctest::check(cx, &text, range.start, fragments);
1254+
}
12121255
}
12131256
} else {
12141257
if in_link.is_some() {
Lines changed: 53 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -1,160 +1,63 @@
1-
use std::ops::Range;
2-
use std::sync::Arc;
3-
use std::{io, thread};
4-
5-
use crate::doc::{NEEDLESS_DOCTEST_MAIN, TEST_ATTR_IN_DOCTEST};
1+
use super::Fragments;
2+
use crate::doc::NEEDLESS_DOCTEST_MAIN;
63
use clippy_utils::diagnostics::span_lint;
7-
use rustc_ast::{CoroutineKind, Fn, FnRetTy, Item, ItemKind};
8-
use rustc_errors::emitter::HumanEmitter;
9-
use rustc_errors::{Diag, DiagCtxt};
4+
use clippy_utils::tokenize_with_text;
5+
use rustc_lexer::TokenKind;
106
use rustc_lint::LateContext;
11-
use rustc_parse::lexer::StripTokens;
12-
use rustc_parse::new_parser_from_source_str;
13-
use rustc_parse::parser::ForceCollect;
14-
use rustc_session::parse::ParseSess;
15-
use rustc_span::edition::Edition;
16-
use rustc_span::source_map::{FilePathMapping, SourceMap};
17-
use rustc_span::{FileName, Ident, Pos, sym};
18-
19-
use super::Fragments;
20-
21-
fn get_test_spans(item: &Item, ident: Ident, test_attr_spans: &mut Vec<Range<usize>>) {
22-
test_attr_spans.extend(
23-
item.attrs
24-
.iter()
25-
.find(|attr| attr.has_name(sym::test))
26-
.map(|attr| attr.span.lo().to_usize()..ident.span.hi().to_usize()),
27-
);
28-
}
29-
30-
pub fn check(
31-
cx: &LateContext<'_>,
32-
text: &str,
33-
edition: Edition,
34-
range: Range<usize>,
35-
fragments: Fragments<'_>,
36-
ignore: bool,
37-
) {
38-
// return whether the code contains a needless `fn main` plus a vector of byte position ranges
39-
// of all `#[test]` attributes in not ignored code examples
40-
fn check_code_sample(code: String, edition: Edition, ignore: bool) -> (bool, Vec<Range<usize>>) {
41-
rustc_driver::catch_fatal_errors(|| {
42-
rustc_span::create_session_globals_then(edition, &[], None, || {
43-
let mut test_attr_spans = vec![];
44-
let filename = FileName::anon_source_code(&code);
45-
46-
let translator = rustc_driver::default_translator();
47-
let emitter = HumanEmitter::new(Box::new(io::sink()), translator);
48-
let dcx = DiagCtxt::new(Box::new(emitter)).disable_warnings();
49-
#[expect(clippy::arc_with_non_send_sync)] // `Arc` is expected by with_dcx
50-
let sm = Arc::new(SourceMap::new(FilePathMapping::empty()));
51-
let psess = ParseSess::with_dcx(dcx, sm);
52-
53-
let mut parser =
54-
match new_parser_from_source_str(&psess, filename, code, StripTokens::ShebangAndFrontmatter) {
55-
Ok(p) => p,
56-
Err(errs) => {
57-
errs.into_iter().for_each(Diag::cancel);
58-
return (false, test_attr_spans);
59-
},
60-
};
61-
62-
let mut relevant_main_found = false;
63-
let mut eligible = true;
64-
loop {
65-
match parser.parse_item(ForceCollect::No) {
66-
Ok(Some(item)) => match &item.kind {
67-
ItemKind::Fn(box Fn {
68-
ident,
69-
sig,
70-
body: Some(block),
71-
..
72-
}) if ident.name == sym::main => {
73-
if !ignore {
74-
get_test_spans(&item, *ident, &mut test_attr_spans);
75-
}
76-
77-
let is_async = matches!(sig.header.coroutine_kind, Some(CoroutineKind::Async { .. }));
78-
let returns_nothing = match &sig.decl.output {
79-
FnRetTy::Default(..) => true,
80-
FnRetTy::Ty(ty) if ty.kind.is_unit() => true,
81-
FnRetTy::Ty(_) => false,
82-
};
83-
84-
if returns_nothing && !is_async && !block.stmts.is_empty() {
85-
// This main function should be linted, but only if there are no other functions
86-
relevant_main_found = true;
87-
} else {
88-
// This main function should not be linted, we're done
89-
eligible = false;
90-
}
91-
},
92-
// Another function was found; this case is ignored for needless_doctest_main
93-
ItemKind::Fn(fn_) => {
94-
eligible = false;
95-
if ignore {
96-
// If ignore is active invalidating one lint,
97-
// and we already found another function thus
98-
// invalidating the other one, we have no
99-
// business continuing.
100-
return (false, test_attr_spans);
101-
}
102-
get_test_spans(&item, fn_.ident, &mut test_attr_spans);
103-
},
104-
// Tests with one of these items are ignored
105-
ItemKind::Static(..)
106-
| ItemKind::Const(..)
107-
| ItemKind::ExternCrate(..)
108-
| ItemKind::ForeignMod(..) => {
109-
eligible = false;
110-
},
111-
_ => {},
112-
},
113-
Ok(None) => break,
114-
Err(e) => {
115-
// See issue #15041. When calling `.cancel()` on the `Diag`, Clippy will unexpectedly panic
116-
// when the `Diag` is unwinded. Meanwhile, we can just call `.emit()`, since the `DiagCtxt`
117-
// is just a sink, nothing will be printed.
118-
e.emit();
119-
return (false, test_attr_spans);
120-
},
121-
}
122-
}
123-
124-
(relevant_main_found & eligible, test_attr_spans)
125-
})
126-
})
127-
.ok()
128-
.unwrap_or_default()
7+
use rustc_span::InnerSpan;
8+
9+
fn returns_unit<'a>(mut tokens: impl Iterator<Item = (TokenKind, &'a str, InnerSpan)>) -> bool {
10+
let mut next = || tokens.next().map_or(TokenKind::Whitespace, |(kind, ..)| kind);
11+
12+
match next() {
13+
// {
14+
TokenKind::OpenBrace => true,
15+
// - > ( ) {
16+
TokenKind::Minus => {
17+
next() == TokenKind::Gt
18+
&& next() == TokenKind::OpenParen
19+
&& next() == TokenKind::CloseParen
20+
&& next() == TokenKind::OpenBrace
21+
},
22+
_ => false,
12923
}
24+
}
13025

131-
let trailing_whitespace = text.len() - text.trim_end().len();
132-
133-
// We currently only test for "fn main". Checking for the real
134-
// entrypoint (with tcx.entry_fn(())) in each block would be unnecessarily
135-
// expensive, as those are probably intended and relevant. Same goes for
136-
// macros and other weird ways of declaring a main function.
137-
//
138-
// Also, as we only check for attribute names and don't do macro expansion,
139-
// we can check only for #[test]
140-
141-
if !((text.contains("main") && text.contains("fn")) || text.contains("#[test]")) {
26+
pub fn check(cx: &LateContext<'_>, text: &str, offset: usize, fragments: Fragments<'_>) {
27+
if !text.contains("main") {
14228
return;
14329
}
14430

145-
// Because of the global session, we need to create a new session in a different thread with
146-
// the edition we need.
147-
let text = text.to_owned();
148-
let (has_main, test_attr_spans) = thread::spawn(move || check_code_sample(text, edition, ignore))
149-
.join()
150-
.expect("thread::spawn failed");
151-
if has_main && let Some(span) = fragments.span(cx, range.start..range.end - trailing_whitespace) {
152-
span_lint(cx, NEEDLESS_DOCTEST_MAIN, span, "needless `fn main` in doctest");
153-
}
154-
for span in test_attr_spans {
155-
let span = (range.start + span.start)..(range.start + span.end);
156-
if let Some(span) = fragments.span(cx, span) {
157-
span_lint(cx, TEST_ATTR_IN_DOCTEST, span, "unit tests in doctest are not executed");
31+
let mut tokens = tokenize_with_text(text).filter(|&(kind, ..)| {
32+
!matches!(
33+
kind,
34+
TokenKind::Whitespace | TokenKind::BlockComment { .. } | TokenKind::LineComment { .. }
35+
)
36+
});
37+
if let Some((TokenKind::Ident, "fn", fn_span)) = tokens.next()
38+
&& let Some((TokenKind::Ident, "main", main_span)) = tokens.next()
39+
&& let Some((TokenKind::OpenParen, ..)) = tokens.next()
40+
&& let Some((TokenKind::CloseParen, ..)) = tokens.next()
41+
&& returns_unit(&mut tokens)
42+
{
43+
let mut depth = 1;
44+
for (kind, ..) in &mut tokens {
45+
match kind {
46+
TokenKind::OpenBrace => depth += 1,
47+
TokenKind::CloseBrace => {
48+
depth -= 1;
49+
if depth == 0 {
50+
break;
51+
}
52+
},
53+
_ => {},
54+
}
55+
}
56+
57+
if tokens.next().is_none()
58+
&& let Some(span) = fragments.span(cx, fn_span.start + offset..main_span.end + offset)
59+
{
60+
span_lint(cx, NEEDLESS_DOCTEST_MAIN, span, "needless `fn main` in doctest");
15861
}
15962
}
16063
}

0 commit comments

Comments
 (0)