diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 173a92fdbf759..2510eaa36921c 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -1031,9 +1031,84 @@ impl<'psess, 'src> Lexer<'psess, 'src> { ); } + // If this is a doc comment, check whether any `/*` appears inside + // backtick-delimited code spans. The lexer doesn't parse Markdown, + // so such occurrences cause unexpected nesting depth. + if doc_style.is_some() { + let content = self.str_from(start); + if Self::has_comment_marker_in_backticks(content) { + err.note( + "`/*` or `*/` inside a backtick code span \ + does not nest comments; \ + the lexer does not parse Markdown", + ); + err.help(concat!( + "consider removing the `/*` from the code ", + "span, closing it with a matching `*/`, or ", + "using a raw string doc attribute: ", + r#"`#[doc = r"..."]`"#, + )); + } + } + err.emit(); } + /// Checks whether the text of a block comment contains `/*` or `*/` + /// inside a backtick-delimited inline code span. + /// + /// This is a best-effort heuristic: it tracks single-backtick and + /// multi-backtick spans (like single and triple backticks), but does not + /// attempt full Markdown parsing. Unmatched backticks are ignored + /// gracefully (the "inside backtick" state simply stays false once + /// we reach the end without a closing match). + fn has_comment_marker_in_backticks(content: &str) -> bool { + let bytes = content.as_bytes(); + let len = bytes.len(); + let mut i = 0; + + while i < len { + if bytes[i] == b'`' { + let backtick_start = i; + while i < len && bytes[i] == b'`' { + i += 1; + } + let backtick_count = i - backtick_start; + + let mut found_comment_marker = false; + loop { + if i >= len { + break; + } + if bytes[i] == b'`' { + let close_start = i; + while i < len && bytes[i] == b'`' { + i += 1; + } + if i - close_start == backtick_count { + if found_comment_marker { + return true; + } + break; + } + } else { + if i + 1 < len + && ((bytes[i] == b'/' && bytes[i + 1] == b'*') + || (bytes[i] == b'*' && bytes[i + 1] == b'/')) + { + found_comment_marker = true; + } + i += 1; + } + } + } else { + i += 1; + } + } + + false + } + // RFC 3101 introduced the idea of (reserved) prefixes. As of Rust 2021, // using a (unknown) prefix is an error. In earlier editions, however, they // only result in a (allowed by default) lint, and are treated as regular diff --git a/tests/ui/lexer/unterminated-block-comment-backtick.rs b/tests/ui/lexer/unterminated-block-comment-backtick.rs new file mode 100644 index 0000000000000..f6468b60bea93 --- /dev/null +++ b/tests/ui/lexer/unterminated-block-comment-backtick.rs @@ -0,0 +1,4 @@ +/** This doc comment has `/*` inside backticks */ +//~^ ERROR E0758 + +fn main() {} diff --git a/tests/ui/lexer/unterminated-block-comment-backtick.stderr b/tests/ui/lexer/unterminated-block-comment-backtick.stderr new file mode 100644 index 0000000000000..4b459f104224d --- /dev/null +++ b/tests/ui/lexer/unterminated-block-comment-backtick.stderr @@ -0,0 +1,19 @@ +error[E0758]: unterminated block doc-comment + --> $DIR/unterminated-block-comment-backtick.rs:1:1 + | +LL | /** This doc comment has `/*` inside backticks */ + | ^- -- -- ...and last nested comment terminates here. + | | | + | | ...as last nested comment starts here, maybe you want to close this instead? + | _unterminated block doc-comment + | | +... | +LL | | fn main() {} + | |_____________^ + | + = note: `/*` or `*/` inside a backtick code span does not nest comments; the lexer does not parse Markdown + = help: consider removing the `/*` from the code span, closing it with a matching `*/`, or using a raw string doc attribute: `#[doc = r"..."]` + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0758`.