diff --git a/CHANGELOG.md b/CHANGELOG.md index 9209d11feb34..cbbbc8bef3cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5571,6 +5571,7 @@ Released 2018-09-13 [`disallowed_type`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_type [`disallowed_types`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_types [`diverging_sub_expression`]: https://rust-lang.github.io/rust-clippy/master/index.html#diverging_sub_expression +[`doc_broken_link`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_broken_link [`doc_comment_double_space_linebreaks`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_comment_double_space_linebreaks [`doc_include_without_cfg`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_include_without_cfg [`doc_lazy_continuation`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_lazy_continuation diff --git a/clippy_lints/src/declared_lints.rs b/clippy_lints/src/declared_lints.rs index b9f9f4e4fe0b..540c4031140a 100644 --- a/clippy_lints/src/declared_lints.rs +++ b/clippy_lints/src/declared_lints.rs @@ -139,6 +139,7 @@ pub static LINTS: &[&crate::LintInfo] = &[ crate::disallowed_names::DISALLOWED_NAMES_INFO, crate::disallowed_script_idents::DISALLOWED_SCRIPT_IDENTS_INFO, crate::disallowed_types::DISALLOWED_TYPES_INFO, + crate::doc::DOC_BROKEN_LINK_INFO, crate::doc::DOC_COMMENT_DOUBLE_SPACE_LINEBREAKS_INFO, crate::doc::DOC_INCLUDE_WITHOUT_CFG_INFO, crate::doc::DOC_LAZY_CONTINUATION_INFO, diff --git a/clippy_lints/src/doc/broken_link.rs b/clippy_lints/src/doc/broken_link.rs new file mode 100644 index 000000000000..a97b807e605f --- /dev/null +++ b/clippy_lints/src/doc/broken_link.rs @@ -0,0 +1,83 @@ +use clippy_utils::diagnostics::span_lint; +use pulldown_cmark::BrokenLink as PullDownBrokenLink; +use rustc_lint::LateContext; +use rustc_resolve::rustdoc::{DocFragment, source_span_for_markdown_range}; +use rustc_span::{BytePos, Pos, Span}; + +use super::DOC_BROKEN_LINK; + +/// Scan and report broken link on documents. +/// It ignores false positives detected by `pulldown_cmark`, and only +/// warns users when the broken link is consider a URL. +// NOTE: We don't check these other cases because +// rustdoc itself will check and warn about it: +// - When a link url is broken across multiple lines in the URL path part +// - When a link tag is missing the close parenthesis character at the end. +// - When a link has whitespace within the url link. +pub fn check(cx: &LateContext<'_>, bl: &PullDownBrokenLink<'_>, doc: &str, fragments: &[DocFragment]) { + warn_if_broken_link(cx, bl, doc, fragments); +} + +fn warn_if_broken_link(cx: &LateContext<'_>, bl: &PullDownBrokenLink<'_>, doc: &str, fragments: &[DocFragment]) { + if let Some(span) = source_span_for_markdown_range(cx.tcx, doc, &bl.span, fragments) { + let mut len = 0; + + // grab raw link data + let (_, raw_link) = doc.split_at(bl.span.start); + + // strip off link text part + let raw_link = match raw_link.split_once(']') { + None => return, + Some((prefix, suffix)) => { + len += prefix.len() + 1; + suffix + }, + }; + + let raw_link = match raw_link.split_once('(') { + None => return, + Some((prefix, suffix)) => { + if !prefix.is_empty() { + // there is text between ']' and '(' chars, so it is not a valid link + return; + } + len += prefix.len() + 1; + suffix + }, + }; + + if raw_link.starts_with("(http") { + // reduce chances of false positive reports + // by limiting this checking only to http/https links. + return; + } + + for c in raw_link.chars() { + if c == ')' { + // it is a valid link + return; + } + + if c == '\n' { + report_broken_link(cx, span, len); + break; + } + + len += 1; + } + } +} + +fn report_broken_link(cx: &LateContext<'_>, frag_span: Span, offset: usize) { + let start = frag_span.lo(); + let end = start + BytePos::from_usize(offset); + + let span = Span::new(start, end, frag_span.ctxt(), frag_span.parent()); + + span_lint( + cx, + DOC_BROKEN_LINK, + span, + "possible broken doc link: broken across multiple lines", + ); +} diff --git a/clippy_lints/src/doc/mod.rs b/clippy_lints/src/doc/mod.rs index ab77edf1147c..5e2f64e8f095 100644 --- a/clippy_lints/src/doc/mod.rs +++ b/clippy_lints/src/doc/mod.rs @@ -25,6 +25,7 @@ use rustc_span::edition::Edition; use std::ops::Range; use url::Url; +mod broken_link; mod doc_comment_double_space_linebreaks; mod include_in_doc_without_cfg; mod lazy_continuation; @@ -292,6 +293,34 @@ declare_clippy_lint! { "possible typo for an intra-doc link" } +declare_clippy_lint! { + /// ### What it does + /// Checks the doc comments have unbroken links, mostly caused + /// by bad formatted links such as broken across multiple lines. + /// + /// ### Why is this bad? + /// Because documentation generated by rustdoc will be broken + /// since expected links won't be links and just text. + /// + /// ### Examples + /// This link is broken: + /// ```no_run + /// /// [example of a bad link](https:// + /// /// github.com/rust-lang/rust-clippy/) + /// pub fn do_something() {} + /// ``` + /// + /// It shouldn't be broken across multiple lines to work: + /// ```no_run + /// /// [example of a good link](https://github.com/rust-lang/rust-clippy/) + /// pub fn do_something() {} + /// ``` + #[clippy::version = "1.84.0"] + pub DOC_BROKEN_LINK, + pedantic, + "broken document link" +} + declare_clippy_lint! { /// ### What it does /// Checks for the doc comments of publicly visible @@ -625,6 +654,7 @@ impl Documentation { impl_lint_pass!(Documentation => [ DOC_LINK_CODE, DOC_LINK_WITH_QUOTES, + DOC_BROKEN_LINK, DOC_MARKDOWN, DOC_NESTED_REFDEFS, MISSING_SAFETY_DOC, @@ -751,9 +781,9 @@ struct DocHeaders { /// back in the various late lint pass methods if they need the final doc headers, like "Safety" or /// "Panics" sections. fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet, attrs: &[Attribute]) -> Option { - /// We don't want the parser to choke on intra doc links. Since we don't - /// actually care about rendering them, just pretend that all broken links - /// point to a fake address. + // We don't want the parser to choke on intra doc links. Since we don't + // actually care about rendering them, just pretend that all broken links + // point to a fake address. #[expect(clippy::unnecessary_wraps)] // we're following a type signature fn fake_broken_link_callback<'a>(_: BrokenLink<'_>) -> Option<(CowStr<'a>, CowStr<'a>)> { Some(("fake".into(), "fake".into())) @@ -793,14 +823,12 @@ fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet, attrs: &[ return Some(DocHeaders::default()); } - let mut cb = fake_broken_link_callback; - check_for_code_clusters( cx, pulldown_cmark::Parser::new_with_broken_link_callback( &doc, main_body_opts() - Options::ENABLE_SMART_PUNCTUATION, - Some(&mut cb), + Some(&mut fake_broken_link_callback), ) .into_offset_iter(), &doc, @@ -810,9 +838,17 @@ fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet, attrs: &[ }, ); + // NOTE: check_doc uses it own cb function, + // to avoid causing duplicated diagnostics for the broken link checker. + let mut full_fake_broken_link_callback = |bl: BrokenLink<'_>| -> Option<(CowStr<'_>, CowStr<'_>)> { + broken_link::check(cx, &bl, &doc, &fragments); + Some(("fake".into(), "fake".into())) + }; + // disable smart punctuation to pick up ['link'] more easily let opts = main_body_opts() - Options::ENABLE_SMART_PUNCTUATION; - let parser = pulldown_cmark::Parser::new_with_broken_link_callback(&doc, opts, Some(&mut cb)); + let parser = + pulldown_cmark::Parser::new_with_broken_link_callback(&doc, opts, Some(&mut full_fake_broken_link_callback)); Some(check_doc( cx, diff --git a/tests/ui/doc_broken_link.rs b/tests/ui/doc_broken_link.rs new file mode 100644 index 000000000000..7d9c0ef13b3c --- /dev/null +++ b/tests/ui/doc_broken_link.rs @@ -0,0 +1,72 @@ +#![warn(clippy::doc_broken_link)] + +fn main() {} + +pub struct FakeType {} + +/// This might be considered a link false positive +/// and should be ignored by this lint rule: +/// Example of referencing some code with brackets [FakeType]. +pub fn doc_ignore_link_false_positive_1() {} + +/// This might be considered a link false positive +/// and should be ignored by this lint rule: +/// [`FakeType`]. Continue text after brackets, +/// then (something in +/// parenthesis). +pub fn doc_ignore_link_false_positive_2() {} + +/// Test valid link, whole link single line. +/// [doc valid link](https://test.fake/doc_valid_link) +pub fn doc_valid_link() {} + +/// Test valid link, whole link single line but it has special chars such as brackets and +/// parenthesis. [doc invalid link url invalid char](https://test.fake/doc_valid_link_url_invalid_char?foo[bar]=1&bar(foo)=2) +pub fn doc_valid_link_url_invalid_char() {} + +/// Test valid link, text tag broken across multiple lines. +/// [doc valid link broken +/// text](https://test.fake/doc_valid_link_broken_text) +pub fn doc_valid_link_broken_text() {} + +/// Test valid link, url tag broken across multiple lines, but +/// the whole url part in a single line. +/// [doc valid link broken url tag two lines first](https://test.fake/doc_valid_link_broken_url_tag_two_lines_first +/// ) +pub fn doc_valid_link_broken_url_tag_two_lines_first() {} + +/// Test valid link, url tag broken across multiple lines, but +/// the whole url part in a single line. +/// [doc valid link broken url tag two lines second]( +/// https://test.fake/doc_valid_link_broken_url_tag_two_lines_second) +pub fn doc_valid_link_broken_url_tag_two_lines_second() {} + +/// Test valid link, url tag broken across multiple lines, but +/// the whole url part in a single line, but the closing pharentesis +/// in a third line. +/// [doc valid link broken url tag three lines]( +/// https://test.fake/doc_valid_link_broken_url_tag_three_lines +/// ) +pub fn doc_valid_link_broken_url_tag_three_lines() {} + +/// Test invalid link, url part broken across multiple lines. +/// [doc invalid link broken url scheme part](https:// +/// test.fake/doc_invalid_link_broken_url_scheme_part) +//~^^ ERROR: possible broken doc link: broken across multiple lines +pub fn doc_invalid_link_broken_url_scheme_part() {} + +/// Test invalid link, url part broken across multiple lines. +/// [doc invalid link broken url host part](https://test +/// .fake/doc_invalid_link_broken_url_host_part) +//~^^ ERROR: possible broken doc link: broken across multiple lines +pub fn doc_invalid_link_broken_url_host_part() {} + +/// Test invalid link, for multiple urls in the same block of comment. +/// There is a [fist link - invalid](https://test +/// .fake) then it continues +//~^^ ERROR: possible broken doc link: broken across multiple lines +/// with a [second link - valid](https://test.fake/doc_valid_link) and another [third link - invalid](https://test +/// .fake). It ends with another +//~^^ ERROR: possible broken doc link: broken across multiple lines +/// line of comment. +pub fn doc_multiple_invalid_link_broken_url() {} diff --git a/tests/ui/doc_broken_link.stderr b/tests/ui/doc_broken_link.stderr new file mode 100644 index 000000000000..179ed97635ee --- /dev/null +++ b/tests/ui/doc_broken_link.stderr @@ -0,0 +1,29 @@ +error: possible broken doc link: broken across multiple lines + --> tests/ui/doc_broken_link.rs:53:5 + | +LL | /// [doc invalid link broken url scheme part](https:// + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: `-D clippy::doc-broken-link` implied by `-D warnings` + = help: to override `-D warnings` add `#[allow(clippy::doc_broken_link)]` + +error: possible broken doc link: broken across multiple lines + --> tests/ui/doc_broken_link.rs:59:5 + | +LL | /// [doc invalid link broken url host part](https://test + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: possible broken doc link: broken across multiple lines + --> tests/ui/doc_broken_link.rs:65:16 + | +LL | /// There is a [fist link - invalid](https://test + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: possible broken doc link: broken across multiple lines + --> tests/ui/doc_broken_link.rs:68:80 + | +LL | /// with a [second link - valid](https://test.fake/doc_valid_link) and another [third link - invalid](https://test + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: aborting due to 4 previous errors +