From e030c58b59a17b062c02ac796071a40ba9fb3590 Mon Sep 17 00:00:00 2001 From: Ryan Peach Date: Sat, 14 Dec 2024 20:55:49 -0500 Subject: [PATCH 1/5] Reproduced error in https://github.com/ryanpeach/mdlinker/issues/45 --- tests/logseq/broken_wikilink/assets/pages/foo.md | 2 +- tests/logseq/broken_wikilink/tests.rs | 4 ++-- tests/logseq/unlinked_text/assets/pages/foo.md | 1 + tests/logseq/unlinked_text/tests.rs | 16 +++++++++++++++- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/tests/logseq/broken_wikilink/assets/pages/foo.md b/tests/logseq/broken_wikilink/assets/pages/foo.md index ada588d..86d2eec 100644 --- a/tests/logseq/broken_wikilink/assets/pages/foo.md +++ b/tests/logseq/broken_wikilink/assets/pages/foo.md @@ -2,5 +2,5 @@ ugds: xlvf, zptqgmbety --- -- lorem lhet qfee trbiuhd bz #dolor +- O vnb’b bzk thd lorem ivqr fqtz wqgfdkdljm, ynxl famn nnfrp D hxjapjhueaay cslq. Fwe unaxg #dolor - diff --git a/tests/logseq/broken_wikilink/tests.rs b/tests/logseq/broken_wikilink/tests.rs index 1cb65e2..e42dd2a 100644 --- a/tests/logseq/broken_wikilink/tests.rs +++ b/tests/logseq/broken_wikilink/tests.rs @@ -175,6 +175,6 @@ fn dolor_does_not_exist_and_is_wikilink_in_foo_span() { &format!("{}::foo::dolor", broken_wikilink::CODE).into(), ); let err = err_list.iter().exactly_one().unwrap(); - assert_eq!(err.wikilink.offset(), 62); - assert_eq!(err.wikilink.len(), 5); + assert_eq!(err.wikilink.offset(), 127); + assert_eq!(err.wikilink.len(), 6); } diff --git a/tests/logseq/unlinked_text/assets/pages/foo.md b/tests/logseq/unlinked_text/assets/pages/foo.md index e5a225c..46bc10e 100644 --- a/tests/logseq/unlinked_text/assets/pages/foo.md +++ b/tests/logseq/unlinked_text/assets/pages/foo.md @@ -3,4 +3,5 @@ ugds: xlvf, zptqgmbety --- - #lorem lhet qfee trbiuhd bz dolors +- O vnb’b bzk thd [[lorem]] ivqr fqtz wqgfdkdljm, ynxl famn nnfrp D hxjapjhueaay cslq. Fwe unaxg lorem - diff --git a/tests/logseq/unlinked_text/tests.rs b/tests/logseq/unlinked_text/tests.rs index 93bc778..1afa399 100644 --- a/tests/logseq/unlinked_text/tests.rs +++ b/tests/logseq/unlinked_text/tests.rs @@ -25,7 +25,7 @@ fn number_of_unlinked_texts() { for unlinked_texts in &report.unlinked_texts() { debug!("{unlinked_texts:#?}"); } - assert_eq!(report.unlinked_texts().len(), 3); + assert_eq!(report.unlinked_texts().len(), 4); } /// This passes because the link is valid @@ -101,3 +101,17 @@ fn icazyvey_exists_and_is_not_wikilink_in_journal() { assert_eq!(err.span.offset(), offset.offset()); assert_eq!(err.span.len(), 8); } + +/// Tests that linking is right after a non-standard character like "right parentheses" U+2019 +#[test] +fn lorem_exists_and_is_not_wikilink_in_journal() { + info!("lorem_exists_and_is_not_wikilink_in_journal"); + let report = get_report(PATHS.as_slice()); + let err_list = filter_code( + report.unlinked_texts(), + &format!("{}::foo::lorem", unlinked_text::CODE).into(), + ); + let err = err_list.iter().exactly_one().unwrap(); + assert_eq!(err.span.offset(), 168); + assert_eq!(err.span.len(), 5); +} From ac79e9cf46f8296b76f6625c8db1b3feb194000d Mon Sep 17 00:00:00 2001 From: Ryan Peach Date: Sat, 14 Dec 2024 23:53:32 -0500 Subject: [PATCH 2/5] Created a script to help debug --- tests/logseq/broken_wikilink/assets/pages/foo.md | 2 +- tests/logseq/broken_wikilink/tests.rs | 2 +- tests/logseq/unlinked_text/assets/pages/foo.md | 2 +- tests/logseq/unlinked_text/tests.rs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/logseq/broken_wikilink/assets/pages/foo.md b/tests/logseq/broken_wikilink/assets/pages/foo.md index 86d2eec..a79e1ef 100644 --- a/tests/logseq/broken_wikilink/assets/pages/foo.md +++ b/tests/logseq/broken_wikilink/assets/pages/foo.md @@ -2,5 +2,5 @@ ugds: xlvf, zptqgmbety --- -- O vnb’b bzk thd lorem ivqr fqtz wqgfdkdljm, ynxl famn nnfrp D hxjapjhueaay cslq. Fwe unaxg #dolor +- ’ #dolor - diff --git a/tests/logseq/broken_wikilink/tests.rs b/tests/logseq/broken_wikilink/tests.rs index e42dd2a..92f0801 100644 --- a/tests/logseq/broken_wikilink/tests.rs +++ b/tests/logseq/broken_wikilink/tests.rs @@ -175,6 +175,6 @@ fn dolor_does_not_exist_and_is_wikilink_in_foo_span() { &format!("{}::foo::dolor", broken_wikilink::CODE).into(), ); let err = err_list.iter().exactly_one().unwrap(); - assert_eq!(err.wikilink.offset(), 127); + assert_eq!(err.wikilink.offset(), 38); assert_eq!(err.wikilink.len(), 6); } diff --git a/tests/logseq/unlinked_text/assets/pages/foo.md b/tests/logseq/unlinked_text/assets/pages/foo.md index 46bc10e..cd6df4b 100644 --- a/tests/logseq/unlinked_text/assets/pages/foo.md +++ b/tests/logseq/unlinked_text/assets/pages/foo.md @@ -3,5 +3,5 @@ ugds: xlvf, zptqgmbety --- - #lorem lhet qfee trbiuhd bz dolors -- O vnb’b bzk thd [[lorem]] ivqr fqtz wqgfdkdljm, ynxl famn nnfrp D hxjapjhueaay cslq. Fwe unaxg lorem +- ’ [[lorem]] lorem - diff --git a/tests/logseq/unlinked_text/tests.rs b/tests/logseq/unlinked_text/tests.rs index 1afa399..0f8dbf4 100644 --- a/tests/logseq/unlinked_text/tests.rs +++ b/tests/logseq/unlinked_text/tests.rs @@ -112,6 +112,6 @@ fn lorem_exists_and_is_not_wikilink_in_journal() { &format!("{}::foo::lorem", unlinked_text::CODE).into(), ); let err = err_list.iter().exactly_one().unwrap(); - assert_eq!(err.span.offset(), 168); + assert_eq!(err.span.offset(), 85); assert_eq!(err.span.len(), 5); } From 7cf87afa5a5e19ecb81f08b342da11feb79a0951 Mon Sep 17 00:00:00 2001 From: Ryan Peach Date: Sat, 14 Dec 2024 22:06:34 -0500 Subject: [PATCH 3/5] WIP: Made unlinked text a little more understandable --- src/file/content/wikilink.rs | 2 +- src/rules/unlinked_text.rs | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/file/content/wikilink.rs b/src/file/content/wikilink.rs index 78cdf6e..497d482 100644 --- a/src/file/content/wikilink.rs +++ b/src/file/content/wikilink.rs @@ -124,7 +124,7 @@ impl Visitor for WikilinkVisitor { Wikilink::builder() .alias(alias.clone()) .span(span_repaired) - .build(), + .build() ); } }; diff --git a/src/rules/unlinked_text.rs b/src/rules/unlinked_text.rs index f10b9f4..c6bdc12 100644 --- a/src/rules/unlinked_text.rs +++ b/src/rules/unlinked_text.rs @@ -181,9 +181,6 @@ impl Visitor for UnlinkedTextVisitor { continue; } let alias = Alias::new(&patterns[found.pattern().as_usize()]); - if "lorem" == alias.to_string() { - println!("Found lorem"); - } let text_without_frontmatter = remove_frontmatter_from_source(source, node); let sourcepos_start_offset_bytes = SourceOffset::from_location( text_without_frontmatter, From 071515ca310315fcb4078ae279fec82529fc1ab8 Mon Sep 17 00:00:00 2001 From: Ryan Peach Date: Tue, 17 Dec 2024 01:15:35 -0500 Subject: [PATCH 4/5] Tracked the issue down to sourcepos being wrong on lines with unicode right parentheses. See https://github.com/kivikakk/comrak/issues/495 --- src/rules/unlinked_text.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/rules/unlinked_text.rs b/src/rules/unlinked_text.rs index c6bdc12..f10b9f4 100644 --- a/src/rules/unlinked_text.rs +++ b/src/rules/unlinked_text.rs @@ -181,6 +181,9 @@ impl Visitor for UnlinkedTextVisitor { continue; } let alias = Alias::new(&patterns[found.pattern().as_usize()]); + if "lorem" == alias.to_string() { + println!("Found lorem"); + } let text_without_frontmatter = remove_frontmatter_from_source(source, node); let sourcepos_start_offset_bytes = SourceOffset::from_location( text_without_frontmatter, From 4b98470203aa814163d964208de2273480323186 Mon Sep 17 00:00:00 2001 From: Ryan Peach Date: Tue, 17 Dec 2024 02:14:27 -0500 Subject: [PATCH 5/5] Remove the limits on multibyte characters --- src/file/content/wikilink.rs | 2 +- src/visitor.rs | 13 ------------- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/src/file/content/wikilink.rs b/src/file/content/wikilink.rs index 497d482..78cdf6e 100644 --- a/src/file/content/wikilink.rs +++ b/src/file/content/wikilink.rs @@ -124,7 +124,7 @@ impl Visitor for WikilinkVisitor { Wikilink::builder() .alias(alias.clone()) .span(span_repaired) - .build() + .build(), ); } }; diff --git a/src/visitor.rs b/src/visitor.rs index 5683aa5..c8e8b61 100644 --- a/src/visitor.rs +++ b/src/visitor.rs @@ -93,11 +93,6 @@ pub enum ParseError { #[backtrace] source: std::io::Error, }, - #[error("Multibyte characters found in the file {file:?}")] - MultibyteError { - file: PathBuf, - backtrace: backtrace::Backtrace, - }, #[error("Error parsing the source code for file {file:?} using tree-sitter")] TreeSitter { file: PathBuf, @@ -125,14 +120,6 @@ pub fn parse(path: &PathBuf, visitors: Vec>>) -> Result< source, })?; - // Check for multibyte characters - if source.chars().count() != source.len() { - return Err(ParseError::MultibyteError { - file: path.clone(), - backtrace: backtrace::Backtrace::force_capture(), - }); - } - // Parse the source code let arena = Arena::new(); let options = ExtensionOptionsBuilder::default()