diff --git a/ConvertOneNote2MarkDown-v2.Tests.ps1 b/ConvertOneNote2MarkDown-v2.Tests.ps1 index c09485c..553690e 100644 --- a/ConvertOneNote2MarkDown-v2.Tests.ps1 +++ b/ConvertOneNote2MarkDown-v2.Tests.ps1 @@ -1190,6 +1190,7 @@ Describe 'New-SectionGroupConversionConfig' -Tag 'Unit' { $fakeMarkdownContent = @" hello world$( [char]0x00A0 ) + - foo - foo1 @@ -1198,9 +1199,29 @@ hello world$( [char]0x00A0 ) - bar1 +- baz + + - baz1 + > > > some other text + +1. foo + + 1. foo1 + +2. bar + + 1. bar1 + +3. baz + + 1. baz1 + +some new paragraph +some more + "@ -replace "`r", '' # On some Windows Powershell 5 versions, a here-string will contain `\r`, so let's ensure that doesn't happen. foreach ($pageCfg in $result) { @@ -1212,19 +1233,33 @@ hello world$( [char]0x00A0 ) } } - # Should remove newlines between bullets, and remove non-breaking spaces. Ignore first 8 lines for page header + # Should remove extra newline between unordered and ordered lists, remove non-breaking spaces, and '>' from ordered lists. Ignore first 8 lines for page header $split = $mutated -split "`n" $expectedBody = $split[8..($split.Count - 1)] -join "`n" $expectedBody | Should -Be $( @" hello world + - foo - foo1 - bar - bar1 +- baz + - baz1 some other text + +1. foo + 1. foo1 +2. bar + 1. bar1 +3. baz + 1. baz1 + +some new paragraph +some more + "@ -replace "`r", '') # On some Windows Powershell 5 versions, a here-string will contain `\r`, so let's ensure that doesn't happen. } @@ -1244,23 +1279,10 @@ some other text } } - # Should keep newlines between bullets, and keep non-breaking spaces. Ignore first 8 lines for page header + # Should keep extra newline between ordered and unordered list items, keep non-breaking spaces, and `>` from ordered lists. Ignore first 8 lines for page header $split = $mutated -split "`n" $expectedBody = $split[8..($split.Count - 1)] -join "`n" - $expectedBody | Should -Be $( @" -hello world$( [char]0x00A0 ) -- foo - - - foo1 - -- bar - - - bar1 - -> -> -> some other text -"@ -replace "`r", '') # On some Windows Powershell 5 versions, a here-string will contain `\r`, so let's ensure that doesn't happen. + $expectedBody | Should -Be $fakeMarkdownContent # On some Windows Powershell 5 versions, a here-string will contain `\r`, so let's ensure that doesn't happen. } } diff --git a/ConvertOneNote2MarkDown-v2.ps1 b/ConvertOneNote2MarkDown-v2.ps1 index 2f783f8..ac8b727 100644 --- a/ConvertOneNote2MarkDown-v2.ps1 +++ b/ConvertOneNote2MarkDown-v2.ps1 @@ -152,9 +152,9 @@ Whether to include page timestamp and separator at top of document } keepspaces = @{ description = @' -Whether to clear double spaces between bullets, non-breaking spaces from blank lines, and '>` after bullet lists -1: Clear double spaces in bullets - Default -2: Keep double spaces +Whether to clear extra newlines between unordered (bullet) and ordered (numbered) list items, non-breaking spaces from blank lines, and `>` after unordered lists +1: Clear - Default +2: Don't clear '@ default = 1 value = 1 @@ -928,18 +928,24 @@ Function New-SectionGroupConversionConfig { } if ($config['keepspaces']['value'] -eq 1 ) { @{ - description = 'Clear double spaces from bullets and non-breaking spaces spaces from blank lines' + description = 'Clear extra newlines between unordered (bullet) and ordered (numbered) list items, non-breaking spaces from blank lines, and `>` after unordered lists' replacements = @( + # Remove non-breaking spaces @{ searchRegex = [regex]::Escape([char]0x00A0) replacement = '' } - # Remove a newline between each occurrence of '- some list item' + # Remove an extra newline between each occurrence of '- some unordered list item' @{ - searchRegex = '\r*\n\r*\n(\s*)- ' - replacement = "`n`$1- " + searchRegex = '(\s*)- ([^\r\n]*)\r*\n\r*\n(?=\s*-)' + replacement = "`$1- `$2`n" } - # Remove all '>' occurrences immediately following bullet lists + # Remove an extra newline between each occurrence of '1. some ordered list item' + @{ + searchRegex = '(\s*)(\d+\.) ([^\r\n]*)\r*\n\r*\n(?=\s*\d+\.)' + replacement = "`$1`$2 `$3`n" + } + # Remove all '>' occurrences immediately following unordered lists @{ searchRegex = '\n>[ ]*' replacement = "`n" diff --git a/README.md b/README.md index d9ee4be..84c1f25 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ The powershell script `ConvertOneNote2MarkDown-v2.ps1` will utilize the OneNote * `markdown_phpextra` (PHP Markdown Extra) * `markdown_strict` (original unextended Markdown) * Improved headers, with title now as a `#` heading, standardized `DateTime` format for created and modified dates, and horizontal line to separate from rest of document -* Choose whether to remove double spaces between bullet points, non-breaking spaces from blank lines, and `>` after bullet lists, which are created when converting with Pandoc +* Choose whether to clear extra newlines between unordered (bullet) and ordered (numbered) list items, non-breaking spaces from blank lines, and `>` after unordered lists, which are created when converting with Pandoc * Choose whether to remove `\` escape symbol that are created when converting with Pandoc * Choose whether to use Line Feed (`LF`) or Carriage Return + Line Feed (`CRLF`) for new lines * Choose whether to include a `.pdf` export alongside the `.md` file. `.md` does not preserve `InkDrawing` (i.e. overlayed drawings, highlights, pen marks) absolute positions within a page, but a `.pdf` export is a complete page snapshot that preserves `InkDrawing` absolute positions within a page. diff --git a/config.example.ps1 b/config.example.ps1 index 18603f6..6077bdf 100644 --- a/config.example.ps1 +++ b/config.example.ps1 @@ -68,9 +68,9 @@ $conversion = 'markdown-simple_tables-multiline_tables-grid_tables+pipe_tables' # 2: Don't include $headerTimestampEnabled = 1 -# Whether to clear double spaces between bullets, non-breaking spaces from blank lines, and '>` after bullet lists -# 1: Clear double spaces in bullets - Default -# 2: Keep double spaces +# Whether to clear extra newlines between unordered (bullet) and ordered (numbered) list items, non-breaking spaces from blank lines, and `>` after unordered lists +# 1: Clear - Default +# 2: Don't clear $keepspaces = 1 # Whether to clear escape symbols from md files. See: https://pandoc.org/MANUAL.html#backslash-escapes