From 7aa5a3203dd52650b5d186fa2bf3368057c286e3 Mon Sep 17 00:00:00 2001 From: Mikkel Kjeldsen Date: Thu, 21 Sep 2023 20:58:18 +0200 Subject: [PATCH] Preserve block quotes Teach commitmsgfmt to recognize Usenet block quotes and preserve them as-is, thereby enabling quotes to safely appear outside of a literal context. Users that wish to reflow a quote are advised to do so via another tool, like fmt(1) or par(1). References: https://en.wikipedia.org/wiki/Usenet_quoting#Canonical_quoting References: https://gitlab.com/mkjeldsen/commitmsgfmt/-/issues/6 --- CHANGELOG.md | 4 ++ README.md | 6 +- doc/commitmsgfmt.1.adoc | 45 ++++++++++++++- src/commitmsgfmt.rs | 72 +++++++++++++++++++++++- src/main.rs | 3 +- src/parser.rs | 122 +++++++++++++++++++++++++++++++++++++++- 6 files changed, 245 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2016df9..2c10f7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ understanding of patterns often seen in commit messages. ## Unreleased +- #6: Recognize lines that begin with `>` as _block quotes_ and preserve them + in their entirety, and allow them to follow a preceding paragraph without the + empty line that is otherwise usually required. + - If `--width` is specified multiple times, ignore all but the last occurrence. ## 1.5.0 - 2022-07-30 diff --git a/README.md b/README.md index 0cbad16..29021a0 100644 --- a/README.md +++ b/README.md @@ -31,13 +31,13 @@ In summary, `commitmsgfmt` paragraph; - reflows and wraps all other prose at the specified max width, defaulting to - 72 characters based on Time Pope's recommendation from 2008 [[tpope]]; + 72 characters based on Tim Pope's recommendation from 2008 [[tpope]]; - properly indents continuation lines in numbered and unnumbered lists, recognizing several different list styles; -- exempts comments; text indented at least 4 spaces or 1 tab; and "trailers" - (`Signed-off-by:`); +- exempts comments; text indented at least 4 spaces or 1 tab; "trailers" + (`Signed-off-by:`); and block quotes; - assumes UTF-8 encoded input but can gracefully degrade to ISO-8859-1 ("latin1") which has been observed in the Linux kernel; diff --git a/doc/commitmsgfmt.1.adoc b/doc/commitmsgfmt.1.adoc index 13f1ce5..6eec6e5 100644 --- a/doc/commitmsgfmt.1.adoc +++ b/doc/commitmsgfmt.1.adoc @@ -93,7 +93,11 @@ those cases and avoid them by preventing wrapping: it will * join a sequence of footnote references to their preceding word, so the references both preserve their context and don't degenerate into _list - items_; + items_. + +_Block quotes_ are exempt from the requirement of surrounding blank lines and +will never be considered to belong to a paragraph. A block quote embedded +inside a paragraph has the same effect on that paragraph as an empty line has. === Subject line @@ -195,6 +199,39 @@ literals. A line starting with one tab or four spaces is considered a _literal_. Literals are printed verbatim, making them suitable for listings and tables. +=== Block quote + +A line starting with a greater-than sign (*>*) is considered a _block quote_: + +---- +Git's Web site claims: +> Git is easy to learn and has a tiny footprint with lightning fast +> performance. It outclasses SCM tools like Subversion, CVS, Perforce, and +> ClearCase with features like cheap local branching, convenient staging areas, +> and multiple workflows. +---- + +Block quotes are printed verbatim; they are not wrapped, nor are quote markers +in any way normalized or aligned. + +[TIP] +==== +If you wish to reflow a block quote, Vim's *gq* command does a decent job. +Alternatively, consider delegating to *fmt*(1). For example, the following Vim +Normal mode command instructs *fmt*(1) to reflow every line starting with *>* +in the cursor's paragraph to 72 columns: + +---- +vip:!fmt -w72 -p'>' +---- +==== + +Unlike other constructs a block quote may be embedded inside a _paragraph_ with +no preceding or following blank line; the block quote will not be folded into +the paragraph and the paragraph will otherwise observe standard behavior. This +enables a common pattern of immediately preceding the block quote with an +author attribution, illustrated above. + === Comment A line starting with the *core.commentChar* character, or a hash sign (*#*) @@ -269,6 +306,9 @@ Given input subject foo baar -- baz qux wupwupwup [1][2] [wup] +hex: +> 0 1 2 3 4 5 6 7 8 9 a b c d e f + - foo 1. foo bar baz @@ -289,6 +329,9 @@ baar -- baz qux wupwupwup [1][2] [wup] +hex: +> 0 1 2 3 4 5 6 7 8 9 a b c d e f + - foo 1. foo bar baz diff --git a/src/commitmsgfmt.rs b/src/commitmsgfmt.rs index 4645a99..7b4d814 100644 --- a/src/commitmsgfmt.rs +++ b/src/commitmsgfmt.rs @@ -37,7 +37,8 @@ impl CommitMsgFmt { fn reflow_into(&self, buf: &mut String, msg: &[Token]) { for tok in msg { match *tok { - Comment(ref s) | Literal(ref s) | Scissored(ref s) | Trailer(ref s) => { + BlockQuote(ref s) | Comment(ref s) | Literal(ref s) | Scissored(ref s) + | Trailer(ref s) => { buf.push_str(s); } ListItem(ref indent, ref li, ref s) => { @@ -214,6 +215,75 @@ foo assert_eq!(filter(72, &input), expected); } + #[test] + fn preserves_block_quote() { + let input = " +foo + +> block quote +paragraph +"; + + let expected = input; + + assert_eq!(filter(72, &input), expected); + } + + #[test] + fn preserves_block_quote_with_attribution() { + let input = " +foo + +author wrote: +> block quote +paragraph +"; + + let expected = input; + + assert_eq!(filter(72, &input), expected); + } + + #[test] + fn preserves_multiline_block_quote() { + let input = " +xx-xxxxxx xxxx xxxxxxx xxxxxxxxxxxxxx + +xxxx xxxxxx xxxxxxx xxxxx xx xxx xxx -x xxxxxx xxxx xxxx-xx-xxxxx, xxxxx +xxxxxxxxx xxxx xxxxxxx xxxxxxxxxxxxxx. xxxx xxx xxxxxxx: + +> ``` +> -x xx --xx-xxxx +> xxxxxxxx xxxxxxx xxx xxxxxxx xxxxxxxxxxxxxx xxx xxxxxxxxxxxxxxxx +> xxxxxxx xx xxx xxxxxxxx. xxxx xx +> ``` + +xxx xxxxxxx xxxxxxxx xx `xxxx` xx xx xxxxxx xxxxxxx xxxxxxxxxxxxxx. +"; + + let expected = " +xx-xxxxxx xxxx xxxxxxx xxxxxxxxxxxxxx + +xxxx xxxxxx xxxxxxx xxxxx xx +xxx xxx -x xxxxxx xxxx +xxxx-xx-xxxxx, xxxxx xxxxxxxxx +xxxx xxxxxxx xxxxxxxxxxxxxx. +xxxx xxx xxxxxxx: + +> ``` +> -x xx --xx-xxxx +> xxxxxxxx xxxxxxx xxx xxxxxxx xxxxxxxxxxxxxx xxx xxxxxxxxxxxxxxxx +> xxxxxxx xx xxx xxxxxxxx. xxxx xx +> ``` + +xxx xxxxxxx xxxxxxxx xx `xxxx` +xx xx xxxxxx xxxxxxx +xxxxxxxxxxxxxx. +"; + + assert_eq!(filter(30, &input), expected); + } + #[test] fn formats_footnotes() { let msg = " diff --git a/src/main.rs b/src/main.rs index 6811851..f693721 100644 --- a/src/main.rs +++ b/src/main.rs @@ -146,7 +146,8 @@ Some text is exempt from wrapping: behaviour necessitates a laxer limit on its length to avoid rejecting too many valid subjects. -- Text indented at least 4 spaces or 1 tab, and trailers, are printed unchanged."#, +- Text indented at least 4 spaces or 1 tab; trailers; and block quotes are + printed unchanged."#, ) .value_name("WIDTH"), ) diff --git a/src/parser.rs b/src/parser.rs index c9b6b3d..df32f21 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -23,6 +23,7 @@ pub enum Token<'input> { Subject(&'input str), Scissored(&'input str), Trailer(&'input str), + BlockQuote(&'input str), VerticalSpace, } @@ -63,7 +64,9 @@ pub fn parse(input: &str, comment_char: char) -> Vec { toks.push(Token::Trailer(line)); } else if let Some(y) = match toks.last_mut() { Some(&mut Token::Footnote(_, ref mut b)) => extend_prose_buffer_with_line(b, line), - Some(&mut Token::Paragraph(ref mut b)) => extend_prose_buffer_with_line(b, line), + Some(&mut Token::Paragraph(ref mut b)) => { + line_as_line_block_quote(line).or_else(|| extend_prose_buffer_with_line(b, line)) + } Some(&mut Token::ListItem(_, _, ref mut b)) => { line_as_list_item(line).or_else(|| extend_prose_buffer_with_line(b, line)) } @@ -72,6 +75,8 @@ pub fn parse(input: &str, comment_char: char) -> Vec { Some(tok) } else if is_line_indented(line) { Some(Token::Literal(line)) + } else if let Some(tok) = line_as_line_block_quote(line) { + Some(tok) } else { px = false; Some(Token::Paragraph(line.trim().into())) @@ -313,6 +318,14 @@ fn line_as_list_item(line: &str) -> Option { }) } +fn line_as_line_block_quote(line: &str) -> Option { + if line.starts_with('>') { + Some(Token::BlockQuote(line)) + } else { + None + } +} + #[cfg(test)] mod tests { use super::Token::*; @@ -611,6 +624,113 @@ some other paragraph ); } + #[test] + fn parses_block_quote_verbatim() { + assert_eq!( + parse( + " +some subject + +some paragraph + +> some block quote + +some other paragraph +" + ), + [ + VerticalSpace, + Subject("some subject"), + VerticalSpace, + Paragraph("some paragraph".into()), + VerticalSpace, + BlockQuote("> some block quote"), + VerticalSpace, + Paragraph("some other paragraph".into()), + ], + ); + } + + #[test] + fn parses_nested_block_quotes_verbatim() { + assert_eq!( + parse( + " +some subject + +some paragraph + +> > some block quote + +some other paragraph +" + ), + [ + VerticalSpace, + Subject("some subject"), + VerticalSpace, + Paragraph("some paragraph".into()), + VerticalSpace, + BlockQuote("> > some block quote"), + VerticalSpace, + Paragraph("some other paragraph".into()), + ], + ); + } + + #[test] + fn parses_nested_block_quotes_ignoring_quote_marker_spacing_and_quote_levels() { + assert_eq!( + parse( + " +some subject + +some paragraph + +>>>> >>> >> some block quote + +some other paragraph +" + ), + [ + VerticalSpace, + Subject("some subject"), + VerticalSpace, + Paragraph("some paragraph".into()), + VerticalSpace, + BlockQuote(">>>> >>> >> some block quote"), + VerticalSpace, + Paragraph("some other paragraph".into()), + ], + ); + } + + #[test] + fn parses_block_quote_with_immediately_preceding_paragraph_as_attribution_leaving_no_vertical_space( + ) { + assert_eq!( + parse( + " +some subject + +some attribution paragraph +> some block quote + +some other paragraph +" + ), + [ + VerticalSpace, + Subject("some subject"), + VerticalSpace, + Paragraph("some attribution paragraph".into()), + BlockQuote("> some block quote"), + VerticalSpace, + Paragraph("some other paragraph".into()), + ], + ); + } + #[test] fn parses_trailers() { // Trailers look like HTTP or email headers but are not formally