Skip to content

Commit bb54a53

Browse files
committed
pkg/md: Add a Codec for smart punctuations.
1 parent e16ffe7 commit bb54a53

File tree

2 files changed

+153
-0
lines changed

2 files changed

+153
-0
lines changed

pkg/md/smart_puncts.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package md
2+
3+
import (
4+
"strings"
5+
"unicode"
6+
)
7+
8+
// SmartPunctsCodec wraps another codec, converting certain ASCII punctuations to
9+
// nicer Unicode counterparts:
10+
//
11+
// - A straight double quote (") is converted to a left double quote (“) when
12+
// it follows a whitespace, or a right double quote (”) when it follows a
13+
// non-whitespace.
14+
//
15+
// - A straight single quote (') is converted to a left single quote (‘) when
16+
// it follows a whitespace, or a right single quote or apostrophe (’) when
17+
// it follows a non-whitespace.
18+
//
19+
// - A run of two dashes (--) is converted to an en-dash (–).
20+
//
21+
// - A run of three dashes (---) is converted to an em-dash (—).
22+
//
23+
// - A run of three dot (...) is converted to an ellipsis (…).
24+
//
25+
// Start of lines are considered to be whitespaces.
26+
type SmartPunctsCodec struct{ Inner Codec }
27+
28+
func (c SmartPunctsCodec) Do(op Op) { c.Inner.Do(applySmartPunctsToOp(op)) }
29+
30+
func applySmartPunctsToOp(op Op) Op {
31+
for i := range op.Content {
32+
inlineOp := &op.Content[i]
33+
switch inlineOp.Type {
34+
case OpText, OpLinkStart, OpLinkEnd, OpImage:
35+
inlineOp.Text = applySmartPuncts(inlineOp.Text)
36+
if inlineOp.Type == OpImage {
37+
inlineOp.Alt = applySmartPuncts(inlineOp.Alt)
38+
}
39+
}
40+
}
41+
return op
42+
}
43+
44+
var applySimpleSmartPuncts = strings.NewReplacer(
45+
"--", "–", "---", "—", "...", "…").Replace
46+
47+
func applySmartPuncts(s string) string {
48+
return applySimpleSmartPuncts(applySmartQuotes(s))
49+
}
50+
51+
func applySmartQuotes(s string) string {
52+
if !strings.ContainsAny(s, `'"`) {
53+
return s
54+
}
55+
var sb strings.Builder
56+
// Start of line is considered to be whitespace
57+
prev := ' '
58+
for _, r := range s {
59+
if r == '"' {
60+
if unicode.IsSpace(prev) {
61+
sb.WriteRune('“')
62+
} else {
63+
sb.WriteRune('”')
64+
}
65+
} else if r == '\'' {
66+
if unicode.IsSpace(prev) {
67+
sb.WriteRune('‘')
68+
} else {
69+
sb.WriteRune('’')
70+
}
71+
} else {
72+
sb.WriteRune(r)
73+
}
74+
prev = r
75+
}
76+
return sb.String()
77+
}

pkg/md/smart_puncts_test.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package md_test
2+
3+
import (
4+
"testing"
5+
6+
"github.com/google/go-cmp/cmp"
7+
. "src.elv.sh/pkg/md"
8+
)
9+
10+
var smartPunctsTestCases = []testCase{
11+
{
12+
Name: "Simple smart punctuations",
13+
Markdown: `a -- b --- c...`,
14+
HTML: dedent(`
15+
<p>a – b –- c…</p>
16+
`),
17+
},
18+
{
19+
Name: "Smart quotes",
20+
Markdown: `It's "foo" and 'bar'.`,
21+
HTML: dedent(`
22+
<p>It’s “foo” and ‘bar’.</p>
23+
`),
24+
},
25+
{
26+
Name: "Link and image title",
27+
Markdown: dedent(`
28+
[link text](a.html "--")
29+
![img alt](a.png "--")
30+
`),
31+
HTML: dedent(`
32+
<p><a href="a.html" title="–">link text</a>
33+
<img src="a.png" alt="img alt" title="–" /></p>
34+
`),
35+
},
36+
{
37+
Name: "Link alt",
38+
Markdown: `![img -- alt](a.png)`,
39+
HTML: dedent(`
40+
<p><img src="a.png" alt="img – alt" /></p>
41+
`),
42+
},
43+
{
44+
Name: "Code span is unchanged",
45+
Markdown: "`a -- b`",
46+
HTML: dedent(`
47+
<p><code>a -- b</code></p>
48+
`),
49+
},
50+
{
51+
Name: "Non-inline content is unchanged",
52+
Markdown: dedent(`
53+
~~~
54+
a -- b
55+
~~~
56+
`),
57+
HTML: dedent(`
58+
<pre><code>a -- b
59+
</code></pre>
60+
`),
61+
},
62+
}
63+
64+
func TestSmartPuncts(t *testing.T) {
65+
for _, tc := range smartPunctsTestCases {
66+
t.Run(tc.Name, func(t *testing.T) {
67+
var htmlCodec HTMLCodec
68+
Render(tc.Markdown, SmartPunctsCodec{&htmlCodec})
69+
got := htmlCodec.String()
70+
if diff := cmp.Diff(tc.HTML, got); diff != "" {
71+
t.Errorf("input:\n%s\ndiff (-want +got):\n%s",
72+
hr+"\n"+tc.Markdown+hr, diff)
73+
}
74+
})
75+
}
76+
}

0 commit comments

Comments
 (0)