Skip to content

Commit

Permalink
add IsPunctuation2() that also works on unicode punctuation (fixes #309)
Browse files Browse the repository at this point in the history
  • Loading branch information
kjk committed Jun 26, 2024
1 parent a4f7c57 commit 2eda941
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 5 deletions.
5 changes: 1 addition & 4 deletions inline_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,7 @@ func TestEmphasis(t *testing.T) {
doTestsInlineParam(t, tests, TestParams{})
}

// TODO: to fix this we would have to update IsPunctuation() to handle
// `—` which looks like `-` but is a unicode 3-byte thingy and
// currently IsPunctuation() only handles 1-byte ascii
func Disabled_TestBug309(t *testing.T) {
func TestBug309(t *testing.T) {
var tests = []string{
`*f*—`,
"<p><em>f</em>—</p>\n",
Expand Down
3 changes: 2 additions & 1 deletion parser/inline.go
Original file line number Diff line number Diff line change
Expand Up @@ -1208,7 +1208,8 @@ func helperEmphasis(p *Parser, data []byte, c byte) (int, ast.Node) {
if data[i] == c && !IsSpace(data[i-1]) {

if p.extensions&NoIntraEmphasis != 0 {
if !(i+1 == len(data) || IsSpace(data[i+1]) || IsPunctuation(data[i+1])) {
rest := data[i+1:]
if !(len(rest) == 0 || IsSpace(rest[0]) || IsPunctuation2(rest)) {
continue
}
}
Expand Down
16 changes: 16 additions & 0 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"fmt"
"strconv"
"strings"
"unicode"
"unicode/utf8"

"github.com/gomarkdown/markdown/ast"
)
Expand Down Expand Up @@ -727,6 +729,20 @@ func IsPunctuation(c byte) bool {
return false
}

func IsPunctuation2(d []byte) bool {
if len(d) == 0 {
return false
}
if IsPunctuation(d[0]) {
return true
}
r, _ := utf8.DecodeRune(d)
if r == utf8.RuneError {
return false
}
return unicode.IsPunct(r)
}

// IsSpace returns true if c is a white-space charactr
func IsSpace(c byte) bool {
return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
Expand Down

0 comments on commit 2eda941

Please sign in to comment.