Skip to content

Commit

Permalink
internal/frontend: limit heading ids to ASCII characters
Browse files Browse the repository at this point in the history
It seems like bluemonday intended to only allow ascii IDs. Limit the
heading ids produced to ASCII characters to keep with what we think
the intended behavior is.

Change-Id: Ifa9aaad5fcc5308d9efcaa75fafb65547839fde2
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/544356
LUCI-TryBot-Result: Go LUCI <[email protected]>
kokoro-CI: kokoro <[email protected]>
Reviewed-by: Jonathan Amsterdam <[email protected]>
  • Loading branch information
matloob committed Nov 30, 2023
1 parent e743b59 commit 044caf5
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 3 deletions.
5 changes: 2 additions & 3 deletions internal/frontend/goldmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"fmt"
"regexp"
"strings"
"unicode"

"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
Expand Down Expand Up @@ -180,7 +179,7 @@ func newIDs() parser.IDs {
}

// Generate turns heading content from a markdown document into a heading id.
// First HTML markup and markdown images are stripped then unicode letters
// First HTML markup and markdown images are stripped then ASCII letters
// and numbers are used to generate the final result. Finally, all heading ids
// are prefixed with "readme-" to avoid name collisions with other ids on the
// unit page. Duplicated heading ids are given an incremental suffix. See
Expand All @@ -190,7 +189,7 @@ func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte {
r := regexp.MustCompile(`(<[^<>]+>|\[\!\[[^\]]+]\([^\)]+\)\]\([^\)]+\))`)
str := r.ReplaceAllString(string(value), "")
f := func(c rune) bool {
return !unicode.IsLetter(c) && !unicode.IsNumber(c)
return !('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z') && !('0' <= c && c <= '9')
}
str = strings.Join(strings.FieldsFunc(str, f), "-")
str = strings.ToLower(str)
Expand Down
10 changes: 10 additions & 0 deletions internal/frontend/readme_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,16 @@ func TestReadme(t *testing.T) {
{Level: 1, Text: "One", ID: "readme-one-1"},
},
},
{
name: "Non-ASCII Heading",
unit: unit,
readme: &internal.Readme{
Filepath: sample.ReadmeFilePath,
Contents: "# 中文¹",
},
wantHTML: "<h3 class=\"h1\" id=\"readme-heading\">中文¹</h3>",
wantOutline: []*Heading{{Level: 1, Text: "中文¹", ID: "readme-heading"}},
},
{
name: "Github markdown emoji markup is properly rendered",
unit: unit,
Expand Down

0 comments on commit 044caf5

Please sign in to comment.