Skip to content

Commit 47024e5

Browse files
committed
internal/godoc/dochtml/internal/render: unique headings redux
This CL takes an alternative approach to generate unique headings. It builds on the work of https://go.dev/cl/573595, which was a fix for https://go.dev/issue/64582. - It takes a more direct approach to constructing a unique string from an ast.Decl. - The function that does that is tested separately, reducing the test cases needed for formatDocHTML. - It saves the generated headings, so the HTML doesn't have to be reparsed. NOTE: This will break all links to headings that are not in the package comment. Happily, such headings are rare: only 40 of the top thousand packages have one. It might seem we could avoid any breakage by only applying a suffix to duplicate headings. But then at any time in the future, a unique heading could become a duplicate, causing a break. Better one break now than an unending stream of them. Change-Id: I379712b54c6bc9c6a9343d0006639085a40e23d9 Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/608035 kokoro-CI: kokoro <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Hyang-Ah Hana Kim <[email protected]>
1 parent 46f6a4c commit 47024e5

File tree

12 files changed

+383
-384
lines changed

12 files changed

+383
-384
lines changed

internal/godoc/dochtml/internal/render/linkify.go

Lines changed: 153 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import (
2323
safe "github.com/google/safehtml"
2424
"github.com/google/safehtml/legacyconversions"
2525
"github.com/google/safehtml/template"
26-
"golang.org/x/net/html"
2726
"golang.org/x/pkgsite/internal/log"
2827
)
2928

@@ -53,10 +52,7 @@ const (
5352
rfcRx = `RFC\s+(\d{3,5})(,?\s+[Ss]ection\s+(\d+(\.\d+)*))?`
5453
)
5554

56-
var (
57-
matchRx = regexp.MustCompile(urlRx + `|` + rfcRx)
58-
badAnchorRx = regexp.MustCompile(`[^a-zA-Z0-9]`)
59-
)
55+
var matchRx = regexp.MustCompile(urlRx + `|` + rfcRx)
6056

6157
type link struct {
6258
Class string
@@ -65,19 +61,23 @@ type link struct {
6561
}
6662

6763
type heading struct {
68-
ID safe.Identifier
64+
ID safe.Identifier // if empty, the title is not linked
6965
Title safe.HTML
7066
}
7167

7268
var (
7369
// tocTemplate expects a []heading.
7470
tocTemplate = template.Must(template.New("toc").Parse(`<div role="navigation" aria-label="Table of Contents">
7571
<ul class="Documentation-toc{{if gt (len .) 5}} Documentation-toc-columns{{end}}">
76-
{{range . -}}
72+
{{- range .}}
7773
<li class="Documentation-tocItem">
78-
<a href="#{{.ID}}">{{.Title}}</a>
74+
{{- if .ID.String -}}
75+
<a href="#{{.ID}}">{{.Title}}</a>
76+
{{- else -}}
77+
{{.Title}}
78+
{{- end -}}
7979
</li>
80-
{{end -}}
80+
{{- end}}
8181
</ul>
8282
</div>
8383
`))
@@ -88,8 +88,13 @@ var (
8888

8989
paraTemplate = template.Must(template.New("para").Parse("<p>{{.}}\n</p>"))
9090

91-
headingTemplate = template.Must(template.New("heading").Parse(
92-
`<h4 id="{{.ID}}">{{.Title}} <a class="Documentation-idLink" href="#{{.ID}}" aria-label="Go to {{.Title}}">¶</a></h4>`))
91+
// expects a heading
92+
headingTemplate = template.Must(template.New("heading").Parse(`
93+
{{- if .ID.String -}}
94+
<h4 id="{{.ID}}">{{.Title}} <a class="Documentation-idLink" href="#{{.ID}}" aria-label="Go to {{.Title}}">¶</a></h4>
95+
{{- else -}}
96+
<h4>{{.Title}}</h4>
97+
{{- end}}`))
9398

9499
linkTemplate = template.Must(template.New("link").Parse(
95100
`<a{{with .Class}}class="{{.}}" {{end}} href="{{.Href}}">{{.Text}}</a>`))
@@ -119,57 +124,14 @@ func (r *Renderer) formatDocHTML(text string, decl ast.Decl, extractLinks bool)
119124
if extractLinks {
120125
r.removeLinks(doc)
121126
}
122-
123-
h := r.blocksToHTML(doc.Content, decl, true, extractLinks)
124-
if headings := extractHeadings(h); len(headings) > 0 {
125-
h = safe.HTMLConcat(ExecuteToHTML(tocTemplate, headings), h)
127+
hscope := newHeadingScope(headingIDSuffix(decl))
128+
h := r.blocksToHTML(doc.Content, true, hscope)
129+
if len(hscope.headings) > 0 {
130+
h = safe.HTMLConcat(ExecuteToHTML(tocTemplate, hscope.headings), h)
126131
}
127-
128132
return h
129133
}
130134

131-
func extractHeadings(h safe.HTML) []heading {
132-
var headings []heading
133-
134-
doc, err := html.Parse(strings.NewReader(h.String()))
135-
if err != nil {
136-
return nil
137-
}
138-
139-
var f func(*html.Node)
140-
f = func(n *html.Node) {
141-
for _, a := range n.Attr {
142-
if a.Key == "id" && strings.HasPrefix(a.Val, "hdr-") {
143-
if tn := firstTextNode(n); tn != nil {
144-
title := strings.TrimSpace(tn.Data)
145-
hdrName := strings.TrimPrefix(a.Val, "hdr-")
146-
id := safe.IdentifierFromConstantPrefix("hdr", hdrName)
147-
headings = append(headings, heading{id, safe.HTMLEscaped(title)})
148-
}
149-
break
150-
}
151-
}
152-
for c := n.FirstChild; c != nil; c = c.NextSibling {
153-
f(c)
154-
}
155-
}
156-
f(doc)
157-
158-
return headings
159-
}
160-
161-
func firstTextNode(n *html.Node) *html.Node {
162-
if n.Type == html.TextNode {
163-
return n
164-
}
165-
for c := n.FirstChild; c != nil; c = c.NextSibling {
166-
if r := firstTextNode(c); r != nil {
167-
return r
168-
}
169-
}
170-
return nil
171-
}
172-
173135
// removeLinks removes the "Links" section from doc.
174136
// Pkgsite has a convention where a "Links" heading in a doc comment provides links
175137
// that are rendered in a separate place in the UI.
@@ -263,13 +225,13 @@ func parseLink(line string) *Link {
263225
}
264226
}
265227

266-
func (r *Renderer) blocksToHTML(bs []comment.Block, decl ast.Decl, useParagraph, extractLinks bool) safe.HTML {
228+
func (r *Renderer) blocksToHTML(bs []comment.Block, useParagraph bool, hscope *headingScope) safe.HTML {
267229
return concatHTML(bs, func(b comment.Block) safe.HTML {
268-
return r.blockToHTML(b, decl, useParagraph, extractLinks)
230+
return r.blockToHTML(b, useParagraph, hscope)
269231
})
270232
}
271233

272-
func (r *Renderer) blockToHTML(b comment.Block, decl ast.Decl, useParagraph, extractLinks bool) safe.HTML {
234+
func (r *Renderer) blockToHTML(b comment.Block, useParagraph bool, hscope *headingScope) safe.HTML {
273235
switch b := b.(type) {
274236
case *comment.Paragraph:
275237
th := r.textsToHTML(b.Text)
@@ -282,7 +244,7 @@ func (r *Renderer) blockToHTML(b comment.Block, decl ast.Decl, useParagraph, ext
282244
return ExecuteToHTML(codeTemplate, b.Text)
283245

284246
case *comment.Heading:
285-
return ExecuteToHTML(headingTemplate, r.newHeading(b, decl))
247+
return ExecuteToHTML(headingTemplate, r.newHeading(b, hscope))
286248

287249
case *comment.List:
288250
var items []safe.HTML
@@ -291,7 +253,7 @@ func (r *Renderer) blockToHTML(b comment.Block, decl ast.Decl, useParagraph, ext
291253
items = append(items, ExecuteToHTML(listItemTemplate, struct {
292254
Number string
293255
Content safe.HTML
294-
}{item.Number, r.blocksToHTML(item.Content, decl, useParagraph, false)}))
256+
}{item.Number, r.blocksToHTML(item.Content, useParagraph, hscope)}))
295257
}
296258
t := oListTemplate
297259
if b.Items[0].Number == "" {
@@ -303,53 +265,143 @@ func (r *Renderer) blockToHTML(b comment.Block, decl ast.Decl, useParagraph, ext
303265
}
304266
}
305267

306-
// headingIDs keeps track of used heading ids to prevent duplicates.
307-
type headingIDs map[safe.Identifier]bool
268+
/*
269+
We want to convert headings in doc comments to unique and stable HTML IDs.
308270
309-
// headingID returns a unique identifier for a *comment.Heading & ast.Decl pair.
310-
func (r *Renderer) headingID(h *comment.Heading, decl ast.Decl) safe.Identifier {
311-
s := textsToString(h.Text)
312-
hdrTitle := badAnchorRx.ReplaceAllString(s, "_")
313-
id := safe.IdentifierFromConstantPrefix("hdr", hdrTitle)
314-
if !r.headingIDs[id] {
315-
r.headingIDs[id] = true
316-
return id
317-
}
271+
The IDs in an HTML page should be unique, so URLs with fragments can link to
272+
them. The headings in the doc comments of a Go package (which corresponds to an
273+
HTML page for us) need not be unique. The same heading can be used in the doc
274+
comment of several symbols, and in the package comment as well.
318275
319-
// The id is not unique. Attempt to generate an identifier using the decl.
320-
for _, v := range generateAnchorPoints(decl) {
321-
if v.Kind == "field" {
322-
continue
323-
}
324-
hdrTitle = badAnchorRx.ReplaceAllString(v.ID.String()+"_"+s, "_")
325-
if v.Kind == "constant" || v.Kind == "variable" {
326-
if specs := decl.(*ast.GenDecl).Specs; len(specs) > 1 {
327-
// Grouped consts and vars cannot be deterministically identified,
328-
// so treat them as a single section. e.g. "hdr-constant-Title"
329-
hdrTitle = badAnchorRx.ReplaceAllString(v.Kind+"_"+s, "_")
330-
}
331-
}
332-
if v.Kind != "method" {
333-
// Continue iterating until we find a higher precedence kind.
334-
break
335-
}
276+
If uniqueness were the only criterion, we could just append a number. But we
277+
also want the IDs to be stable. The page for the latest version of a package
278+
has no version in the URL, so that a saved link will always reference the latest
279+
version. We do the same for other links on the page, like links to symbols. We
280+
want to do so for heading links too. That means that a heading link, which uses
281+
the ID as a fragment, may visit pages with different content at different times.
282+
We want that saved link to return to the same heading every time, if the heading
283+
still exists.
284+
285+
If we numbered the IDs from top to bottom, a heading ID would change whenever headings
286+
were added above it. Instead, we try to construct a unique suffix from the declaration that the
287+
doc comment refers to. (The suffix is empty for the package comment.) For ungrouped declarations,
288+
we use the symbol name, which must be unique in the package. For grouped declarations, like
289+
290+
const (
291+
A = 1
292+
B = 2
293+
)
294+
295+
or
296+
297+
var x, Y int
298+
299+
there is no good way to pick a suffix, so we don't link headings at all. Evidence suggests
300+
that such headings are very rare.
301+
302+
After constructing the unique suffix, we may still have duplicate IDs because the same doc comment
303+
can repeat headings. So we append a number that is unique for the given suffix.
304+
305+
This approach will always produce unique, stable IDs when it produces IDs at all.
306+
*/
307+
308+
// A headingScope is a collection of headings that arise from the documentation of
309+
// a single element, either the package itself or a symbol within it.
310+
// A headingScope ensures that heading IDs are unique, and keeps a list of headings as they appear in the scope.
311+
type headingScope struct {
312+
suffix string // ID suffix; unique for the package; empty for package doc
313+
createIDs bool // create IDs for the headings
314+
headings []heading // headings for this scope, in order
315+
ids map[string]int // count of seen IDs
316+
}
317+
318+
func newHeadingScope(suffix string, createIDs bool) *headingScope {
319+
return &headingScope{
320+
suffix: badAnchorRx.ReplaceAllString(suffix, "_"),
321+
createIDs: createIDs,
322+
ids: map[string]int{},
336323
}
324+
}
337325

338-
for {
339-
id = safe.IdentifierFromConstantPrefix("hdr", hdrTitle)
340-
if !r.headingIDs[id] {
341-
r.headingIDs[id] = true
342-
break
343-
}
344-
// The id is still not unique. Append _ until unique.
345-
hdrTitle = hdrTitle + "_"
326+
// newHeading constructs a heading from the comment.Heading and remembers it.
327+
func (r *Renderer) newHeading(h *comment.Heading, hs *headingScope) heading {
328+
return hs.addHeading(h, r.textsToHTML(h.Text))
329+
}
330+
331+
// addHeading constructs a heading and adds it to hs.
332+
func (hs *headingScope) addHeading(ch *comment.Heading, html safe.HTML) heading {
333+
h := heading{Title: html}
334+
if hs.createIDs {
335+
h.ID = safe.IdentifierFromConstantPrefix("hdr", hs.newHeadingID(ch))
346336
}
337+
hs.headings = append(hs.headings, h)
338+
return h
339+
}
347340

341+
var badAnchorRx = regexp.MustCompile(`[^a-zA-Z0-9-]`)
342+
343+
// newHeadingID constructs a unique heading ID from the argument, which is the text
344+
// of a heading.
345+
func (hs *headingScope) newHeadingID(ch *comment.Heading) string {
346+
// Start with the heading's default ID.
347+
// We can only construct a [safe.Identifier] by adding a prefix, so remove the
348+
// default's prefix.
349+
id := strings.TrimPrefix(ch.DefaultID(), "hdr-")
350+
if hs.suffix != "" {
351+
id += "-" + hs.suffix
352+
}
353+
n := hs.ids[id]
354+
hs.ids[id]++
355+
if n > 0 {
356+
id += "-" + strconv.Itoa(n)
357+
}
348358
return id
349359
}
350360

351-
func (r *Renderer) newHeading(h *comment.Heading, decl ast.Decl) heading {
352-
return heading{r.headingID(h, decl), r.textsToHTML(h.Text)}
361+
// headingIDSuffix returns a unique string from the decl, to be
362+
// used as the suffix in heading IDs.
363+
// If a unique string can be created, the second return value is true.
364+
// If not (the declaration is a group), headingIDSuffix returns "", false.
365+
func headingIDSuffix(decl ast.Decl) (string, bool) {
366+
if decl == nil {
367+
return "", true
368+
}
369+
switch decl := decl.(type) {
370+
case *ast.FuncDecl:
371+
// functionName, or recvTypeName_methodName
372+
if decl.Recv == nil || len(decl.Recv.List) == 0 {
373+
return decl.Name.Name, true
374+
}
375+
return recvTypeString(decl.Recv.List[0].Type) + "_" + decl.Name.Name, true
376+
case *ast.GenDecl:
377+
// We can only pick a stable name when there is a single decl.
378+
if len(decl.Specs) == 1 {
379+
switch spec := decl.Specs[0].(type) {
380+
case *ast.TypeSpec:
381+
return spec.Name.Name, true
382+
case *ast.ValueSpec:
383+
if len(spec.Names) == 1 {
384+
return spec.Names[0].Name, true
385+
}
386+
}
387+
}
388+
}
389+
return "", false
390+
}
391+
392+
// recvTypeString returns a string for the symbol in a type expression
393+
// for a method receiver.
394+
func recvTypeString(t ast.Expr) string {
395+
switch t := t.(type) {
396+
case *ast.Ident:
397+
return t.Name
398+
case *ast.IndexExpr:
399+
return t.X.(*ast.Ident).Name
400+
case *ast.StarExpr:
401+
return recvTypeString(t.X)
402+
default: // should never happen
403+
return "unknown"
404+
}
353405
}
354406

355407
func (r *Renderer) textsToHTML(ts []comment.Text) safe.HTML {

0 commit comments

Comments
 (0)