diff --git a/modules/html/html.go b/modules/html/html.go deleted file mode 100644 index b1ebd584c6b6d..0000000000000 --- a/modules/html/html.go +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2022 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package html - -// ParseSizeAndClass get size and class from string with default values -// If present, "others" expects the new size first and then the classes to use -func ParseSizeAndClass(defaultSize int, defaultClass string, others ...any) (int, string) { - size := defaultSize - if len(others) >= 1 { - if v, ok := others[0].(int); ok && v != 0 { - size = v - } - } - class := defaultClass - if len(others) >= 2 { - if v, ok := others[1].(string); ok && v != "" { - if class != "" { - class += " " - } - class += v - } - } - return size, class -} diff --git a/modules/htmlutil/html.go b/modules/htmlutil/html.go new file mode 100644 index 0000000000000..9b5f5a92d89a8 --- /dev/null +++ b/modules/htmlutil/html.go @@ -0,0 +1,48 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package htmlutil + +import ( + "fmt" + "html/template" + "slices" +) + +// ParseSizeAndClass get size and class from string with default values +// If present, "others" expects the new size first and then the classes to use +func ParseSizeAndClass(defaultSize int, defaultClass string, others ...any) (int, string) { + size := defaultSize + if len(others) >= 1 { + if v, ok := others[0].(int); ok && v != 0 { + size = v + } + } + class := defaultClass + if len(others) >= 2 { + if v, ok := others[1].(string); ok && v != "" { + if class != "" { + class += " " + } + class += v + } + } + return size, class +} + +func HTMLFormat(s string, rawArgs ...any) template.HTML { + args := slices.Clone(rawArgs) + for i, v := range args { + switch v := v.(type) { + case nil, bool, int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64, template.HTML: + // for most basic types (including template.HTML which is safe), just do nothing and use it + case string: + args[i] = template.HTMLEscapeString(v) + case fmt.Stringer: + args[i] = template.HTMLEscapeString(v.String()) + default: + args[i] = template.HTMLEscapeString(fmt.Sprint(v)) + } + } + return template.HTML(fmt.Sprintf(s, args...)) +} diff --git a/modules/htmlutil/html_test.go b/modules/htmlutil/html_test.go new file mode 100644 index 0000000000000..5ff05d75b36cc --- /dev/null +++ b/modules/htmlutil/html_test.go @@ -0,0 +1,15 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package htmlutil + +import ( + "html/template" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestHTMLFormat(t *testing.T) { + assert.Equal(t, template.HTML("< < 1"), HTMLFormat("%s %s %d", "<", template.HTML("<"), 1)) +} diff --git a/modules/markup/html.go b/modules/markup/html.go index 16ccd4b40672f..ce085aaaa4059 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -25,9 +25,6 @@ const ( IssueNameStyleRegexp = "regexp" ) -// CSS class for action keywords (e.g. "closes: #1") -const keywordClass = "issue-keyword" - type globalVarsType struct { hashCurrentPattern *regexp.Regexp shortLinkPattern *regexp.Regexp @@ -39,6 +36,7 @@ type globalVarsType struct { emojiShortCodeRegex *regexp.Regexp issueFullPattern *regexp.Regexp filesChangedFullPattern *regexp.Regexp + codePreviewPattern *regexp.Regexp tagCleaner *regexp.Regexp nulCleaner *strings.Replacer @@ -88,6 +86,9 @@ var globalVars = sync.OnceValue[*globalVarsType](func() *globalVarsType { // example: https://domain/org/repo/pulls/27/files#hash v.filesChangedFullPattern = regexp.MustCompile(`https?://(?:\S+/)[\w_.-]+/[\w_.-]+/pulls/((?:\w{1,10}-)?[1-9][0-9]*)/files([\?|#](\S+)?)?\b`) + // codePreviewPattern matches "http://domain/.../{owner}/{repo}/src/commit/{commit}/{filepath}#L10-L20" + v.codePreviewPattern = regexp.MustCompile(`https?://\S+/([^\s/]+)/([^\s/]+)/src/commit/([0-9a-f]{7,64})(/\S+)#(L\d+(-L\d+)?)`) + v.tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL]\b)|(/?[hH][eE][aA][dD]\b))`) v.nulCleaner = strings.NewReplacer("\000", "") return v @@ -164,11 +165,7 @@ var defaultProcessors = []processor{ // emails with HTML links, parsing shortlinks in the format of [[Link]], like // MediaWiki, linking issues in the format #ID, and mentions in the format // @user, and others. -func PostProcess( - ctx *RenderContext, - input io.Reader, - output io.Writer, -) error { +func PostProcess(ctx *RenderContext, input io.Reader, output io.Writer) error { return postProcess(ctx, defaultProcessors, input, output) } @@ -189,10 +186,7 @@ var commitMessageProcessors = []processor{ // RenderCommitMessage will use the same logic as PostProcess, but will disable // the shortLinkProcessor and will add a defaultLinkProcessor if defaultLink is // set, which changes every text node into a link to the passed default link. -func RenderCommitMessage( - ctx *RenderContext, - content string, -) (string, error) { +func RenderCommitMessage(ctx *RenderContext, content string) (string, error) { procs := commitMessageProcessors return renderProcessString(ctx, procs, content) } @@ -219,10 +213,7 @@ var emojiProcessors = []processor{ // RenderCommitMessage, but will disable the shortLinkProcessor and // emailAddressProcessor, will add a defaultLinkProcessor if defaultLink is set, // which changes every text node into a link to the passed default link. -func RenderCommitMessageSubject( - ctx *RenderContext, - defaultLink, content string, -) (string, error) { +func RenderCommitMessageSubject(ctx *RenderContext, defaultLink, content string) (string, error) { procs := slices.Clone(commitMessageSubjectProcessors) procs = append(procs, func(ctx *RenderContext, node *html.Node) { ch := &html.Node{Parent: node, Type: html.TextNode, Data: node.Data} @@ -236,10 +227,7 @@ func RenderCommitMessageSubject( } // RenderIssueTitle to process title on individual issue/pull page -func RenderIssueTitle( - ctx *RenderContext, - title string, -) (string, error) { +func RenderIssueTitle(ctx *RenderContext, title string) (string, error) { // do not render other issue/commit links in an issue's title - which in most cases is already a link. return renderProcessString(ctx, []processor{ emojiShortCodeProcessor, @@ -257,10 +245,7 @@ func renderProcessString(ctx *RenderContext, procs []processor, content string) // RenderDescriptionHTML will use similar logic as PostProcess, but will // use a single special linkProcessor. -func RenderDescriptionHTML( - ctx *RenderContext, - content string, -) (string, error) { +func RenderDescriptionHTML(ctx *RenderContext, content string) (string, error) { return renderProcessString(ctx, []processor{ descriptionLinkProcessor, emojiShortCodeProcessor, @@ -270,10 +255,7 @@ func RenderDescriptionHTML( // RenderEmoji for when we want to just process emoji and shortcodes // in various places it isn't already run through the normal markdown processor -func RenderEmoji( - ctx *RenderContext, - content string, -) (string, error) { +func RenderEmoji(ctx *RenderContext, content string) (string, error) { return renderProcessString(ctx, emojiProcessors, content) } @@ -333,6 +315,17 @@ func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output return nil } +func isEmojiNode(node *html.Node) bool { + if node.Type == html.ElementNode && node.Data == atom.Span.String() { + for _, attr := range node.Attr { + if (attr.Key == "class" || attr.Key == "data-attr-class") && strings.Contains(attr.Val, "emoji") { + return true + } + } + } + return false +} + func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Node { // Add user-content- to IDs and "#" links if they don't already have them for idx, attr := range node.Attr { @@ -346,47 +339,27 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Nod if attr.Key == "href" && strings.HasPrefix(attr.Val, "#") && notHasPrefix { node.Attr[idx].Val = "#user-content-" + val } - - if attr.Key == "class" && attr.Val == "emoji" { - procs = nil - } } switch node.Type { case html.TextNode: - processTextNodes(ctx, procs, node) + for _, proc := range procs { + proc(ctx, node) // it might add siblings + } + case html.ElementNode: - if node.Data == "code" || node.Data == "pre" { - // ignore code and pre nodes + if isEmojiNode(node) { + // TextNode emoji will be converted to ``, then the next iteration will visit the "span" + // if we don't stop it, it will go into the TextNode again and create an infinite recursion return node.NextSibling + } else if node.Data == "code" || node.Data == "pre" { + return node.NextSibling // ignore code and pre nodes } else if node.Data == "img" { return visitNodeImg(ctx, node) } else if node.Data == "video" { return visitNodeVideo(ctx, node) } else if node.Data == "a" { - // Restrict text in links to emojis - procs = emojiProcessors - } else if node.Data == "i" { - for _, attr := range node.Attr { - if attr.Key != "class" { - continue - } - classes := strings.Split(attr.Val, " ") - for i, class := range classes { - if class == "icon" { - classes[0], classes[i] = classes[i], classes[0] - attr.Val = strings.Join(classes, " ") - - // Remove all children of icons - child := node.FirstChild - for child != nil { - node.RemoveChild(child) - child = node.FirstChild - } - break - } - } - } + procs = emojiProcessors // Restrict text in links to emojis } for n := node.FirstChild; n != nil; { n = visitNode(ctx, procs, n) @@ -396,22 +369,17 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Nod return node.NextSibling } -// processTextNodes runs the passed node through various processors, in order to handle -// all kinds of special links handled by the post-processing. -func processTextNodes(ctx *RenderContext, procs []processor, node *html.Node) { - for _, p := range procs { - p(ctx, node) - } -} - // createKeyword() renders a highlighted version of an action keyword -func createKeyword(content string) *html.Node { +func createKeyword(ctx *RenderContext, content string) *html.Node { + // CSS class for action keywords (e.g. "closes: #1") + const keywordClass = "issue-keyword" + span := &html.Node{ Type: html.ElementNode, Data: atom.Span.String(), Attr: []html.Attribute{}, } - span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: keywordClass}) + span.Attr = append(span.Attr, ctx.RenderInternal.NodeSafeAttr("class", keywordClass)) text := &html.Node{ Type: html.TextNode, @@ -422,7 +390,7 @@ func createKeyword(content string) *html.Node { return span } -func createLink(href, content, class string) *html.Node { +func createLink(ctx *RenderContext, href, content, class string) *html.Node { a := &html.Node{ Type: html.ElementNode, Data: atom.A.String(), @@ -432,7 +400,7 @@ func createLink(href, content, class string) *html.Node { a.Attr = append(a.Attr, html.Attribute{Key: "data-markdown-generated-content"}) } if class != "" { - a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class}) + a.Attr = append(a.Attr, ctx.RenderInternal.NodeSafeAttr("class", class)) } text := &html.Node{ diff --git a/modules/markup/html_codepreview.go b/modules/markup/html_codepreview.go index 5ab9290b3e42f..5c88481d769ef 100644 --- a/modules/markup/html_codepreview.go +++ b/modules/markup/html_codepreview.go @@ -6,7 +6,6 @@ package markup import ( "html/template" "net/url" - "regexp" "strconv" "strings" @@ -16,9 +15,6 @@ import ( "golang.org/x/net/html" ) -// codePreviewPattern matches "http://domain/.../{owner}/{repo}/src/commit/{commit}/{filepath}#L10-L20" -var codePreviewPattern = regexp.MustCompile(`https?://\S+/([^\s/]+)/([^\s/]+)/src/commit/([0-9a-f]{7,64})(/\S+)#(L\d+(-L\d+)?)`) - type RenderCodePreviewOptions struct { FullURL string OwnerName string @@ -30,7 +26,7 @@ type RenderCodePreviewOptions struct { } func renderCodeBlock(ctx *RenderContext, node *html.Node) (urlPosStart, urlPosStop int, htm template.HTML, err error) { - m := codePreviewPattern.FindStringSubmatchIndex(node.Data) + m := globalVars().codePreviewPattern.FindStringSubmatchIndex(node.Data) if m == nil { return 0, 0, "", nil } @@ -66,8 +62,8 @@ func codePreviewPatternProcessor(ctx *RenderContext, node *html.Node) { node = node.NextSibling continue } - urlPosStart, urlPosEnd, h, err := renderCodeBlock(ctx, node) - if err != nil || h == "" { + urlPosStart, urlPosEnd, renderedCodeBlock, err := renderCodeBlock(ctx, node) + if err != nil || renderedCodeBlock == "" { if err != nil { log.Error("Unable to render code preview: %v", err) } @@ -84,7 +80,8 @@ func codePreviewPatternProcessor(ctx *RenderContext, node *html.Node) { // then it is resolved as: "

{TextBefore}

{TextAfter}

", // so unless it could correctly replace the parent "p/li" node, it is very difficult to eliminate the "TextBefore" empty node. node.Data = textBefore - node.Parent.InsertBefore(&html.Node{Type: html.RawNode, Data: string(h)}, next) + renderedCodeNode := &html.Node{Type: html.RawNode, Data: string(ctx.RenderInternal.ProtectSafeAttrs(renderedCodeBlock))} + node.Parent.InsertBefore(renderedCodeNode, next) if textAfter != "" { node.Parent.InsertBefore(&html.Node{Type: html.TextNode, Data: textAfter}, next) } diff --git a/modules/markup/html_email.go b/modules/markup/html_email.go index 32d0285eb4c46..cbfae8b82940e 100644 --- a/modules/markup/html_email.go +++ b/modules/markup/html_email.go @@ -15,7 +15,7 @@ func emailAddressProcessor(ctx *RenderContext, node *html.Node) { } mail := node.Data[m[2]:m[3]] - replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto")) + replaceContent(node, m[2], m[3], createLink(ctx, "mailto:"+mail, mail, "" /*mailto*/)) node = node.NextSibling.NextSibling } } diff --git a/modules/markup/html_emoji.go b/modules/markup/html_emoji.go index 6eacb2067f028..c63806542524c 100644 --- a/modules/markup/html_emoji.go +++ b/modules/markup/html_emoji.go @@ -13,15 +13,13 @@ import ( "golang.org/x/net/html/atom" ) -func createEmoji(content, class, name string) *html.Node { +func createEmoji(ctx *RenderContext, content, name string) *html.Node { span := &html.Node{ Type: html.ElementNode, Data: atom.Span.String(), Attr: []html.Attribute{}, } - if class != "" { - span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: class}) - } + span.Attr = append(span.Attr, ctx.RenderInternal.NodeSafeAttr("class", "emoji")) if name != "" { span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: name}) } @@ -35,13 +33,13 @@ func createEmoji(content, class, name string) *html.Node { return span } -func createCustomEmoji(alias string) *html.Node { +func createCustomEmoji(ctx *RenderContext, alias string) *html.Node { span := &html.Node{ Type: html.ElementNode, Data: atom.Span.String(), Attr: []html.Attribute{}, } - span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: "emoji"}) + span.Attr = append(span.Attr, ctx.RenderInternal.NodeSafeAttr("class", "emoji")) span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: alias}) img := &html.Node{ @@ -77,7 +75,7 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) { if converted == nil { // check if this is a custom reaction if _, exist := setting.UI.CustomEmojisMap[alias]; exist { - replaceContent(node, m[0], m[1], createCustomEmoji(alias)) + replaceContent(node, m[0], m[1], createCustomEmoji(ctx, alias)) node = node.NextSibling.NextSibling start = 0 continue @@ -85,7 +83,7 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) { continue } - replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description)) + replaceContent(node, m[0], m[1], createEmoji(ctx, converted.Emoji, converted.Description)) node = node.NextSibling.NextSibling start = 0 } @@ -107,7 +105,7 @@ func emojiProcessor(ctx *RenderContext, node *html.Node) { start = m[1] val := emoji.FromCode(codepoint) if val != nil { - replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description)) + replaceContent(node, m[0], m[1], createEmoji(ctx, codepoint, val.Description)) node = node.NextSibling.NextSibling start = 0 } diff --git a/modules/markup/html_issue.go b/modules/markup/html_issue.go index 2acf154ad2ad7..7341af7eb697b 100644 --- a/modules/markup/html_issue.go +++ b/modules/markup/html_issue.go @@ -57,10 +57,10 @@ func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) { matchRepo := linkParts[len(linkParts)-3] if matchOrg == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] { - replaceContent(node, m[0], m[1], createLink(link, text, "ref-issue")) + replaceContent(node, m[0], m[1], createLink(ctx, link, text, "ref-issue")) } else { text = matchOrg + "/" + matchRepo + text - replaceContent(node, m[0], m[1], createLink(link, text, "ref-issue")) + replaceContent(node, m[0], m[1], createLink(ctx, link, text, "ref-issue")) } node = node.NextSibling.NextSibling } @@ -129,16 +129,16 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) { log.Error("unable to expand template vars for ref %s, err: %v", ref.Issue, err) } - link = createLink(res, reftext, "ref-issue ref-external-issue") + link = createLink(ctx, res, reftext, "ref-issue ref-external-issue") } else { // Path determines the type of link that will be rendered. It's unknown at this point whether // the linked item is actually a PR or an issue. Luckily it's of no real consequence because // Gitea will redirect on click as appropriate. issuePath := util.Iif(ref.IsPull, "pulls", "issues") if ref.Owner == "" { - link = createLink(util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], issuePath, ref.Issue), reftext, "ref-issue") + link = createLink(ctx, util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], issuePath, ref.Issue), reftext, "ref-issue") } else { - link = createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, issuePath, ref.Issue), reftext, "ref-issue") + link = createLink(ctx, util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, issuePath, ref.Issue), reftext, "ref-issue") } } @@ -151,7 +151,7 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) { // Decorate action keywords if actionable var keyword *html.Node if references.IsXrefActionable(ref, hasExtTrackFormat) { - keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End]) + keyword = createKeyword(ctx, node.Data[ref.ActionLocation.Start:ref.ActionLocation.End]) } else { keyword = &html.Node{ Type: html.TextNode, @@ -177,7 +177,7 @@ func commitCrossReferencePatternProcessor(ctx *RenderContext, node *html.Node) { } reftext := ref.Owner + "/" + ref.Name + "@" + base.ShortSha(ref.CommitSha) - link := createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, "commit", ref.CommitSha), reftext, "commit") + link := createLink(ctx, util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, "commit", ref.CommitSha), reftext, "commit") replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link) node = node.NextSibling.NextSibling diff --git a/modules/markup/html_link.go b/modules/markup/html_link.go index b7562d0aa6d26..dc40677c37ec4 100644 --- a/modules/markup/html_link.go +++ b/modules/markup/html_link.go @@ -195,7 +195,7 @@ func linkProcessor(ctx *RenderContext, node *html.Node) { } uri := node.Data[m[0]:m[1]] - replaceContent(node, m[0], m[1], createLink(uri, uri, "link")) + replaceContent(node, m[0], m[1], createLink(ctx, uri, uri, "" /*link*/)) node = node.NextSibling.NextSibling } } diff --git a/modules/markup/html_mention.go b/modules/markup/html_mention.go index 3f0692e05f55e..f7e2ad50f139f 100644 --- a/modules/markup/html_mention.go +++ b/modules/markup/html_mention.go @@ -33,7 +33,7 @@ func mentionProcessor(ctx *RenderContext, node *html.Node) { if ok && strings.Contains(mention, "/") { mentionOrgAndTeam := strings.Split(mention, "/") if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") { - replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention")) + replaceContent(node, loc.Start, loc.End, createLink(ctx, util.URLJoin(ctx.Links.Prefix(), "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "" /*mention*/)) node = node.NextSibling.NextSibling start = 0 continue @@ -44,7 +44,7 @@ func mentionProcessor(ctx *RenderContext, node *html.Node) { mentionedUsername := mention[1:] if DefaultProcessorHelper.IsUsernameMentionable != nil && DefaultProcessorHelper.IsUsernameMentionable(ctx.Ctx, mentionedUsername) { - replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), mentionedUsername), mention, "mention")) + replaceContent(node, loc.Start, loc.End, createLink(ctx, util.URLJoin(ctx.Links.Prefix(), mentionedUsername), mention, "" /*mention*/)) node = node.NextSibling.NextSibling start = 0 } else { diff --git a/modules/markup/internal/finalprocessor.go b/modules/markup/internal/finalprocessor.go new file mode 100644 index 0000000000000..14d46a161f0b8 --- /dev/null +++ b/modules/markup/internal/finalprocessor.go @@ -0,0 +1,30 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package internal + +import ( + "bytes" + "io" +) + +type finalProcessor struct { + renderInternal *RenderInternal + + output io.Writer + buf bytes.Buffer +} + +func (p *finalProcessor) Write(data []byte) (int, error) { + p.buf.Write(data) + return len(data), nil +} + +func (p *finalProcessor) Close() error { + // TODO: reading the whole markdown isn't a problem at the moment, + // because "postProcess" already does so. In the future we could optimize the code to process data on the fly. + buf := p.buf.Bytes() + buf = bytes.ReplaceAll(buf, []byte(` data-attr-class="`+p.renderInternal.secureIDPrefix), []byte(` class="`)) + _, err := p.output.Write(buf) + return err +} diff --git a/modules/markup/internal/internal_test.go b/modules/markup/internal/internal_test.go new file mode 100644 index 0000000000000..cd811777d4f34 --- /dev/null +++ b/modules/markup/internal/internal_test.go @@ -0,0 +1,53 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package internal + +import ( + "bytes" + "html/template" + "io" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestRenderInternal(t *testing.T) { + cases := []struct { + input, protected, recovered string + }{ + { + input: `
class="content"
`, + protected: `
class="content"
`, + recovered: `
class="content"
`, + }, + { + input: "
", + protected: `
`, + recovered: `
`, + }, + } + for _, c := range cases { + var r RenderInternal + out := &bytes.Buffer{} + in := r.init("sec", out) + protected := r.ProtectSafeAttrs(template.HTML(c.input)) + assert.EqualValues(t, c.protected, protected) + _, _ = io.WriteString(in, string(protected)) + _ = in.Close() + assert.EqualValues(t, c.recovered, out.String()) + } + + var r1, r2 RenderInternal + protected := r1.ProtectSafeAttrs(`
`) + assert.EqualValues(t, `
`, protected, "non-initialized RenderInternal should not protect any attributes") + _ = r1.init("sec", nil) + protected = r1.ProtectSafeAttrs(`
`) + assert.EqualValues(t, `
`, protected) + + out2 := &bytes.Buffer{} + in2 := r2.init("sec-other", out2) + _, _ = io.WriteString(in2, string(protected)) + _ = in2.Close() + assert.EqualValues(t, `
`, out2.String(), "different secureID should not recover the value") +} diff --git a/modules/markup/internal/renderinternal.go b/modules/markup/internal/renderinternal.go new file mode 100644 index 0000000000000..3f293fbc4e072 --- /dev/null +++ b/modules/markup/internal/renderinternal.go @@ -0,0 +1,82 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package internal + +import ( + "crypto/rand" + "encoding/base64" + "html/template" + "io" + "regexp" + "strings" + "sync" + + "code.gitea.io/gitea/modules/htmlutil" + + "golang.org/x/net/html" +) + +var reAttrClass = sync.OnceValue[*regexp.Regexp](func() *regexp.Regexp { + // TODO: it isn't a problem at the moment because our HTML contents are always well constructed + return regexp.MustCompile(`(<[^>]+)\s+class="([^"]+)"([^>]*>)`) +}) + +// RenderInternal also works without initialization +// If no initialization (no secureID), it will not protect any attributes and return the original name&value +type RenderInternal struct { + secureID string + secureIDPrefix string +} + +func (r *RenderInternal) Init(output io.Writer) io.WriteCloser { + buf := make([]byte, 12) + _, err := rand.Read(buf) + if err != nil { + panic("unable to generate secure id") + } + return r.init(base64.URLEncoding.EncodeToString(buf), output) +} + +func (r *RenderInternal) init(secID string, output io.Writer) io.WriteCloser { + r.secureID = secID + r.secureIDPrefix = r.secureID + ":" + return &finalProcessor{renderInternal: r, output: output} +} + +func (r *RenderInternal) RecoverProtectedValue(v string) (string, bool) { + if !strings.HasPrefix(v, r.secureIDPrefix) { + return "", false + } + return v[len(r.secureIDPrefix):], true +} + +func (r *RenderInternal) SafeAttr(name string) string { + if r.secureID == "" { + return name + } + return "data-attr-" + name +} + +func (r *RenderInternal) SafeValue(val string) string { + if r.secureID == "" { + return val + } + return r.secureID + ":" + val +} + +func (r *RenderInternal) NodeSafeAttr(attr, val string) html.Attribute { + return html.Attribute{Key: r.SafeAttr(attr), Val: r.SafeValue(val)} +} + +func (r *RenderInternal) ProtectSafeAttrs(content template.HTML) template.HTML { + if r.secureID == "" { + return content + } + return template.HTML(reAttrClass().ReplaceAllString(string(content), `$1 data-attr-class="`+r.secureIDPrefix+`$2"$3`)) +} + +func (r *RenderInternal) FormatWithSafeAttrs(w io.StringWriter, fmt string, a ...any) error { + _, err := w.WriteString(string(r.ProtectSafeAttrs(htmlutil.HTMLFormat(fmt, a...)))) + return err +} diff --git a/modules/markup/markdown/goldmark.go b/modules/markup/markdown/goldmark.go index c837b21e7804c..df32502f9ec63 100644 --- a/modules/markup/markdown/goldmark.go +++ b/modules/markup/markdown/goldmark.go @@ -10,6 +10,7 @@ import ( "code.gitea.io/gitea/modules/container" "code.gitea.io/gitea/modules/markup" + "code.gitea.io/gitea/modules/markup/internal" "code.gitea.io/gitea/modules/setting" "github.com/yuin/goldmark/ast" @@ -23,11 +24,13 @@ import ( // ASTTransformer is a default transformer of the goldmark tree. type ASTTransformer struct { + renderInternal *internal.RenderInternal attentionTypes container.Set[string] } -func NewASTTransformer() *ASTTransformer { +func NewASTTransformer(renderInternal *internal.RenderInternal) *ASTTransformer { return &ASTTransformer{ + renderInternal: renderInternal, attentionTypes: container.SetOf("note", "tip", "important", "warning", "caution"), } } @@ -111,10 +114,11 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa // NewHTMLRenderer creates a HTMLRenderer to render // in the gitea form. -func NewHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { +func NewHTMLRenderer(renderInternal *internal.RenderInternal, opts ...html.Option) renderer.NodeRenderer { r := &HTMLRenderer{ - Config: html.NewConfig(), - reValidName: regexp.MustCompile("^[a-z ]+$"), + renderInternal: renderInternal, + Config: html.NewConfig(), + reValidName: regexp.MustCompile("^[a-z ]+$"), } for _, opt := range opts { opt.SetHTMLOption(&r.Config) @@ -126,7 +130,8 @@ func NewHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { // renders gitea specific features. type HTMLRenderer struct { html.Config - reValidName *regexp.Regexp + reValidName *regexp.Regexp + renderInternal *internal.RenderInternal } // RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs. @@ -219,7 +224,8 @@ func (r *HTMLRenderer) renderIcon(w util.BufWriter, source []byte, node ast.Node return ast.WalkContinue, nil } - _, err := w.WriteString(fmt.Sprintf(``, name)) + // FIXME: the "icon xxx" is from Fomantic UI, it's really questionable whether it still works correctly + err := r.renderInternal.FormatWithSafeAttrs(w, ``, name) if err != nil { return ast.WalkStop, err } diff --git a/modules/markup/markdown/markdown.go b/modules/markup/markdown/markdown.go index 6af0deb27bb2b..263ce5ff85f35 100644 --- a/modules/markup/markdown/markdown.go +++ b/modules/markup/markdown/markdown.go @@ -9,7 +9,6 @@ import ( "html/template" "io" "strings" - "sync" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/markup" @@ -29,11 +28,6 @@ import ( "github.com/yuin/goldmark/util" ) -var ( - specMarkdown goldmark.Markdown - specMarkdownOnce sync.Once -) - var ( renderContextKey = parser.NewContextKey() renderConfigKey = parser.NewContextKey() @@ -68,85 +62,95 @@ func newParserContext(ctx *markup.RenderContext) parser.Context { return pc } +type GlodmarkRender struct { + ctx *markup.RenderContext + + goldmarkMarkdown goldmark.Markdown +} + +func (r *GlodmarkRender) Convert(source []byte, writer io.Writer, opts ...parser.ParseOption) error { + return r.goldmarkMarkdown.Convert(source, writer, opts...) +} + +func (r *GlodmarkRender) Renderer() renderer.Renderer { + return r.goldmarkMarkdown.Renderer() +} + +func (r *GlodmarkRender) highlightingRenderer(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) { + if entering { + language, _ := c.Language() + if language == nil { + language = []byte("text") + } + + languageStr := string(language) + + preClasses := []string{"code-block"} + if languageStr == "mermaid" || languageStr == "math" { + preClasses = append(preClasses, "is-loading") + } + + err := r.ctx.RenderInternal.FormatWithSafeAttrs(w, `
`, strings.Join(preClasses, " "))
+		if err != nil {
+			return
+		}
+
+		// include language-x class as part of commonmark spec
+		// the "display" class is used by "js/markup/math.js" to render the code element as a block
+		err = r.ctx.RenderInternal.FormatWithSafeAttrs(w, ``, string(language))
+		if err != nil {
+			return
+		}
+	} else {
+		_, err := w.WriteString("
") + if err != nil { + return + } + } +} + // SpecializedMarkdown sets up the Gitea specific markdown extensions -func SpecializedMarkdown() goldmark.Markdown { - specMarkdownOnce.Do(func() { - specMarkdown = goldmark.New( - goldmark.WithExtensions( - extension.NewTable( - extension.WithTableCellAlignMethod(extension.TableCellAlignAttribute)), - extension.Strikethrough, - extension.TaskList, - extension.DefinitionList, - common.FootnoteExtension, - highlighting.NewHighlighting( - highlighting.WithFormatOptions( - chromahtml.WithClasses(true), - chromahtml.PreventSurroundingPre(true), - ), - highlighting.WithWrapperRenderer(func(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) { - if entering { - language, _ := c.Language() - if language == nil { - language = []byte("text") - } - - languageStr := string(language) - - preClasses := []string{"code-block"} - if languageStr == "mermaid" || languageStr == "math" { - preClasses = append(preClasses, "is-loading") - } - - _, err := w.WriteString(`
`)
-							if err != nil {
-								return
-							}
-
-							// include language-x class as part of commonmark spec
-							// the "display" class is used by "js/markup/math.js" to render the code element as a block
-							_, err = w.WriteString(``)
-							if err != nil {
-								return
-							}
-						} else {
-							_, err := w.WriteString("
") - if err != nil { - return - } - } - }), - ), - math.NewExtension( - math.Enabled(setting.Markdown.EnableMath), - ), - meta.Meta, - ), - goldmark.WithParserOptions( - parser.WithAttribute(), - parser.WithAutoHeadingID(), - parser.WithASTTransformers( - util.Prioritized(NewASTTransformer(), 10000), +func SpecializedMarkdown(ctx *markup.RenderContext) *GlodmarkRender { + // TODO: it could use a pool to cache the renderers to reuse them with different contexts + // at the moment it is fast enough (see the benchmarks) + r := &GlodmarkRender{ctx: ctx} + r.goldmarkMarkdown = goldmark.New( + goldmark.WithExtensions( + extension.NewTable(extension.WithTableCellAlignMethod(extension.TableCellAlignAttribute)), + extension.Strikethrough, + extension.TaskList, + extension.DefinitionList, + common.FootnoteExtension, + highlighting.NewHighlighting( + highlighting.WithFormatOptions( + chromahtml.WithClasses(true), + chromahtml.PreventSurroundingPre(true), ), + highlighting.WithWrapperRenderer(r.highlightingRenderer), ), - goldmark.WithRendererOptions( - html.WithUnsafe(), - ), - ) - - // Override the original Tasklist renderer! - specMarkdown.Renderer().AddOptions( - renderer.WithNodeRenderers( - util.Prioritized(NewHTMLRenderer(), 10), - ), - ) - }) - return specMarkdown + math.NewExtension(&ctx.RenderInternal, math.Enabled(setting.Markdown.EnableMath)), + meta.Meta, + ), + goldmark.WithParserOptions( + parser.WithAttribute(), + parser.WithAutoHeadingID(), + parser.WithASTTransformers(util.Prioritized(NewASTTransformer(&ctx.RenderInternal), 10000)), + ), + goldmark.WithRendererOptions(html.WithUnsafe()), + ) + + // Override the original Tasklist renderer! + r.goldmarkMarkdown.Renderer().AddOptions( + renderer.WithNodeRenderers(util.Prioritized(NewHTMLRenderer(&ctx.RenderInternal), 10)), + ) + + return r } -// actualRender renders Markdown to HTML without handling special links. -func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { - converter := SpecializedMarkdown() +// render calls goldmark render to convert Markdown to HTML +// NOTE: The output of this method MUST get sanitized!!! +func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { + converter := SpecializedMarkdown(ctx) lw := &limitWriter{ w: output, limit: setting.UI.MaxDisplayFileSize * 3, @@ -160,8 +164,8 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer) } log.Warn("Unable to render markdown due to panic in goldmark: %v", err) - if log.IsDebug() { - log.Debug("Panic in markdown: %v\n%s", err, log.Stack(2)) + if !setting.IsProd && !setting.IsInTesting { + log.Error("Panic in markdown: %v\n%s", err, log.Stack(2)) } }() @@ -200,26 +204,6 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer) return nil } -// Note: The output of this method must get sanitized. -func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { - defer func() { - err := recover() - if err == nil { - return - } - - log.Warn("Unable to render markdown due to panic in goldmark - will return raw bytes") - if log.IsDebug() { - log.Debug("Panic in markdown: %v\n%s", err, log.Stack(2)) - } - _, err = io.Copy(output, input) - if err != nil { - log.Error("io.Copy failed: %v", err) - } - }() - return actualRender(ctx, input, output) -} - // MarkupName describes markup's name var MarkupName = "markdown" diff --git a/modules/markup/markdown/markdown_test.go b/modules/markup/markdown/markdown_test.go index 780df8727f0c8..35a549deebd67 100644 --- a/modules/markup/markdown/markdown_test.go +++ b/modules/markup/markdown/markdown_test.go @@ -1051,3 +1051,17 @@ func TestAttention(t *testing.T) { // legacy GitHub style test(`> **warning**`, renderAttention("warning", "octicon-alert")+"\n") } + +func BenchmarkSpecializedMarkdown(b *testing.B) { + // 187785 5990 ns/op + for i := 0; i < b.N; i++ { + markdown.SpecializedMarkdown(nil) + } +} + +func BenchmarkMarkdownRender(b *testing.B) { + // 24698 48585 ns/op + for i := 0; i < b.N; i++ { + markdown.RenderString(&markup.RenderContext{Ctx: context.Background()}, "https://example.com\n- a\n- b\n") + } +} diff --git a/modules/markup/markdown/math/block_renderer.go b/modules/markup/markdown/math/block_renderer.go index 84817ef1e4a51..0d2a966102e95 100644 --- a/modules/markup/markdown/math/block_renderer.go +++ b/modules/markup/markdown/math/block_renderer.go @@ -4,17 +4,21 @@ package math import ( + "code.gitea.io/gitea/modules/markup/internal" + gast "github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/renderer" "github.com/yuin/goldmark/util" ) // BlockRenderer represents a renderer for math Blocks -type BlockRenderer struct{} +type BlockRenderer struct { + renderInternal *internal.RenderInternal +} // NewBlockRenderer creates a new renderer for math Blocks -func NewBlockRenderer() renderer.NodeRenderer { - return &BlockRenderer{} +func NewBlockRenderer(renderInternal *internal.RenderInternal) renderer.NodeRenderer { + return &BlockRenderer{renderInternal: renderInternal} } // RegisterFuncs registers the renderer for math Blocks @@ -33,7 +37,7 @@ func (r *BlockRenderer) writeLines(w util.BufWriter, source []byte, n gast.Node) func (r *BlockRenderer) renderBlock(w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) { n := node.(*Block) if entering { - _, _ = w.WriteString(`
`)
+		_ = r.renderInternal.FormatWithSafeAttrs(w, `
`)
 		r.writeLines(w, source, n)
 	} else {
 		_, _ = w.WriteString(`
` + "\n") diff --git a/modules/markup/markdown/math/inline_renderer.go b/modules/markup/markdown/math/inline_renderer.go index 96848099cce23..0cff4f1e74e11 100644 --- a/modules/markup/markdown/math/inline_renderer.go +++ b/modules/markup/markdown/math/inline_renderer.go @@ -6,17 +6,21 @@ package math import ( "bytes" + "code.gitea.io/gitea/modules/markup/internal" + "github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/renderer" "github.com/yuin/goldmark/util" ) // InlineRenderer is an inline renderer -type InlineRenderer struct{} +type InlineRenderer struct { + renderInternal *internal.RenderInternal +} // NewInlineRenderer returns a new renderer for inline math -func NewInlineRenderer() renderer.NodeRenderer { - return &InlineRenderer{} +func NewInlineRenderer(renderInternal *internal.RenderInternal) renderer.NodeRenderer { + return &InlineRenderer{renderInternal: renderInternal} } func (r *InlineRenderer) renderInline(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) { @@ -25,7 +29,7 @@ func (r *InlineRenderer) renderInline(w util.BufWriter, source []byte, n ast.Nod if _, ok := n.(*InlineBlock); ok { extraClass = "display " } - _, _ = w.WriteString(``) + _ = r.renderInternal.FormatWithSafeAttrs(w, ``, extraClass) for c := n.FirstChild(); c != nil; c = c.NextSibling() { segment := c.(*ast.Text).Segment value := util.EscapeHTML(segment.Value(source)) diff --git a/modules/markup/markdown/math/math.go b/modules/markup/markdown/math/math.go index 3d9f376bc60e4..7e8defcd4a1e7 100644 --- a/modules/markup/markdown/math/math.go +++ b/modules/markup/markdown/math/math.go @@ -4,6 +4,8 @@ package math import ( + "code.gitea.io/gitea/modules/markup/internal" + "github.com/yuin/goldmark" "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/renderer" @@ -12,6 +14,7 @@ import ( // Extension is a math extension type Extension struct { + renderInternal *internal.RenderInternal enabled bool parseDollarInline bool parseDollarBlock bool @@ -39,38 +42,10 @@ func Enabled(enable ...bool) Option { }) } -// WithInlineDollarParser enables or disables the parsing of $...$ -func WithInlineDollarParser(enable ...bool) Option { - value := true - if len(enable) > 0 { - value = enable[0] - } - return extensionFunc(func(e *Extension) { - e.parseDollarInline = value - }) -} - -// WithBlockDollarParser enables or disables the parsing of $$...$$ -func WithBlockDollarParser(enable ...bool) Option { - value := true - if len(enable) > 0 { - value = enable[0] - } - return extensionFunc(func(e *Extension) { - e.parseDollarBlock = value - }) -} - -// Math represents a math extension with default rendered delimiters -var Math = &Extension{ - enabled: true, - parseDollarBlock: true, - parseDollarInline: true, -} - // NewExtension creates a new math extension with the provided options -func NewExtension(opts ...Option) *Extension { +func NewExtension(renderInternal *internal.RenderInternal, opts ...Option) *Extension { r := &Extension{ + renderInternal: renderInternal, enabled: true, parseDollarBlock: true, parseDollarInline: true, @@ -102,7 +77,7 @@ func (e *Extension) Extend(m goldmark.Markdown) { m.Parser().AddOptions(parser.WithInlineParsers(inlines...)) m.Renderer().AddOptions(renderer.WithNodeRenderers( - util.Prioritized(NewBlockRenderer(), 501), - util.Prioritized(NewInlineRenderer(), 502), + util.Prioritized(NewBlockRenderer(e.renderInternal), 501), + util.Prioritized(NewInlineRenderer(e.renderInternal), 502), )) } diff --git a/modules/markup/markdown/transform_blockquote.go b/modules/markup/markdown/transform_blockquote.go index 92dc500e69625..2651d44a69ff9 100644 --- a/modules/markup/markdown/transform_blockquote.go +++ b/modules/markup/markdown/transform_blockquote.go @@ -32,7 +32,8 @@ func (r *HTMLRenderer) renderAttention(w util.BufWriter, source []byte, node ast default: // including "note" octiconName = "info" } - _, _ = w.WriteString(string(svg.RenderHTML("octicon-"+octiconName, 16, "attention-icon attention-"+n.AttentionType))) + svgHTML := svg.RenderHTML("octicon-"+octiconName, 16, "attention-icon attention-"+n.AttentionType) + _, _ = w.WriteString(string(r.renderInternal.ProtectSafeAttrs(svgHTML))) } return ast.WalkContinue, nil } @@ -128,13 +129,13 @@ func (g *ASTTransformer) transformBlockquote(v *ast.Blockquote, reader text.Read } // color the blockquote - v.SetAttributeString("class", []byte("attention-header attention-"+attentionType)) + v.SetAttributeString(g.renderInternal.SafeAttr("class"), []byte(g.renderInternal.SafeValue("attention-header attention-"+attentionType))) // create an emphasis to make it bold attentionParagraph := ast.NewParagraph() g.applyElementDir(attentionParagraph) emphasis := ast.NewEmphasis(2) - emphasis.SetAttributeString("class", []byte("attention-"+attentionType)) + emphasis.SetAttributeString(g.renderInternal.SafeAttr("class"), []byte(g.renderInternal.SafeValue("attention-"+attentionType))) attentionAstString := ast.NewString([]byte(cases.Title(language.English).String(attentionType))) diff --git a/modules/markup/markdown/transform_codespan.go b/modules/markup/markdown/transform_codespan.go index ff7d24eec97c9..bccc43aad2510 100644 --- a/modules/markup/markdown/transform_codespan.go +++ b/modules/markup/markdown/transform_codespan.go @@ -5,7 +5,6 @@ package markdown import ( "bytes" - "fmt" "strings" "code.gitea.io/gitea/modules/markup" @@ -40,7 +39,7 @@ func (r *HTMLRenderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Nod r.Writer.RawWrite(w, value) } case *ColorPreview: - _, _ = w.WriteString(fmt.Sprintf(``, string(v.Color))) + _ = r.renderInternal.FormatWithSafeAttrs(w, ``, string(v.Color)) } } return ast.WalkSkipChildren, nil diff --git a/modules/markup/render.go b/modules/markup/render.go index 1977dc73f55ef..0cce5c421d084 100644 --- a/modules/markup/render.go +++ b/modules/markup/render.go @@ -9,14 +9,15 @@ import ( "io" "net/url" "strings" - "sync" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/gitrepo" + "code.gitea.io/gitea/modules/markup/internal" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/util" "github.com/yuin/goldmark/ast" + "golang.org/x/sync/errgroup" ) type RenderMetaMode string @@ -65,6 +66,8 @@ type RenderContext struct { SidebarTocNode ast.Node RenderMetaAs RenderMetaMode InStandalonePage bool // used by external render. the router "/org/repo/render/..." will output the rendered content in a standalone page + + RenderInternal internal.RenderInternal } // Cancel runs any cleanup functions that have been registered for this Ctx @@ -156,59 +159,52 @@ sandbox="allow-scripts" return err } -func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Writer) error { - var wg sync.WaitGroup - var err error +func pipes() (io.ReadCloser, io.WriteCloser, func()) { pr, pw := io.Pipe() - defer func() { + return pr, pw, func() { _ = pr.Close() _ = pw.Close() - }() - - var pr2 io.ReadCloser - var pw2 io.WriteCloser - - var sanitizerDisabled bool - if r, ok := renderer.(ExternalRenderer); ok { - sanitizerDisabled = r.SanitizerDisabled() } +} - if !sanitizerDisabled { - pr2, pw2 = io.Pipe() - defer func() { - _ = pr2.Close() - _ = pw2.Close() - }() - - wg.Add(1) - go func() { - err = SanitizeReader(pr2, renderer.Name(), output) - _ = pr2.Close() - wg.Done() - }() - } else { - pw2 = util.NopCloser{Writer: output} +func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Writer) error { + finalProcessor := ctx.RenderInternal.Init(output) + defer finalProcessor.Close() + + // input -> pw1 -> pr1 -> renderer -> pw2 -> pr2 -> SanitizeReader -> finalProcessor -> output + pr1, pw1, close1 := pipes() + defer close1() + + eg, _ := errgroup.WithContext(ctx.Ctx) + var pw2 io.WriteCloser = util.NopCloser{Writer: finalProcessor} + + if r, ok := renderer.(ExternalRenderer); !ok || !r.SanitizerDisabled() { + var pr2 io.ReadCloser + var close2 func() + pr2, pw2, close2 = pipes() + defer close2() + eg.Go(func() error { + defer pr2.Close() + return SanitizeReader(pr2, renderer.Name(), finalProcessor) + }) } - wg.Add(1) - go func() { + eg.Go(func() (err error) { if r, ok := renderer.(PostProcessRenderer); ok && r.NeedPostProcess() { - err = PostProcess(ctx, pr, pw2) + err = PostProcess(ctx, pr1, pw2) } else { - _, err = io.Copy(pw2, pr) + _, err = io.Copy(pw2, pr1) } - _ = pr.Close() - _ = pw2.Close() - wg.Done() - }() + _, _ = pr1.Close(), pw2.Close() + return err + }) - if err1 := renderer.Render(ctx, input, pw); err1 != nil { - return err1 + if err := renderer.Render(ctx, input, pw1); err != nil { + return err } - _ = pw.Close() + _ = pw1.Close() - wg.Wait() - return err + return eg.Wait() } // Init initializes the render global variables diff --git a/modules/markup/sanitizer_default.go b/modules/markup/sanitizer_default.go index 476ae5e26f0c1..9a588658ec6c4 100644 --- a/modules/markup/sanitizer_default.go +++ b/modules/markup/sanitizer_default.go @@ -16,37 +16,15 @@ import ( func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy { policy := bluemonday.UGCPolicy() - // For JS code copy and Mermaid loading state - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-block( is-loading)?$`)).OnElements("pre") - - // For code preview - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-preview-[-\w]+( file-content)?$`)).Globally() - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-num$`)).OnElements("td") - policy.AllowAttrs("data-line-number").OnElements("span") - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-code chroma$`)).OnElements("td") - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-inner$`)).OnElements("div") - - // For code preview (unicode escape) - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^file-view( unicode-escaped)?$`)).OnElements("table") - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-escape$`)).OnElements("td") - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^toggle-escape-button btn interact-bg$`)).OnElements("a") // don't use button, button might submit a form - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(ambiguous-code-point|escaped-code-point|broken-code-point)$`)).OnElements("span") - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^char$`)).OnElements("span") - policy.AllowAttrs("data-tooltip-content", "data-escaped").OnElements("span") - - // For color preview - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^color-preview$`)).OnElements("span") - - // For attention - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-header attention-\w+$`)).OnElements("blockquote") - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-\w+$`)).OnElements("strong") - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-icon attention-\w+ svg octicon-[\w-]+$`)).OnElements("svg") + // NOTICE: DO NOT add special rules here anymore, use RenderInternal.SafeAttr instead + + // Safe class attributes + policy.AllowAttrs("data-attr-class").Globally() + + // General safe SVG attributes policy.AllowAttrs("viewBox", "width", "height", "aria-hidden").OnElements("svg") policy.AllowAttrs("fill-rule", "d").OnElements("path") - // For Chroma markdown plugin - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+( display)?( is-loading)?$`)).OnElements("code") - // Checkboxes policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input") @@ -66,24 +44,12 @@ func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy { policy.AllowURLSchemeWithCustomPolicy("data", disallowScheme) } - // Allow classes for anchors - policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue( ref-external-issue)?`)).OnElements("a") - // Allow classes for task lists - policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li") + policy.AllowAttrs("class").Matching(regexp.MustCompile(`^task-list-item$`)).OnElements("li") // Allow classes for org mode list item status. policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(unchecked|checked|indeterminate)$`)).OnElements("li") - // Allow icons - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i") - - // Allow classes for emojis - policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img") - - // Allow icons, emojis, chroma syntax and keyword markup on span - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji)|(language-math display)|(language-math inline))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span") - // Allow 'color' and 'background-color' properties for the style attribute on text elements. policy.AllowStyles("color", "background-color").OnElements("span", "p") diff --git a/modules/markup/sanitizer_default_test.go b/modules/markup/sanitizer_default_test.go index 20370509c134f..c5c43695ea08a 100644 --- a/modules/markup/sanitizer_default_test.go +++ b/modules/markup/sanitizer_default_test.go @@ -19,7 +19,6 @@ func TestSanitizer(t *testing.T) { // Code highlighting class ``, ``, ``, ``, - ``, ``, // Input checkbox ``, ``, @@ -38,10 +37,8 @@ func TestSanitizer(t *testing.T) { // tags `Ctrl + C`, `Ctrl + C`, `NAUGHTY`, `NAUGHTY`, - ``, ``, `unchecked`, `unchecked`, `NAUGHTY`, `NAUGHTY`, - `contents`, `contents`, // Color property `Hello World`, `Hello World`, diff --git a/modules/svg/svg.go b/modules/svg/svg.go index 8132978caca99..fded9d0873744 100644 --- a/modules/svg/svg.go +++ b/modules/svg/svg.go @@ -9,7 +9,7 @@ import ( "path" "strings" - gitea_html "code.gitea.io/gitea/modules/html" + gitea_html "code.gitea.io/gitea/modules/htmlutil" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/public" ) diff --git a/modules/templates/helper.go b/modules/templates/helper.go index 3ef11772dc7d7..d5b32358da752 100644 --- a/modules/templates/helper.go +++ b/modules/templates/helper.go @@ -10,12 +10,12 @@ import ( "html/template" "net/url" "reflect" - "slices" "strings" "time" user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/base" + "code.gitea.io/gitea/modules/htmlutil" "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/svg" @@ -39,7 +39,7 @@ func NewFuncMap() template.FuncMap { "Iif": iif, "Eval": evalTokens, "SafeHTML": safeHTML, - "HTMLFormat": HTMLFormat, + "HTMLFormat": htmlutil.HTMLFormat, "HTMLEscape": htmlEscape, "QueryEscape": queryEscape, "JSEscape": jsEscapeSafe, @@ -184,23 +184,6 @@ func NewFuncMap() template.FuncMap { } } -func HTMLFormat(s string, rawArgs ...any) template.HTML { - args := slices.Clone(rawArgs) - for i, v := range args { - switch v := v.(type) { - case nil, bool, int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64, template.HTML: - // for most basic types (including template.HTML which is safe), just do nothing and use it - case string: - args[i] = template.HTMLEscapeString(v) - case fmt.Stringer: - args[i] = template.HTMLEscapeString(v.String()) - default: - args[i] = template.HTMLEscapeString(fmt.Sprint(v)) - } - } - return template.HTML(fmt.Sprintf(s, args...)) -} - // safeHTML render raw as HTML func safeHTML(s any) template.HTML { switch v := s.(type) { diff --git a/modules/templates/helper_test.go b/modules/templates/helper_test.go index b9fabb7016460..3e17e86c66256 100644 --- a/modules/templates/helper_test.go +++ b/modules/templates/helper_test.go @@ -61,10 +61,6 @@ func TestJSEscapeSafe(t *testing.T) { assert.EqualValues(t, `\u0026\u003C\u003E\'\"`, jsEscapeSafe(`&<>'"`)) } -func TestHTMLFormat(t *testing.T) { - assert.Equal(t, template.HTML("< < 1"), HTMLFormat("%s %s %d", "<", template.HTML("<"), 1)) -} - func TestSanitizeHTML(t *testing.T) { assert.Equal(t, template.HTML(`link xss
inline
`), SanitizeHTML(`link xss
inline
`)) } diff --git a/modules/templates/util_avatar.go b/modules/templates/util_avatar.go index afc10915163bc..f7dd408ee2133 100644 --- a/modules/templates/util_avatar.go +++ b/modules/templates/util_avatar.go @@ -14,7 +14,7 @@ import ( "code.gitea.io/gitea/models/organization" repo_model "code.gitea.io/gitea/models/repo" user_model "code.gitea.io/gitea/models/user" - gitea_html "code.gitea.io/gitea/modules/html" + gitea_html "code.gitea.io/gitea/modules/htmlutil" "code.gitea.io/gitea/modules/setting" ) diff --git a/modules/templates/util_render.go b/modules/templates/util_render.go index 8e443446bd69c..5776eefced961 100644 --- a/modules/templates/util_render.go +++ b/modules/templates/util_render.go @@ -16,6 +16,7 @@ import ( issues_model "code.gitea.io/gitea/models/issues" "code.gitea.io/gitea/modules/emoji" + "code.gitea.io/gitea/modules/htmlutil" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/markup/markdown" @@ -140,7 +141,7 @@ func (ut *RenderUtils) RenderLabel(label *issues_model.Label) template.HTML { if labelScope == "" { // Regular label - return HTMLFormat(`
%s
`, + return htmlutil.HTMLFormat(`
%s
`, extraCSSClasses, textColor, label.Color, descriptionText, ut.RenderEmoji(label.Name)) } @@ -174,7 +175,7 @@ func (ut *RenderUtils) RenderLabel(label *issues_model.Label) template.HTML { itemColor := "#" + hex.EncodeToString(itemBytes) scopeColor := "#" + hex.EncodeToString(scopeBytes) - return HTMLFormat(``+ + return htmlutil.HTMLFormat(``+ `
%s
`+ `
%s
`+ `
`, diff --git a/modules/templates/util_render_test.go b/modules/templates/util_render_test.go index 529507e7eab82..cf6d839cbf686 100644 --- a/modules/templates/util_render_test.go +++ b/modules/templates/util_render_test.go @@ -113,34 +113,34 @@ func TestRenderCommitBody(t *testing.T) { } expected := `/just/a/path.bin -https://example.com/file.bin +https://example.com/file.bin [local link](file.bin) -[remote link](https://example.com) +[remote link](https://example.com) [[local link|file.bin]] -[[remote link|https://example.com]] +[[remote link|https://example.com]] ![local image](image.jpg) -![remote image](https://example.com/image.jpg) +![remote image](https://example.com/image.jpg) [[local image|image.jpg]] -[[remote link|https://example.com/image.jpg]] +[[remote link|https://example.com/image.jpg]] 88fc37a3c0...12fc37a3c0 (hash) com 88fc37a3c0a4dda553bdcfc80c178a58247f42fb...12fc37a3c0a4dda553bdcfc80c178a58247f42fb pare 88fc37a3c0 com 88fc37a3c0a4dda553bdcfc80c178a58247f42fb mit 👍 -mail@domain.com -@mention-user test +mail@domain.com +@mention-user test #123 space` assert.EqualValues(t, expected, string(newTestRenderUtils().RenderCommitBody(testInput(), testMetas))) } func TestRenderCommitMessage(t *testing.T) { - expected := `space @mention-user ` + expected := `space @mention-user ` assert.EqualValues(t, expected, newTestRenderUtils().RenderCommitMessage(testInput(), testMetas)) } func TestRenderCommitMessageLinkSubject(t *testing.T) { - expected := `space @mention-user` + expected := `space @mention-user` assert.EqualValues(t, expected, newTestRenderUtils().RenderCommitMessageLinkSubject(testInput(), "https://example.com/link", testMetas)) } diff --git a/routers/web/repo/wiki.go b/routers/web/repo/wiki.go index 13f6b69493e09..2732a67e714fd 100644 --- a/routers/web/repo/wiki.go +++ b/routers/web/repo/wiki.go @@ -326,7 +326,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { if rctx.SidebarTocNode != nil { sb := &strings.Builder{} - err = markdown.SpecializedMarkdown().Renderer().Render(sb, nil, rctx.SidebarTocNode) + err = markdown.SpecializedMarkdown(rctx).Renderer().Render(sb, nil, rctx.SidebarTocNode) if err != nil { log.Error("Failed to render wiki sidebar TOC: %v", err) } else {