From 6fdfe493740f24d52f70f9e952a35aef04138f35 Mon Sep 17 00:00:00 2001 From: Nicolas Garnier Date: Mon, 29 Apr 2019 15:39:56 +0200 Subject: [PATCH] Remove potential line breaks from getting into codelab IDs and titles. --- claat/parser/gdoc/css.go | 2 +- claat/parser/gdoc/html.go | 19 +++++++++++++------ claat/parser/gdoc/parse.go | 26 +++++++++++++------------- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/claat/parser/gdoc/css.go b/claat/parser/gdoc/css.go index 675c8a94a..351a41c69 100644 --- a/claat/parser/gdoc/css.go +++ b/claat/parser/gdoc/css.go @@ -47,7 +47,7 @@ func parseStyle(doc *html.Node) (cssStyle, error) { if node == nil { return style, nil } - css := stringifyNode(node, true) + css := stringifyNode(node, true, true) var skip bool var sel []string diff --git a/claat/parser/gdoc/html.go b/claat/parser/gdoc/html.go index e6903dbbb..1ad9842d5 100644 --- a/claat/parser/gdoc/html.go +++ b/claat/parser/gdoc/html.go @@ -226,8 +226,8 @@ func nodeAttr(n *html.Node, name string) string { } // stringifyNode extracts and concatenates all text nodes starting with root. -// Line breaks are inserted at
and any non- elements. -func stringifyNode(root *html.Node, trim bool) string { +// Line breaks are inserted at
and any non- elements if requested. +func stringifyNode(root *html.Node, trim bool, lineBreak bool) string { if root.Type == html.TextNode { s := textCleaner.Replace(root.Data) if !trim { @@ -236,7 +236,10 @@ func stringifyNode(root *html.Node, trim bool) string { return strings.TrimSpace(s) } if root.DataAtom == atom.Br && !trim { - return "\n" + if lineBreak { + return "\n" + } + return "" } var buf bytes.Buffer for c := root.FirstChild; c != nil; c = c.NextSibling { @@ -248,7 +251,9 @@ func stringifyNode(root *html.Node, trim bool) string { } } if c.DataAtom == atom.Br { - buf.WriteRune('\n') + if lineBreak { + buf.WriteRune('\n') + } continue } if c.Type == html.TextNode { @@ -256,9 +261,11 @@ func stringifyNode(root *html.Node, trim bool) string { continue } if c.DataAtom != atom.Span && c.DataAtom != atom.A { - buf.WriteRune('\n') + if lineBreak { + buf.WriteRune('\n') + } } - buf.WriteString(stringifyNode(c, false)) + buf.WriteString(stringifyNode(c, false, lineBreak)) } s := textCleaner.Replace(buf.String()) if !trim { diff --git a/claat/parser/gdoc/parse.go b/claat/parser/gdoc/parse.go index 0c6db6426..e9e4dd327 100644 --- a/claat/parser/gdoc/parse.go +++ b/claat/parser/gdoc/parse.go @@ -207,7 +207,7 @@ func parseDoc(doc *html.Node) (*types.Codelab, error) { } switch { case hasClass(ds.cur, "title") && ds.step == nil: - if v := stringifyNode(ds.cur, true); v != "" { + if v := stringifyNode(ds.cur, true, false); v != "" { ds.clab.Title = v } if ds.clab.ID == "" { @@ -361,7 +361,7 @@ func parseNode(ds *docState) (types.Node, bool) { // newStep creates a new codelab step from ds.cur // and finalizes nodes of the previous step. func newStep(ds *docState) { - t := stringifyNode(ds.cur, true) + t := stringifyNode(ds.cur, true, false) if t == "" { return } @@ -376,8 +376,8 @@ func metaTable(ds *docState) { if tr.FirstChild == nil || tr.FirstChild.NextSibling == nil { continue } - s := stringifyNode(tr.FirstChild.NextSibling, true) - switch strings.ToLower(stringifyNode(tr.FirstChild, true)) { + s := stringifyNode(tr.FirstChild.NextSibling, true, false) + switch strings.ToLower(stringifyNode(tr.FirstChild, true, false)) { case "id", "url": ds.clab.ID = s case "author", "authors": @@ -385,7 +385,7 @@ func metaTable(ds *docState) { case "badge", "badge id": ds.clab.BadgeID = s case "summary": - ds.clab.Summary = s + ds.clab.Summary = stringifyNode(tr.FirstChild.NextSibling, true, true) case "category", "categories": ds.clab.Categories = util.Unique(stringSlice(s)) case "environment", "environments", "tags": @@ -411,7 +411,7 @@ func metaTable(ds *docState) { func metaStep(ds *docState) { var text string for { - text += stringifyNode(ds.cur, false) + text += stringifyNode(ds.cur, false, false) if ds.cur.NextSibling == nil || !isMeta(ds.css, ds.cur.NextSibling) { break } @@ -463,7 +463,7 @@ func header(ds *docState) types.Node { return nil } n := types.NewHeaderNode(headerLevel[ds.cur.DataAtom], nodes...) - switch strings.ToLower(stringifyNode(ds.cur, true)) { + switch strings.ToLower(stringifyNode(ds.cur, true, false)) { case headerLearn, headerCover: n.MutateType(types.NodeHeaderCheck) case headerFAQ: @@ -556,7 +556,7 @@ func survey(ds *docState) types.Node { opt, next := surveyOpt(c.NextSibling) if len(opt) > 0 { gg = append(gg, &types.SurveyGroup{ - Name: stringifyNode(c, true), + Name: stringifyNode(c, true, false), Options: opt, }) } @@ -583,7 +583,7 @@ func surveyOpt(hn *html.Node) ([]string, *html.Node) { if li.DataAtom != atom.Li { continue } - opt = append(opt, stringifyNode(li, true)) + opt = append(opt, stringifyNode(li, true, true)) } } return opt, nil @@ -598,7 +598,7 @@ func code(ds *docState, term bool) types.Node { return text(ds) } // block code or terminal - v := stringifyNode(ds.cur, false) + v := stringifyNode(ds.cur, false, true) if v == "" { if countDirect(ds.cur.Parent) > 1 { return nil @@ -700,7 +700,7 @@ func button(ds *docState) types.Node { return nil } - s := strings.ToLower(stringifyNode(a, true)) + s := strings.ToLower(stringifyNode(a, true, false)) dl := strings.HasPrefix(s, "download ") btn := types.NewButtonNode(true, true, dl, nodes...) @@ -719,7 +719,7 @@ func link(ds *docState) types.Node { return nil } - text := stringifyNode(ds.cur, false) + text := stringifyNode(ds.cur, false, true) if strings.TrimSpace(text) == "" { return nil } @@ -776,7 +776,7 @@ func text(ds *docState) types.Node { } } - v := stringifyNode(ds.cur, false) + v := stringifyNode(ds.cur, false, true) n := types.NewTextNode(v) n.Bold = bold n.Italic = italic