Allow non-alphanum characters in tag content

The tag content should allow non-alphanum characters. See the spec for section tag content [1]: > These tags' content MUST be a non-whitespace character sequence NOT > containing the current closing delimiter; ... For the 3 added test cases, alexkappa#1 and alexkappa#3 will fail today, and alexkappa#2 will trigger a panic. The code change fixes them. The expected behavior can also be verified on http://mustache.github.io/#demo, with Mustache: 1: {{#key*}}{{.}}{{/key*}} 2: {{#key}}{{*}}{{/key}} 2: {{#key}}{{*}*}}{{/key}} and JSON: { "key*": "value*", "key": "value", "*": "star", "*}*": "fish" } We can get output as: 1: value* 2: star 2: fish [1] https://github.com/mustache/spec/blob/b1329a25e6d265ff360267d23f7c6327bbf59f52/specs/sections.yml#L5
xueweiz · Apr 5, 2021 · b7d834c · b7d834c
1 parent 8bb9cfc
commit b7d834c
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 11 deletions.
diff --git a/lex.go b/lex.go
@@ -7,7 +7,6 @@ import (
 	"bytes"
 	"fmt"
 	"strings"
-	"unicode"
 	"unicode/utf8"
 )
 
@@ -30,7 +29,7 @@ type tokenType int
 const (
 	tokenError tokenType = iota // error occurred; value is text of error
 	tokenEOF
-	tokenIdentifier     // alphanumeric identifier
+	tokenIdentifier     // tag identifier: non-whitespace characters NOT containing closing delimiter
 	tokenLeftDelim      // {{ left action delimiter
 	tokenRightDelim     // }} right action delimiter
 	tokenText           // plain text
@@ -279,22 +278,21 @@ func stateTag(l *lexer) stateFn {
 		l.emit(tokenPartial)
 	case r == '{':
 		l.emit(tokenRawStart)
-	case alphanum(r):
+	default:
 		l.backup()
 		return stateIdent
-	default:
-		return l.errorf("unrecognized character in action: %#U", r)
 	}
 	return stateTag
 }
 
-// stateIdent scans an alphanumeric or field.
+// stateIdent scans an partial tag or field.
 func stateIdent(l *lexer) stateFn {
 Loop:
 	for {
+
 		switch r := l.next(); {
-		case alphanum(r):
-			// absorb.
+		case !whitespace(r) && !isRightDelim(l, r):
+			// absorb
 		default:
 			l.backup()
 			l.emit(tokenIdentifier)
@@ -366,7 +364,17 @@ func whitespace(r rune) bool {
 	return false
 }
 
-// alphanum reports whether r is an alphabetic, digit, or underscore.
-func alphanum(r rune) bool {
-	return r == '_' || r == '.' || unicode.IsLetter(r) || unicode.IsDigit(r)
+// isRightDelim reports whether r is a right delimiter.
+func isRightDelim(l *lexer, r rune) bool {
+	length := len(l.rightDelim)
+	// Right delimiter can be either two charactors in the default case "}}", or single
+	// charactor when setting delimiter.
+	firstRightDelim, _ := utf8.DecodeRuneInString(l.rightDelim)
+	secondRightDelim, _ := utf8.DecodeRuneInString(l.rightDelim[1:])
+	if length == 1 && r == firstRightDelim {
+		return true
+	} else if length == 2 && r == firstRightDelim && l.peek() == secondRightDelim {
+		return true
+	}
+	return false
 }
diff --git a/parse_test.go b/parse_test.go
@@ -56,6 +56,36 @@ func TestParser(t *testing.T) {
 				}},
 			},
 		},
+		{
+			"{{#*}}({{.}}){{/*}}",
+			[]node{
+				&sectionNode{"*", false, []node{
+					textNode("("),
+					&varNode{".", true},
+					textNode(")"),
+				}},
+			},
+		},
+		{
+			"{{#list}}({{*}}){{/list}}",
+			[]node{
+				&sectionNode{"list", false, []node{
+					textNode("("),
+					&varNode{"*", true},
+					textNode(")"),
+				}},
+			},
+		},
+		{
+			"{{#list}}({{a}a}}){{/list}}",
+			[]node{
+				&sectionNode{"list", false, []node{
+					textNode("("),
+					&varNode{"a}a", true},
+					textNode(")"),
+				}},
+			},
+		},
 	} {
 		parser := newParser(newLexer(test.template, "{{", "}}"))
 		elems, err := parser.parse()