@@ -70,12 +70,6 @@ source: []const u8,
70
70
index : u32 = 0 ,
71
71
/// Current line number (0 indexed).
72
72
line : u32 = 0 ,
73
- /// When `true`, the tokenizer will attempt to parse any remaining input
74
- /// starting with '/' (that isn't a comment starter) as a regex literal (e.g: '/[a-zA-Z0-9]/').
75
- /// Otherwise, '/' (when not starting a comment) is assumed to be either the '/' or '/=' operator.
76
- /// This property is used to dis-ambiguate between division operators and regex literals.
77
- assume_bslash_starts_regex : bool = false ,
78
-
79
73
/// When `true`, the tokenizer assumes that a '}' character is the part of a template
80
74
/// literal after an interpolated expression, and not a "}" token.
81
75
/// e.g:
@@ -120,12 +114,38 @@ pub fn next(self: *Self) Error!Token {
120
114
return token ;
121
115
}
122
116
123
- /// Returns the next JSX token.
124
- /// A JSX token is one of: '<', '>', '{', '}', or JSX text.
125
- /// To tokenize JS expressions inside JSX (e.g: prop values), the 'next' function should be used instead.
126
- /// The caller (parser) must know when to call `nextJsx` or `next` based on surrounding context.
117
+ /// Regex literals can be ambiguous with "/" or "/=" from the tokenizer's perspective,
118
+ /// as "/a/g" can mean either ['/', 'a', '/', 'g'], or ['/a/g' (regex) ].
119
+ ///
120
+ /// When the parser sees a "/" or "/=" token, but it expects an expression, it should
121
+ /// call this function to re-scan the source code starting from the '/' character.
122
+ ///
123
+ /// [div_token]: The token that was previously returned by the tokenizer when it saw the '/' character
124
+ /// (must be a "/" or "/=" token).
125
+ pub fn reScanRegexLiteral (self : * Self , div_token : * const Token ) Error ! Token {
126
+ assert (div_token .tag == .@"/" or div_token .tag == .@"/=" );
127
+ self .rewind (div_token .start , div_token .line );
128
+ return self .regexLiteral ();
129
+ }
130
+
131
+ /// Inside template literals, a "}" should be treated as the part of a template string,
132
+ /// instead of a lone '}' token.
133
+ /// So '`foo${bar}baz`' should be tokenized as: '`foo${', 'bar', '}baz`'.
127
134
///
128
- /// [is_inside_jsx_tags] is `true` when the tokenizer is inside a '<' and '>' pair.
135
+ /// When the parser receives a '}' token after 'baz', it should call this function
136
+ /// to rescan the source code starting from the '}' character, and tokenize it as a template part.
137
+ ///
138
+ /// [rbrace_token]: The token that was previously returned by the tokenizer when it saw the '}' character.
139
+ pub fn reScanTemplatePart (self : * Self , rbrace_token : * const Token ) Error ! Token {
140
+ assert (rbrace_token .tag == .@"}" );
141
+ self .rewind (rbrace_token .start , rbrace_token .line );
142
+ return self .templateAfterInterpolation ();
143
+ }
144
+
145
+ /// Returns the next token that starts a JSX child.
146
+ /// The token returned is one of: '<', '{', or JSX text.
147
+ /// To tokenize JS expressions inside JSX (e.g: prop values), the 'next' function should be used instead.
148
+ /// The caller (parser) must know when to call `nextJsxChild` or `next` based on surrounding context.
129
149
pub fn nextJsxChild (self : * Self ) Error ! Token {
130
150
const byte = self .peekByte () orelse {
131
151
return Token {
@@ -248,17 +268,6 @@ fn consumeToken(self: *Self) Error!Token {
248
268
'/' = > {
249
269
if (try self .comment ()) | tok |
250
270
return tok ;
251
-
252
- // Parsing regex literals is awkward.
253
- // A '/abc' can either be the start of a regex literal,
254
- // or a '/' (division) token followed by an 'abc' (identifier) token.
255
- //
256
- // The parser has to tell the tokenizer what it expects
257
- // to see next. If it expects to see a literal, then
258
- // we want to try tokenizing a regex literal.
259
- // Otherwise, we look for '/' or '/='.
260
- if (self .assume_bslash_starts_regex )
261
- return try self .regexLiteral ();
262
271
return try self .punctuator ();
263
272
},
264
273
' ' ,
@@ -270,9 +279,6 @@ fn consumeToken(self: *Self) Error!Token {
270
279
'\u{000C} ' ,
271
280
= > return self .whiteSpaces (),
272
281
'}' = > {
273
- if (self .assume_rbrace_is_template_part )
274
- return try self .templateAfterInterpolation ();
275
-
276
282
self .index += 1 ;
277
283
return Token {
278
284
.start = self .index - 1 ,
@@ -1875,19 +1881,16 @@ test Self {
1875
1881
1876
1882
{
1877
1883
var tokenizer = try Self .init (" /a\\ (bc[some_character_class]/g //foo" , .{});
1878
- tokenizer .assume_bslash_starts_regex = true ; // '/' is now interpreted as regex literal start marker.
1879
- try t .expectEqual (Token .Tag .whitespace , (try tokenizer .next ()).tag );
1880
- try t .expectEqual (Token .Tag .regex_literal , (try tokenizer .next ()).tag );
1881
1884
try t .expectEqual (Token .Tag .whitespace , (try tokenizer .next ()).tag );
1882
- try t .expectEqual (Token .Tag .comment , (try tokenizer .next ()).tag );
1883
- try t .expectEqual (Token .Tag .eof , (try tokenizer .next ()).tag );
1884
- }
1885
1885
1886
- {
1887
- var tokenizer = try Self .init (" /a\\ (bc[some_character_class]/g //foo" , .{});
1888
- tokenizer .assume_bslash_starts_regex = true ; // '/' is now interpreted as regex literal start marker.
1889
- try t .expectEqual (Token .Tag .whitespace , (try tokenizer .next ()).tag );
1890
- try t .expectEqual (Token .Tag .regex_literal , (try tokenizer .next ()).tag );
1886
+ // by default '/' is interpreted as a division operator.
1887
+ const div_token = try tokenizer .next ();
1888
+ try t .expectEqual (.@"/" , div_token .tag );
1889
+
1890
+ // Then it can be re-scanned as a regex literal.
1891
+ const regex_token = try tokenizer .reScanRegexLiteral (& div_token );
1892
+ try t .expectEqual (.regex_literal , regex_token .tag );
1893
+
1891
1894
try t .expectEqual (Token .Tag .whitespace , (try tokenizer .next ()).tag );
1892
1895
try t .expectEqual (Token .Tag .comment , (try tokenizer .next ()).tag );
1893
1896
try t .expectEqual (Token .Tag .eof , (try tokenizer .next ()).tag );
@@ -1897,9 +1900,10 @@ test Self {
1897
1900
var tokenizer = try Self .init ("`hello ${'world'}`" , .{});
1898
1901
try t .expectEqual (.template_literal_part , (try tokenizer .next ()).tag );
1899
1902
try t .expectEqual (.string_literal , (try tokenizer .next ()).tag );
1900
- tokenizer .assume_rbrace_is_template_part = true ;
1901
- try t .expectEqual (.template_literal_part , (try tokenizer .next ()).tag );
1902
- tokenizer .assume_rbrace_is_template_part = false ;
1903
+ const rb_token = try tokenizer .next ();
1904
+ try t .expectEqual (.@"}" , rb_token .tag );
1905
+ const template_part = try tokenizer .reScanTemplatePart (& rb_token );
1906
+ try t .expectEqual (.template_literal_part , template_part .tag );
1903
1907
try t .expectEqual (.eof , (try tokenizer .next ()).tag );
1904
1908
}
1905
1909
}
0 commit comments