Skip to content

Commit 6ebef86

Browse files
committed
Lexer/Parser: Fix break/continue/return parsing
There were a number of issues around parsing these keywords when when the code doesn't contain semi-colons. For example, this valid code fragment would fail to parse: function f (x) { if (x) return x.foo() } The solution was to modify the lexer so that it inserts an AutoSemiToken after break/continue/return if and only if the white space immediately following those keywords contain a newline. Closes: erikd#30 Closes: erikd#36
1 parent c305e01 commit 6ebef86

File tree

4 files changed

+33
-8
lines changed

4 files changed

+33
-8
lines changed

runtests.hs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,12 @@ main = defaultMain
1616
, parserSuite
1717
-- ++AZ++temporary++ , commentSuite
1818
, commentPrintSuite
19-
-- , pendingSuite
19+
, pendingSuite
2020
]
2121

2222
pendingSuite :: Test
2323
pendingSuite = testGroup "Pending"
24-
[ testCase "AutoSemi1" (testProg "if (true) return\nfoo();" "Right (JSSourceElementsTop [JSIf (JSExpression [JSLiteral \"true\"]) ([JSReturn [] JSLiteral \"\"]) ([]),JSExpression [JSIdentifier \"foo\",JSArguments []],JSLiteral \";\"])")
25-
, testCase "AutoSemi2" (testProg "if (true) break\nfoo();" "Right")
24+
[
2625
]
2726

2827
lexerSuite:: Test
@@ -331,6 +330,9 @@ parserSuite = testGroup "Parser"
331330
-- https://github.com/alanz/language-javascript/issues/14
332331
, testCase "issue14" (testProg "var z = x[i] / y;" "Right (JSSourceElementsTop [JSVariables JSLiteral \"var\" [JSVarDecl (JSIdentifier \"z\") [JSLiteral \"=\",JSExpressionBinary \"/\" [JSMemberSquare [JSIdentifier \"x\"] (JSExpression [JSIdentifier \"i\"])] [JSIdentifier \"y\"]]],JSLiteral \"\"])")
333332

333+
, testCase "AutoSemiBreak" (testProg "if(true)break \nfoo();" "Right (JSSourceElementsTop [JSIf (JSExpression [JSLiteral \"true\"]) ([JSBreak [] JSLiteral \"\"]) ([]),JSExpression [JSIdentifier \"foo\",JSArguments []],JSLiteral \";\",JSLiteral \"\"])")
334+
, testCase "AutoSemiContinue" (testProg "if(true)continue \nfoo();" "Right (JSSourceElementsTop [JSIf (JSExpression [JSLiteral \"true\"]) ([JSContinue [] JSLiteral \"\"]) ([]),JSExpression [JSIdentifier \"foo\",JSArguments []],JSLiteral \";\",JSLiteral \"\"])")
335+
, testCase "AutoSemiReturn" (testProg "if(true)break \nfoo();" "Right (JSSourceElementsTop [JSIf (JSExpression [JSLiteral \"true\"]) ([JSBreak [] JSLiteral \"\"]) ([]),JSExpression [JSIdentifier \"foo\",JSArguments []],JSLiteral \";\",JSLiteral \"\"])")
334336
]
335337

336338
caseHelloWorld :: Assertion

src/Language/JavaScript/Parser/Grammar5.y

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ import qualified Language.JavaScript.Parser.AST as AST
7171
')' { RightParenToken {} }
7272
'@*/' { CondcommentEndToken {} }
7373

74+
'autosemi' { AutoSemiToken {} }
7475
'break' { BreakToken {} }
7576
'case' { CaseToken {} }
7677
'catch' { CatchToken {} }
@@ -123,12 +124,19 @@ import qualified Language.JavaScript.Parser.AST as AST
123124
%%
124125

125126
-- ---------------------------------------------------------------------
126-
-- Sort out automatically inserted semi-colons
127+
-- Sort out automatically inserted semi-colons.
128+
-- A MaybeSemi is an actual semi-colon or nothing.
129+
-- An AutoSemu is either an actual semi-colon or 'virtual' semi-colon inserted
130+
-- by the Alex lexer.
127131

128132
MaybeSemi :: { AST.JSNode }
129133
MaybeSemi : ';' { AST.NT (AST.JSLiteral ";") (ss $1) (gc $1)}
130134
| { AST.NT (AST.JSLiteral "") tokenPosnEmpty []}
131135

136+
AutoSemi :: { AST.JSNode }
137+
AutoSemi : ';' { AST.NT (AST.JSLiteral ";") (ss $1) (gc $1)}
138+
| 'autosemi' { AST.NT (AST.JSLiteral "") (ss $1) (gc $1)}
139+
132140
-- ---------------------------------------------------------------------
133141

134142
-- Helpers
@@ -962,21 +970,21 @@ IterationStatement : Do Statement While LParen Expression RParen MaybeSemi
962970
-- continue [no LineTerminator here] Identifieropt ;
963971
-- TODO: deal with [no LineTerminator here]
964972
ContinueStatement :: { AST.JSNode }
965-
ContinueStatement : Continue MaybeSemi { fp (AST.NN (AST.JSContinue $1 [] $2)) }
973+
ContinueStatement : Continue AutoSemi { fp (AST.NN (AST.JSContinue $1 [] $2)) }
966974
| Continue Identifier MaybeSemi { fp (AST.NN (AST.JSContinue $1 [$2] $3)) }
967975

968976
-- BreakStatement : See 12.8
969977
-- break [no LineTerminator here] Identifieropt ;
970978
-- TODO: deal with [no LineTerminator here]
971979
BreakStatement :: { AST.JSNode }
972-
BreakStatement : Break MaybeSemi { fp (AST.NN (AST.JSBreak $1 [] $2)) }
980+
BreakStatement : Break AutoSemi { fp (AST.NN (AST.JSBreak $1 [] $2)) }
973981
| Break Identifier MaybeSemi { fp (AST.NN (AST.JSBreak $1 [$2] $3)) }
974982

975983
-- ReturnStatement : See 12.9
976984
-- return [no LineTerminator here] Expressionopt ;
977985
-- TODO: deal with [no LineTerminator here]
978986
ReturnStatement :: { AST.JSNode }
979-
ReturnStatement : Return MaybeSemi { fp (AST.NN (AST.JSReturn $1 [] $2)) }
987+
ReturnStatement : Return AutoSemi { fp (AST.NN (AST.JSReturn $1 [] $2)) }
980988
| Return Expression MaybeSemi { fp (AST.NN (AST.JSReturn $1 [$2] $3)) }
981989

982990
-- WithStatement : See 12.10

src/Language/JavaScript/Parser/Lexer.x

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -385,13 +385,27 @@ lexCont cont = do
385385
lexLoop
386386
WsToken {} -> do
387387
addComment tok
388-
lexLoop
388+
ltok <- getLastToken
389+
case ltok of
390+
BreakToken {} -> maybeAutoSemi tok
391+
ContinueToken {} -> maybeAutoSemi tok
392+
ReturnToken {} -> maybeAutoSemi tok
393+
_otherwise -> lexLoop
389394
_other -> do
390395
cs <- getComment
391396
let tok' = tok{ tokenComment=(toCommentAnnotation cs) }
392397
setComment []
393398
cont tok'
394399

400+
-- If the token is a WsToken and it contains a newline, convert it to an
401+
-- AutoSemiToken and call the continuation, otherwise, just lexLoop.
402+
maybeAutoSemi ws@(WsToken sp tl cmt) =
403+
if any (== '\n') tl
404+
then cont $ AutoSemiToken sp tl cmt
405+
else lexLoop
406+
maybeAutoSemi _ = lexLoop
407+
408+
395409
toCommentAnnotation :: [Token] -> [CommentAnnotation]
396410
toCommentAnnotation [] = [NoComment]
397411

src/Language/JavaScript/Parser/Token.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ data Token
9494

9595
-- Delimiters
9696
-- Operators
97+
| AutoSemiToken { tokenSpan :: !TokenPosn, tokenLiteral :: !String, tokenComment :: ![CommentAnnotation] }
9798
| SemiColonToken { tokenSpan :: !TokenPosn, tokenComment :: ![CommentAnnotation] }
9899
| CommaToken { tokenSpan :: !TokenPosn, tokenComment :: ![CommentAnnotation] }
99100
| HookToken { tokenSpan :: !TokenPosn, tokenComment :: ![CommentAnnotation] }

0 commit comments

Comments
 (0)