Skip to content

Commit

Permalink
Merge pull request #1111 from lark-parser/iterate_parse
Browse files Browse the repository at this point in the history
InteractiveParser: Added iter_parse() method, for easier instrumentation
  • Loading branch information
erezsh authored Feb 8, 2022
2 parents e965da6 + 4a4e1d2 commit 113007a
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 5 deletions.
25 changes: 20 additions & 5 deletions lark/parsers/lalr_interactive_parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# This module provides a LALR interactive parser, which is used for debugging and error handling

from typing import Iterator, List
from copy import copy

from lark.exceptions import UnexpectedToken
Expand All @@ -15,20 +16,34 @@ def __init__(self, parser, parser_state, lexer_state):
self.parser = parser
self.parser_state = parser_state
self.lexer_state = lexer_state
self.result = None

def feed_token(self, token):
def feed_token(self, token: Token):
"""Feed the parser with a token, and advance it to the next state, as if it received it from the lexer.
Note that ``token`` has to be an instance of ``Token``.
"""
return self.parser_state.feed_token(token, token.type == '$END')

def iter_parse(self) -> Iterator[Token]:
"""Step through the different stages of the parse, by reading tokens from the lexer
and feeding them to the parser, one per iteration.
Returns an iterator of the tokens it encounters.
When the parse is over, the resulting tree can be found in ``InteractiveParser.result``.
"""
for token in self.lexer_state.lex(self.parser_state):
yield token
self.result = self.feed_token(token)

def exhaust_lexer(self):
def exhaust_lexer(self) -> List[Token]:
"""Try to feed the rest of the lexer state into the interactive parser.
Note that this modifies the instance in place and does not feed an '$END' Token"""
for token in self.lexer_state.lex(self.parser_state):
self.parser_state.feed_token(token)
Note that this modifies the instance in place and does not feed an '$END' Token
"""
return list(self.iter_parse())


def feed_eof(self, last_token=None):
"""Feed a '$END' Token. Borrows from 'last_token' if given."""
Expand Down
11 changes: 11 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2567,6 +2567,17 @@ def ignore_errors(e):
s = "[0 1, 2,@, 3,,, 4, 5 6 ]$"
tree = g.parse(s, on_error=ignore_errors)

@unittest.skipIf(PARSER!='lalr', "interactive_parser error handling only works with LALR for now")
def test_iter_parse(self):
ab_grammar = '!start: "a"* "b"*'
parser = Lark(ab_grammar, parser="lalr")
ip = parser.parse_interactive("aaabb")
i = ip.iter_parse()
assert next(i) == 'a'
assert next(i) == 'a'
assert next(i) == 'a'
assert next(i) == 'b'

@unittest.skipIf(PARSER!='lalr', "Tree-less mode is only supported in lalr")
def test_default_in_treeless_mode(self):
grammar = r"""
Expand Down

0 comments on commit 113007a

Please sign in to comment.