forked from chrisdew/pl1-language-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pl1_lexer.py
executable file
·110 lines (85 loc) · 1.96 KB
/
pl1_lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env python
import sys
import ply.lex as lex
# program = block "." .
#
# block = [ "const" ident "=" number {"," ident "=" number} ";"]
# [ "var" ident {"," ident} ";"]
# { "procedure" ident ";" block ";" } statement .
#
# statement = [ ident ":=" expression | "call" ident |
# "begin" statement {";" statement } "end" |
# "if" condition "then" statement |
# "while" condition "do" statement ].
#
# condition = "odd" expression |
# expression ("="|"#"|"<"|"<="|">"|">=") expression .
#
# expression = [ "+"|"-"] term { ("+"|"-") term}.
#
# term = factor {("*"|"/") factor}.
#
# factor = ident | number | "(" expression ")".
keywords = [
'ODD', 'CALL', 'BEGIN', 'END', 'IF', 'THEN', 'WHILE', 'DO', 'CONST', 'VAR', 'PROCEDURE', 'WRITE', 'WRITELN'
]
# Special variable named 'tokens'
tokens = keywords + [
'DOT', 'EOS', 'UPDATE',
'COMMA', 'LPAREN', 'RPAREN',
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'ASSIGN', 'PRINT',
'LT', 'LTE', 'GT', 'GTE', 'E', 'NE',
'NAME', 'NUMBER'
]
t_ignore = ' \t'
def t_NAME(t):
r'[a-zA-Z_][a-zA-Z0-9_]*'
if t.value.upper() in keywords:
t.value = t.value.upper()
t.type = t.value
return t
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
def t_COMMENT(t):
r'\#.*'
# No return value. Token discarded
pass
t_DOT = r'\.'
t_EOS = r';'
t_UPDATE = r':='
t_COMMA = r','
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LT = r'<'
t_LTE = r'<='
t_GT = r'>'
t_GTE = r'>='
t_E = r'=='
t_NE = r'!='
t_ODD = r'ODD'
t_PLUS = r'\+'
t_MINUS = r'\-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_ASSIGN = r'='
t_PRINT = r'!'
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
return t
# Error handling rule
def t_error(t):
print "Illegal character '%s'" % t.value[0]
t.lexer.skip(1)
# Build the lexer
lexer = lex.lex()
def create():
return lexer.clone()
if __name__ == "__main__":
code = sys.stdin.read()
lex.input(code)
while True:
tok = lex.token()
if not tok: break
print tok