-
Notifications
You must be signed in to change notification settings - Fork 1
/
parser.a68
427 lines (386 loc) · 10.7 KB
/
parser.a68
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
PR include "ast.a68" PR
PR include "util.a68" PR
PR include "astdump.a68" PR
PR include "astprint.a68" PR
PROC parse input = (REF FILE source)DECLS:
CO
this routine transforms the input string into lexical elements (read token)
and subsequently parses this stream of tokens into a abstract syntax tree
CO
BEGIN
HEAP[0]EXPR no args;
MODE TOKEN = UNION(INT, SYMBOL, IDENT, VOID);
INT line := 1;
INT col := 0;
[2]INT pos;
PROC end the misery = BOOL: halt;
PROC parse error = (STRING why)BOOL:
(complain(why,pos); end the misery);
CHAR cur char := " ";
PROC next char = CHAR:
BEGIN
get(source, cur char);
9DEBUG (VOID: print(("'", cur char)));
col +:= 1; cur char
END;
PROC skip white space = VOID:
BEGIN
on line end(source, (REF FILE f)BOOL: (get(f, new line); line +:= 1; col := 0; cur char := " "; TRUE));
WHILE is space (cur char) DO next char OD
END;
PROC is reserved = (STRING word)BOOL:
BEGIN
word MEMBER reserved words
END;
PROC as integer = (STRING word)INT:
BEGIN
INT val := 0;
FOR i TO UPB word DO
IF NOT is digit (word[i]) THEN
parse error ("not a valid integer constant")
FI;
val := 10*val + (ABS word[i] - ABS "0")
OD;
val
END;
PROC read token = TOKEN:
CO
this is the lexicalization step, processing raw input characters
into usable tokens for the parsing fase (which happens concurrently)
CO
BEGIN
STRING tok := "";
on file end(source, (REF FILE f)BOOL: (cur char:=" ";leave));
read token:
skip white space;
on line end(source, (REF FILE f)BOOL: (cur char:=" ";leave));
pos := (line, col);
tok := cur char;
#IF is alnum(cur char) THEN# #for reification#
IF is alnum(cur char) OR cur char = "_" THEN
WHILE CHAR ch = next char; is alnum(ch) OR ch = "_" DO tok +:= ch OD
ELSE next char FI;
leave:
IF tok = "" THEN
8DEBUG (VOID: write(("<~>", new line)));
EMPTY
#ELIF is alpha (tok[1]) THEN# #for reification#
ELIF is alpha (tok[1]) OR tok[1] = "_" THEN
8DEBUG (VOID: write(("<IDWORD:", tok, ">", new line)));
(is reserved (tok) | SYMBOL(tok,pos) | IDENT(tok, pos, HEAP DECLINFO))
ELIF is digit (tok[1]) THEN
8DEBUG (VOID: write(("<INT:", tok, ">", new line)));
as integer(tok)
ELSE
IF tok[1] = "/" THEN
IF cur char = "/" THEN
# comment to end of line #
on line end(source, (REF FILE f)BOOL: (cur char := " "; read token));
DO next char OD
ELIF cur char = "*" THEN
# C-style comment #
skip white space;
WHILE (WHILE next char /= "*" DO ~ OD; next char) /= "/" DO ~ OD;
next char;
read token
FI
ELIF char in string (tok[1], NIL, "<>!=") AND cur char = "=" THEN
tok +:= cur char;
next char
ELIF char in string (tok[1], NIL, "&|") AND tok[1] = cur char THEN
tok +:= cur char;
next char
FI;
8DEBUG (VOID: write(("<SYMBOL:", tok, ">", new line)));
SYMBOL(tok, pos)
FI
END;
SYMBOL matched symbol;
TOKEN token;
PROC shift = BOOL: (token := read token; TRUE);
shift;
OP MATCH = (CHAR symbol)BOOL:
( token | (SYMBOL k): (matched symbol:=k)=symbol ANDTH shift | FALSE );
OP MATCH = ([]CHAR symbol)BOOL:
( token | (SYMBOL k): (matched symbol:=k)=symbol ANDTH shift | FALSE );
OP MATCH = ([]STRING symbol)BOOL:
( token | (SYMBOL k): repr OF (matched symbol:=k) MEMBER symbol ANDTH shift | FALSE );
OP MATCH = (REF INT obj)BOOL:
( token | (INT i): (obj := i; shift) | FALSE );
OP MATCH = (REF IDENT obj)BOOL:
( token | (IDENT id): (obj := id; shift) | FALSE );
PROC parse arguments = REF[]EXPR:
BEGIN
REF[]EXPR arg list := no args;
IF NOT MATCH ")" THEN
VOID (MATCH arg list);
REQUIRE ")"
FI;
arg list
END;
OP MATCH = (REF EXPR obj)BOOL:
BEGIN
[]STRING units = ("True", "False");
[]STRING monop = ("!","-");
[][]STRING dyop =
(("*", "/", "%"), ("+","-"),
(">","<","<=",">="), ("==", "!="),
("&&"), ("||"), (":"));
PROC match primary = (REF EXPR obj)BOOL:
BEGIN
SYMBOL sym;
INT val;
IDENT id;
IF MATCH val THEN
obj := CONST(val, pos#!#); TRUE
ELIF MATCH "(" THEN
REQUIRE obj;
IF MATCH "," THEN
HEAP EXPR lhs := obj, rhs;
REQUIRE rhs;
obj := TUPLE(lhs, rhs, pos#!#)
FI;
REQUIRE ")"; TRUE
ELIF MATCH "[" THEN
obj := SYMBOL("[]", pos OF matched symbol);
REQUIRE "]"; TRUE
ELIF MATCH units THEN
obj := matched symbol; TRUE
ELIF MATCH monop THEN
SYMBOL op := matched symbol;
IF match primary (obj) THEN
obj := MONAD(op, HEAP EXPR:=obj); TRUE
ELSE
FALSE
FI
ELIF MATCH id THEN
IF MATCH "(" THEN
obj := FUNCALL(id, parse arguments); TRUE
ELSE
obj := id; TRUE
FI
ELSE
FALSE
FI
END;
PROC missing expression = BOOL:
(parse error ("illegal operator"));
# parse dyadic formulas using their precedence grouping #
PROC default = (REF EXPR e)EXPR: e;
PROC match priority = (REF EXPR obj, INT level, PROC(REF EXPR)EXPR left extend)BOOL:
BEGIN
IF level = 0 THEN
match primary (obj)
ELIF match priority (obj, level-1, default) THEN
EXPR lhs = left extend(obj);
EXPR rhs;
IF MATCH dyop[level] THEN
SYMBOL op = matched symbol;
IF op = ":" THEN
# right-associative #
match priority (rhs, level, default) OREL missing expression;
obj := DYAD(op, HEAP EXPR:=lhs, HEAP EXPR:=rhs)
ELSE
# left-associative #
match priority (obj, level, (REF EXPR rhs)EXPR:
rhs := DYAD(op, HEAP EXPR:=lhs, HEAP EXPR:=rhs)) OREL missing expression
FI
FI; TRUE
ELSE
FALSE
FI
END;
match priority (obj, UPB dyop, default)
END;
OP MATCH = (REF REF[]EXPR args)BOOL:
BEGIN
# lets perform a neat trick to avoid resizing arrays #
PROC recurse = (INT i)VOID:
BEGIN
EXPR arg;
REQUIRE arg;
IF MATCH "," THEN recurse(i+1) ELSE args := HEAP [i] EXPR FI;
args[i] := arg
END;
recurse(1); TRUE
END;
OP MATCH = (REF TYPE spec)BOOL:
BEGIN
[]STRING prim types = ("Int", "Bool");
IDENT id;
IF MATCH id THEN
spec := id; TRUE
ELIF MATCH prim types THEN
spec := matched symbol; TRUE
ELIF MATCH "[" THEN
REQUIRE spec;
spec := LISTT(HEAP TYPE:=spec, ~);
REQUIRE "]"; TRUE
ELIF MATCH "(" THEN
HEAP TYPE lt, rt;
REQUIRE lt;
REQUIRE ",";
REQUIRE rt;
spec := PAIRT(lt, rt);
REQUIRE ")"; TRUE
ELSE
FALSE
FI
END;
OP MATCH = (REF STM stm)BOOL:
BEGIN
TYPE type;
IDENT id;
EXPR expr;
IF MATCH "{" THEN
stm := parse statements; TRUE
ELIF MATCH "return" THEN
stm := RETURN((MATCH expr | expr | EMPTY), pos#!#);
REQUIRE ";"; TRUE
ELIF MATCH "if" THEN
REQUIRE "("; REQUIRE expr; REQUIRE ")";
STM then, else; REQUIRE then;
IF MATCH "else" THEN
REQUIRE else;
stm := IFSTM(expr, HEAP STM:=then, HEAP STM:=else); TRUE
ELSE
stm := IFSTM(expr, HEAP STM:=then, NIL); TRUE
FI
ELIF MATCH "while" THEN
REQUIRE "("; REQUIRE expr; REQUIRE ")";
HEAP STM body; REQUIRE body;
stm := WHILESTM(expr, body); TRUE
ELIF MATCH id THEN
IF MATCH "=" THEN
REQUIRE expr;
stm := ASSIGN(id, expr)
ELIF MATCH "(" THEN
stm := FUNCALL(id, parse arguments)
ELSE
IDENT name; REQUIRE name; REQUIRE "="; REQUIRE expr;
# auto-extension; prev: no HEAP TYPE:= #
stm := DECLSTM(HEAP TYPE:=id,name,expr)
FI; REQUIRE ";"; TRUE
# auto-extension; prev: ELIF MATCH type THEN #
ELIF type:=37; MATCH type OREL MATCH "Auto" THEN
IDENT name; REQUIRE name; REQUIRE "="; REQUIRE expr;
# auto-extension; prev: no HEAP TYPE:= #
stm := DECLSTM(HEAP TYPE:=type,name,expr);
REQUIRE ";"; TRUE
ELSE
FALSE
FI
END;
# iterative -- to avoid running out of stack #
PROC parse statements = REF STMLIST:
BEGIN
MODE LINK = REF STMLIST;
LINK start;
REF LINK next := start;
WHILE NOT MATCH "}" DO
STM stm; REQUIRE stm;
REF LINK(next) := HEAP STMLIST:=(stm, ~);
next := tail OF next
OD;
REF LINK(next) := NIL;
start
COMMENT
IF MATCH "}" THEN NIL ELSE
STM stm; REQUIRE stm;
HEAP STMLIST:=(stm, parse statements)
FI
COMMENT
END;
OP MATCH = (REF DECL decl)BOOL:
# auto extension; HEAP TYPE #
IF HEAP TYPE ret type := SYMBOL("Void", pos); MATCH ret type OR MATCH "Void" THEN
IDENT id; REQUIRE id;
IF MATCH "=" THEN
IF (ret type|(SYMBOL s): s="Void"|FALSE) THEN
complain ("argument list expected", pos OF matched symbol); end the misery
FI;
EXPR expr; REQUIRE expr; REQUIRE ";";
decl := DECLSTM(ret type, id, expr)
ELSE
PARAMS formal args;
PROC gather formal args = (INT i)VOID:
BEGIN
HEAP TYPE type; REQUIRE type;
IDENT id; REQUIRE id;
IF MATCH "," THEN gather formal args(i+1) ELSE formal args := HEAP [0:i] PARAM FI;
formal args[i] := (type, id)
END;
REQUIRE "(";
IF NOT MATCH ")" THEN
gather formal args(1); REQUIRE ")"
ELSE
formal args := HEAP[0:0] PARAM
FI;
REQUIRE "{";
# auto extension; HEAP TYPE:= removed #
formal args[0] := (ret type, id);
decl := DECLFUN(formal args, parse statements, HEAP INT, HEAP FLEX[0]PARAM)
FI; TRUE
ELSE
FALSE
FI;
# iterative version - otherwise we run out of stack...#
OP MATCH = (REF DECLS prog)BOOL:
BEGIN
MODE LINK = REF DECLS;
LINK start; REF LINK next := start;
WHILE DECL head; MATCH head DO
REF LINK(next) := HEAP DECLS:=(head, ~);
next := tail OF next;
preemptive sweep
OD;
REF LINK(next) := NIL;
IF LINK(start) ISNT NIL THEN
prog := start; TRUE
ELSE
FALSE
FI
COMMENT
DECL head;
IF MATCH head THEN
prog := DECLS(head, (MATCH prog|HEAP DECLS:=prog|NIL));
TRUE
ELSE
FALSE
FI
COMMENT
END;
#-----------------------------------------#
DECLS program;
IF MATCH program THEN
7DEBUG (VOID: DUMP( program) )
ELSE
parse error ("cowardly refusing to compile an empty program")
FI;
IF (token|(VOID):FALSE|TRUE) THEN
parse error ("stray junk at end of program: "+
(token|(IDENT id): name OF id,
(SYMBOL s): repr OF s,
(INT i): whole(i,0)))
FI;
# syntax error messages #
OP REQUIRE = ([] CHAR sym)VOID:
(MATCH sym | TRUE | parse error ("expecting `"+sym+"'"));
OP REQUIRE = (CHAR sym)VOID:
(MATCH sym | TRUE | parse error ("expecting `"+sym+"'"));
OP REQUIRE = (REF TYPE type)VOID:
(MATCH type | TRUE | parse error ("invalid type specifier"));
OP REQUIRE = (REF STM stm)VOID:
(MATCH stm | TRUE | parse error ("statement expected"));
OP REQUIRE = (REF EXPR expr)VOID:
(MATCH expr | TRUE | parse error ("illegal expression"));
OP REQUIRE = (REF IDENT id)VOID:
(MATCH id | TRUE | parse error ("identifier expected"));
program
END;
PROC parse file = (STRING file name)DECLS:
BEGIN
FILE src;
open(src, file name, standin channel);
parse input(src)
END;