-
Notifications
You must be signed in to change notification settings - Fork 1
/
analysis.a68
422 lines (372 loc) · 12.8 KB
/
analysis.a68
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
PR include "ast.a68" PR
PR include "dict.a68" PR
PR include "prelude.a68" PR
COMMENT
type sanity - how strict should type instantiation be?
3 - don't use return type for derivation
2 - disallow all ambiguous calls
1 - disallow infinite and empty types
0 - allow improper types
type deferral - use a more powerful type unification
* you should NOT need this with sanity = 3
type voiding - discard function call results?
* if FALSE, only Void functions can be statements
COMMENT
# TODO: should make a copy of the type signature for every call #
INT type sanity = 3;
BOOL type voiding = TRUE;
BOOL type deferral = FALSE;
BOOL type chaining = FALSE;
PROC semantic analysis = (REF DECLS ast)VOID:
BEGIN
TYPE void := SYMBOL("Void", ~);
TYPE integral := SYMBOL("Int", ~);
TYPE boolean := SYMBOL("Bool", ~);
HEAP TYPE dont care;
# cleverly using the dictionary to reason about scope #
DICT symbols := new dictionary (64);
[LWB symbols:UPB symbols]INT cur scope;
[]INT global scope = cur scope := mark(symbols);
add prelude(symbols);
PROC make symbol = (IDENT id, UNION(DECL,PARAM) decl, INT info)VOID:
IF retrieve (slice(symbols,cur scope), name OF id, LOC DECLINFO) THEN
complain ("`"+name OF id+"' already defined", pos OF id)
ELSE
assert (symbols, name OF id, info OF id:=DECLINFO(decl,info))
FI;
PROC get symbol = (IDENT id)DECLINFO:
IF DECLINFO found; retrieve (symbols, name OF id, found) THEN
found
ELSE
complain("unknown identifier: `"+name OF id+"'", pos OF id);
DECLSTM stm = (HEAP TYPE:=0, id, SYMBOL("?",~));
DECLINFO(stm, ~)
FI;
PROC get var type = (IDENT id)REF TYPE:
BEGIN
DECLINFO var = get symbol (id);
info OF id := var;
CASE decl OF var IN
(DECLSTM decl): type OF decl,
(PARAM arg): type OF arg
OUT
complain("function `"+name OF id+"' used where a variable expected", pos OF id);
HEAP TYPE:=0
ESAC
END;
PROC get fun signature = (FUNCALL call)DECLFUN:
(decl OF get symbol(id OF call) | (DECLFUN fun): fun |
complain("`"+name OF id OF call+"' is not a function", pos OF id OF call);
HEAP [0:UPB args OF call]PARAM fake param;
FOR i FROM 0 TO UPB args OF call DO fake param[i] := PARAM(HEAP TYPE:=0, id OF call) OD;
DECLFUN(fake param, NIL, NIL, HEAP FLEX[0]PARAM));
INT frame size;
INT frame offset;
PROC new offset = INT:
(IF (frame offset+:=1) > frame size THEN frame size := frame offset FI;
frame offset);
PROC new scope = (PROC VOID whatever)VOID:
([]INT outer scope = cur scope;
[]INT restore = cur scope := mark(symbols);
INT offset := frame offset;
whatever;
frame offset := offset;
release(symbols, restore);
cur scope := outer scope);
# normalize the sub statements of if/then/else and while statements #
PROC sub statement = (REF STM stm)REF STMLIST:
IF stm IS NIL THEN
NIL
ELSE CASE stm IN
(DECLSTM d):
(complain("declaration not allowed here", pos OF id OF d);
HEAP STMLIST:=(stm, NIL)),
(REF STMLIST bloc):
bloc
OUT
HEAP STMLIST:=(stm, NIL)
ESAC FI;
# replace type variables with shared structures; do this just once #
INT ucount := 0;
PROC substitute type vars = (DECLFUN fun)VOID:
BEGIN
DICT types := new dictionary(1);
INT count := 0;
FOR i FROM 0 TO UPB args OF fun DO
REF REF TYPE cur = type OF (args OF fun)[i];
cur := edit type(cur);
PROC edit type = (REF TYPE type)REF TYPE:
CASE type IN
(LISTT t): HEAP TYPE:=LISTT(edit type (lt OF t),0),
(PAIRT t): HEAP TYPE:=PAIRT(edit type (lt OF t), edit type (rt OF t)),
(IDENT id):
IF DECLINFO found; retrieve (types, name OF id, found) THEN
(decl OF found| (PARAM v): type OF v | fatal error ("cosmic rays!?"); ~)
ELSE
PARAM var := PARAM(HEAP TYPE:=ucount+:=1, id);
assert (types, name OF id, DECLINFO(var, count+:=1));
type OF var
FI
OUT type
ESAC;
~
OD;
[count]PARAM type vars;
for each entry (types, (REF DICTENTRY entry)VOID:
type vars[pos OF content OF entry] := (decl OF content OF entry|(PARAM v):v|~));
unifiers OF fun := type vars
END;
# process the ast; check types & match identifiers in one pass.
we can do this since we require declarations to preceed usage #
for each decl(ast, (REF DECL this)VOID:
CASE this IN
(DECLSTM decl):
( cur scope := global scope;
value OF decl => type OF decl;
make symbol (id OF decl, this, 0) ),
(DECLFUN function):
( cur scope := global scope;
make symbol (function id (function), this, 0);
frame size := frame offset := 0;
[]INT restore point = cur scope := mark (symbols);
# record formal signature, then substitute #
TYPE return type := ret type(function);
FOR i TO UPB args OF function DO
PARAM parm = (args OF function)[i];
make symbol (id OF parm, parm, i)
OD;
substitute type vars (function);
for each stm(body OF function, process stm);
PROC process stm = (REF STM stm)VOID:
CASE stm IN
(RETURN v):
(value OF v|(EXPR e): e => return type,
(VOID) : type identify(void, return type, pos OF v)),
(FUNCALL e):
IF NOT type voiding THEN
e => void
ELSE
e => (dont care:=-1);
IF type sanity > 1 THEN
(dont care|(UNIFIER i): complain("result needed for type deduction", pos OF id OF e) | SKIP)
FI
FI,
(ASSIGN a):
value OF a => get var type (id OF a),
(REF STMLIST bloc):
new scope(VOID: for each stm(bloc, process stm)),
(IFSTM if):
( cond OF if => boolean;
REF STMLIST then = sub statement(then OF if);
REF STMLIST else = sub statement(else OF if);
then OF if := then;
IF else OF if ISNT NIL THEN else OF if := else FI;
new scope(VOID: for each stm(then, process stm));
new scope(VOID: for each stm(else, process stm)) ),
(WHILESTM wh):
( cond OF wh => boolean;
REF STMLIST body = sub statement(body OF wh);
body OF wh := body;
new scope(VOID: for each stm(body, process stm)) ),
(DECLSTM decl):
( value OF decl => type OF decl;
# auto-extenion; reset, and check if unified #
IF (type OF decl|(UNIFIER i): TRUE|FALSE) THEN
complain("could not deduce type of variable "+name OF id OF decl, pos OF id OF decl)
FI;
make symbol (id OF decl, decl, new offset) )
OUT
fatal error ("compiler is incomplete!")
ESAC;
frame size OF function := frame size;
release (symbols, restore point)
)
ESAC
);
PROC check function call = (FUNCALL call, REF TYPE target)VOID:
BEGIN
DECLFUN decl = get fun signature (call);
PARAMS sign = args OF decl;
PROC type check arguments = VOID:
IF UPB args OF call /= UPB sign THEN
complain ("wrong number of arguments in call", pos OF id OF call)
ELSE
FOR i TO UPB args OF call DO
(args OF call)[i] %> type OF sign[i]
OD
FI;
PROC all types instantiated = VOID:
FOR i TO UPB unifiers OF decl DO
PARAM unifier = (unifiers OF decl)[i];
(type OF unifier | (UNIFIER i):
complain("could not deduce type for type variable "+name OF id OF unifier+" of `"+name OF id OF call+"'", pos OF id OF call))
OD;
PROC detect improper types = VOID:
FOR i TO UPB unifiers OF decl DO
PARAM unifier = (unifiers OF decl)[i];
cycle check (type OF unifier);
PROC cycle check = ([]REF TYPE types)VOID:
BEGIN
[UPB types+1]REF TYPE seen; seen[2:] := types;
FOR i FROM 2 TO UPB types DO IF types[i] IS types[1] THEN
TYPE inf = types[1]; types[1] := id OF unifier;
complain ("attempt to instantiate cyclic type "+name OF id OF unifier+" = "+REPR inf, pos OF id OF call); exit
FI OD;
CASE types[1] IN
(SYMBOL s): IF s="Void" THEN complain ("attempt to instantiate "+name OF id OF unifier+" = Void", pos OF id OF call) FI,
(LISTT t): cycle check((seen[1]:=lt OF t; seen)),
(PAIRT t): (cycle check((seen[1]:=lt OF t; seen));
cycle check((seen[1]:=rt OF t; seen))),
(TYPELNK k): cycle check((seen[1]:=alias OF k; seen))
ESAC
END;
exit: SKIP
OD;
CASE type sanity IN
(type check arguments;
type identify(type OF sign[0], target, pos OF id OF call);
detect improper types),
(type identify(type OF sign[0], target, pos OF id OF call);
detect improper types; # prevent recursive error messages #
type check arguments;
all types instantiated;
detect improper types),
(type check arguments;
all types instantiated;
detect improper types;
type identify(type OF sign[0], target, pos OF id OF call))
OUT
type identify(type OF sign[0], target, pos OF id OF call);
type check arguments
ESAC;
IF NOT type chaining THEN FOR i TO UPB unifiers OF decl DO
PARAM unifier = (unifiers OF decl)[i];
type OF unifier := i
OD FI
END;
PROC get unifiers = (FUNCALL call)REF[]PARAM:
IF DECLINFO found; retrieve (symbols, name OF id OF call, found) THEN
(decl OF found | (DECLFUN fun): unifiers OF fun | HEAP[0]PARAM)
ELSE
HEAP[0]PARAM
FI;
PROC reset unifiers = (EXPR e)VOID:
CASE e IN
(MONAD m): (reset unifiers (lhs OF m)),
(DYAD d): (reset unifiers (lhs OF d); reset unifiers (rhs OF d)),
(TUPLE d): (reset unifiers (lhs OF d); reset unifiers (rhs OF d)),
(FUNCALL f): (FOR i TO UPB args OF f DO reset unifiers ((args OF f)[i]) OD;
[]PARAM unifiers = get unifiers (f);
FOR i TO UPB unifiers DO REF TYPE(type OF (unifiers)[i]) := i OD)
ESAC;
PRIO => = 5;
OP => = (EXPR expr, REF TYPE type)VOID:
IF NOT type chaining THEN
#expr %> (LOC TYPE:=type)#
expr %> type
ELSE
expr %> type;
reset unifiers (expr)
FI;
PRIO %> = 5;
OP %> = (EXPR expr, REF TYPE type)VOID:
CASE expr IN
(SYMBOL s):
IF s = "True" OR s = "False" THEN
type identify (boolean, type, pos OF s)
ELIF s = "[]" THEN
# a crucial case! #
TYPE any list := LISTT(HEAP TYPE:=42,0);
type identify (any list, type, pos OF s)
ELSE
fatal error ("illegal symbol?!")
FI,
(CONST i):
type identify (integral, type, pos OF i),
CO
what follows is actually a hardcoded library of operators ....
we could make this cleaner and allow for operator
overloading at the same time!
CO
(MONAD m):
IF op OF m = "-" THEN
type identify (integral, type, pos OF op OF m);
lhs OF m %> integral
ELIF op OF m = "!" THEN
type identify (boolean, type, pos OF op OF m);
lhs OF m %> boolean
ELSE
fatal error ("illegal operator?!")
FI,
(DYAD d):
IF char in string ((repr OF op OF d)[1], NIL, "&|") THEN
type identify (boolean, type, pos OF op OF d);
lhs OF d %> boolean;
rhs OF d %> boolean
ELIF char in string ((repr OF op OF d)[1], NIL, "!=<>") THEN
type identify (boolean, type, pos OF op OF d);
lhs OF d %> integral;
rhs OF d %> integral
ELIF op OF d = ":" THEN
HEAP TYPE elem := 23;
HEAP TYPE any list := LISTT(elem, 0);
lhs OF d %> elem;
rhs OF d %> any list;
type identify (any list, type, pos OF op OF d)
ELSE
type identify (integral, type, pos OF op OF d);
rhs OF d %> integral;
lhs OF d %> integral
FI,
(TUPLE t):
BEGIN
HEAP TYPE u := 23, v := 23;
HEAP TYPE any tuple := PAIRT(u, v);
lhs OF t %> u;
rhs OF t %> v;
type identify (any tuple, type, pos OF t)
END,
(IDENT id):
type identify (get var type (id), type, pos OF id),
(FUNCALL f):
check function call(f, type)
ESAC;
PROC type identify = (REF TYPE src, tgt, []INT pos)VOID:
(src = tgt | ~ | unification error(src, tgt, pos));
PROC unification error = (TYPE src, tgt, []INT pos)VOID:
complain ("cannot unify types: `"+REPR src+"' found but `"+REPR tgt+"' expected", pos);
OP = = (REF TYPE a, b)BOOL:
IF a IS b THEN TRUE ELSE
CASE b IN
# TODO further analysis needed -- when is this called? #
(TYPELNK k): a = alias OF k,
(UNIFIER i): (6DEBUG (VOID: print((REPR b,":=",REPR a,new line)));
CASE a IN
(UNIFIER j):
IF type deferral THEN
a := TYPELNK(b,~); TRUE
ELSE FALSE FI
OUT b := a; TRUE
ESAC)
COMMENT
we shouldn't unify unifiers; in normal circumstances the linking
behaviour is not necessary since arguments should be fully
instantiated.
COMMENT
OUSE a IN
(SYMBOL sa): (b|(SYMBOL sb):repr OF sa=repr OF sb|FALSE),
(LISTT la): (b|(LISTT lb):lt OF la = lt OF lb|FALSE),
(PAIRT pa): (b|(PAIRT pb):lt OF pa = lt OF pb AND
rt OF pa = rt OF pb|FALSE),
(IDENT ia): (b|(IDENT ib):name OF ia = name OF ib|FALSE),
(TYPELNK k): alias OF k = b,
(UNIFIER i): (6DEBUG (VOID: print((REPR b,"=:",REPR a,new line)));
a := b; TRUE)
COMMENT
sanity >= 2: this case should only validly apply in case of [];
any other use of a generic return type is forbidden elsewhere
COMMENT
OUT fatal error ("types types types!?"); ~
ESAC FI;
~
END;