|
1 | 1 | module CSTParser |
2 | | -global debug = true |
3 | 2 |
|
4 | 3 | using Tokenize |
5 | | -import Base: length, first, last, getindex, setindex! |
6 | 4 | import Tokenize.Tokens |
7 | 5 | import Tokenize.Tokens: RawToken, AbstractToken, iskeyword, isliteral, isoperator, untokenize |
8 | | -import Tokenize.Lexers: Lexer, peekchar, iswhitespace |
| 6 | +import Tokenize.Lexers: Lexer, peekchar, iswhitespace, readchar, emit, emit_error, accept_batch, eof |
9 | 7 |
|
10 | | -export ParseState, parse_expression |
| 8 | +include("packagedef.jl") |
11 | 9 |
|
12 | | -include("lexer.jl") |
13 | | -include("spec.jl") |
14 | | -include("utils.jl") |
15 | | -include("recovery.jl") |
16 | | -include("components/internals.jl") |
17 | | -include("components/keywords.jl") |
18 | | -include("components/lists.jl") |
19 | | -include("components/operators.jl") |
20 | | -include("components/strings.jl") |
21 | | -include("conversion.jl") |
22 | | -include("display.jl") |
23 | | -include("interface.jl") |
24 | | -include("iterate.jl") |
25 | | - |
26 | | -""" |
27 | | - parse_expression(ps) |
28 | | -
|
29 | | -Parses an expression until `closer(ps) == true`. Expects to enter the |
30 | | -`ParseState` the token before the the beginning of the expression and ends |
31 | | -on the last token. |
32 | | -
|
33 | | -Acceptable starting tokens are: |
34 | | -+ A keyword |
35 | | -+ An opening parentheses or brace. |
36 | | -+ An operator. |
37 | | -+ An instance (e.g. identifier, number, etc.) |
38 | | -+ An `@`. |
39 | | -
|
40 | | -""" |
41 | | -function parse_expression(ps::ParseState, esc_on_error = false) |
42 | | - if kindof(ps.nt) === Tokens.ENDMARKER |
43 | | - ret = mErrorToken(ps, UnexpectedToken) |
44 | | - elseif (esc_on_error && ps.nt.kind == Tokens.ERROR) |
45 | | - ret = EXPR(:errortoken, 0, 0) |
46 | | - elseif kindof(ps.nt) ∈ term_c && !(kindof(ps.nt) === Tokens.END && ps.closer.square) |
47 | | - if ps.closer.square && kindof(ps.nt) === Tokens.RSQUARE |
48 | | - ret = mErrorToken(ps, UnexpectedToken) |
49 | | - else |
50 | | - ret = mErrorToken(ps, EXPR(next(ps)), UnexpectedToken) |
51 | | - end |
52 | | - else |
53 | | - next(ps) |
54 | | - if iskeyword(kindof(ps.t)) && kindof(ps.t) != Tokens.DO |
55 | | - ret = parse_kw(ps) |
56 | | - elseif kindof(ps.t) === Tokens.LPAREN |
57 | | - ret = parse_paren(ps) |
58 | | - elseif kindof(ps.t) === Tokens.LSQUARE |
59 | | - ret = @closer ps :for_generator @default ps parse_array(ps) |
60 | | - elseif kindof(ps.t) === Tokens.LBRACE |
61 | | - ret = @default ps @closebrace ps parse_braces(ps) |
62 | | - elseif isinstance(ps.t) || isoperator(ps.t) |
63 | | - if both_symbol_and_op(ps.t) |
64 | | - ret = EXPR(:IDENTIFIER, ps) |
65 | | - else |
66 | | - @static if VERSION < v"1.6" |
67 | | - # https://github.com/JuliaLang/julia/pull/37583 |
68 | | - ret = INSTANCE(ps) |
69 | | - else |
70 | | - if ps.t.dotop && closer(ps) && !isassignmentop(ps.t) |
71 | | - # Split dotted operator into dot-call |
72 | | - v = val(ps.t, ps)[2:end] |
73 | | - dot = EXPR(:OPERATOR, 1, 1, ".") |
74 | | - op = EXPR(:OPERATOR, ps.nt.startbyte - ps.t.startbyte - 1, ps.t.endbyte - ps.t.startbyte, v) |
75 | | - ret = EXPR(dot, EXPR[op], nothing) |
76 | | - else |
77 | | - ret = INSTANCE(ps) |
78 | | - end |
79 | | - end |
80 | | - end |
81 | | - if is_colon(ret) && !(iscomma(ps.nt) || kindof(ps.ws) == SemiColonWS) |
82 | | - ret = parse_unary(ps, ret) |
83 | | - elseif isoperator(ret) && assign_prec(valof(ret)) && !isunaryop(ret) |
84 | | - ret = mErrorToken(ps, ret, UnexpectedAssignmentOp) |
85 | | - end |
86 | | - elseif kindof(ps.t) === Tokens.AT_SIGN |
87 | | - ret = parse_macrocall(ps) |
88 | | - else |
89 | | - ret = mErrorToken(ps, INSTANCE(ps), UnexpectedToken) |
90 | | - end |
91 | | - ret = parse_compound_recur(ps, ret) |
92 | | - end |
93 | | - return ret |
94 | | -end |
95 | | - |
96 | | -function parse_compound_recur(ps, ret) |
97 | | - !closer(ps) ? parse_compound_recur(ps, parse_compound(ps, ret)) : ret |
98 | | -end |
99 | | - |
100 | | -""" |
101 | | - parse_compound(ps::ParseState, ret::EXPR) |
102 | | -
|
103 | | -Attempts to parse a compound expression given the preceding expression `ret`. |
104 | | -""" |
105 | | -function parse_compound(ps::ParseState, ret::EXPR) |
106 | | - if kindof(ps.nt) === Tokens.FOR |
107 | | - ret = parse_generator(ps, ret) |
108 | | - elseif kindof(ps.nt) === Tokens.DO |
109 | | - ret = @default ps @closer ps :block parse_do(ps, ret) |
110 | | - elseif isajuxtaposition(ps, ret) |
111 | | - if disallowednumberjuxt(ret) |
112 | | - ret = mErrorToken(ps, ret, CannotJuxtapose) |
113 | | - end |
114 | | - ret = parse_operator(ps, ret, EXPR(:OPERATOR, 0, 0, "*")) |
115 | | - elseif issuffixableliteral(ps, ret) |
116 | | - if isnumberliteral(ps.nt) |
117 | | - arg = mLITERAL(next(ps)) |
118 | | - push!(ret, arg) |
119 | | - else |
120 | | - arg = EXPR(:IDENTIFIER, next(ps)) |
121 | | - push!(ret, EXPR(:STRING, arg.fullspan, arg.span, val(ps.t, ps))) |
122 | | - end |
123 | | - elseif (isidentifier(ret) || is_getfield(ret)) && isemptyws(ps.ws) && isprefixableliteral(ps.nt) |
124 | | - ret = parse_prefixed_string_cmd(ps, ret) |
125 | | - elseif kindof(ps.nt) === Tokens.LPAREN |
126 | | - no_ws = !isemptyws(ps.ws) |
127 | | - ret = @closer ps :for_generator @closeparen ps parse_call(ps, ret) |
128 | | - if no_ws && !isunarycall(ret) |
129 | | - ret = mErrorToken(ps, ret, UnexpectedWhiteSpace) |
130 | | - end |
131 | | - elseif kindof(ps.nt) === Tokens.LBRACE |
132 | | - if isemptyws(ps.ws) |
133 | | - ret = @default ps @nocloser ps :inwhere @closebrace ps parse_curly(ps, ret) |
134 | | - else |
135 | | - ret = mErrorToken(ps, (@default ps @nocloser ps :inwhere @closebrace ps parse_curly(ps, ret)), UnexpectedWhiteSpace) |
136 | | - end |
137 | | - elseif kindof(ps.nt) === Tokens.LSQUARE && isemptyws(ps.ws) && !isoperator(ret) |
138 | | - ret = @closer ps :for_generator @default ps @nocloser ps :block parse_ref(ps, ret) |
139 | | - elseif iscomma(ps.nt) |
140 | | - ret = parse_tuple(ps, ret) |
141 | | - elseif isunaryop(ret) && kindof(ps.nt) != Tokens.EQ |
142 | | - ret = parse_unary(ps, ret) |
143 | | - elseif isoperator(ps.nt) |
144 | | - op = EXPR(:OPERATOR, next(ps)) |
145 | | - ret = parse_operator(ps, ret, op) |
146 | | - elseif is_prime(ret.head) |
147 | | - # prime operator followed by an identifier has an implicit multiplication |
148 | | - nextarg = @precedence ps TimesOp parse_expression(ps) |
149 | | - ret = EXPR(:call, EXPR[EXPR(:OPERATOR, 0, 0, "*"), ret, nextarg], nothing) |
150 | | -# ############################################################################### |
151 | | -# Everything below here is an error |
152 | | -# ############################################################################### |
153 | | - else |
154 | | - ps.errored = true |
155 | | - if kindof(ps.nt) in (Tokens.RPAREN, Tokens.RSQUARE, Tokens.RBRACE) |
156 | | - nextarg = mErrorToken(ps, EXPR(next(ps)), Unknown) |
157 | | - else |
158 | | - nextarg = try |
159 | | - parse_expression(ps) |
160 | | - catch err |
161 | | - if err isa StackOverflowError |
162 | | - throw(error(string(ps, "\nsize: ", ps.l.io.size))) |
163 | | - end |
164 | | - mErrorToken(ps, ret, Unknown) |
165 | | - end |
166 | | - end |
167 | | - ret = EXPR(:errortoken, EXPR[ret, nextarg], nothing) |
168 | | - end |
169 | | - return ret |
170 | | -end |
171 | | - |
172 | | -""" |
173 | | - parse_paren(ps, ret) |
174 | | -
|
175 | | -Parses an expression starting with a `(`. |
176 | | -""" |
177 | | -function parse_paren(ps::ParseState) |
178 | | - args = EXPR[] |
179 | | - trivia = EXPR[EXPR(ps)] |
180 | | - @closeparen ps @default ps @nocloser ps :inwhere parse_comma_sep(ps, args, trivia, false, true, true, insert_params_at = 1) |
181 | | - if length(args) == 1 && length(trivia) == 1 && ((kindof(ps.ws) !== SemiColonWS || headof(args[1]) === :block) && headof(args[1]) !== :parameters) |
182 | | - accept_rparen(ps, trivia) |
183 | | - ret = EXPR(:brackets, args, trivia) |
184 | | - elseif VERSION < v"1.5" && length(args) == 1 && args[1].head === :parameters && isempty(args[1].args) |
185 | | - accept_rparen(ps, trivia) |
186 | | - pop!(args) |
187 | | - push!(args, EXPR(:block, EXPR[], nothing)) |
188 | | - ret = EXPR(:brackets, args, trivia) |
189 | | - else |
190 | | - accept_rparen(ps, trivia) |
191 | | - ret = EXPR(:tuple, args, trivia) |
192 | | - end |
193 | | - return ret |
194 | | -end |
195 | | - |
196 | | -""" |
197 | | - parse(str, cont = false) |
198 | | -
|
199 | | -Parses the passed string. If `cont` is true then will continue parsing until the end of the string returning the resulting expressions in a TOPLEVEL block. |
200 | | -""" |
201 | | -function parse(str::String, cont=false) |
202 | | - ps = ParseState(str) |
203 | | - x, _ = parse(ps, cont) |
204 | | - return x |
205 | | -end |
206 | | - |
207 | | -""" |
208 | | - parse_doc(ps::ParseState) |
209 | | -
|
210 | | -Used for top-level parsing - attaches documentation (such as this) to expressions. |
211 | | -""" |
212 | | -function parse_doc(ps::ParseState) |
213 | | - if (kindof(ps.nt) === Tokens.STRING || kindof(ps.nt) === Tokens.TRIPLE_STRING) && !isemptyws(ps.nws) |
214 | | - doc = mLITERAL(next(ps)) |
215 | | - if kindof(ps.nt) === Tokens.ENDMARKER || kindof(ps.nt) === Tokens.END || ps.t.endpos[1] + 1 < ps.nt.startpos[1] |
216 | | - ret = doc |
217 | | - elseif isbinaryop(ps.nt) && !closer(ps) |
218 | | - ret = parse_compound_recur(ps, doc) |
219 | | - else |
220 | | - ret = parse_expression(ps) |
221 | | - ret = EXPR(:macrocall, EXPR[EXPR(:globalrefdoc, 0, 0), EXPR(:NOTHING, 0, 0), doc, ret], nothing) |
222 | | - end |
223 | | - else |
224 | | - ret = parse_expression(ps) |
225 | | - end |
226 | | - if _continue_doc_parse(ps, ret) |
227 | | - push!(ret, parse_expression(ps)) |
228 | | - end |
229 | | - return ret |
230 | | -end |
231 | | - |
232 | | -function parse(ps::ParseState, cont=false) |
233 | | - if ps.l.io.size == 0 |
234 | | - return (cont ? EXPR(:file, EXPR[]) : nothing), ps |
235 | | - end |
236 | | - last_line = 0 |
237 | | - curr_line = 0 |
238 | | - |
239 | | - if cont |
240 | | - top = EXPR(:file, EXPR[], nothing) |
241 | | - if kindof(ps.nt) === Tokens.WHITESPACE || kindof(ps.nt) === Tokens.COMMENT |
242 | | - next(ps) |
243 | | - push!(top, EXPR(:NOTHING, ps.nt.startbyte, ps.nt.startbyte, "")) |
244 | | - elseif kindof(ps.nt) === Tokens.SEMICOLON |
245 | | - next(ps) |
246 | | - push!(top, EXPR(:toplevel, EXPR[EXPR(:NOTHING, ps.nt.startbyte, ps.nt.startbyte, "")])) |
247 | | - end |
248 | | - |
249 | | - prevpos = position(ps) |
250 | | - while kindof(ps.nt) !== Tokens.ENDMARKER |
251 | | - curr_line = ps.nt.startpos[1] |
252 | | - ret = parse_doc(ps) |
253 | | - # join semicolon sep items |
254 | | - if curr_line == last_line && headof(last(top.args)) === :toplevel |
255 | | - push!(last(top.args), ret) |
256 | | - top.fullspan += ret.fullspan |
257 | | - top.span = top.fullspan - (ret.fullspan - ret.span) |
258 | | - elseif kindof(ps.ws) == SemiColonWS |
259 | | - push!(top, EXPR(:toplevel, EXPR[ret])) |
260 | | - else |
261 | | - push!(top, ret) |
262 | | - end |
263 | | - last_line = curr_line |
264 | | - kindof(ps.nt) === Tokens.ENDMARKER && break # don't do loop check if eof |
265 | | - prevpos = loop_check(ps, prevpos) |
266 | | - end |
267 | | - else |
268 | | - if kindof(ps.nt) === Tokens.WHITESPACE || kindof(ps.nt) === Tokens.COMMENT |
269 | | - next(ps) |
270 | | - top = EXPR(:NOTHING, ps.nt.startbyte, ps.nt.startbyte, "") |
271 | | - elseif !(ps.done || kindof(ps.nt) === Tokens.ENDMARKER) |
272 | | - last_line = current_line(ps) |
273 | | - if ps.nt.kind === Tokens.SEMICOLON |
274 | | - next(ps) |
275 | | - top = EXPR(:toplevel, EXPR[EXPR(:NOTHING, ps.nt.startbyte, ps.nt.startbyte, "")]) |
276 | | - else |
277 | | - top = parse_doc(ps) |
278 | | - end |
279 | | - if kindof(ps.ws) == SemiColonWS# && curr_line == last_line |
280 | | - top = EXPR(:toplevel, EXPR[top], nothing) |
281 | | - prevpos = position(ps) |
282 | | - while kindof(ps.ws) == SemiColonWS && current_line(ps) == last_line && kindof(ps.nt) != Tokens.ENDMARKER |
283 | | - last_line = current_line(ps) |
284 | | - ret = parse_doc(ps) |
285 | | - push!(top, ret) |
286 | | - prevpos = loop_check(ps, prevpos) |
287 | | - end |
288 | | - end |
289 | | - else |
290 | | - top = EXPR(:errortoken, EXPR[], nothing, 0, 0) |
291 | | - end |
292 | | - end |
293 | | - |
294 | | - return top, ps |
295 | | -end |
296 | | - |
297 | | -function _continue_doc_parse(ps::ParseState, x::EXPR) |
298 | | - kindof(ps.nt) !== Tokens.ENDMARKER && |
299 | | - headof(x) === :macrocall && |
300 | | - valof(x.args[1]) == "@doc" && |
301 | | - length(x.args) < 4 && |
302 | | - ps.t.endpos[1] + 1 == ps.nt.startpos[1] |
303 | | -end |
304 | | - |
305 | | -include("precompile.jl") |
306 | | -_precompile() |
307 | 10 | end |
0 commit comments