Skip to content

Commit

Permalink
feat(parser,ast): decl statements for token replacement
Browse files Browse the repository at this point in the history
  • Loading branch information
JaDogg committed Jul 13, 2024
1 parent aec58c5 commit 0c433a6
Show file tree
Hide file tree
Showing 14 changed files with 425 additions and 98 deletions.
26 changes: 15 additions & 11 deletions compiler/scripts/update_ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
# Assign to a variable
# We can promote an assignment to a let statement, if so promoted is set to true
("assign", (("token*", "name"), ("token*", "opr"), ("expr*", "right"), ("bool", "promoted"),
("ykdatatype*", "promoted_data_type"))),
("yk_datatype*", "promoted_data_type"))),
# Assign to a member
("assign_member", (("expr*", "set_oper"), ("token*", "opr"), ("expr*", "right"))),
# Assign to array object
Expand Down Expand Up @@ -138,14 +138,18 @@
"import": "import_token_",
"runtimefeature": "runtimefeature_token_",
"compins": "name_",
"decl": "name_",
}
IGNORE_VISITS_STMT = {"elif", "macros", "token_soup", "dsl_macro"}
IGNORE_VISITS_STMT = {"elif", "macros", "token_soup", "dsl_macro", "decl"}
# Different kinds of statements
STMTS = sorted([
# Directives
# directive (os="windows|linux")? (defines="X=Y|A=B")? (no_runtime|no_main|c_include_path|c_link_path|c_link|c_file|ccode) ("STR")?
("directive", (("token*", "directive_token"), ("std::vector<parameter>", "values"), ("token*", "directive_type"),
("token*", "directive_val"))),
# Declare
# decl red console.red
("decl", (("token*", "decl_token"), ("token*", "name"), ("std::vector<token*>", "replacement"),)),
# Define macros in current file.
# Parser should check that for a single invoke of parse(), there must be only one, macros section.
# -- macros code will be validated and parsed same way.
Expand All @@ -161,7 +165,7 @@
# Why?
# this allows us to store arbitrary tokens in order in statement list
("token_soup", (("std::vector<token*>", "soup"),)),
("return", (("token*", "return_keyword"), ("expr*", "expression"), ("ykdatatype*", "result_type"))),
("return", (("token*", "return_keyword"), ("expr*", "expression"), ("yk_datatype*", "result_type"))),
# defer statement works just like how we use string, deletions.
("defer", (("token*", "defer_keyword"), ("expr*", "expression"), ("stmt*", "del_statement"))),
# del statement
Expand All @@ -183,9 +187,9 @@
# ## or
# for x in expr:
# println(x)
("foreach", (("token*", "for_keyword"), ("token*", "name"), ("ykdatatype*", "data_type"),
("foreach", (("token*", "for_keyword"), ("token*", "name"), ("yk_datatype*", "data_type"),
("token*", "in_keyword"), ("expr*", "expression"), ("stmt*", "for_body"),
("ykdatatype*", "expr_datatype"),)),
("yk_datatype*", "expr_datatype"),)),
# For loop - endless loop
# for:
# println("endless")
Expand All @@ -209,17 +213,17 @@
("continue", (("token*", "continue_token"),)),
("break", (("token*", "break_token"),)),
# Let statements
("let", (("token*", "name"), ("ykdatatype*", "data_type"), ("expr*", "expression"))),
("const", (("token*", "name"), ("ykdatatype*", "data_type"), ("expr*", "expression"), ("bool", "is_global"))),
("let", (("token*", "name"), ("yk_datatype*", "data_type"), ("expr*", "expression"))),
("const", (("token*", "name"), ("yk_datatype*", "data_type"), ("expr*", "expression"), ("bool", "is_global"))),
# Native constant statement
# `ITEM: Const[int] = ccode """1 + 1"""`
("nativeconst", (("token*", "name"), ("ykdatatype*", "data_type"),
("nativeconst", (("token*", "name"), ("yk_datatype*", "data_type"),
("token*", "ccode_keyword"), ("token*", "code_str"), ("bool", "is_global"))),
# Function declarations
# Make sure we always say the return type
# `def abc(a: int) -> None:`
("def", (("token*", "name"), ("std::vector<parameter>", "params"),
("stmt*", "function_body"), ("ykdatatype*", "return_type"), ("annotations", "annotations"))),
("stmt*", "function_body"), ("yk_datatype*", "return_type"), ("annotations", "annotations"))),
("class", (("token*", "name"), ("std::vector<parameter>", "members"), ("annotations", "annotations"))),
("enum", (("token*", "name"), ("std::vector<parameter>", "members"), ("annotations", "annotations"))),
# import io [as io]
Expand All @@ -230,7 +234,7 @@
("runtimefeature", (("token*", "runtimefeature_token"), ("token*", "feature"))),
# ------------ Hidden special instructions for compiler --------------
("compins", (
("token*", "name"), ("ykdatatype*", "data_type"), ("token*", "meta1"), ("ykdatatype*", "meta2"),
("token*", "name"), ("yk_datatype*", "data_type"), ("token*", "meta1"), ("yk_datatype*", "meta2"),
("void*", "meta3"))
)
], key=lambda x: x[0])
Expand Down Expand Up @@ -364,7 +368,7 @@
*/
struct parameter {
token* name_;
ykdatatype* data_type_;
yk_datatype* data_type_;
token* enum_val_override_;
};
/**
Expand Down
5 changes: 4 additions & 1 deletion compiler/scripts/update_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,10 @@
"and", "continue", "for", "try",
"as", "def", "from", "while",
"assert", "del", "not",
"elif", "if", "or", "defer", "ccode", "runtimefeature", "in", "struct", "macros", "directive", "enum"])
"elif", "if", "or", "defer",
"ccode", "runtimefeature", "in",
"struct", "macros", "directive",
"enum", "decl"])
TOKENS = sorted([
"NAME", "AT", "DOUBLE_NUMBER", "FLOAT_NUMBER", "INDENT", "BA_INDENT", "BA_DEDENT", "NEW_LINE", "COLON", "SEMICOLON",
"COMMENT",
Expand Down
51 changes: 51 additions & 0 deletions compiler/src/ast/ast.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,41 @@
// ==============================================================================================
// ╦ ┬┌─┐┌─┐┌┐┌┌─┐┌─┐ Yaksha Programming Language
// ║ ││ ├┤ │││└─┐├┤ is Licensed with GPLv3 + extra terms. Please see below.
// ╩═╝┴└─┘└─┘┘└┘└─┘└─┘
// Note: libs - MIT license, runtime/3rd - various
// ==============================================================================================
// GPLv3:
//
// Yaksha - Programming Language.
// Copyright (C) 2020 - 2024 Bhathiya Perera
//
// This program is free software: you can redistribute it and/or modify it under the terms
// of the GNU General Public License as published by the Free Software Foundation,
// either version 3 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along with this program.
// If not, see https://www.gnu.org/licenses/.
//
// ==============================================================================================
// Additional Terms:
//
// Please note that any commercial use of the programming language's compiler source code
// (everything except compiler/runtime, compiler/libs and compiler/3rd) require a written agreement
// with author of the language (Bhathiya Perera).
//
// If you are using it for an open source project, please give credits.
// Your own project must use GPLv3 license with these additional terms.
//
// You may use programs written in Yaksha/YakshaLisp for any legal purpose
// (commercial, open-source, closed-source, etc) as long as it agrees
// to the licenses of linked runtime libraries (see compiler/runtime/README.md).
//
// ==============================================================================================
// ast.cpp
// generated by update_ast.py
#include "ast/ast.h"
Expand Down Expand Up @@ -513,6 +551,19 @@ stmt *ast_pool::c_while_stmt(token *while_keyword, expr *expression,
cleanup_stmt_.push_back(o);
return o;
}
decl_stmt::decl_stmt(token *decl_token, token *name,
std::vector<token *> replacement)
: decl_token_(decl_token), name_(name),
replacement_(std::move(replacement)) {}
ast_type decl_stmt::get_type() { return ast_type::STMT_DECL; }
void decl_stmt::accept(stmt_visitor *v) {}
token *decl_stmt::locate() { return name_; }
stmt *ast_pool::c_decl_stmt(token *decl_token, token *name,
std::vector<token *> replacement) {
auto o = new decl_stmt(decl_token, name, std::move(replacement));
cleanup_stmt_.push_back(o);
return o;
}
dsl_macro_stmt::dsl_macro_stmt(token *name, token *name2, token *not_symbol_tok,
token *curly_open,
std::vector<token *> internal_soup,
Expand Down
51 changes: 51 additions & 0 deletions compiler/src/ast/ast.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,41 @@
// ==============================================================================================
// ╦ ┬┌─┐┌─┐┌┐┌┌─┐┌─┐ Yaksha Programming Language
// ║ ││ ├┤ │││└─┐├┤ is Licensed with GPLv3 + extra terms. Please see below.
// ╩═╝┴└─┘└─┘┘└┘└─┘└─┘
// Note: libs - MIT license, runtime/3rd - various
// ==============================================================================================
// GPLv3:
//
// Yaksha - Programming Language.
// Copyright (C) 2020 - 2024 Bhathiya Perera
//
// This program is free software: you can redistribute it and/or modify it under the terms
// of the GNU General Public License as published by the Free Software Foundation,
// either version 3 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along with this program.
// If not, see https://www.gnu.org/licenses/.
//
// ==============================================================================================
// Additional Terms:
//
// Please note that any commercial use of the programming language's compiler source code
// (everything except compiler/runtime, compiler/libs and compiler/3rd) require a written agreement
// with author of the language (Bhathiya Perera).
//
// If you are using it for an open source project, please give credits.
// Your own project must use GPLv3 license with these additional terms.
//
// You may use programs written in Yaksha/YakshaLisp for any legal purpose
// (commercial, open-source, closed-source, etc) as long as it agrees
// to the licenses of linked runtime libraries (see compiler/runtime/README.md).
//
// ==============================================================================================
// ast.h
// generated by update_ast.py
#ifndef AST_H
Expand Down Expand Up @@ -36,6 +74,7 @@ namespace yaksha {
struct compins_stmt;
struct const_stmt;
struct continue_stmt;
struct decl_stmt;
struct def_stmt;
struct defer_stmt;
struct del_stmt;
Expand Down Expand Up @@ -82,6 +121,7 @@ namespace yaksha {
STMT_COMPINS,
STMT_CONST,
STMT_CONTINUE,
STMT_DECL,
STMT_DEF,
STMT_DEFER,
STMT_DEL,
Expand Down Expand Up @@ -397,6 +437,15 @@ namespace yaksha {
token *locate() override;
token *continue_token_;
};
struct decl_stmt : stmt {
decl_stmt(token *decl_token, token *name, std::vector<token *> replacement);
void accept(stmt_visitor *v) override;
ast_type get_type() override;
token *locate() override;
token *decl_token_;
token *name_;
std::vector<token *> replacement_;
};
struct def_stmt : stmt {
def_stmt(token *name, std::vector<parameter> params, stmt *function_body,
yk_datatype *return_type, annotations annotations);
Expand Down Expand Up @@ -637,6 +686,8 @@ namespace yaksha {
stmt *c_const_stmt(token *name, yk_datatype *data_type, expr *expression,
bool is_global);
stmt *c_continue_stmt(token *continue_token);
stmt *c_decl_stmt(token *decl_token, token *name,
std::vector<token *> replacement);
stmt *c_def_stmt(token *name, std::vector<parameter> params,
stmt *function_body, yk_datatype *return_type,
annotations annotations);
Expand Down
120 changes: 120 additions & 0 deletions compiler/src/ast/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
using namespace yaksha;
#ifdef YAKSHA_DEBUG
#define PRINT_PREPROCESSOR_OUTPUT
#define DUMP_TOKENS_ON_MACRO_ERROR
#endif
parser::parser(std::string filepath, std::vector<token *> &tokens,
yk_datatype_pool *pool)
Expand Down Expand Up @@ -1157,6 +1158,42 @@ void parser::step_4_expand_macros(macro_processor *mp,
// Validate tokens
for (auto tk : tokens_) {
if (!is_valid(tk->token_, tk->type_)) {
#ifdef DUMP_TOKENS_ON_MACRO_ERROR
{
std::cerr << "// Invalid token: [[" << tk->token_
<< "]] type: " << token_to_str(tk->type_) << " at "
<< tk->file_ << ":" << tk->line_ << ":" << tk->pos_
<< std::endl;
std::cerr << "// ----------- Preprocessor output for " << filepath_
<< "-------------------" << std::endl;
std::cerr << "/* " << std::endl;
token *prev = nullptr;
for (auto ttk : tokens_) {
// add a space if it is not jammed
if (prev != nullptr && (prev->line_ == ttk->line_) &&
(prev->pos_ + prev->token_.size() != ttk->pos_)) {
std::cerr << " ";
}
if (ttk->type_ == token_type::NEW_LINE) {
std::cerr << " [NL]" << std::endl;
} else if (ttk->type_ == token_type::BA_INDENT) {
std::cerr << "`{" << std::endl;
} else if (ttk->type_ == token_type::BA_DEDENT) {
std::cerr << "}`" << std::endl;
} else if (ttk->type_ == token_type::END_OF_FILE) {
std::cerr << "[EOF]" << std::endl;
} else if (ttk->type_ == token_type::STRING) {
std::cerr << "\"" << ttk->token_ << "\"";
} else {
std::cerr << ttk->token_;
}
prev = ttk;
}
std::cerr << "*/" << std::endl;
std::cerr << "// ---------------- end ------------------ "
<< std::endl;
}
#endif
throw error(tk, "Invalid token");
}
}
Expand All @@ -1165,6 +1202,12 @@ void parser::step_4_expand_macros(macro_processor *mp,
errors_.emplace_back(err.message_, err.tok_.file_, err.tok_.line_,
err.tok_.pos_);
}
// Execute replace decl after macro expansion for second time
if (!errors_.empty()) { return; }
step_replace_decl_alias();
if (tokens_.empty() || tokens_.back()->type_ != token_type::END_OF_FILE) {
tokens_.emplace_back(original_tokens_.back());
}
}
void parser::step_3_execute_macros(macro_processor *mp) {
try {
Expand All @@ -1183,6 +1226,11 @@ void parser::step_3_execute_macros(macro_processor *mp) {
}
}
void parser::step_1_parse_token_soup() {
step_replace_decl_alias();
if (tokens_.empty() || tokens_.back()->type_ != token_type::END_OF_FILE) {
tokens_.emplace_back(original_tokens_.back());
}
if (!errors_.empty()) { return; }
try {
std::vector<token *> tokens_buffer{};
parse_token_soup(soup_statements_, tokens_buffer);
Expand Down Expand Up @@ -1393,4 +1441,76 @@ std::vector<parameter> parser::parse_enum_members(token *name_token) {
consume(token_type::BA_DEDENT, "Expected dedent");
return values;
}
void parser::step_replace_decl_alias() {
// TODO verify that alias start in an empty line
try {
// -------- Parse all the DECL statements ---------
std::vector<stmt *> decl_soup{};
std::vector<token *> tokens_buffer{};
tokens_buffer.reserve(30);
std::unordered_map<std::string, decl_stmt *> decls{};
std::vector<token *> new_tokens{};
new_tokens.reserve(tokens_.size());
while (!is_at_end_of_stream()) {
// decl red console.red
// decl_keyword alias replacement_tokens
if (match({token_type::KEYWORD_DECL})) {
if (!tokens_buffer.empty()) {
decl_soup.emplace_back(pool_.c_token_soup_stmt(tokens_buffer));
tokens_buffer.clear();
}
auto decl_kw = previous();
auto alias = consume(token_type::NAME, "Alias must be present");
// get all tokens until newline for replacement
while (!check(token_type::NEW_LINE)) {
tokens_buffer.emplace_back(advance());
}
consume(token_type::NEW_LINE, "Expect new line after replacement");
// note we are copying vector, so not a ref
decls[alias->token_] = dynamic_cast<decl_stmt *>(
pool_.c_decl_stmt(decl_kw, alias, tokens_buffer));
tokens_buffer.clear();
} else {
tokens_buffer.emplace_back(advance());
}
}
if (!tokens_buffer.empty()) {
decl_soup.emplace_back(pool_.c_token_soup_stmt(tokens_buffer));
tokens_buffer.clear();
}
// --------------- Replace all names that matches with alias ------------
if (decls.empty()) {
// No decl statements, so no need to replace anything
current_ = 0;
return;
}
for (stmt *soup : decl_soup) {
if (soup->get_type() == ast_type::STMT_TOKEN_SOUP) {
auto tokens = dynamic_cast<token_soup_stmt *>(soup);
for (token *tk : tokens->soup_) {
if (tk->type_ == token_type::NAME) {
auto decl = decls.find(tk->token_);
if (decl != decls.end()) {
auto replacement = decl->second->replacement_;
for (token *rt : replacement) {
rt->line_ = tk->line_;
rt->pos_ = tk->pos_;
new_tokens.emplace_back(rt);
}
} else {
new_tokens.emplace_back(tk);
}
} else {
new_tokens.emplace_back(tk);
}
}
}
}
current_ = 0;
tokens_ = new_tokens;
} catch (parsing_error &err) {
errors_.emplace_back(err.message_, err.tok_.file_, err.tok_.line_,
err.tok_.pos_);
}
}
#pragma clang diagnostic pop
Loading

0 comments on commit 0c433a6

Please sign in to comment.