Skip to content

Commit

Permalink
feat(ic2c): further ic2c implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
JaDogg committed Jun 18, 2023
1 parent 0b4938a commit 3da613c
Show file tree
Hide file tree
Showing 20 changed files with 634 additions and 89 deletions.
2 changes: 1 addition & 1 deletion compiler/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ include_directories(tests)
include_directories(${UTF8_DIR})

set(YAKSHA_SOURCE_FILES
src/ast/ast.h src/ast/ast_printer.h src/ast/ast_vis.h src/ast/codefiles.h src/ast/environment.h src/ast/environment_stack.h src/ast/parser.h src/builtins/builtin.h src/builtins/builtins.h src/compiler/compiler.h src/compiler/compiler_utils.h src/compiler/datatype_compiler.h src/compiler/datatype_parser.h src/compiler/def_class_visitor.h src/compiler/delete_stack.h src/compiler/delete_stack_stack.h src/compiler/desugaring_compiler.h src/compiler/entry_struct_func_compiler.h src/compiler/function_datatype_extractor.h src/compiler/multifile_compiler.h src/compiler/slot_matcher.h src/compiler/statement_writer.h src/compiler/type_checker.h src/file_formats/ic_tokens_file.h src/file_formats/tokens_file.h src/ic2c/ic2c.h src/ic2c/ic_ast.h src/ic2c/ic_compiler.h src/ic2c/ic_level2_parser.h src/ic2c/ic_level2_tokenizer.h src/ic2c/ic_optimizer.h src/ic2c/ic_parser.h src/ic2c/ic_preprocessor.h src/ic2c/ic_token.h src/ic2c/ic_tokenizer.h src/tokenizer/block_analyzer.h src/tokenizer/string_utils.h src/tokenizer/token.h src/tokenizer/tokenizer.h src/utilities/annotation.h src/utilities/annotations.h src/utilities/defer_stack.h src/utilities/defer_stack_stack.h src/utilities/error_printer.h src/utilities/ykdatatype.h src/utilities/ykdt_pool.h src/utilities/ykobject.h src/ast/ast.cpp src/ast/ast_printer.cpp src/ast/ast_vis.cpp src/ast/codefiles.cpp src/ast/environment.cpp src/ast/environment_stack.cpp src/ast/parser.cpp src/builtins/builtins.cpp src/compiler/compiler.cpp src/compiler/compiler_utils.cpp src/compiler/def_class_visitor.cpp src/compiler/delete_stack.cpp src/compiler/delete_stack_stack.cpp src/compiler/desugaring_compiler.cpp src/compiler/entry_struct_func_compiler.cpp src/compiler/multifile_compiler.cpp src/compiler/type_checker.cpp src/file_formats/ic_tokens_file.cpp src/file_formats/tokens_file.cpp src/ic2c/ic2c.cpp src/ic2c/ic_ast.cpp src/ic2c/ic_compiler.cpp src/ic2c/ic_level2_parser.cpp src/ic2c/ic_level2_tokenizer.cpp src/ic2c/ic_optimizer.cpp src/ic2c/ic_parser.cpp src/ic2c/ic_preprocessor.cpp src/ic2c/ic_tokenizer.cpp src/tokenizer/block_analyzer.cpp src/tokenizer/string_utils.cpp src/tokenizer/tokenizer.cpp src/utilities/annotation.cpp src/utilities/annotations.cpp src/utilities/defer_stack.cpp src/utilities/defer_stack_stack.cpp src/utilities/error_printer.cpp src/utilities/ykdatatype.cpp src/utilities/ykdt_pool.cpp src/utilities/ykobject.cpp) # update_makefile.py SRC
src/ast/ast.h src/ast/ast_printer.h src/ast/ast_vis.h src/ast/codefiles.h src/ast/environment.h src/ast/environment_stack.h src/ast/parser.h src/builtins/builtin.h src/builtins/builtins.h src/compiler/compiler.h src/compiler/compiler_utils.h src/compiler/datatype_compiler.h src/compiler/datatype_parser.h src/compiler/def_class_visitor.h src/compiler/delete_stack.h src/compiler/delete_stack_stack.h src/compiler/desugaring_compiler.h src/compiler/entry_struct_func_compiler.h src/compiler/function_datatype_extractor.h src/compiler/multifile_compiler.h src/compiler/slot_matcher.h src/compiler/statement_writer.h src/compiler/type_checker.h src/file_formats/ic_tokens_file.h src/file_formats/tokens_file.h src/ic2c/ic2c.h src/ic2c/ic_ast.h src/ic2c/ic_compiler.h src/ic2c/ic_level2_parser.h src/ic2c/ic_level2_tokenizer.h src/ic2c/ic_line_splicer.h src/ic2c/ic_optimizer.h src/ic2c/ic_parser.h src/ic2c/ic_peek_ahead_iter.h src/ic2c/ic_preprocessor.h src/ic2c/ic_simple_character_iter.h src/ic2c/ic_token.h src/ic2c/ic_tokenizer.h src/ic2c/ic_trigraph_translater.h src/tokenizer/block_analyzer.h src/tokenizer/string_utils.h src/tokenizer/token.h src/tokenizer/tokenizer.h src/utilities/annotation.h src/utilities/annotations.h src/utilities/defer_stack.h src/utilities/defer_stack_stack.h src/utilities/error_printer.h src/utilities/ykdatatype.h src/utilities/ykdt_pool.h src/utilities/ykobject.h src/ast/ast.cpp src/ast/ast_printer.cpp src/ast/ast_vis.cpp src/ast/codefiles.cpp src/ast/environment.cpp src/ast/environment_stack.cpp src/ast/parser.cpp src/builtins/builtins.cpp src/compiler/compiler.cpp src/compiler/compiler_utils.cpp src/compiler/def_class_visitor.cpp src/compiler/delete_stack.cpp src/compiler/delete_stack_stack.cpp src/compiler/desugaring_compiler.cpp src/compiler/entry_struct_func_compiler.cpp src/compiler/multifile_compiler.cpp src/compiler/type_checker.cpp src/file_formats/ic_tokens_file.cpp src/file_formats/tokens_file.cpp src/ic2c/ic2c.cpp src/ic2c/ic_ast.cpp src/ic2c/ic_compiler.cpp src/ic2c/ic_level2_parser.cpp src/ic2c/ic_level2_tokenizer.cpp src/ic2c/ic_line_splicer.cpp src/ic2c/ic_optimizer.cpp src/ic2c/ic_parser.cpp src/ic2c/ic_peek_ahead_iter.cpp src/ic2c/ic_preprocessor.cpp src/ic2c/ic_tokenizer.cpp src/ic2c/ic_trigraph_translater.cpp src/tokenizer/block_analyzer.cpp src/tokenizer/string_utils.cpp src/tokenizer/tokenizer.cpp src/utilities/annotation.cpp src/utilities/annotations.cpp src/utilities/defer_stack.cpp src/utilities/defer_stack_stack.cpp src/utilities/error_printer.cpp src/utilities/ykdatatype.cpp src/utilities/ykdt_pool.cpp src/utilities/ykobject.cpp) # update_makefile.py SRC

set(YAKSHA_TEST_FILES
tests/btest.h tests/test_block_analyzer.cpp tests/test_compiler.cpp tests/test_ic_tokenizer.cpp tests/test_parser.cpp tests/test_string_utils.cpp tests/test_tokenizer.cpp tests/test_type_checker.cpp) # update_makefile.py TESTS
Expand Down
2 changes: 1 addition & 1 deletion compiler/hammer.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ args_c=["-std=c99"]
include_paths=["src", "3rd/utfcpp/source", "runtime"]
# .c or .cpp files that get compiled to .o files
# so this is all except the .c/.cpp file with main()
sources=["src/ast/ast.cpp", "src/ast/ast_printer.cpp", "src/ast/ast_vis.cpp", "src/ast/codefiles.cpp", "src/ast/environment.cpp", "src/ast/environment_stack.cpp", "src/ast/parser.cpp", "src/builtins/builtins.cpp", "src/compiler/compiler.cpp", "src/compiler/compiler_utils.cpp", "src/compiler/def_class_visitor.cpp", "src/compiler/delete_stack.cpp", "src/compiler/delete_stack_stack.cpp", "src/compiler/desugaring_compiler.cpp", "src/compiler/entry_struct_func_compiler.cpp", "src/compiler/multifile_compiler.cpp", "src/compiler/type_checker.cpp", "src/file_formats/ic_tokens_file.cpp", "src/file_formats/tokens_file.cpp", "src/ic2c/ic2c.cpp", "src/ic2c/ic_ast.cpp", "src/ic2c/ic_compiler.cpp", "src/ic2c/ic_level2_parser.cpp", "src/ic2c/ic_level2_tokenizer.cpp", "src/ic2c/ic_optimizer.cpp", "src/ic2c/ic_parser.cpp", "src/ic2c/ic_preprocessor.cpp", "src/ic2c/ic_tokenizer.cpp", "src/tokenizer/block_analyzer.cpp", "src/tokenizer/string_utils.cpp", "src/tokenizer/tokenizer.cpp", "src/utilities/annotation.cpp", "src/utilities/annotations.cpp", "src/utilities/defer_stack.cpp", "src/utilities/defer_stack_stack.cpp", "src/utilities/error_printer.cpp", "src/utilities/ykdatatype.cpp", "src/utilities/ykdt_pool.cpp", "src/utilities/ykobject.cpp"] # update_makefile.py HAMMER_CPP
sources=["src/ast/ast.cpp", "src/ast/ast_printer.cpp", "src/ast/ast_vis.cpp", "src/ast/codefiles.cpp", "src/ast/environment.cpp", "src/ast/environment_stack.cpp", "src/ast/parser.cpp", "src/builtins/builtins.cpp", "src/compiler/compiler.cpp", "src/compiler/compiler_utils.cpp", "src/compiler/def_class_visitor.cpp", "src/compiler/delete_stack.cpp", "src/compiler/delete_stack_stack.cpp", "src/compiler/desugaring_compiler.cpp", "src/compiler/entry_struct_func_compiler.cpp", "src/compiler/multifile_compiler.cpp", "src/compiler/type_checker.cpp", "src/file_formats/ic_tokens_file.cpp", "src/file_formats/tokens_file.cpp", "src/ic2c/ic2c.cpp", "src/ic2c/ic_ast.cpp", "src/ic2c/ic_compiler.cpp", "src/ic2c/ic_level2_parser.cpp", "src/ic2c/ic_level2_tokenizer.cpp", "src/ic2c/ic_line_splicer.cpp", "src/ic2c/ic_optimizer.cpp", "src/ic2c/ic_parser.cpp", "src/ic2c/ic_peek_ahead_iter.cpp", "src/ic2c/ic_preprocessor.cpp", "src/ic2c/ic_tokenizer.cpp", "src/ic2c/ic_trigraph_translater.cpp", "src/tokenizer/block_analyzer.cpp", "src/tokenizer/string_utils.cpp", "src/tokenizer/tokenizer.cpp", "src/utilities/annotation.cpp", "src/utilities/annotations.cpp", "src/utilities/defer_stack.cpp", "src/utilities/defer_stack_stack.cpp", "src/utilities/error_printer.cpp", "src/utilities/ykdatatype.cpp", "src/utilities/ykdt_pool.cpp", "src/utilities/ykobject.cpp"] # update_makefile.py HAMMER_CPP
temp_out_dir="bin/hammer"
binaries=["yaksha"]
disable_parallel=false
Expand Down
73 changes: 73 additions & 0 deletions compiler/scripts/output_tester.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import subprocess
import tkinter as tk
import tkinter.messagebox
from tkinter.scrolledtext import ScrolledText
import os

MAX_EXECUTION_TIME_SEC = 30
ROOT = os.path.dirname(os.path.abspath(__file__))


def execute(arg: str) -> (str, str, int):
proc = subprocess.Popen(
arg,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
encoding="utf-8",
universal_newlines=True,
shell=True)
try:
so, se = proc.communicate(timeout=MAX_EXECUTION_TIME_SEC)
return_value = proc.returncode
except subprocess.TimeoutExpired:
proc.kill()
proc.communicate()
return "", "", -1
return so.strip(), se.strip(), return_value


def get_text(w):
return w.get("1.0", tkinter.END)


def set_text(w, txt):
w.delete('1.0', tkinter.END)
w.insert(tk.INSERT, txt)


class OutputTester:

def __init__(self):
self.out = None
self.inp = None
self.run_pp = None
self.root = None

def _run_pp(self):
self._check_output("temp_file_please_delete.c", "zig cc -E %TEMP%")

def _check_output(self, temp_file_name, command):
text = get_text(self.inp)
i_file = os.path.join(ROOT, temp_file_name)
command_f = command.replace("%TEMP%", i_file)
with open(i_file, "w+", encoding="utf-8") as h:
h.write(text)
so, se, r = execute(command_f)
set_text(self.out, so + se)
os.unlink(i_file)

def start(self):
self.root = tk.Tk()
self.root.title("Output tester")
self.root.resizable(False, False)
self.run_pp = tk.Button(self.root, text="Run real C preprocessor", command=self._run_pp)
self.inp = ScrolledText(self.root)
self.out = ScrolledText(self.root)
self.run_pp.pack()
self.inp.pack()
self.out.pack()
self.root.mainloop()


if __name__ == "__main__":
OutputTester().start()
4 changes: 2 additions & 2 deletions compiler/src/ic2c/ic_ast.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,8 +303,8 @@ namespace yaksha {
std::vector<ic_stmt *> cleanup_stmt_;
};
/**
* Parameter for a user defined function declaration
*/
* Parameter for a user defined function declaration
*/
struct ic_parameter {
ic_token *name_;
std::vector<ic_token *> data_type_;
Expand Down
49 changes: 49 additions & 0 deletions compiler/src/ic2c/ic_line_splicer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// ic_line_splicer.cpp
#include "ic_line_splicer.h"
using namespace yaksha;
ic_line_splicer::ic_line_splicer(ic_trigraph_translater &tt)
: tt_(tt), current_(0), consumed_extra_(false) {
read();
}
void ic_line_splicer::next() {
if (consumed_extra_) {
read();
return;
}
if (!reached_end()) { tt_.next(); }
read();
}
[[nodiscard]] bool ic_line_splicer::reached_end() { return tt_.reached_end(); }
[[nodiscard]] uint32_t ic_line_splicer::get_current() { return current_; }
[[nodiscard]] int ic_line_splicer::get_column() {
return consumed_extra_ ? tt_.get_column() - 1 : tt_.get_column();
}
[[nodiscard]] int ic_line_splicer::get_line() { return tt_.get_line(); }
ic_line_splicer::~ic_line_splicer() = default;
void ic_line_splicer::read() {
consumed_extra_ = false;
current_ = tt_.get_current();
if (current_ != '\\') return;
if (tt_.reached_end()) return;
tt_.next();
if (tt_.get_current() == '\r' && !tt_.reached_end()) {
tt_.next();
if (tt_.get_current() == '\n') {
if (tt_.reached_end()) {
current_ = 0;
} else {
tt_.next();
current_ = tt_.get_current();
}
} else {
current_ = tt_.get_current();
}
} else if (tt_.get_current() == '\n' && !tt_.reached_end()) {
tt_.next();
current_ = tt_.get_current();
} else if (tt_.reached_end()) {
current_ = 0;
} else {
consumed_extra_ = true;
}
}
23 changes: 23 additions & 0 deletions compiler/src/ic2c/ic_line_splicer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// ic_line_splicer.h
#ifndef IC_LINE_SPLICER_H
#define IC_LINE_SPLICER_H
#include "ic_simple_character_iter.h"
#include "ic_trigraph_translater.h"
namespace yaksha {
struct ic_line_splicer : public ic_simple_character_iter {
explicit ic_line_splicer(ic_trigraph_translater &tt);
void next() override;
[[nodiscard]] bool reached_end() override;
[[nodiscard]] uint32_t get_current() override;
[[nodiscard]] int get_column() override;
[[nodiscard]] int get_line() override;
~ic_line_splicer() override;

private:
void read();
ic_trigraph_translater &tt_;
utf8::uint32_t current_;
bool consumed_extra_;
};
}// namespace yaksha
#endif
147 changes: 146 additions & 1 deletion compiler/src/ic2c/ic_parser.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,150 @@
// ic_parser.cpp
#include "ic_parser.h"
#include <algorithm>
#include <cassert>
using namespace yaksha;
ic_parser::ic_parser() = default;
// TODO add a dummy token for comments to ensure it is theoretically replaced by a single space
// we can not do it if it is not really required, I do not think this is required as line_splicer takes care of this!
// newline after comment is also preserved
ic_parser::~ic_parser() = default;
ic_parser::ic_parser(std::vector<ic_token> &tokens, ic_ast_pool *ast_pool)
: tokens_(tokens), ast_pool_(ast_pool), errors_(), current_(0) {}
void ic_parser::parse() {
while (!is_at_end()) {
try {
statements_.emplace_back(preprocessor_statement());
} catch (ic_parsing_error &ex) {
// synchronize
while (peek()->type_ != ic_token_type::NEWLINE && !is_at_end()) {
advance();
}
}
}
}
ic_token *ic_parser::advance() {
if (!is_at_end()) { current_++; }
return previous();
}
ic_token *ic_parser::recede() {
if (current_ != 0) { current_--; }
return peek();
}
bool ic_parser::is_at_end() { return peek()->type_ == ic_token_type::TC_EOF; }
ic_token *ic_parser::peek() { return &tokens_[current_]; }
ic_token *ic_parser::previous() {
assert(current_ > 0);
return &tokens_[current_ - 1];
}
bool ic_parser::match(std::initializer_list<ic_token_type> types) {
return std::any_of(types.begin(), types.end(), [this](ic_token_type t) {
if (this->check(t)) {
this->advance();
return true;
}
return false;
});
}
bool ic_parser::check(ic_token_type t) {
if (is_at_end()) { return false; }
return peek()->type_ == t;
}
ic_token *ic_parser::consume(ic_token_type t, const std::string &message) {
if (check(t)) return advance();
throw error(peek(), message);
}
ic_parsing_error ic_parser::error(ic_token *tok, const std::string &message) {
auto err = ic_parsing_error{message, tok};
errors_.push_back(err);
return err;
}
ic_token *ic_parser::consume_or_eof(ic_token_type t,
const std::string &message) {
if (check(t)) return advance();
if (is_at_end()) return peek();
throw error(peek(), message);
}
ic_stmt *ic_parser::preprocessor_statement() {
if (peek()->type_ == ic_token_type::HASH) {
auto hash_t = advance();
if (peek()->type_ != ic_token_type::IDENTIFIER) {
throw ic_parsing_error("Invalid token after #", hash_t);
}
auto identifier = peek()->token_;
if (identifier == "define") { return define_st(hash_t); }
if (identifier == "include") { return include_st(hash_t); }
if (identifier == "line") { return line_st(hash_t); }
if (identifier == "undef") { return undef_st(hash_t); }
if (identifier == "error") { return error_st(hash_t); }
if (identifier == "warning") { return warning_st(hash_t); }
if (identifier == "pragma") { return pragma_st(hash_t); }
if (identifier == "if" || identifier == "ifdef" || identifier == "ifndef") {
return if_st(hash_t);
}
throw ic_parsing_error("Unknown preprocessor statement", hash_t);
}
return code_line();
}
ic_stmt *ic_parser::define_st(ic_token *hash_t) {
auto define_tok = advance();
if (!check(ic_token_type::IDENTIFIER)) {
throw ic_parsing_error("Expected a valid identifier after #define", hash_t);
}
std::vector<ic_token *> replacement_{};
auto identifier_tok = advance();
std::vector<ic_token *> args{};
if (check(ic_token_type::OPEN_PAREN)) {
auto paren_open = advance();
// parse macro #define
if (!check(ic_token_type::CLOSE_PAREN)) {
do {
args.emplace_back(advance());
} while (match({ic_token_type::COMMA}));
}
auto paren_close = consume(ic_token_type::CLOSE_PAREN,
"#define macro must have a valid ')'");
std::vector<ic_token *> tok_string = token_string();
consume(ic_token_type::NEWLINE, "new line is expected after #define");
return ast_pool_->ic_c_pp_define_function_stmt(
hash_t, define_tok, identifier_tok,
paren_open, args, paren_close,
tok_string);
}
// parse simple #define
std::vector<ic_token *> tok_string = token_string();
consume(ic_token_type::NEWLINE, "new line is expected after #define");
return ast_pool_->ic_c_pp_define_normal_stmt(hash_t, define_tok,
identifier_tok, tok_string);
}
ic_stmt *ic_parser::include_st(ic_token *hash_t) {
auto include_tok = advance();
auto path_spec = consume(ic_token_type::STRING_LITERAL,
"Expected an integer constant after #line");
consume(ic_token_type::NEWLINE, "New line expected after #include");
return ast_pool_->ic_c_pp_include_stmt(hash_t, include_tok, path_spec);
}
ic_stmt *ic_parser::line_st(ic_token *hash_t) {
auto line_tok = advance();
auto integer_const_tok = consume(ic_token_type::STRING_LITERAL,
"Expected an integer constant after #line");
auto path_tok = consume(ic_token_type::STRING_LITERAL,
"Expected a valid path token after #line");
consume(ic_token_type::NEWLINE, "New line expected after #line");
return ast_pool_->ic_c_pp_line_stmt(hash_t, line_tok, integer_const_tok,
path_tok);
}
ic_stmt *ic_parser::undef_st(ic_token *hash_t) {
auto undef_tok = advance();
auto identifier_tok =
consume(ic_token_type::IDENTIFIER, "Expected an identifier after #undef");
consume(ic_token_type::NEWLINE, "New line expected after #undef");
return ast_pool_->ic_c_pp_undef_stmt(hash_t, undef_tok, identifier_tok);
}
ic_stmt *ic_parser::error_st(ic_token *hash_t) { return nullptr; }
ic_stmt *ic_parser::warning_st(ic_token *hash_t) { return nullptr; }
ic_stmt *ic_parser::pragma_st(ic_token *hash_t) { return nullptr; }
ic_stmt *ic_parser::code_line() { return nullptr; }
ic_stmt *ic_parser::block(ic_token *hash_t) { return nullptr; }
ic_stmt *ic_parser::if_st(ic_token *hash_t) { return nullptr; }
std::vector<ic_token *> ic_parser::token_string() {
return std::vector<ic_token *>{};
}
Loading

0 comments on commit 3da613c

Please sign in to comment.