-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(ic2c): further ic2c implementation
- Loading branch information
Showing
20 changed files
with
634 additions
and
89 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import subprocess | ||
import tkinter as tk | ||
import tkinter.messagebox | ||
from tkinter.scrolledtext import ScrolledText | ||
import os | ||
|
||
MAX_EXECUTION_TIME_SEC = 30 | ||
ROOT = os.path.dirname(os.path.abspath(__file__)) | ||
|
||
|
||
def execute(arg: str) -> (str, str, int): | ||
proc = subprocess.Popen( | ||
arg, | ||
stdout=subprocess.PIPE, | ||
stderr=subprocess.PIPE, | ||
encoding="utf-8", | ||
universal_newlines=True, | ||
shell=True) | ||
try: | ||
so, se = proc.communicate(timeout=MAX_EXECUTION_TIME_SEC) | ||
return_value = proc.returncode | ||
except subprocess.TimeoutExpired: | ||
proc.kill() | ||
proc.communicate() | ||
return "", "", -1 | ||
return so.strip(), se.strip(), return_value | ||
|
||
|
||
def get_text(w): | ||
return w.get("1.0", tkinter.END) | ||
|
||
|
||
def set_text(w, txt): | ||
w.delete('1.0', tkinter.END) | ||
w.insert(tk.INSERT, txt) | ||
|
||
|
||
class OutputTester: | ||
|
||
def __init__(self): | ||
self.out = None | ||
self.inp = None | ||
self.run_pp = None | ||
self.root = None | ||
|
||
def _run_pp(self): | ||
self._check_output("temp_file_please_delete.c", "zig cc -E %TEMP%") | ||
|
||
def _check_output(self, temp_file_name, command): | ||
text = get_text(self.inp) | ||
i_file = os.path.join(ROOT, temp_file_name) | ||
command_f = command.replace("%TEMP%", i_file) | ||
with open(i_file, "w+", encoding="utf-8") as h: | ||
h.write(text) | ||
so, se, r = execute(command_f) | ||
set_text(self.out, so + se) | ||
os.unlink(i_file) | ||
|
||
def start(self): | ||
self.root = tk.Tk() | ||
self.root.title("Output tester") | ||
self.root.resizable(False, False) | ||
self.run_pp = tk.Button(self.root, text="Run real C preprocessor", command=self._run_pp) | ||
self.inp = ScrolledText(self.root) | ||
self.out = ScrolledText(self.root) | ||
self.run_pp.pack() | ||
self.inp.pack() | ||
self.out.pack() | ||
self.root.mainloop() | ||
|
||
|
||
if __name__ == "__main__": | ||
OutputTester().start() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
// ic_line_splicer.cpp | ||
#include "ic_line_splicer.h" | ||
using namespace yaksha; | ||
ic_line_splicer::ic_line_splicer(ic_trigraph_translater &tt) | ||
: tt_(tt), current_(0), consumed_extra_(false) { | ||
read(); | ||
} | ||
void ic_line_splicer::next() { | ||
if (consumed_extra_) { | ||
read(); | ||
return; | ||
} | ||
if (!reached_end()) { tt_.next(); } | ||
read(); | ||
} | ||
[[nodiscard]] bool ic_line_splicer::reached_end() { return tt_.reached_end(); } | ||
[[nodiscard]] uint32_t ic_line_splicer::get_current() { return current_; } | ||
[[nodiscard]] int ic_line_splicer::get_column() { | ||
return consumed_extra_ ? tt_.get_column() - 1 : tt_.get_column(); | ||
} | ||
[[nodiscard]] int ic_line_splicer::get_line() { return tt_.get_line(); } | ||
ic_line_splicer::~ic_line_splicer() = default; | ||
void ic_line_splicer::read() { | ||
consumed_extra_ = false; | ||
current_ = tt_.get_current(); | ||
if (current_ != '\\') return; | ||
if (tt_.reached_end()) return; | ||
tt_.next(); | ||
if (tt_.get_current() == '\r' && !tt_.reached_end()) { | ||
tt_.next(); | ||
if (tt_.get_current() == '\n') { | ||
if (tt_.reached_end()) { | ||
current_ = 0; | ||
} else { | ||
tt_.next(); | ||
current_ = tt_.get_current(); | ||
} | ||
} else { | ||
current_ = tt_.get_current(); | ||
} | ||
} else if (tt_.get_current() == '\n' && !tt_.reached_end()) { | ||
tt_.next(); | ||
current_ = tt_.get_current(); | ||
} else if (tt_.reached_end()) { | ||
current_ = 0; | ||
} else { | ||
consumed_extra_ = true; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
// ic_line_splicer.h | ||
#ifndef IC_LINE_SPLICER_H | ||
#define IC_LINE_SPLICER_H | ||
#include "ic_simple_character_iter.h" | ||
#include "ic_trigraph_translater.h" | ||
namespace yaksha { | ||
struct ic_line_splicer : public ic_simple_character_iter { | ||
explicit ic_line_splicer(ic_trigraph_translater &tt); | ||
void next() override; | ||
[[nodiscard]] bool reached_end() override; | ||
[[nodiscard]] uint32_t get_current() override; | ||
[[nodiscard]] int get_column() override; | ||
[[nodiscard]] int get_line() override; | ||
~ic_line_splicer() override; | ||
|
||
private: | ||
void read(); | ||
ic_trigraph_translater &tt_; | ||
utf8::uint32_t current_; | ||
bool consumed_extra_; | ||
}; | ||
}// namespace yaksha | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,150 @@ | ||
// ic_parser.cpp | ||
#include "ic_parser.h" | ||
#include <algorithm> | ||
#include <cassert> | ||
using namespace yaksha; | ||
ic_parser::ic_parser() = default; | ||
// TODO add a dummy token for comments to ensure it is theoretically replaced by a single space | ||
// we can not do it if it is not really required, I do not think this is required as line_splicer takes care of this! | ||
// newline after comment is also preserved | ||
ic_parser::~ic_parser() = default; | ||
ic_parser::ic_parser(std::vector<ic_token> &tokens, ic_ast_pool *ast_pool) | ||
: tokens_(tokens), ast_pool_(ast_pool), errors_(), current_(0) {} | ||
void ic_parser::parse() { | ||
while (!is_at_end()) { | ||
try { | ||
statements_.emplace_back(preprocessor_statement()); | ||
} catch (ic_parsing_error &ex) { | ||
// synchronize | ||
while (peek()->type_ != ic_token_type::NEWLINE && !is_at_end()) { | ||
advance(); | ||
} | ||
} | ||
} | ||
} | ||
ic_token *ic_parser::advance() { | ||
if (!is_at_end()) { current_++; } | ||
return previous(); | ||
} | ||
ic_token *ic_parser::recede() { | ||
if (current_ != 0) { current_--; } | ||
return peek(); | ||
} | ||
bool ic_parser::is_at_end() { return peek()->type_ == ic_token_type::TC_EOF; } | ||
ic_token *ic_parser::peek() { return &tokens_[current_]; } | ||
ic_token *ic_parser::previous() { | ||
assert(current_ > 0); | ||
return &tokens_[current_ - 1]; | ||
} | ||
bool ic_parser::match(std::initializer_list<ic_token_type> types) { | ||
return std::any_of(types.begin(), types.end(), [this](ic_token_type t) { | ||
if (this->check(t)) { | ||
this->advance(); | ||
return true; | ||
} | ||
return false; | ||
}); | ||
} | ||
bool ic_parser::check(ic_token_type t) { | ||
if (is_at_end()) { return false; } | ||
return peek()->type_ == t; | ||
} | ||
ic_token *ic_parser::consume(ic_token_type t, const std::string &message) { | ||
if (check(t)) return advance(); | ||
throw error(peek(), message); | ||
} | ||
ic_parsing_error ic_parser::error(ic_token *tok, const std::string &message) { | ||
auto err = ic_parsing_error{message, tok}; | ||
errors_.push_back(err); | ||
return err; | ||
} | ||
ic_token *ic_parser::consume_or_eof(ic_token_type t, | ||
const std::string &message) { | ||
if (check(t)) return advance(); | ||
if (is_at_end()) return peek(); | ||
throw error(peek(), message); | ||
} | ||
ic_stmt *ic_parser::preprocessor_statement() { | ||
if (peek()->type_ == ic_token_type::HASH) { | ||
auto hash_t = advance(); | ||
if (peek()->type_ != ic_token_type::IDENTIFIER) { | ||
throw ic_parsing_error("Invalid token after #", hash_t); | ||
} | ||
auto identifier = peek()->token_; | ||
if (identifier == "define") { return define_st(hash_t); } | ||
if (identifier == "include") { return include_st(hash_t); } | ||
if (identifier == "line") { return line_st(hash_t); } | ||
if (identifier == "undef") { return undef_st(hash_t); } | ||
if (identifier == "error") { return error_st(hash_t); } | ||
if (identifier == "warning") { return warning_st(hash_t); } | ||
if (identifier == "pragma") { return pragma_st(hash_t); } | ||
if (identifier == "if" || identifier == "ifdef" || identifier == "ifndef") { | ||
return if_st(hash_t); | ||
} | ||
throw ic_parsing_error("Unknown preprocessor statement", hash_t); | ||
} | ||
return code_line(); | ||
} | ||
ic_stmt *ic_parser::define_st(ic_token *hash_t) { | ||
auto define_tok = advance(); | ||
if (!check(ic_token_type::IDENTIFIER)) { | ||
throw ic_parsing_error("Expected a valid identifier after #define", hash_t); | ||
} | ||
std::vector<ic_token *> replacement_{}; | ||
auto identifier_tok = advance(); | ||
std::vector<ic_token *> args{}; | ||
if (check(ic_token_type::OPEN_PAREN)) { | ||
auto paren_open = advance(); | ||
// parse macro #define | ||
if (!check(ic_token_type::CLOSE_PAREN)) { | ||
do { | ||
args.emplace_back(advance()); | ||
} while (match({ic_token_type::COMMA})); | ||
} | ||
auto paren_close = consume(ic_token_type::CLOSE_PAREN, | ||
"#define macro must have a valid ')'"); | ||
std::vector<ic_token *> tok_string = token_string(); | ||
consume(ic_token_type::NEWLINE, "new line is expected after #define"); | ||
return ast_pool_->ic_c_pp_define_function_stmt( | ||
hash_t, define_tok, identifier_tok, | ||
paren_open, args, paren_close, | ||
tok_string); | ||
} | ||
// parse simple #define | ||
std::vector<ic_token *> tok_string = token_string(); | ||
consume(ic_token_type::NEWLINE, "new line is expected after #define"); | ||
return ast_pool_->ic_c_pp_define_normal_stmt(hash_t, define_tok, | ||
identifier_tok, tok_string); | ||
} | ||
ic_stmt *ic_parser::include_st(ic_token *hash_t) { | ||
auto include_tok = advance(); | ||
auto path_spec = consume(ic_token_type::STRING_LITERAL, | ||
"Expected an integer constant after #line"); | ||
consume(ic_token_type::NEWLINE, "New line expected after #include"); | ||
return ast_pool_->ic_c_pp_include_stmt(hash_t, include_tok, path_spec); | ||
} | ||
ic_stmt *ic_parser::line_st(ic_token *hash_t) { | ||
auto line_tok = advance(); | ||
auto integer_const_tok = consume(ic_token_type::STRING_LITERAL, | ||
"Expected an integer constant after #line"); | ||
auto path_tok = consume(ic_token_type::STRING_LITERAL, | ||
"Expected a valid path token after #line"); | ||
consume(ic_token_type::NEWLINE, "New line expected after #line"); | ||
return ast_pool_->ic_c_pp_line_stmt(hash_t, line_tok, integer_const_tok, | ||
path_tok); | ||
} | ||
ic_stmt *ic_parser::undef_st(ic_token *hash_t) { | ||
auto undef_tok = advance(); | ||
auto identifier_tok = | ||
consume(ic_token_type::IDENTIFIER, "Expected an identifier after #undef"); | ||
consume(ic_token_type::NEWLINE, "New line expected after #undef"); | ||
return ast_pool_->ic_c_pp_undef_stmt(hash_t, undef_tok, identifier_tok); | ||
} | ||
ic_stmt *ic_parser::error_st(ic_token *hash_t) { return nullptr; } | ||
ic_stmt *ic_parser::warning_st(ic_token *hash_t) { return nullptr; } | ||
ic_stmt *ic_parser::pragma_st(ic_token *hash_t) { return nullptr; } | ||
ic_stmt *ic_parser::code_line() { return nullptr; } | ||
ic_stmt *ic_parser::block(ic_token *hash_t) { return nullptr; } | ||
ic_stmt *ic_parser::if_st(ic_token *hash_t) { return nullptr; } | ||
std::vector<ic_token *> ic_parser::token_string() { | ||
return std::vector<ic_token *>{}; | ||
} |
Oops, something went wrong.