diff --git a/.gitignore b/.gitignore index 0eb3e7a..8b132d3 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ scratch/ **/GazpreaPackage/** **/GeneratorPackage/** **/VCalcPackage/** +**/VCalc24Package/** gazpreaConfig.json GeneratorConfig.json diff --git a/dragon_runner/cli.py b/dragon_runner/cli.py index c6b4b73..5a5e156 100644 --- a/dragon_runner/cli.py +++ b/dragon_runner/cli.py @@ -2,12 +2,13 @@ import os from typing import NamedTuple -class CLIArgs(NamedTuple): +class CLIArgs(NamedTuple): config_file: str - grade_file: str + output_file: str failure_log: str + debug_package: str + mode: str timeout: float - debug_package: str time: bool verbosity: int verify: bool @@ -16,11 +17,12 @@ def __repr__(self) -> str: return ( "Parsed CLI Arguments:\n" f" Config File: {self.config_file}\n" - f" Grade File: {self.grade_file}\n" + f" Mode: {self.mode}\n" f" Failure Log: {self.failure_log}\n" f" Timeout: {self.timeout}\n" f" Debug Package: {self.debug_package}\n" f" Time: {self.time}\n" + f" Output file: {self.output_file}\n" f" Verbosity: {self.verbosity}" f" Verify: {self.verify}" ) @@ -28,29 +30,48 @@ def __repr__(self) -> str: def parse_cli_args() -> CLIArgs: parser = argparse.ArgumentParser(description="CMPUT 415 testing utility") - parser.add_argument("config_file", help="Path to the tester JSON configuration file.") - parser.add_argument("--grade", dest="grade_file", help="Perform grading analysis and output to this file") - parser.add_argument("--log-failures", dest="failure_log", help="Log the testcases the solution compiler fails.") - parser.add_argument("--timeout", type=float, default=2.0, help="Specify timeout length for EACH command in a toolchain.") - parser.add_argument("--verify", action="store_true", default=False, help="Verify that config and tests are configured correctly") - parser.add_argument("--debug-package", help="Provide a sub-path to run the tester on.") - parser.add_argument("-t", "--time", action="store_true", help="Include the timings (seconds) of each test in the output.") - parser.add_argument("-v", "--verbosity", action="count", default=0, help="Increase verbosity level") + parser.add_argument("config_file", + help="Path to the tester JSON configuration file.") + + parser.add_argument("--mode", dest="mode", default="regular", + help="run in regular, grade or script mode") + + parser.add_argument("--fail-log", dest="failure_log", + help="Log the testcases the solution compiler fails.") + + parser.add_argument("--timeout", type=float, default=2.0, + help="Specify timeout length for EACH command in a toolchain.") + + parser.add_argument("--verify", action="store_true", default=False, + help="Verify that config and tests are configured correctly") + + parser.add_argument("--debug-package", + help="Provide a sub-path to run the tester on.") + + parser.add_argument("-t", "--time", action="store_true", + help="Include the timings (seconds) of each test in the output.") + + parser.add_argument("-v", "--verbosity", action="count", default=0, + help="Increase verbosity level") + parser.add_argument("-o", "--output", action="store_true", + help="Direct the output of dragon-runner to a file") + args = parser.parse_args() if not os.path.isfile(args.config_file): parser.error(f"The config file {args.config_file} does not exist.") - if bool(args.grade_file) != bool(args.failure_log): - parser.error("Both --grade and --log-failures must be provided together.") + if args.mode == "grade" and not bool(args.failure_log): + parser.error("Failure log must be supplied when using grade mode.") if args.verbosity > 0: os.environ["DEBUG"] = str(args.verbosity) return CLIArgs( config_file = args.config_file, - grade_file = args.grade_file, + mode = args.mode, failure_log = args.failure_log, timeout = args.timeout, debug_package = args.debug_package, + output_file = args.output, time = args.time, verbosity = args.verbosity, verify = args.verify diff --git a/dragon_runner/config.py b/dragon_runner/config.py index ead5ba0..0179406 100644 --- a/dragon_runner/config.py +++ b/dragon_runner/config.py @@ -1,6 +1,5 @@ import json import os -import pathlib import sys from pathlib import Path from typing import Dict, List, Optional @@ -213,6 +212,7 @@ def log_test_info(self): """ Prints a simple formatted table of test information. """ + log("\nPackages:", level=1) for pkg in self.packages: log(f"-- ({pkg.name})", level=1) for spkg in pkg.subpackages: diff --git a/dragon_runner/harness.py b/dragon_runner/harness.py index 96adbf7..c91b3c0 100644 --- a/dragon_runner/harness.py +++ b/dragon_runner/harness.py @@ -1,6 +1,6 @@ import csv from colorama import Fore -from typing import List, Dict +from typing import List, Dict, Optional from dragon_runner.cli import CLIArgs from dragon_runner.config import Config, Executable, Package from dragon_runner.log import log @@ -24,7 +24,7 @@ def run_regular(self) -> bool: Iterate over all tested executables, toolchains, subpackages and tests. Return True is all pass, false otherwise. """ - sucecss = True + success = True for exe in self.config.executables: log("Running executable:\t", exe.id) exe.source_env() @@ -44,12 +44,15 @@ def run_regular(self) -> bool: sp_pass_count = 0 sp_test_count = 0 for test in spkg.tests: - test_result: TestResult = tc_runner.run(test, exe) - test_result.log(args=self.cli_args) - if test_result.did_pass: + test_result: Optional[TestResult] = tc_runner.run(test, exe) + if not test_result: + success=False + continue + elif test_result.did_pass: sp_pass_count += 1 else: self.failures.append(test_result) + test_result.log(args=self.cli_args) sp_test_count +=1 log("Subpackage Passed: ", sp_pass_count, "/", sp_test_count, indent=2) pkg_pass_count += sp_pass_count @@ -62,14 +65,14 @@ def run_regular(self) -> bool: exe_test_count += tc_test_count log("Executable Passed: ", exe_pass_count, "/", exe_test_count) if exe_pass_count != exe_test_count: - sucecss = False - return sucecss + success = False + return success def run(self) -> bool: """ decide wether to run in regular mode or grade mode based on the CLI args """ - if self.cli_args.grade_file: + if self.cli_args.mode == "grade": assert self.cli_args.failure_log is not None, "Need to supply failure log!" print("Running Dragon Runner in grade mode") return self.run_grader_json() @@ -85,15 +88,19 @@ def trim_bytes(data: bytes, max_bytes: int = 512) -> bytes: if len(data) > max_bytes: trimmed += b"\n... (output trimmed to %d bytes)" % max_bytes return trimmed - + with open(file, 'a+') as feedback_file: if not result.did_pass: + test_contents = file_to_str(result.test.path) + exp_out = trim_bytes(x) if isinstance(x := result.test.expected_out, bytes) else "" + gen_out = trim_bytes(x) if isinstance(x := result.gen_output, bytes) else "" + feedback_file.write( - f"Test: {result.test.file}\n"\ - + "Test contents:\n" + '-'*40 + '\n' + file_to_str( - result.test.path, max_bytes=512) + '\n' + '-'*40 + '\n'\ - + "Expected Output: " + str(trim_bytes(result.test.expected_out)) + '\n'\ - + "Generated Output: " + str(trim_bytes(result.gen_output)) + '\n' + f"""Test: {result.test.file}\n + Test contents: {test_contents}\n + Expected Output: {exp_out}\n + Generated Output: {gen_out} + """ ) if result.error_msg: feedback_file.write(f"Error Message: {result.error_msg}\n") @@ -125,30 +132,32 @@ def run_grader_json(self) -> bool: attacking_pkgs = sorted(self.config.packages, key=lambda pkg: pkg.name.lower()) defending_exes = sorted(self.config.executables, key=lambda exe: exe.id.lower()) solution_exe = self.config.solution_exe - + + # track grader internal errors + exit_status = True + with open(self.cli_args.failure_log, 'w') as sol_fail_log, \ - open(self.cli_args.grade_file, 'w', newline='') as grade_csv: - + open(self.cli_args.output_file, 'w', newline='') as grade_csv: csv_writer = csv.writer(grade_csv) for toolchain in self.config.toolchains: tc_runner = ToolChainRunner(toolchain, self.cli_args.timeout) tc_table = self.create_tc_dataframe(toolchain.name, defending_exes, attacking_pkgs) - print(f"\nToolchain: {toolchain.name}") - + print(f"\nToolchain: {toolchain.name}") for def_exe in defending_exes: def_exe.source_env() - def_feedback_file = f"{def_exe.id}-{toolchain.name}feedback.txt" - + def_feedback_file = f"{def_exe.id}-{toolchain.name}feedback.txt" for a_pkg in attacking_pkgs: pass_count = 0 test_count = a_pkg.n_tests - print(f"\n {a_pkg.name:<12} --> {def_exe.id:<12}", end='') - + print(f"\n {a_pkg.name:<12} --> {def_exe.id:<12}", end='') for a_spkg in a_pkg.subpackages: for test in a_spkg.tests: - test_result: TestResult = tc_runner.run(test, def_exe) - if test_result.did_pass: + test_result: Optional[TestResult] = tc_runner.run(test, def_exe) + if not test_result: + log(f"Failed to run test {test.stem}") + exit_status=False + elif test_result.did_pass: print(Fore.GREEN + '.' + Fore.RESET, end='') pass_count += 1 else: @@ -162,4 +171,5 @@ def run_grader_json(self) -> bool: for exe in defending_exes: csv_writer.writerow([exe.id] + [tc_table[exe.id][pkg.name] for pkg in attacking_pkgs]) csv_writer.writerow([]) # empty row for separation - return True + + return exit_status diff --git a/dragon_runner/main.py b/dragon_runner/main.py index 26c1525..d0e37d9 100644 --- a/dragon_runner/main.py +++ b/dragon_runner/main.py @@ -10,6 +10,7 @@ def main(): # parse and verify the CLI arguments args: CLIArgs = parse_cli_args() + log(args, level=1) # parse and verify the config config = load_config(args.config_file, args) diff --git a/dragon_runner/testfile.py b/dragon_runner/testfile.py index 8bb3bc1..089bbdc 100644 --- a/dragon_runner/testfile.py +++ b/dragon_runner/testfile.py @@ -10,7 +10,7 @@ def __init__(self, test_path, input_dir="input", input_stream_dir="input-stream" output_dir="output", comment_syntax="//"): self.path = test_path self.stem, self.extension = os.path.splitext(os.path.basename(test_path)) - self.file = self.stem + self.extension + self.file:str = self.stem + self.extension self.input_dir = input_dir self.input_stream_dir = input_stream_dir self.output_dir = output_dir @@ -38,7 +38,7 @@ def get_content(self, inline_directive: str, file_directive: str) -> Union[bytes file_contents = self._get_directive_contents(file_directive) if inline_contents and file_contents: - return TestFileError(f"Directive Conflict: Supplied both\ + return TestFileError(f"Directive Conflict for test {self.file}: Supplied both\ {inline_directive} and {file_directive}") elif inline_contents: diff --git a/tests/configs/VCalcCompileConfig.json b/tests/configs/VCalcCompileConfig.json new file mode 100644 index 0000000..efd665b --- /dev/null +++ b/tests/configs/VCalcCompileConfig.json @@ -0,0 +1,39 @@ +{ + "testDir": "/home/justin/school/415/vcalc-marking/testfiles", + "testedExecutablePaths": { + "solution": "/home/justin/CDOL/Solutions/VCalc24Solution/bin/vcalc" + }, + "runtimes": { + "solution": "/home/justin/CDOL/Solutions/VCalc24Solution/bin/libvcalcrt.so" + }, + "solutionExecutable": "solution", + "toolchains": { + "vcalc-llc": [ + { + "stepName": "vcalc", + "executablePath": "$EXE", + "arguments": ["$INPUT", "$OUTPUT"], + "output": "vcalc.ll" + }, + { + "stepName": "llc", + "executablePath": "/home/justin/install/llvm/llvm-18/bin/llc", + "arguments": ["-filetype=obj", "-relocation-model=pic", "$INPUT", "-o", "$OUTPUT"], + "output": "vcalc.o" + }, + { + "stepName": "clang", + "executablePath": "/usr/bin/clang", + "arguments": ["$INPUT", "-o", "$OUTPUT", "-L$RT_PATH", "-l$RT_LIB"], + "output": "vcalc" + }, + { + "stepName": "run", + "executablePath": "$INPUT", + "arguments": [], + "usesInStr": true, + "usesRuntime": true + } + ] + } +} diff --git a/tests/conftest.py b/tests/conftest.py index 3e23576..6c2022e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ import pytest +from typing import Optional from pathlib import Path from dragon_runner.cli import CLIArgs from dragon_runner.config import load_config, Config @@ -6,20 +7,21 @@ def get_config_path(config_name: str) -> Path: return Path(__file__).parent / "configs" / config_name -def create_config(config_name: str) -> Config: +def create_config(config_name: str) -> Optional[Config]: config_path = get_config_path(config_name) return load_config(str(config_path)) def create_cli_args(**kwargs) -> CLIArgs: return CLIArgs( - kwargs.get('config_file', None), - kwargs.get('grade_file', None), - kwargs.get('failure_file', None), - kwargs.get('timeout', None), - kwargs.get('debug-package', None), - kwargs.get('time', None), - kwargs.get('verbosity', None), - kwargs.get('verify', None) + config_file = kwargs.get('config_file', None), + output_file = kwargs.get('output_file', None), + failure_log = kwargs.get('failure_log', None), + debug_package = kwargs.get('debug_package', None), + mode = kwargs.get('mode', None), + timeout = kwargs.get('timeout', None), + time = kwargs.get('time', None), + verbosity = kwargs.get('verbosity', None), + verify = kwargs.get('verify', None) ) @pytest.fixture(scope="session") diff --git a/tests/packages/CPackage/RegularPass/valid_tests/018_no_conflict.c b/tests/packages/CPackage/RegularPass/valid_tests/018_no_conflict.c new file mode 100644 index 0000000..79653ba --- /dev/null +++ b/tests/packages/CPackage/RegularPass/valid_tests/018_no_conflict.c @@ -0,0 +1,15 @@ +#include + +// INPUT:a +// INPUT:a + +int main() { + + char c; + scanf("%c", &c); + printf("%c", c); + + return 0; +} + +//CHECK:a diff --git a/tests/run_tests.py b/tests/run_tests.py new file mode 100644 index 0000000..c219dc3 --- /dev/null +++ b/tests/run_tests.py @@ -0,0 +1,17 @@ +# +# Quick script +# +# + +import os +import subprocess +from pathlib import Path + +if __name__ == "__main__": + + script_dir = Path(__file__).parent.absolute() + for file in os.listdir(script_dir): + if "test_" in file: + print(file) + subprocess.run(f"pytest {os.path.join(script_dir, file)}", shell=True) + diff --git a/tests/scripts/grading-scripts/tournament.py b/tests/scripts/grading-scripts/tournament.py index 96e9b11..49a8f93 100644 --- a/tests/scripts/grading-scripts/tournament.py +++ b/tests/scripts/grading-scripts/tournament.py @@ -1,9 +1,10 @@ + import csv from fractions import Fraction DEFENSIVE_PTS = 2 OFFENSIVE_PTS = 1 -COHERENCE_PTS = 1 +COHERENCE_PTS = 10 COMPETITIVE_WEIGHT = 0.2 TA_WEIGHT = 0.5 SOLUTION = "solution" @@ -22,11 +23,15 @@ def normalize_competetive_scores(tc_table): """ n_rows = len(tc_table) n_cols = len(tc_table[0]) + - raw_competitive_scores = tc_table[n_rows-1][1:] - max_score = max(raw_competitive_scores) + print("TC_TABLE", tc_table) + print("COMPETITIVE ROW:", tc_table[-2][1:]) + raw_competitive_scores = tc_table[-2][1:] + max_score = max(raw_competitive_scores) + print("MAX SCORE: ", max_score) norm_competitive_scores = [ - round(COMPETITIVE_WEIGHT * (score / max_score), 3) + round(COMPETITIVE_WEIGHT * (float(score) / float(max_score)), 3) for score in raw_competitive_scores ] norm_scores_row = ["Normalized Points (20% Weight)"] + norm_competitive_scores @@ -45,7 +50,7 @@ def average_toolchain_tables(tables): avg_table = [row[:] for row in tables[0]] avg_table[0][0] = "toolchain summary" - for i in range(1, n_rows): + for i in range(1, n_cols): for j in range(1, n_cols): avg = 0 for tc in tables: @@ -61,7 +66,9 @@ def add_competitive_rows(table): and coherence points. Not yet normalized to highest score. """ n_cols = len(table[0]) - + print("N_COLS:", n_cols) + print("N_ROWS:", len(table)) + # Declare new rows ta_points_row = ["TA Testing Score (50% Weight)"] + [0] * (n_cols - 1) defensive_row = ["Defensive Points"] + [0] * (n_cols - 1) @@ -77,23 +84,25 @@ def add_competitive_rows(table): c_score = 0 # coherence score c_score += COHERENCE_PTS if to_float(table[j][j]) == 1 else 0 - for i in range(1, len(table)): + # defender = table[1][0] + for i in range(1, n_cols): defender = table[i][0] + print(f"i: {i}", defender) if defender == SOLUTION: # look at the transpose position to determine TA score ta_score += to_float(table[j][i]) - o_score += (1 - to_float(table[i][j])) - d_score += (2 if to_float(table[j][i]) == 1 else 0) + o_score += (OFFENSIVE_PTS * (1 - to_float(table[i][j]))) + d_score += (DEFENSIVE_PTS * to_float(table[j][i])) # print(f"attacker: {attacker}\n oscore: {o_score} \ndscore: {d_score}\n cscore: {c_score}") # Populate the new rows - ta_points_row[j] = round(ta_score * TA_WEIGHT, 3) - defensive_row[j] = round(d_score * DEFENSIVE_PTS, 2) - offensive_row[j] = round(o_score * OFFENSIVE_PTS, 2) + ta_points_row[j] = str(round(ta_score * TA_WEIGHT, 3)) + defensive_row[j] = str(round(d_score, 2)) + offensive_row[j] = str(round(o_score, 2)) coherence_row[j] = round(c_score, 3) - total_row[j] = defensive_row[j] + offensive_row[j] + coherence_row[j] + total_row[j] = str(float(defensive_row[j]) + float(offensive_row[j]) + float(coherence_row[j])) # Append new rows to the table table.append(defensive_row) @@ -106,22 +115,23 @@ def add_competitive_rows(table): if __name__ == "__main__": - input_files = ['interpreter.csv', 'riscv.csv', 'x86.csv', 'arm.csv'] + input_files = ['Grades.csv'] tc_tables = [] for file in input_files: with open(file, 'r') as f: reader = csv.reader(f) tc_table = list(reader) tc_tables.append(add_competitive_rows(tc_table)) - + + print(tc_tables) tc_avg = average_toolchain_tables(tc_tables) normalize_competetive_scores(tc_avg) print(tc_avg) - output_file = './scalc-grades.csv' + output_file = './vcalc-grades.csv' with open(output_file, 'w') as f: writer = csv.writer(f) for table in tc_tables: writer.writerows(table) writer.writerow([]) # newline - writer.writerows(tc_avg) \ No newline at end of file + writer.writerows(tc_avg) diff --git a/tests/test_grader.py b/tests/test_grader.py index e706ab8..cbb053e 100644 --- a/tests/test_grader.py +++ b/tests/test_grader.py @@ -7,7 +7,7 @@ def test_grader_config(config_factory, cli_factory): config : Config = config_factory("ConfigGrade.json") args : CLIArgs = cli_factory(**{ - "grade_file": "Grades.csv", + "output_file": "Grades.csv", "failure_file": "Failures.txt", "timeout": 1 }) @@ -16,4 +16,4 @@ def test_grader_config(config_factory, cli_factory): assert harness is not None harness.run() - assert os.path.exists(args.grade_file) + assert os.path.exists(args.output_file)