From 605de1779e3ffc35684c12b809b5e3b0ad48d784 Mon Sep 17 00:00:00 2001 From: Justin Date: Wed, 13 Nov 2024 12:10:56 -0700 Subject: [PATCH] Refactor command history --- dragon_runner/harness.py | 5 +- dragon_runner/runner.py | 167 ++++++++++++++++++++------------ dragon_runner/scripts/loader.py | 1 - dragon_runner/testfile.py | 9 ++ 4 files changed, 119 insertions(+), 63 deletions(-) diff --git a/dragon_runner/harness.py b/dragon_runner/harness.py index c91b3c0..03fb806 100644 --- a/dragon_runner/harness.py +++ b/dragon_runner/harness.py @@ -47,12 +47,13 @@ def run_regular(self) -> bool: test_result: Optional[TestResult] = tc_runner.run(test, exe) if not test_result: success=False - continue + log(f"Failed to receive test result for: {test.stem}") elif test_result.did_pass: sp_pass_count += 1 + test_result.log(args=self.cli_args) else: self.failures.append(test_result) - test_result.log(args=self.cli_args) + test_result.log(args=self.cli_args) sp_test_count +=1 log("Subpackage Passed: ", sp_pass_count, "/", sp_test_count, indent=2) pkg_pass_count += sp_pass_count diff --git a/dragon_runner/runner.py b/dragon_runner/runner.py index 8b37730..cd642a4 100644 --- a/dragon_runner/runner.py +++ b/dragon_runner/runner.py @@ -28,61 +28,91 @@ def __repr__(self): @dataclass class Command: + cmd: str args: List[str] - def log(self, level:int=0): - log("Command: ", ' '.join(self.args), indent=4, level=level) @dataclass class CommandResult: - subprocess: Optional[CompletedProcess] - exit_status: int + cmd:str + subprocess: Optional[CompletedProcess]=None + exit_status: int=0 time: float=0 timed_out: bool=False - def log(self, level:int=0): + def log(self, level:int=0, indent=0): if self.subprocess: stdout = self.subprocess.stdout stderr = self.subprocess.stderr - log(f"stdout ({len(stdout)} bytes):", truncated_bytes(stdout, max_bytes=512), indent=4, level=level) - log(f"stderr ({len(stderr)} bytes):", truncated_bytes(stderr, max_bytes=512), indent=4, level=level) - log(f"exit code: {self.exit_status}", indent=4, level=level) + + log(f"==> {self.cmd} (exit {self.exit_status})", indent=indent) -@dataclass -class TestResult: - __test__ = False # pytest gets confused when classes start with 'Test' - test: TestFile # test result is derived from - did_pass: bool=False # did expected out match generated - error_test: bool=False # did test return with non-zero exit - did_panic: bool=False # did test cause the toolchain to panic - time: Optional[float]=None # time test took on the final step - diff: Optional[str]=None # diff if the test failed gracefully - error_msg: Optional[str]=None # error message if test did not fail gracefully - failing_step: Optional[str]=None # step the TC failed on - gen_output: Optional[bytes]=b'' # output of the test + log(f"stdout ({len(stdout)} bytes):", truncated_bytes(stdout, max_bytes=512), + indent=indent+2, level=level) + + log(f"stderr ({len(stderr)} bytes):", truncated_bytes(stderr, max_bytes=512), + indent=indent+2, level=level) + +class TestResult: + """ + Represents the result of running a test case, including pass/fail status, + execution time, and error information. + """ + __test__ = False # pytest gets confused when classes start with 'Test' + def __init__( + self, + test: 'TestFile', + did_pass: bool = False, + error_test: bool = False, + did_panic: bool = False, + time: Optional[float] = None, + diff: Optional[str] = None, + error_msg: Optional[str] = None, + failing_step: Optional[str] = None, + gen_output: Optional[bytes] = b'', + ): + self.test = test # test result is derived from + self.did_pass = did_pass # did expected out match generated + self.error_test = error_test # did test return with non-zero exit + self.did_panic = did_panic # did test cause the toolchain to panic + self.time = time # time test took on the final step + self.diff = diff # diff if the test failed gracefully + self.error_msg = error_msg # error message if test did not fail gracefully + self.failing_step = failing_step # step the TC failed on + self.gen_output = gen_output # output of the test + self.command_history = [] # list of command histories for this test - def log(self, file=sys.stdout, args: Union[CLIArgs, None]=None): + def log(self, file=sys.stdout, args: Union['CLIArgs', None]=None): + pass_msg = "[E-PASS] " if self.error_test else "[PASS] " + fail_msg = "[E-FAIL] " if self.error_test else "[FAIL] " + test_name = f"{self.test.file:<50}" + show_time = args and args.time and self.time is not None + + # Log test result if self.did_pass: - pass_msg = "[E-PASS] " if self.error_test else "[PASS] " - test_name = f"{self.test.file:<50}" - if args and args.time and self.time is not None: + time_display = "" + if show_time: time_str = f"{self.time:.4f}" - time_with_unit = f"{time_str:>10} (s)" - else: - time_with_unit = "" - log_msg = f"{Fore.GREEN}{pass_msg}{Fore.RESET}{test_name}{time_with_unit}" + time_display = f"{time_str:>10} (s)" + log_msg = f"{Fore.GREEN}{pass_msg}{Fore.RESET}{test_name}{time_display}" log(log_msg, indent=3, file=file) else: - fail_msg = "[E-FAIL] " if self.error_test else "[FAIL] " - log(Fore.RED + fail_msg + Fore.RESET + f"{self.test.file}", indent=3, file=file) - + log(Fore.RED + fail_msg + Fore.RESET + f"{test_name}", indent=3, file=file) + + # Log the command history level = 3 if self.did_pass else 2 - # if not self.test.expected_out: - # return - log(f"==> Expected Out ({len(self.test.expected_out)} bytes):", indent=5, level=level) - log_multiline(self.test.expected_out, level=level, indent=6) - log(f"==> Generated Out ({len(self.gen_output)} bytes):", indent=5, level=level) - log_multiline(self.gen_output, level=level, indent=6) - + log(f"==> Command History", indent=5) + for cmd in self.command_history: + cmd.log(level=level, indent=7) + + # Log test expected and generated + expected_out = self.test.get_expected_out() + generated_out = x if (x := self.gen_output) else b'' + + log(f"==> Expected Out ({len(expected_out)} bytes):", indent=5, level=level) + log(str(expected_out), level=level, indent=6) + log(f"==> Generated Out ({len(generated_out)} bytes):", indent=5, level=level) + log(str(generated_out), level=level, indent=6) + def __repr__(self): return "PASS" if self.did_pass else "FAIL" @@ -99,15 +129,23 @@ def run_command(self, command: Command, stdin: bytes) -> CommandResult: env = os.environ.copy() stdout = subprocess.PIPE stderr = subprocess.PIPE - start_time = time.time() + + cr = CommandResult(cmd=command.cmd) try: result = subprocess.run(command.args, env=env, input=stdin, stdout=stdout, stderr=stderr, check=False, timeout=self.timeout) wall_time = time.time() - start_time - return CommandResult(subprocess=result, exit_status=result.returncode, time=wall_time, timed_out=False) + cr.subprocess=result + cr.exit_status=result.returncode + cr.time=wall_time + except TimeoutExpired: - return CommandResult(subprocess=None, exit_status=255, time=0, timed_out=True) + cr.time=0 + cr.timed_out=True + cr.exit_status=255 + + return cr def resolve_output_file(self, step: Step) -> Optional[str]: """ @@ -121,7 +159,7 @@ def resolve_command(self, step: Step, params: MagicParams) -> Command: """ replace magic parameters with real arguments """ - command = Command([step.exe_path] + step.arguments) + command = Command(step.exe_path, [step.exe_path] + step.arguments) command = self.replace_magic_args(command, params) command = self.replace_env_vars(command) exe = command.args[0] @@ -136,19 +174,17 @@ def run(self, test: TestFile, exe: Executable) -> Optional[TestResult]: input_file = test.path expected = test.expected_out if isinstance(test.expected_out, bytes) else b'' tr = TestResult(test=test) - for index, step in enumerate(self.tc): last_step = index == len(self.tc) - 1 input_stream = test.input_stream if step.uses_ins and isinstance(test.input_stream, bytes) else b'' output_file = self.resolve_output_file(step) - command : Command = self.resolve_command(step, MagicParams(exe.exe_path, input_file, output_file)) - command_result : CommandResult = self.run_command(command, input_stream) - - # Log command results for -vvv - command.log(level=3) - command_result.log(level=3) - + command = self.resolve_command(step, MagicParams(exe.exe_path, input_file, output_file)) + command_result = self.run_command(command, input_stream) + + # save command history for logging + tr.command_history.append(command_result) + child_process = command_result.subprocess if not child_process: """ @@ -167,8 +203,14 @@ def run(self, test: TestFile, exe: Executable) -> Optional[TestResult]: A step timed out based on the max timeout specified by CLI arg. """ timeout_msg = f"Toolchain timed out for test: {test.file}" - return TestResult(test=test, did_pass=False, did_panic=True, error_test=False, - gen_output=b'', failing_step=step.name, error_msg=timeout_msg) + tr.did_pass=False; + tr.did_panic=True; + tr.failing_step=step.name; + tr.error_msg=timeout_msg; + + return tr + # return TestResult(test=test, did_pass=False, did_panic=True, error_test=False, + # gen_output=b'', failing_step=step.name, error_msg=timeout_msg) elif child_process.returncode != 0: """ @@ -176,9 +218,12 @@ def run(self, test: TestFile, exe: Executable) -> Optional[TestResult]: is specified in the config, we can perform a lenient diff based on CompileTime or RuntimeError message rules. Otherwise, we abort the toolchain. """ - tr = TestResult(test=test, gen_output=step_stderr, failing_step=step.name, - error_test=True) - + # tr = TestResult(test=test, gen_output=step_stderr, failing_step=step.name, + # error_test=True) + tr.gen_output=step_stderr + tr.failing_step=step.name + tr.error_test=True + # fail by default if errors are not explicitly allowed in config if not step.allow_error: tr.did_pass = False @@ -210,8 +255,10 @@ def run(self, test: TestFile, exe: Executable) -> Optional[TestResult]: output_file_contents = file_to_bytes(output_file) step_stdout = output_file_contents - tr = TestResult(test=test, time=step_time, gen_output=step_stdout) - + # tr = TestResult(test=test, time=step_time, gen_output=step_stdout) + tr.time=step_time + tr.gen_output=step_stdout + # Diff the produced and expected outputs diff = precise_diff(child_process.stdout, expected) if not diff: @@ -226,7 +273,7 @@ def run(self, test: TestFile, exe: Executable) -> Optional[TestResult]: If $OUTPUT is not supplied, we create a temporary pipe. """ input_file = output_file or make_tmp_file(child_process.stdout) - + @staticmethod def replace_env_vars(cmd: Command) -> Command: """ @@ -245,7 +292,7 @@ def replace_env_vars(cmd: Command) -> Command: resolved.append(arg) else: resolved.append(arg) - return Command(resolved) + return Command(cmd.cmd, resolved) @staticmethod def replace_magic_args(command: Command, params: MagicParams) -> Command: @@ -262,7 +309,7 @@ def replace_magic_args(command: Command, params: MagicParams) -> Command: resolved.append(params.output_file) else: resolved.append(arg) - return Command(resolved) + return Command(command.cmd, resolved) def diff_bytes(s1: bytes, s2: bytes) -> str: """ diff --git a/dragon_runner/scripts/loader.py b/dragon_runner/scripts/loader.py index d44915f..7358df3 100644 --- a/dragon_runner/scripts/loader.py +++ b/dragon_runner/scripts/loader.py @@ -18,7 +18,6 @@ def run(self): Select the script to run from the mode argument passed through dragon-runner CLI. """ - def unknown_script(): print(f"script: {self.script} did not match any registered script.") diff --git a/dragon_runner/testfile.py b/dragon_runner/testfile.py index 089bbdc..8e0646a 100644 --- a/dragon_runner/testfile.py +++ b/dragon_runner/testfile.py @@ -17,6 +17,15 @@ def __init__(self, test_path, input_dir="input", input_stream_dir="input-stream" self.comment_syntax = comment_syntax # default C99 // self.expected_out: Union[bytes, TestFileError] = self.get_content("CHECK:", "CHECK_FILE:") self.input_stream: Union[bytes, TestFileError] = self.get_content("INPUT:", "INPUT_FILE:") + + def get_expected_out(self) -> bytes: + """ + Get expected output for logging purposes only. Using this method for test diffs + can result in false retreival of empty output when test is ill-defined. + """ + if isinstance(self.expected_out, bytes): + return self.expected_out + return b'' def verify(self) -> ErrorCollection: """