From 605de1779e3ffc35684c12b809b5e3b0ad48d784 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Wed, 13 Nov 2024 12:10:56 -0700
Subject: [PATCH] Refactor command history

---
 dragon_runner/harness.py        |   5 +-
 dragon_runner/runner.py         | 167 ++++++++++++++++++++------------
 dragon_runner/scripts/loader.py |   1 -
 dragon_runner/testfile.py       |   9 ++
 4 files changed, 119 insertions(+), 63 deletions(-)

diff --git a/dragon_runner/harness.py b/dragon_runner/harness.py
index c91b3c0..03fb806 100644
--- a/dragon_runner/harness.py
+++ b/dragon_runner/harness.py
@@ -47,12 +47,13 @@ def run_regular(self) -> bool:
                             test_result: Optional[TestResult] = tc_runner.run(test, exe)
                             if not test_result:
                                 success=False
-                                continue
+                                log(f"Failed to receive test result for: {test.stem}")
                             elif test_result.did_pass:
                                 sp_pass_count += 1     
+                                test_result.log(args=self.cli_args)
                             else:
                                 self.failures.append(test_result) 
-                            test_result.log(args=self.cli_args) 
+                                test_result.log(args=self.cli_args)
                             sp_test_count +=1 
                         log("Subpackage Passed: ", sp_pass_count, "/", sp_test_count, indent=2)
                         pkg_pass_count += sp_pass_count
diff --git a/dragon_runner/runner.py b/dragon_runner/runner.py
index 8b37730..cd642a4 100644
--- a/dragon_runner/runner.py
+++ b/dragon_runner/runner.py
@@ -28,61 +28,91 @@ def __repr__(self):
 
 @dataclass
 class Command:
+    cmd: str
     args: List[str] 
-    def log(self, level:int=0):
-        log("Command: ", ' '.join(self.args), indent=4, level=level)
 
 @dataclass
 class CommandResult:
-    subprocess: Optional[CompletedProcess]
-    exit_status: int 
+    cmd:str
+    subprocess: Optional[CompletedProcess]=None
+    exit_status: int=0 
     time: float=0
     timed_out: bool=False
 
-    def log(self, level:int=0):
+    def log(self, level:int=0, indent=0):
         if self.subprocess:
             stdout = self.subprocess.stdout
             stderr = self.subprocess.stderr
-            log(f"stdout ({len(stdout)} bytes):", truncated_bytes(stdout, max_bytes=512), indent=4, level=level)
-            log(f"stderr ({len(stderr)} bytes):", truncated_bytes(stderr, max_bytes=512), indent=4, level=level)
-            log(f"exit code: {self.exit_status}", indent=4, level=level)
+             
+            log(f"==> {self.cmd} (exit {self.exit_status})", indent=indent)
 
-@dataclass
-class TestResult: 
-    __test__ = False                    # pytest gets confused when classes start with 'Test'
-    test: TestFile                      # test result is derived from 
-    did_pass: bool=False                # did expected out match generated
-    error_test: bool=False              # did test return with non-zero exit
-    did_panic: bool=False               # did test cause the toolchain to panic
-    time: Optional[float]=None          # time test took on the final step
-    diff: Optional[str]=None            # diff if the test failed gracefully
-    error_msg: Optional[str]=None       # error message if test did not fail gracefully
-    failing_step: Optional[str]=None    # step the TC failed on
-    gen_output: Optional[bytes]=b''     # output of the test
+            log(f"stdout ({len(stdout)} bytes):", truncated_bytes(stdout, max_bytes=512),
+                indent=indent+2, level=level)
+            
+            log(f"stderr ({len(stderr)} bytes):", truncated_bytes(stderr, max_bytes=512),
+                indent=indent+2, level=level)
+
+class TestResult:
+    """
+    Represents the result of running a test case, including pass/fail status,
+    execution time, and error information.
+    """
+    __test__ = False  # pytest gets confused when classes start with 'Test' 
+    def __init__(
+        self,
+        test: 'TestFile',
+        did_pass: bool = False,
+        error_test: bool = False,
+        did_panic: bool = False,
+        time: Optional[float] = None,
+        diff: Optional[str] = None,
+        error_msg: Optional[str] = None,
+        failing_step: Optional[str] = None,
+        gen_output: Optional[bytes] = b'',
+    ):
+        self.test = test                # test result is derived from
+        self.did_pass = did_pass        # did expected out match generated
+        self.error_test = error_test    # did test return with non-zero exit
+        self.did_panic = did_panic      # did test cause the toolchain to panic
+        self.time = time                # time test took on the final step
+        self.diff = diff                # diff if the test failed gracefully
+        self.error_msg = error_msg      # error message if test did not fail gracefully
+        self.failing_step = failing_step # step the TC failed on
+        self.gen_output = gen_output    # output of the test
+        self.command_history = []       # list of command histories for this test
 
-    def log(self, file=sys.stdout, args: Union[CLIArgs, None]=None):
+    def log(self, file=sys.stdout, args: Union['CLIArgs', None]=None):
+        pass_msg = "[E-PASS] " if self.error_test else "[PASS] "
+        fail_msg = "[E-FAIL] " if self.error_test else "[FAIL] " 
+        test_name = f"{self.test.file:<50}"    
+        show_time = args and args.time and self.time is not None
+        
+        # Log test result
         if self.did_pass:
-            pass_msg = "[E-PASS] " if self.error_test else "[PASS] "
-            test_name = f"{self.test.file:<50}"     
-            if args and args.time and self.time is not None:
+            time_display = "" 
+            if show_time:
                 time_str = f"{self.time:.4f}"
-                time_with_unit = f"{time_str:>10} (s)" 
-            else:
-                time_with_unit = "" 
-            log_msg = f"{Fore.GREEN}{pass_msg}{Fore.RESET}{test_name}{time_with_unit}"
+                time_display = f"{time_str:>10} (s)" 
+            log_msg = f"{Fore.GREEN}{pass_msg}{Fore.RESET}{test_name}{time_display}"
             log(log_msg, indent=3, file=file)
         else:
-            fail_msg = "[E-FAIL] " if self.error_test else "[FAIL] "
-            log(Fore.RED + fail_msg + Fore.RESET + f"{self.test.file}", indent=3, file=file)
-
+            log(Fore.RED + fail_msg + Fore.RESET + f"{test_name}", indent=3, file=file)
+        
+        # Log the command history
         level = 3 if self.did_pass else 2
-        # if not self.test.expected_out:
-            # return
-        log(f"==> Expected Out ({len(self.test.expected_out)} bytes):", indent=5, level=level)
-        log_multiline(self.test.expected_out, level=level, indent=6)
-        log(f"==> Generated Out ({len(self.gen_output)} bytes):", indent=5, level=level)
-        log_multiline(self.gen_output, level=level, indent=6)
-
+        log(f"==> Command History", indent=5)
+        for cmd in self.command_history:
+            cmd.log(level=level, indent=7)
+        
+        # Log test expected and generated
+        expected_out = self.test.get_expected_out()
+        generated_out = x if (x := self.gen_output) else b''
+ 
+        log(f"==> Expected Out ({len(expected_out)} bytes):", indent=5, level=level)
+        log(str(expected_out), level=level, indent=6)
+        log(f"==> Generated Out ({len(generated_out)} bytes):", indent=5, level=level)
+        log(str(generated_out), level=level, indent=6) 
+        
     def __repr__(self):
         return "PASS" if self.did_pass else "FAIL"
 
@@ -99,15 +129,23 @@ def run_command(self, command: Command, stdin: bytes) -> CommandResult:
         env = os.environ.copy()
         stdout = subprocess.PIPE
         stderr = subprocess.PIPE
-
         start_time = time.time()
+
+        cr = CommandResult(cmd=command.cmd)
         try:
             result = subprocess.run(command.args, env=env, input=stdin, stdout=stdout,
                                     stderr=stderr, check=False, timeout=self.timeout)
             wall_time = time.time() - start_time
-            return CommandResult(subprocess=result, exit_status=result.returncode, time=wall_time, timed_out=False)
+            cr.subprocess=result
+            cr.exit_status=result.returncode
+            cr.time=wall_time
+
         except TimeoutExpired:
-            return CommandResult(subprocess=None, exit_status=255, time=0, timed_out=True)
+            cr.time=0
+            cr.timed_out=True
+            cr.exit_status=255
+        
+        return cr
         
     def resolve_output_file(self, step: Step) -> Optional[str]:
         """
@@ -121,7 +159,7 @@ def resolve_command(self, step: Step, params: MagicParams) -> Command:
         """
         replace magic parameters with real arguments
         """
-        command = Command([step.exe_path] + step.arguments)
+        command = Command(step.exe_path, [step.exe_path] + step.arguments)
         command = self.replace_magic_args(command, params)
         command = self.replace_env_vars(command)
         exe = command.args[0]
@@ -136,19 +174,17 @@ def run(self, test: TestFile, exe: Executable) -> Optional[TestResult]:
         input_file = test.path
         expected = test.expected_out if isinstance(test.expected_out, bytes) else b'' 
         tr = TestResult(test=test)
-
         for index, step in enumerate(self.tc):
 
             last_step       = index == len(self.tc) - 1
             input_stream    = test.input_stream if step.uses_ins and isinstance(test.input_stream, bytes) else b'' 
             output_file     = self.resolve_output_file(step) 
-            command : Command   = self.resolve_command(step, MagicParams(exe.exe_path, input_file, output_file))
-            command_result : CommandResult = self.run_command(command, input_stream) 
-            
-            # Log command results for -vvv
-            command.log(level=3)
-            command_result.log(level=3)
-             
+            command         = self.resolve_command(step, MagicParams(exe.exe_path, input_file, output_file))
+            command_result  = self.run_command(command, input_stream) 
+           
+            # save command history for logging
+            tr.command_history.append(command_result)
+
             child_process = command_result.subprocess
             if not child_process:
                 """
@@ -167,8 +203,14 @@ def run(self, test: TestFile, exe: Executable) -> Optional[TestResult]:
                 A step timed out based on the max timeout specified by CLI arg.
                 """
                 timeout_msg = f"Toolchain timed out for test: {test.file}"
-                return TestResult(test=test, did_pass=False, did_panic=True, error_test=False,
-                                  gen_output=b'', failing_step=step.name, error_msg=timeout_msg)
+                tr.did_pass=False;
+                tr.did_panic=True;
+                tr.failing_step=step.name;
+                tr.error_msg=timeout_msg;
+
+                return tr
+                # return TestResult(test=test, did_pass=False, did_panic=True, error_test=False,
+                #                   gen_output=b'', failing_step=step.name, error_msg=timeout_msg)
             
             elif child_process.returncode != 0:
                 """
@@ -176,9 +218,12 @@ def run(self, test: TestFile, exe: Executable) -> Optional[TestResult]:
                 is specified in the config, we can perform a lenient diff based on CompileTime
                 or RuntimeError message rules. Otherwise, we abort the toolchain.
                 """
-                tr = TestResult(test=test, gen_output=step_stderr, failing_step=step.name,
-                                                                   error_test=True)
-                
+                # tr = TestResult(test=test, gen_output=step_stderr, failing_step=step.name,
+                                                                   # error_test=True)
+                tr.gen_output=step_stderr
+                tr.failing_step=step.name
+                tr.error_test=True
+
                 # fail by default if errors are not explicitly allowed in config
                 if not step.allow_error:
                     tr.did_pass = False
@@ -210,8 +255,10 @@ def run(self, test: TestFile, exe: Executable) -> Optional[TestResult]:
                     output_file_contents = file_to_bytes(output_file)
                     step_stdout = output_file_contents 
                 
-                tr = TestResult(test=test, time=step_time, gen_output=step_stdout)
-                
+                # tr = TestResult(test=test, time=step_time, gen_output=step_stdout)
+                tr.time=step_time
+                tr.gen_output=step_stdout
+
                 # Diff the produced and expected outputs
                 diff = precise_diff(child_process.stdout, expected)
                 if not diff:
@@ -226,7 +273,7 @@ def run(self, test: TestFile, exe: Executable) -> Optional[TestResult]:
                 If $OUTPUT is not supplied, we create a temporary pipe.
                 """
                 input_file = output_file or make_tmp_file(child_process.stdout)
-   
+
     @staticmethod
     def replace_env_vars(cmd: Command) -> Command:
         """
@@ -245,7 +292,7 @@ def replace_env_vars(cmd: Command) -> Command:
                 resolved.append(arg)
             else:
                 resolved.append(arg)
-        return Command(resolved)
+        return Command(cmd.cmd, resolved)
 
     @staticmethod
     def replace_magic_args(command: Command, params: MagicParams) -> Command:
@@ -262,7 +309,7 @@ def replace_magic_args(command: Command, params: MagicParams) -> Command:
                 resolved.append(params.output_file) 
             else:
                 resolved.append(arg)
-        return Command(resolved)
+        return Command(command.cmd, resolved)
 
 def diff_bytes(s1: bytes, s2: bytes) -> str:
     """
diff --git a/dragon_runner/scripts/loader.py b/dragon_runner/scripts/loader.py
index d44915f..7358df3 100644
--- a/dragon_runner/scripts/loader.py
+++ b/dragon_runner/scripts/loader.py
@@ -18,7 +18,6 @@ def run(self):
         Select the script to run from the mode argument passed through
         dragon-runner CLI.
         """
-
         def unknown_script():
             print(f"script: {self.script} did not match any registered script.")
 
diff --git a/dragon_runner/testfile.py b/dragon_runner/testfile.py
index 089bbdc..8e0646a 100644
--- a/dragon_runner/testfile.py
+++ b/dragon_runner/testfile.py
@@ -17,6 +17,15 @@ def __init__(self, test_path, input_dir="input", input_stream_dir="input-stream"
         self.comment_syntax = comment_syntax # default C99 //
         self.expected_out: Union[bytes, TestFileError] = self.get_content("CHECK:", "CHECK_FILE:")
         self.input_stream: Union[bytes, TestFileError] = self.get_content("INPUT:", "INPUT_FILE:")
+    
+    def get_expected_out(self) -> bytes:
+        """
+        Get expected output for logging purposes only. Using this method for test diffs
+        can result in false retreival of empty output when test is ill-defined.
+        """
+        if isinstance(self.expected_out, bytes):
+            return self.expected_out
+        return b''
 
     def verify(self) -> ErrorCollection:
         """