Rework CLI args for grade scripting

cmput415 · Nov 12, 2024 · 438b214 · 438b214
1 parent 829bf72
commit 438b214
Show file tree

Hide file tree

Showing 12 changed files with 188 additions and 72 deletions.
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,7 @@ scratch/
 **/GazpreaPackage/**
 **/GeneratorPackage/**
 **/VCalcPackage/**
+**/VCalc24Package/**
 
 gazpreaConfig.json
 GeneratorConfig.json

diff --git a/dragon_runner/cli.py b/dragon_runner/cli.py
@@ -2,12 +2,13 @@
 import os
 from typing import NamedTuple
 
-class CLIArgs(NamedTuple):
+class CLIArgs(NamedTuple): 
     config_file: str
-    grade_file: str
+    output_file: str
     failure_log: str
+    debug_package: str 
+    mode: str
     timeout: float
-    debug_package: str
     time: bool
     verbosity: int
     verify: bool
@@ -16,41 +17,61 @@ def __repr__(self) -> str:
         return (
             "Parsed CLI Arguments:\n"
             f"  Config File: {self.config_file}\n"
-            f"  Grade File: {self.grade_file}\n"
+            f"  Mode: {self.mode}\n"
             f"  Failure Log: {self.failure_log}\n"
             f"  Timeout: {self.timeout}\n"
             f"  Debug Package: {self.debug_package}\n"
             f"  Time: {self.time}\n"
+            f"  Output file: {self.output_file}\n"
             f"  Verbosity: {self.verbosity}"
             f"  Verify: {self.verify}"
         )
 
 def parse_cli_args() -> CLIArgs:
     parser = argparse.ArgumentParser(description="CMPUT 415 testing utility")
 
-    parser.add_argument("config_file", help="Path to the tester JSON configuration file.")
-    parser.add_argument("--grade", dest="grade_file", help="Perform grading analysis and output to this file")
-    parser.add_argument("--log-failures", dest="failure_log", help="Log the testcases the solution compiler fails.")
-    parser.add_argument("--timeout", type=float, default=2.0, help="Specify timeout length for EACH command in a toolchain.")
-    parser.add_argument("--verify", action="store_true", default=False, help="Verify that config and tests are configured correctly")
-    parser.add_argument("--debug-package", help="Provide a sub-path to run the tester on.") 
-    parser.add_argument("-t", "--time", action="store_true", help="Include the timings (seconds) of each test in the output.")
-    parser.add_argument("-v", "--verbosity", action="count", default=0, help="Increase verbosity level")
+    parser.add_argument("config_file",
+        help="Path to the tester JSON configuration file.")
+
+    parser.add_argument("--mode", dest="mode", default="regular",
+        help="run in regular, grade or script mode")
+
+    parser.add_argument("--fail-log", dest="failure_log",
+        help="Log the testcases the solution compiler fails.")
+
+    parser.add_argument("--timeout", type=float, default=2.0,
+        help="Specify timeout length for EACH command in a toolchain.")
+
+    parser.add_argument("--verify", action="store_true", default=False,
+        help="Verify that config and tests are configured correctly")
+
+    parser.add_argument("--debug-package",
+        help="Provide a sub-path to run the tester on.") 
+
+    parser.add_argument("-t", "--time", action="store_true",
+        help="Include the timings (seconds) of each test in the output.")
+
+    parser.add_argument("-v", "--verbosity", action="count", default=0,
+        help="Increase verbosity level")
 
+    parser.add_argument("-o", "--output", action="store_true",
+        help="Direct the output of dragon-runner to a file")
+
     args = parser.parse_args()
     if not os.path.isfile(args.config_file):
         parser.error(f"The config file {args.config_file} does not exist.")
-    if bool(args.grade_file) != bool(args.failure_log):
-        parser.error("Both --grade and --log-failures must be provided together.") 
+    if args.mode == "grade" and not bool(args.failure_log):
+        parser.error("Failure log must be supplied when using grade mode.") 
     if args.verbosity > 0:
         os.environ["DEBUG"] = str(args.verbosity)
 
     return CLIArgs(
         config_file     = args.config_file,
-        grade_file      = args.grade_file,
+        mode            = args.mode,
         failure_log     = args.failure_log,
         timeout         = args.timeout,
         debug_package   = args.debug_package,
+        output_file     = args.output,
         time            = args.time,
         verbosity       = args.verbosity,
         verify          = args.verify

diff --git a/dragon_runner/config.py b/dragon_runner/config.py
@@ -1,6 +1,5 @@
 import json
 import os
-import pathlib
 import sys
 from pathlib                    import Path
 from typing                     import Dict, List, Optional
@@ -213,6 +212,7 @@ def log_test_info(self):
         """
         Prints a simple formatted table of test information.
         """
+        log("\nPackages:", level=1)
         for pkg in self.packages:
             log(f"-- ({pkg.name})", level=1)
             for spkg in pkg.subpackages:

diff --git a/dragon_runner/harness.py b/dragon_runner/harness.py
@@ -1,6 +1,6 @@
 import csv
 from colorama               import Fore
-from typing                 import List, Dict
+from typing                 import List, Dict, Optional
 from dragon_runner.cli      import CLIArgs
 from dragon_runner.config   import Config, Executable, Package
 from dragon_runner.log      import log
@@ -24,7 +24,7 @@ def run_regular(self) -> bool:
         Iterate over all tested executables, toolchains, subpackages and tests.
         Return True is all pass, false otherwise.
         """ 
-        sucecss = True
+        success = True
         for exe in self.config.executables:
             log("Running executable:\t", exe.id)
             exe.source_env()
@@ -44,12 +44,15 @@ def run_regular(self) -> bool:
                         sp_pass_count = 0
                         sp_test_count = 0
                         for test in spkg.tests:
-                            test_result: TestResult = tc_runner.run(test, exe)
-                            test_result.log(args=self.cli_args) 
-                            if test_result.did_pass:
+                            test_result: Optional[TestResult] = tc_runner.run(test, exe)
+                            if not test_result:
+                                success=False
+                                continue
+                            elif test_result.did_pass:
                                 sp_pass_count += 1     
                             else:
                                 self.failures.append(test_result) 
+                            test_result.log(args=self.cli_args) 
                             sp_test_count +=1 
                         log("Subpackage Passed: ", sp_pass_count, "/", sp_test_count, indent=2)
                         pkg_pass_count += sp_pass_count
@@ -62,14 +65,14 @@ def run_regular(self) -> bool:
                 exe_test_count += tc_test_count
             log("Executable Passed: ", exe_pass_count, "/", exe_test_count)
             if exe_pass_count != exe_test_count:
-                sucecss = False
-        return sucecss
+                success = False
+        return success
 
     def run(self) -> bool:
         """
         decide wether to run in regular mode or grade mode based on the CLI args 
         """ 
-        if self.cli_args.grade_file:
+        if self.cli_args.mode == "grade":
             assert self.cli_args.failure_log is not None, "Need to supply failure log!"
             print("Running Dragon Runner in grade mode")
             return self.run_grader_json()
@@ -85,15 +88,19 @@ def trim_bytes(data: bytes, max_bytes: int = 512) -> bytes:
             if len(data) > max_bytes:
                 trimmed += b"\n... (output trimmed to %d bytes)" % max_bytes
             return trimmed
-
+        
         with open(file, 'a+') as feedback_file:
             if not result.did_pass:
+                test_contents = file_to_str(result.test.path)
+                exp_out = trim_bytes(x) if isinstance(x := result.test.expected_out, bytes) else ""
+                gen_out = trim_bytes(x) if isinstance(x := result.gen_output, bytes) else ""
+
                 feedback_file.write(
-                    f"Test: {result.test.file}\n"\
-                    + "Test contents:\n" + '-'*40 + '\n' + file_to_str(
-                                    result.test.path, max_bytes=512) + '\n' + '-'*40 + '\n'\
-                    + "Expected Output: " + str(trim_bytes(result.test.expected_out)) + '\n'\
-                    + "Generated Output: " + str(trim_bytes(result.gen_output)) + '\n'
+                    f"""Test: {result.test.file}\n
+                        Test contents: {test_contents}\n
+                        Expected Output: {exp_out}\n
+                        Generated Output: {gen_out} 
+                    """
                 )
                 if result.error_msg:
                     feedback_file.write(f"Error Message: {result.error_msg}\n")
@@ -125,30 +132,32 @@ def run_grader_json(self) -> bool:
         attacking_pkgs = sorted(self.config.packages, key=lambda pkg: pkg.name.lower())
         defending_exes = sorted(self.config.executables, key=lambda exe: exe.id.lower())
         solution_exe = self.config.solution_exe 
-
+
+        # track grader internal errors
+        exit_status = True
+
         with open(self.cli_args.failure_log, 'w') as sol_fail_log, \
-            open(self.cli_args.grade_file, 'w', newline='') as grade_csv:
-
+            open(self.cli_args.output_file, 'w', newline='') as grade_csv: 
             csv_writer = csv.writer(grade_csv)
 
             for toolchain in self.config.toolchains:
                 tc_runner = ToolChainRunner(toolchain, self.cli_args.timeout)
                 tc_table = self.create_tc_dataframe(toolchain.name, defending_exes, attacking_pkgs) 
-                print(f"\nToolchain: {toolchain.name}")
-
+                print(f"\nToolchain: {toolchain.name}") 
                 for def_exe in defending_exes: 
                     def_exe.source_env()
-                    def_feedback_file = f"{def_exe.id}-{toolchain.name}feedback.txt"
-
+                    def_feedback_file = f"{def_exe.id}-{toolchain.name}feedback.txt" 
                     for a_pkg in attacking_pkgs:  
                         pass_count = 0
                         test_count = a_pkg.n_tests
-                        print(f"\n  {a_pkg.name:<12} --> {def_exe.id:<12}", end='')
-
+                        print(f"\n  {a_pkg.name:<12} --> {def_exe.id:<12}", end='') 
                         for a_spkg in a_pkg.subpackages:
                             for test in a_spkg.tests:
-                                test_result: TestResult = tc_runner.run(test, def_exe)
-                                if test_result.did_pass:
+                                test_result: Optional[TestResult] = tc_runner.run(test, def_exe)
+                                if not test_result:
+                                    log(f"Failed to run test {test.stem}")
+                                    exit_status=False
+                                elif test_result.did_pass:
                                     print(Fore.GREEN + '.' + Fore.RESET, end='')
                                     pass_count += 1 
                                 else:      
@@ -162,4 +171,5 @@ def run_grader_json(self) -> bool:
                 for exe in defending_exes:
                     csv_writer.writerow([exe.id] + [tc_table[exe.id][pkg.name] for pkg in attacking_pkgs])
                 csv_writer.writerow([])  # empty row for separation
-        return True 
+
+        return exit_status 
diff --git a/dragon_runner/main.py b/dragon_runner/main.py
@@ -10,6 +10,7 @@
 def main(): 
     # parse and verify the CLI arguments
     args: CLIArgs = parse_cli_args()
+    log(args, level=1)
 
     # parse and verify the config
     config = load_config(args.config_file, args)

diff --git a/dragon_runner/testfile.py b/dragon_runner/testfile.py
@@ -10,7 +10,7 @@ def __init__(self, test_path, input_dir="input", input_stream_dir="input-stream"
                                   output_dir="output", comment_syntax="//"):   
         self.path = test_path
         self.stem, self.extension = os.path.splitext(os.path.basename(test_path))
-        self.file = self.stem + self.extension  
+        self.file:str = self.stem + self.extension  
         self.input_dir = input_dir
         self.input_stream_dir = input_stream_dir          
         self.output_dir = output_dir                
@@ -38,7 +38,7 @@ def get_content(self, inline_directive: str, file_directive: str) -> Union[bytes
         file_contents = self._get_directive_contents(file_directive)
 
         if inline_contents and file_contents:
-            return TestFileError(f"Directive Conflict: Supplied both\
+            return TestFileError(f"Directive Conflict for test {self.file}: Supplied both\
                                  {inline_directive} and {file_directive}")
 
         elif inline_contents:

diff --git a/tests/configs/VCalcCompileConfig.json b/tests/configs/VCalcCompileConfig.json
@@ -0,0 +1,39 @@
+{
+  "testDir": "/home/justin/school/415/vcalc-marking/testfiles",
+  "testedExecutablePaths": {
+    "solution": "/home/justin/CDOL/Solutions/VCalc24Solution/bin/vcalc"
+  },
+  "runtimes": {
+    "solution": "/home/justin/CDOL/Solutions/VCalc24Solution/bin/libvcalcrt.so"
+  },
+  "solutionExecutable": "solution",
+  "toolchains": {
+    "vcalc-llc": [
+      {
+        "stepName": "vcalc",
+        "executablePath": "$EXE",
+        "arguments": ["$INPUT", "$OUTPUT"],
+        "output": "vcalc.ll"
+      }, 
+      {
+        "stepName": "llc",
+        "executablePath": "/home/justin/install/llvm/llvm-18/bin/llc",
+        "arguments": ["-filetype=obj", "-relocation-model=pic", "$INPUT", "-o", "$OUTPUT"],
+        "output": "vcalc.o"
+      },
+      {
+        "stepName": "clang",
+        "executablePath": "/usr/bin/clang",
+        "arguments": ["$INPUT", "-o", "$OUTPUT", "-L$RT_PATH", "-l$RT_LIB"],
+        "output": "vcalc"
+      },
+      {
+        "stepName": "run",
+        "executablePath": "$INPUT",
+        "arguments": [],
+        "usesInStr": true,
+        "usesRuntime": true
+      }
+    ] 
+  }
+}
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,25 +1,27 @@
 import pytest
+from typing import Optional
 from pathlib import Path
 from dragon_runner.cli import CLIArgs
 from dragon_runner.config import load_config, Config
 
 def get_config_path(config_name: str) -> Path:
     return Path(__file__).parent / "configs" / config_name
 
-def create_config(config_name: str) -> Config:
+def create_config(config_name: str) -> Optional[Config]:
     config_path = get_config_path(config_name)
     return load_config(str(config_path))
 
 def create_cli_args(**kwargs) -> CLIArgs:
     return CLIArgs(
-        kwargs.get('config_file', None),
-        kwargs.get('grade_file', None),
-        kwargs.get('failure_file', None),
-        kwargs.get('timeout', None),
-        kwargs.get('debug-package', None),
-        kwargs.get('time', None),
-        kwargs.get('verbosity', None),
-        kwargs.get('verify', None)
+        config_file     = kwargs.get('config_file', None),
+        output_file     = kwargs.get('output_file', None),
+        failure_log     = kwargs.get('failure_log', None),
+        debug_package   = kwargs.get('debug_package', None),
+        mode            = kwargs.get('mode', None),
+        timeout         = kwargs.get('timeout', None),
+        time            = kwargs.get('time', None),
+        verbosity       = kwargs.get('verbosity', None),
+        verify          = kwargs.get('verify', None)
     )
 
 @pytest.fixture(scope="session")

diff --git a/tests/packages/CPackage/RegularPass/valid_tests/018_no_conflict.c b/tests/packages/CPackage/RegularPass/valid_tests/018_no_conflict.c
@@ -0,0 +1,15 @@
+#include <stdio.h>
+
+// INPUT:a
+// INPUT:a
+
+int main() {
+
+  char c;
+  scanf("%c", &c);
+  printf("%c", c);
+
+  return 0;
+}
+
+//CHECK:a
diff --git a/tests/run_tests.py b/tests/run_tests.py
@@ -0,0 +1,17 @@
+#
+# Quick script 
+#
+#
+
+import os
+import subprocess
+from pathlib import Path
+
+if __name__ == "__main__":
+
+    script_dir = Path(__file__).parent.absolute()
+    for file in os.listdir(script_dir):
+        if "test_" in file:
+            print(file)
+            subprocess.run(f"pytest {os.path.join(script_dir, file)}", shell=True)
+