Implement the performance testing grading script.

(Not me procrastinating this until the night Gazprea 2 is due...)
cmput415 · Dec 10, 2024 · 11a8549 · 11a8549
1 parent a938b00
commit 11a8549
Show file tree

Hide file tree

Showing 6 changed files with 173 additions and 13 deletions.
diff --git a/dragon_runner/harness.py b/dragon_runner/harness.py
@@ -4,6 +4,7 @@
 from dragon_runner.cli      import CLIArgs
 from dragon_runner.config   import Config, Executable, Package
 from dragon_runner.log      import log
+from dragon_runner.testfile import TestFile
 from dragon_runner.runner   import TestResult, ToolChainRunner
 from dragon_runner.utils    import file_to_str
 
@@ -15,12 +16,8 @@ def __init__(self, config: Config, cli_args: CLIArgs):
         self.cli_args: CLIArgs = cli_args
         self.failures: List[TestResult] = []
 
-    def post_run_log(self):
-        pass
-
     def process_test_result(self, test_result: Optional[TestResult], counters: Dict[str, int]):
         """
-        Process each test result.
         Subclasses should override this method to handle test result processing and update counts.
         """
         raise NotImplementedError("Subclasses must implement this method")
@@ -33,7 +30,7 @@ def post_subpackage_hook(self, counters: Dict[str, int]):
         """Hook to run after iterating through a subpackage."""
         pass
 
-    def pre_executable_hook(self):
+    def pre_executable_hook(self, exe):
         """Hook to runb efore iterating through an executable."""
         pass
 
@@ -43,14 +40,21 @@ def post_executable_hook(self):
             log(f"Failure Summary: ({len(self.failures)} tests)") 
             for result in self.failures:
                 result.log()
+
+    def post_run_hook(self):
+        pass
+
+    def pre_run_hook(self):
+        pass
 
     def iterate(self):
         """
         Basic structure to record which tests pass and fail. Additional functionality
         can be implemented by overriding default hooks.
         """
+        self.pre_run_hook()
         for exe in self.config.executables:
-            self.pre_executable_hook()
+            self.pre_executable_hook(exe.id)
             log(f"Running executable: {exe.id}", indent=0)
             exe.source_env()
             exe_pass_count = 0
@@ -83,6 +87,7 @@ def iterate(self):
                 exe_test_count += tc_test_count
             log("Executable Passed: ", exe_pass_count, "/", exe_test_count)
             self.post_executable_hook()
+        self.post_run_hook()
 
     def run(self) -> bool:
         """Default run implementation."""
@@ -278,18 +283,65 @@ def process_test_result(self, test_result: Optional[TestResult], counters: Dict[
             counters["pass_count"] += 1
         else:
             self.failures.append(test_result) 
-
+
+from itertools import zip_longest
+
 class PerformanceTestingHarness(TestHarness):
 
+    def __init__(self, config: Config, cli_args: CLIArgs):
+        super().__init__(config, cli_args)
+        self.csv_cols = []
+        self.cur_col = []
+        self.testfile_col = ["Test"]
+        self.first_exec = True
+
+    def create_tc_dataframe(defenders: List[Executable],
+                            attackers: List[TestFile]) -> Dict[str, Dict[str, str]]:
+        """
+        Create an empty toolchain table with labels for defenders and attackers 
+        """ 
+        df = {exe.id: {pkg.name: '' for pkg in attackers} for exe in defenders}
+        print(df) 
+        return df
+
     def process_test_result(self, test_result: Optional[TestResult], counters: Dict[str, int]):
         """
         Override the hook for regular run-specific implementation of counting passes
         """
+        if test_result.error_test:
+            raise RuntimeError("Can not run perf mode on error tests")
+
+        # only construct a column for the test file names once 
+        if self.first_exec:
+            self.testfile_col.append(test_result.test.file)
+
         if test_result.did_pass:
             counters["pass_count"] += 1
             test_result.log(args=self.cli_args)
+            self.cur_col.append(test_result.time)
+
         else:
+            self.cur_col.append(self.cli_args.timeout)
             self.failures.append(test_result)
             test_result.log(args=self.cli_args)
         counters["test_count"] += 1
-
+
+    def pre_executable_hook(self, exe):
+        self.cur_col.append(exe)
+
+    def post_executable_hook(self): 
+        if self.first_exec:
+            self.csv_cols.append(self.testfile_col)
+            self.first_exec = False
+
+        self.csv_cols.append(self.cur_col)
+        self.cur_col = []
+
+    def post_run_hook(self):  
+        # transpose the columns into rows for writing
+        csv_rows = zip_longest(*self.csv_cols, fillvalue='')
+
+        with open('perf.csv', 'w', newline='') as file:
+            writer = csv.writer(file)
+            writer.writerows(csv_rows)
+
diff --git a/dragon_runner/scripts/grade.py b/dragon_runner/scripts/grade.py
@@ -1,9 +1,21 @@
 """
-============================== 415 Grading Script ==============================
+============================== 415 Grading Script ================================
+
 Author: Justin Meimar 
+
 Name: grade.py
-Desc: 
-================================================================================
+
+Desc: As a prerequisite to running this script, the following should have occured:
+      A config including all the student executables *and* the solution executable
+      has been run with --mode=tournament, producing a csv for each toolchain.
+
+      The csv toolchain ouputs are inputs for this script, which will apply
+      the 415 competitive testing algorithm to output a complete CSV.
+      
+Note: The solution executable in the original configshould be named "solution",
+      which this script uses as a reserved keyword.
+
+==================================================================================
 """
 import argparse
 import csv

diff --git a/dragon_runner/scripts/grade_perf.py b/dragon_runner/scripts/grade_perf.py
@@ -0,0 +1,68 @@
+"""
+============================== 415 Grading Script ==============================
+Author: Justin Meimar 
+Name: grade_perf.py
+Desc: Dragon-runner with a config pointing to the performance tests & an
+      executable for each compiler to be tested, when run with --mode=perf,
+      will produce a perf.csv file.
+
+      This script takes perf.csv as its input and runs the performance testing
+      grading algorithm to return a single CSV row, indicating the perf scores
+      for each team.
+
+      The intention is that the single row be manually copy and pasted into the
+      row output by the grade.py script.
+================================================================================
+"""
+import argparse
+import csv
+import numpy as np
+from pathlib    import Path 
+
+def grade_perf(*args):
+    """
+    Run the tournament for each tournament csv then average all the
+    toolchain tables. Write all the tables including the average to 
+    the final grade_path
+    """
+
+    if len(args) < 2:
+        print("Must supply two arguments: <perf_csv> <output_csv>")
+        return 1
+
+    with open(args[0], "r") as perf_csv:
+        reader = csv.reader(perf_csv)
+        headers = next(reader)
+        test_data = list(reader)
+
+    # test_names = [row[0] for row in test_data]
+    raw_times = np.array([[float(x) for x in row[1:]] for row in test_data])
+
+    scores = []
+    for times in raw_times:
+        fastest_time = min(times)
+        test_scores = [fastest_time / time for time in times]
+        scores.append(test_scores)
+    total_scores = np.mean(scores, axis=0)
+
+    print(headers[1:])
+    print(total_scores)
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "perf_csv",
+        type=Path,
+        nargs="+",
+        help="Path to one or more csv files generated from grade mode"
+    )
+    parser.add_argument(
+        "output_csv",
+        type=Path,
+        help="Path to final output csv with grades"
+    )
+
+    args = parser.parse_args() 
+    grade_perf(args.perf_csv, args.output_csv)
+
diff --git a/dragon_runner/scripts/loader.py b/dragon_runner/scripts/loader.py
@@ -4,6 +4,7 @@
 from dragon_runner.scripts.grade import grade 
 from dragon_runner.scripts.gather import gather 
 from dragon_runner.scripts.gen_config import main as gen_config 
+from dragon_runner.scripts.grade_perf import grade_perf
 
 class Loader:
     """
@@ -25,9 +26,10 @@ def unknown_script():
 
         script_dispatch = {
             "build":        lambda: build(*self.args),
-            "grade":        lambda: grade(*self.args),
             "gather":       lambda: gather(*self.args),
             "gen-config":   lambda: gen_config(*self.args),
+            "grade":        lambda: grade(*self.args),
+            "grade-perf":   lambda: grade_perf(*self.args),
             "anon-tests":   lambda: print("TODO"),
             "anon-csv":     lambda: print("TODO"),
             "preview":      lambda: print("TODO")

diff --git a/tests/configs/perfConfig.json b/tests/configs/perfConfig.json
@@ -0,0 +1,26 @@
+{
+  "testDir": "../packages/CPackage/RegularPass/",
+  "testedExecutablePaths": {
+    "gcc1": "/usr/bin/gcc",
+    "gcc2": "/usr/bin/gcc",
+    "gcc3": "/usr/bin/gcc"
+  },
+  "toolchains": {
+    "GCC-toolchain": [
+      {
+        "stepName": "compile",
+        "executablePath": "$EXE",
+        "arguments": ["$INPUT", "-o", "$OUTPUT"],
+        "output": "/tmp/test.o",
+        "allowError": true
+      },
+      {
+        "stepName": "run",
+        "executablePath": "$INPUT",
+        "arguments": [],
+        "usesInStr": true,
+        "allowError": true
+      }
+    ]
+  }
+}
diff --git a/tests/packages/CPackage/ErrorPass/error_tests/002_runtime_math.c b/tests/packages/CPackage/ErrorPass/error_tests/002_runtime_math.c
@@ -21,4 +21,4 @@ int main() {
     return 0;
 }
 
-//CHECK:DivideByZeroError:
+//CHECK:DivideByZeroError: