Skip to content

Commit

Permalink
Implement the performance testing grading script.
Browse files Browse the repository at this point in the history
(Not me procrastinating this until the night Gazprea 2 is due...)
  • Loading branch information
JustinMeimar committed Dec 10, 2024
1 parent a938b00 commit 11a8549
Show file tree
Hide file tree
Showing 6 changed files with 173 additions and 13 deletions.
68 changes: 60 additions & 8 deletions dragon_runner/harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from dragon_runner.cli import CLIArgs
from dragon_runner.config import Config, Executable, Package
from dragon_runner.log import log
from dragon_runner.testfile import TestFile
from dragon_runner.runner import TestResult, ToolChainRunner
from dragon_runner.utils import file_to_str

Expand All @@ -15,12 +16,8 @@ def __init__(self, config: Config, cli_args: CLIArgs):
self.cli_args: CLIArgs = cli_args
self.failures: List[TestResult] = []

def post_run_log(self):
pass

def process_test_result(self, test_result: Optional[TestResult], counters: Dict[str, int]):
"""
Process each test result.
Subclasses should override this method to handle test result processing and update counts.
"""
raise NotImplementedError("Subclasses must implement this method")
Expand All @@ -33,7 +30,7 @@ def post_subpackage_hook(self, counters: Dict[str, int]):
"""Hook to run after iterating through a subpackage."""
pass

def pre_executable_hook(self):
def pre_executable_hook(self, exe):
"""Hook to runb efore iterating through an executable."""
pass

Expand All @@ -43,14 +40,21 @@ def post_executable_hook(self):
log(f"Failure Summary: ({len(self.failures)} tests)")
for result in self.failures:
result.log()

def post_run_hook(self):
pass

def pre_run_hook(self):
pass

def iterate(self):
"""
Basic structure to record which tests pass and fail. Additional functionality
can be implemented by overriding default hooks.
"""
self.pre_run_hook()
for exe in self.config.executables:
self.pre_executable_hook()
self.pre_executable_hook(exe.id)
log(f"Running executable: {exe.id}", indent=0)
exe.source_env()
exe_pass_count = 0
Expand Down Expand Up @@ -83,6 +87,7 @@ def iterate(self):
exe_test_count += tc_test_count
log("Executable Passed: ", exe_pass_count, "/", exe_test_count)
self.post_executable_hook()
self.post_run_hook()

def run(self) -> bool:
"""Default run implementation."""
Expand Down Expand Up @@ -278,18 +283,65 @@ def process_test_result(self, test_result: Optional[TestResult], counters: Dict[
counters["pass_count"] += 1
else:
self.failures.append(test_result)


from itertools import zip_longest

class PerformanceTestingHarness(TestHarness):

def __init__(self, config: Config, cli_args: CLIArgs):
super().__init__(config, cli_args)
self.csv_cols = []
self.cur_col = []
self.testfile_col = ["Test"]
self.first_exec = True

def create_tc_dataframe(defenders: List[Executable],
attackers: List[TestFile]) -> Dict[str, Dict[str, str]]:
"""
Create an empty toolchain table with labels for defenders and attackers
"""
df = {exe.id: {pkg.name: '' for pkg in attackers} for exe in defenders}
print(df)
return df

def process_test_result(self, test_result: Optional[TestResult], counters: Dict[str, int]):
"""
Override the hook for regular run-specific implementation of counting passes
"""
if test_result.error_test:
raise RuntimeError("Can not run perf mode on error tests")

# only construct a column for the test file names once
if self.first_exec:
self.testfile_col.append(test_result.test.file)

if test_result.did_pass:
counters["pass_count"] += 1
test_result.log(args=self.cli_args)
self.cur_col.append(test_result.time)

else:
self.cur_col.append(self.cli_args.timeout)
self.failures.append(test_result)
test_result.log(args=self.cli_args)
counters["test_count"] += 1


def pre_executable_hook(self, exe):
self.cur_col.append(exe)

def post_executable_hook(self):
if self.first_exec:
self.csv_cols.append(self.testfile_col)
self.first_exec = False

self.csv_cols.append(self.cur_col)
self.cur_col = []

def post_run_hook(self):
# transpose the columns into rows for writing
csv_rows = zip_longest(*self.csv_cols, fillvalue='')

with open('perf.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerows(csv_rows)

18 changes: 15 additions & 3 deletions dragon_runner/scripts/grade.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
"""
============================== 415 Grading Script ==============================
============================== 415 Grading Script ================================
Author: Justin Meimar
Name: grade.py
Desc:
================================================================================
Desc: As a prerequisite to running this script, the following should have occured:
A config including all the student executables *and* the solution executable
has been run with --mode=tournament, producing a csv for each toolchain.
The csv toolchain ouputs are inputs for this script, which will apply
the 415 competitive testing algorithm to output a complete CSV.
Note: The solution executable in the original configshould be named "solution",
which this script uses as a reserved keyword.
==================================================================================
"""
import argparse
import csv
Expand Down
68 changes: 68 additions & 0 deletions dragon_runner/scripts/grade_perf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""
============================== 415 Grading Script ==============================
Author: Justin Meimar
Name: grade_perf.py
Desc: Dragon-runner with a config pointing to the performance tests & an
executable for each compiler to be tested, when run with --mode=perf,
will produce a perf.csv file.
This script takes perf.csv as its input and runs the performance testing
grading algorithm to return a single CSV row, indicating the perf scores
for each team.
The intention is that the single row be manually copy and pasted into the
row output by the grade.py script.
================================================================================
"""
import argparse
import csv
import numpy as np
from pathlib import Path

def grade_perf(*args):
"""
Run the tournament for each tournament csv then average all the
toolchain tables. Write all the tables including the average to
the final grade_path
"""

if len(args) < 2:
print("Must supply two arguments: <perf_csv> <output_csv>")
return 1

with open(args[0], "r") as perf_csv:
reader = csv.reader(perf_csv)
headers = next(reader)
test_data = list(reader)

# test_names = [row[0] for row in test_data]
raw_times = np.array([[float(x) for x in row[1:]] for row in test_data])

scores = []
for times in raw_times:
fastest_time = min(times)
test_scores = [fastest_time / time for time in times]
scores.append(test_scores)
total_scores = np.mean(scores, axis=0)

print(headers[1:])
print(total_scores)

if __name__ == "__main__":

parser = argparse.ArgumentParser()
parser.add_argument(
"perf_csv",
type=Path,
nargs="+",
help="Path to one or more csv files generated from grade mode"
)
parser.add_argument(
"output_csv",
type=Path,
help="Path to final output csv with grades"
)

args = parser.parse_args()
grade_perf(args.perf_csv, args.output_csv)

4 changes: 3 additions & 1 deletion dragon_runner/scripts/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from dragon_runner.scripts.grade import grade
from dragon_runner.scripts.gather import gather
from dragon_runner.scripts.gen_config import main as gen_config
from dragon_runner.scripts.grade_perf import grade_perf

class Loader:
"""
Expand All @@ -25,9 +26,10 @@ def unknown_script():

script_dispatch = {
"build": lambda: build(*self.args),
"grade": lambda: grade(*self.args),
"gather": lambda: gather(*self.args),
"gen-config": lambda: gen_config(*self.args),
"grade": lambda: grade(*self.args),
"grade-perf": lambda: grade_perf(*self.args),
"anon-tests": lambda: print("TODO"),
"anon-csv": lambda: print("TODO"),
"preview": lambda: print("TODO")
Expand Down
26 changes: 26 additions & 0 deletions tests/configs/perfConfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"testDir": "../packages/CPackage/RegularPass/",
"testedExecutablePaths": {
"gcc1": "/usr/bin/gcc",
"gcc2": "/usr/bin/gcc",
"gcc3": "/usr/bin/gcc"
},
"toolchains": {
"GCC-toolchain": [
{
"stepName": "compile",
"executablePath": "$EXE",
"arguments": ["$INPUT", "-o", "$OUTPUT"],
"output": "/tmp/test.o",
"allowError": true
},
{
"stepName": "run",
"executablePath": "$INPUT",
"arguments": [],
"usesInStr": true,
"allowError": true
}
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ int main() {
return 0;
}

//CHECK:DivideByZeroError:
//CHECK:DivideByZeroError:

0 comments on commit 11a8549

Please sign in to comment.