From d3513a21dd185fd5d0ade516b291d5214069720e Mon Sep 17 00:00:00 2001 From: Edmund Higham Date: Wed, 30 Oct 2024 17:30:33 -0400 Subject: [PATCH] fixes for running benchmarks with no arguments --- .../benchmark/hail/benchmark_shuffle.py | 13 +++++++ hail/python/benchmark/hail/conftest.py | 34 +++++++++++-------- hail/python/benchmark/hail/fixtures.py | 4 +-- hail/python/hail/backend/backend.py | 10 ++++++ hail/python/hail/backend/py4j_backend.py | 4 +-- 5 files changed, 47 insertions(+), 18 deletions(-) diff --git a/hail/python/benchmark/hail/benchmark_shuffle.py b/hail/python/benchmark/hail/benchmark_shuffle.py index 13ee24ef40a2..3f01aba791e1 100644 --- a/hail/python/benchmark/hail/benchmark_shuffle.py +++ b/hail/python/benchmark/hail/benchmark_shuffle.py @@ -1,7 +1,20 @@ +import pytest + import hail as hl from benchmark.tools import benchmark +@pytest.fixture(autouse=True) +def new_query_tmpdir(tmp_path): + backend = hl.current_backend() + old = backend.local_tmpdir + backend.local_tmpdir = str(tmp_path) + try: + yield + finally: + backend.local_tmpdir = old + + @benchmark() def benchmark_shuffle_key_rows_by_mt(profile25_mt): mt = hl.read_matrix_table(str(profile25_mt)) diff --git a/hail/python/benchmark/hail/conftest.py b/hail/python/benchmark/hail/conftest.py index a8a10df208db..a59e951010c2 100644 --- a/hail/python/benchmark/hail/conftest.py +++ b/hail/python/benchmark/hail/conftest.py @@ -111,6 +111,10 @@ def init_hail(run_config): context = pytest.StashKey[Literal['burn_in', 'benchmark']]() +def prune(kvs: dict) -> dict: + return {k: v for k, v in kvs.items() if v is not None} + + @pytest.hookimpl(tryfirst=True) def pytest_runtest_protocol(item, nextitem): run_config = item.session.config.run_config @@ -184,14 +188,16 @@ def pytest_pyfunc_call(pyfuncitem): is_burn_in = s[context] == 'burn_in' - s[runs].append({ - 'iteration': s[iteration], - 'time': time, - 'is_burn_in': is_burn_in, - 'timed_out': timed_out, - 'failure': traceback, - 'task_memory': get_peak_task_memory(Env.hc()._log), - }) + s[runs].append( + prune({ + 'iteration': s[iteration], + 'time': time, + 'is_burn_in': is_burn_in, + 'timed_out': timed_out, + 'failure': traceback, + 'task_memory': get_peak_task_memory(Env.hc()._log), + }) + ) logging.info(f'{"(burn in) " if is_burn_in else ""}iteration {s[iteration]}, time: {time}s') @@ -210,15 +216,15 @@ def open_file_or_stdout(file): @pytest.hookimpl def pytest_sessionfinish(session): - if not session.config.option.collectonly: + if hasattr(session, 'items') and len(session.items) > 0 and not session.config.option.collectonly: run_config = session.config.run_config meta = { 'uname': platform.uname()._asdict(), 'version': hl.__version__, - **({'batch_id': batch} if (batch := os.getenv('HAIL_BATCH_ID')) else {}), - **({'job_id': job} if (job := os.getenv('HAIL_JOB_ID')) else {}), - **({'trial': trial} if (trial := os.getenv('BENCHMARK_TRIAL_ID')) else {}), + 'batch_id': os.getenv('HAIL_BATCH_ID'), + 'job_id': os.getenv('HAIL_JOB_ID'), + 'trial': os.getenv('BENCHMARK_TRIAL_ID'), } now = datetime.now(timezone.utc).isoformat() @@ -226,14 +232,14 @@ def pytest_sessionfinish(session): for item in session.items: path, _, name = item.location json.dump( - { + prune({ 'path': path, 'name': name, **meta, 'start': item.stash[start], 'end': item.stash.get(end, now), 'runs': item.stash[runs], - }, + }), out, ) out.write('\n') diff --git a/hail/python/benchmark/hail/fixtures.py b/hail/python/benchmark/hail/fixtures.py index 2387d9bff5c3..2686f8fd62a9 100644 --- a/hail/python/benchmark/hail/fixtures.py +++ b/hail/python/benchmark/hail/fixtures.py @@ -15,7 +15,7 @@ def resource_dir(request, tmpdir_factory): resource_dir = Path(run_config.data_dir) resource_dir.mkdir(parents=True, exist_ok=True) else: - resource_dir = tmpdir_factory.mktemp('hail_benchmark_resources') + resource_dir = Path(tmpdir_factory.mktemp('hail_benchmark_resources')) return resource_dir @@ -35,7 +35,7 @@ def __download(data_dir, filename): def localize(path: Path): if not path.exists(): - path.parent.mkdir(exist_ok=True) + path.parent.mkdir(parents=True, exist_ok=True) __download(path.parent, path.name) return path diff --git a/hail/python/hail/backend/backend.py b/hail/python/hail/backend/backend.py index dd35b670e660..8d8e00926299 100644 --- a/hail/python/hail/backend/backend.py +++ b/hail/python/hail/backend/backend.py @@ -398,7 +398,17 @@ def requires_lowering(self): def local_tmpdir(self) -> str: pass + @local_tmpdir.setter + @abc.abstractmethod + def local_tmpdir(self, dir: str) -> None: + pass + @property @abc.abstractmethod def remote_tmpdir(self) -> str: pass + + @remote_tmpdir.setter + @abc.abstractmethod + def remote_tmpdir(self, dir: str) -> None: + pass diff --git a/hail/python/hail/backend/py4j_backend.py b/hail/python/hail/backend/py4j_backend.py index c34ca1d6b986..dfdcf5a685a8 100644 --- a/hail/python/hail/backend/py4j_backend.py +++ b/hail/python/hail/backend/py4j_backend.py @@ -332,7 +332,7 @@ def local_tmpdir(self) -> str: return self._local_tmpdir @local_tmpdir.setter - def local_tmpdir(self, tmpdir) -> str: + def local_tmpdir(self, tmpdir: str) -> None: self._local_tmpdir = tmpdir self._jbackend.pySetLocalTmp(tmpdir) @@ -341,6 +341,6 @@ def remote_tmpdir(self) -> str: return self._remote_tmpdir @remote_tmpdir.setter - def remote_tmpdir(self, tmpdir) -> str: + def remote_tmpdir(self, tmpdir: str) -> None: self._remote_tmpdir = tmpdir self._jbackend.pySetRemoteTmp(tmpdir)