diff --git a/hail/python/benchmark/hail/benchmark_shuffle.py b/hail/python/benchmark/hail/benchmark_shuffle.py index 13ee24ef40a2..3f01aba791e1 100644 --- a/hail/python/benchmark/hail/benchmark_shuffle.py +++ b/hail/python/benchmark/hail/benchmark_shuffle.py @@ -1,7 +1,20 @@ +import pytest + import hail as hl from benchmark.tools import benchmark +@pytest.fixture(autouse=True) +def new_query_tmpdir(tmp_path): + backend = hl.current_backend() + old = backend.local_tmpdir + backend.local_tmpdir = str(tmp_path) + try: + yield + finally: + backend.local_tmpdir = old + + @benchmark() def benchmark_shuffle_key_rows_by_mt(profile25_mt): mt = hl.read_matrix_table(str(profile25_mt)) diff --git a/hail/python/benchmark/hail/conftest.py b/hail/python/benchmark/hail/conftest.py index a8a10df208db..a59e951010c2 100644 --- a/hail/python/benchmark/hail/conftest.py +++ b/hail/python/benchmark/hail/conftest.py @@ -111,6 +111,10 @@ def init_hail(run_config): context = pytest.StashKey[Literal['burn_in', 'benchmark']]() +def prune(kvs: dict) -> dict: + return {k: v for k, v in kvs.items() if v is not None} + + @pytest.hookimpl(tryfirst=True) def pytest_runtest_protocol(item, nextitem): run_config = item.session.config.run_config @@ -184,14 +188,16 @@ def pytest_pyfunc_call(pyfuncitem): is_burn_in = s[context] == 'burn_in' - s[runs].append({ - 'iteration': s[iteration], - 'time': time, - 'is_burn_in': is_burn_in, - 'timed_out': timed_out, - 'failure': traceback, - 'task_memory': get_peak_task_memory(Env.hc()._log), - }) + s[runs].append( + prune({ + 'iteration': s[iteration], + 'time': time, + 'is_burn_in': is_burn_in, + 'timed_out': timed_out, + 'failure': traceback, + 'task_memory': get_peak_task_memory(Env.hc()._log), + }) + ) logging.info(f'{"(burn in) " if is_burn_in else ""}iteration {s[iteration]}, time: {time}s') @@ -210,15 +216,15 @@ def open_file_or_stdout(file): @pytest.hookimpl def pytest_sessionfinish(session): - if not session.config.option.collectonly: + if hasattr(session, 'items') and len(session.items) > 0 and not session.config.option.collectonly: run_config = session.config.run_config meta = { 'uname': platform.uname()._asdict(), 'version': hl.__version__, - **({'batch_id': batch} if (batch := os.getenv('HAIL_BATCH_ID')) else {}), - **({'job_id': job} if (job := os.getenv('HAIL_JOB_ID')) else {}), - **({'trial': trial} if (trial := os.getenv('BENCHMARK_TRIAL_ID')) else {}), + 'batch_id': os.getenv('HAIL_BATCH_ID'), + 'job_id': os.getenv('HAIL_JOB_ID'), + 'trial': os.getenv('BENCHMARK_TRIAL_ID'), } now = datetime.now(timezone.utc).isoformat() @@ -226,14 +232,14 @@ def pytest_sessionfinish(session): for item in session.items: path, _, name = item.location json.dump( - { + prune({ 'path': path, 'name': name, **meta, 'start': item.stash[start], 'end': item.stash.get(end, now), 'runs': item.stash[runs], - }, + }), out, ) out.write('\n') diff --git a/hail/python/benchmark/hail/fixtures.py b/hail/python/benchmark/hail/fixtures.py index 2387d9bff5c3..2686f8fd62a9 100644 --- a/hail/python/benchmark/hail/fixtures.py +++ b/hail/python/benchmark/hail/fixtures.py @@ -15,7 +15,7 @@ def resource_dir(request, tmpdir_factory): resource_dir = Path(run_config.data_dir) resource_dir.mkdir(parents=True, exist_ok=True) else: - resource_dir = tmpdir_factory.mktemp('hail_benchmark_resources') + resource_dir = Path(tmpdir_factory.mktemp('hail_benchmark_resources')) return resource_dir @@ -35,7 +35,7 @@ def __download(data_dir, filename): def localize(path: Path): if not path.exists(): - path.parent.mkdir(exist_ok=True) + path.parent.mkdir(parents=True, exist_ok=True) __download(path.parent, path.name) return path diff --git a/hail/python/hail/backend/backend.py b/hail/python/hail/backend/backend.py index dd35b670e660..8d8e00926299 100644 --- a/hail/python/hail/backend/backend.py +++ b/hail/python/hail/backend/backend.py @@ -398,7 +398,17 @@ def requires_lowering(self): def local_tmpdir(self) -> str: pass + @local_tmpdir.setter + @abc.abstractmethod + def local_tmpdir(self, dir: str) -> None: + pass + @property @abc.abstractmethod def remote_tmpdir(self) -> str: pass + + @remote_tmpdir.setter + @abc.abstractmethod + def remote_tmpdir(self, dir: str) -> None: + pass diff --git a/hail/python/hail/backend/py4j_backend.py b/hail/python/hail/backend/py4j_backend.py index c34ca1d6b986..dfdcf5a685a8 100644 --- a/hail/python/hail/backend/py4j_backend.py +++ b/hail/python/hail/backend/py4j_backend.py @@ -332,7 +332,7 @@ def local_tmpdir(self) -> str: return self._local_tmpdir @local_tmpdir.setter - def local_tmpdir(self, tmpdir) -> str: + def local_tmpdir(self, tmpdir: str) -> None: self._local_tmpdir = tmpdir self._jbackend.pySetLocalTmp(tmpdir) @@ -341,6 +341,6 @@ def remote_tmpdir(self) -> str: return self._remote_tmpdir @remote_tmpdir.setter - def remote_tmpdir(self, tmpdir) -> str: + def remote_tmpdir(self, tmpdir: str) -> None: self._remote_tmpdir = tmpdir self._jbackend.pySetRemoteTmp(tmpdir) diff --git a/hail/python/hail/context.py b/hail/python/hail/context.py index 4ffc0f8b9ede..49914ac6e5fb 100644 --- a/hail/python/hail/context.py +++ b/hail/python/hail/context.py @@ -67,8 +67,6 @@ def create( log: str, quiet: bool, append: bool, - tmpdir: str, - local_tmpdir: str, default_reference: str, global_seed: Optional[int], backend: Backend, @@ -77,25 +75,17 @@ def create( log=log, quiet=quiet, append=append, - tmpdir=tmpdir, - local_tmpdir=local_tmpdir, global_seed=global_seed, backend=backend, ) hc.initialize_references(default_reference) return hc - @typecheck_method( - log=str, quiet=bool, append=bool, tmpdir=str, local_tmpdir=str, global_seed=nullable(int), backend=Backend - ) - def __init__(self, log, quiet, append, tmpdir, local_tmpdir, global_seed, backend): + @typecheck_method(log=str, quiet=bool, append=bool, global_seed=nullable(int), backend=Backend) + def __init__(self, log, quiet, append, global_seed, backend: Backend): assert not Env._hc self._log = log - - self._tmpdir = tmpdir - self._local_tmpdir = local_tmpdir - self._backend = backend self._warn_cols_order = True @@ -138,6 +128,14 @@ def initialize_references(self, default_reference): else: self._default_ref = ReferenceGenome.read(default_reference) + @property + def _tmpdir(self) -> str: + return self._backend.remote_tmpdir + + @property + def _local_tmpdir(self) -> str: + return self._backend.local_tmpdir + @property def default_reference(self) -> ReferenceGenome: assert self._default_ref is not None, '_default_ref should have been initialized in HailContext.create' @@ -500,7 +498,7 @@ def init_spark( if not backend.fs.exists(tmpdir): backend.fs.mkdir(tmpdir) - HailContext.create(log, quiet, append, tmpdir, local_tmpdir, default_reference, global_seed, backend) + HailContext.create(log, quiet, append, default_reference, global_seed, backend) if not quiet: connect_logger(backend._utils_package_object, 'localhost', 12888) @@ -571,7 +569,7 @@ async def init_batch( tmpdir = os.path.join(backend.remote_tmpdir, 'tmp/hail', secret_alnum_string()) local_tmpdir = _get_local_tmpdir(local_tmpdir) - HailContext.create(log, quiet, append, tmpdir, local_tmpdir, default_reference, global_seed, backend) + HailContext.create(log, quiet, append, default_reference, global_seed, backend) @typecheck( @@ -623,7 +621,7 @@ def init_local( if not backend.fs.exists(tmpdir): backend.fs.mkdir(tmpdir) - HailContext.create(log, quiet, append, tmpdir, tmpdir, default_reference, global_seed, backend) + HailContext.create(log, quiet, append, default_reference, global_seed, backend) if not quiet: connect_logger(backend._utils_package_object, 'localhost', 12888)