Skip to content

Commit

Permalink
fixes for running benchmarks with no arguments
Browse files Browse the repository at this point in the history
  • Loading branch information
ehigham committed Nov 20, 2024
1 parent 117db4f commit 7965c8f
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 33 deletions.
13 changes: 13 additions & 0 deletions hail/python/benchmark/hail/benchmark_shuffle.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,20 @@
import pytest

import hail as hl
from benchmark.tools import benchmark


@pytest.fixture(autouse=True)
def new_query_tmpdir(tmp_path):
backend = hl.current_backend()
old = backend.local_tmpdir
backend.local_tmpdir = str(tmp_path)
try:
yield
finally:
backend.local_tmpdir = old


@benchmark()
def benchmark_shuffle_key_rows_by_mt(profile25_mt):
mt = hl.read_matrix_table(str(profile25_mt))
Expand Down
34 changes: 20 additions & 14 deletions hail/python/benchmark/hail/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ def init_hail(run_config):
context = pytest.StashKey[Literal['burn_in', 'benchmark']]()


def prune(kvs: dict) -> dict:
return {k: v for k, v in kvs.items() if v is not None}


@pytest.hookimpl(tryfirst=True)
def pytest_runtest_protocol(item, nextitem):
run_config = item.session.config.run_config
Expand Down Expand Up @@ -184,14 +188,16 @@ def pytest_pyfunc_call(pyfuncitem):

is_burn_in = s[context] == 'burn_in'

s[runs].append({
'iteration': s[iteration],
'time': time,
'is_burn_in': is_burn_in,
'timed_out': timed_out,
'failure': traceback,
'task_memory': get_peak_task_memory(Env.hc()._log),
})
s[runs].append(
prune({
'iteration': s[iteration],
'time': time,
'is_burn_in': is_burn_in,
'timed_out': timed_out,
'failure': traceback,
'task_memory': get_peak_task_memory(Env.hc()._log),
})
)

logging.info(f'{"(burn in) " if is_burn_in else ""}iteration {s[iteration]}, time: {time}s')

Expand All @@ -210,30 +216,30 @@ def open_file_or_stdout(file):

@pytest.hookimpl
def pytest_sessionfinish(session):
if not session.config.option.collectonly:
if hasattr(session, 'items') and len(session.items) > 0 and not session.config.option.collectonly:
run_config = session.config.run_config

meta = {
'uname': platform.uname()._asdict(),
'version': hl.__version__,
**({'batch_id': batch} if (batch := os.getenv('HAIL_BATCH_ID')) else {}),
**({'job_id': job} if (job := os.getenv('HAIL_JOB_ID')) else {}),
**({'trial': trial} if (trial := os.getenv('BENCHMARK_TRIAL_ID')) else {}),
'batch_id': os.getenv('HAIL_BATCH_ID'),
'job_id': os.getenv('HAIL_JOB_ID'),
'trial': os.getenv('BENCHMARK_TRIAL_ID'),
}

now = datetime.now(timezone.utc).isoformat()
with open_file_or_stdout(run_config.output) as out:
for item in session.items:
path, _, name = item.location
json.dump(
{
prune({
'path': path,
'name': name,
**meta,
'start': item.stash[start],
'end': item.stash.get(end, now),
'runs': item.stash[runs],
},
}),
out,
)
out.write('\n')
Expand Down
4 changes: 2 additions & 2 deletions hail/python/benchmark/hail/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def resource_dir(request, tmpdir_factory):
resource_dir = Path(run_config.data_dir)
resource_dir.mkdir(parents=True, exist_ok=True)
else:
resource_dir = tmpdir_factory.mktemp('hail_benchmark_resources')
resource_dir = Path(tmpdir_factory.mktemp('hail_benchmark_resources'))

return resource_dir

Expand All @@ -35,7 +35,7 @@ def __download(data_dir, filename):

def localize(path: Path):
if not path.exists():
path.parent.mkdir(exist_ok=True)
path.parent.mkdir(parents=True, exist_ok=True)
__download(path.parent, path.name)

return path
Expand Down
10 changes: 10 additions & 0 deletions hail/python/hail/backend/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,17 @@ def requires_lowering(self):
def local_tmpdir(self) -> str:
pass

@local_tmpdir.setter
@abc.abstractmethod
def local_tmpdir(self, dir: str) -> None:
pass

@property
@abc.abstractmethod
def remote_tmpdir(self) -> str:
pass

@remote_tmpdir.setter
@abc.abstractmethod
def remote_tmpdir(self, dir: str) -> None:
pass
4 changes: 2 additions & 2 deletions hail/python/hail/backend/py4j_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def local_tmpdir(self) -> str:
return self._local_tmpdir

@local_tmpdir.setter
def local_tmpdir(self, tmpdir) -> str:
def local_tmpdir(self, tmpdir: str) -> None:
self._local_tmpdir = tmpdir
self._jbackend.pySetLocalTmp(tmpdir)

Expand All @@ -341,6 +341,6 @@ def remote_tmpdir(self) -> str:
return self._remote_tmpdir

@remote_tmpdir.setter
def remote_tmpdir(self, tmpdir) -> str:
def remote_tmpdir(self, tmpdir: str) -> None:
self._remote_tmpdir = tmpdir
self._jbackend.pySetRemoteTmp(tmpdir)
28 changes: 13 additions & 15 deletions hail/python/hail/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,6 @@ def create(
log: str,
quiet: bool,
append: bool,
tmpdir: str,
local_tmpdir: str,
default_reference: str,
global_seed: Optional[int],
backend: Backend,
Expand All @@ -77,25 +75,17 @@ def create(
log=log,
quiet=quiet,
append=append,
tmpdir=tmpdir,
local_tmpdir=local_tmpdir,
global_seed=global_seed,
backend=backend,
)
hc.initialize_references(default_reference)
return hc

@typecheck_method(
log=str, quiet=bool, append=bool, tmpdir=str, local_tmpdir=str, global_seed=nullable(int), backend=Backend
)
def __init__(self, log, quiet, append, tmpdir, local_tmpdir, global_seed, backend):
@typecheck_method(log=str, quiet=bool, append=bool, global_seed=nullable(int), backend=Backend)
def __init__(self, log, quiet, append, global_seed, backend: Backend):
assert not Env._hc

self._log = log

self._tmpdir = tmpdir
self._local_tmpdir = local_tmpdir

self._backend = backend

self._warn_cols_order = True
Expand Down Expand Up @@ -138,6 +128,14 @@ def initialize_references(self, default_reference):
else:
self._default_ref = ReferenceGenome.read(default_reference)

@property
def _tmpdir(self) -> str:
return self._backend.remote_tmpdir

@property
def _local_tmpdir(self) -> str:
return self._backend.local_tmpdir

@property
def default_reference(self) -> ReferenceGenome:
assert self._default_ref is not None, '_default_ref should have been initialized in HailContext.create'
Expand Down Expand Up @@ -500,7 +498,7 @@ def init_spark(
if not backend.fs.exists(tmpdir):
backend.fs.mkdir(tmpdir)

HailContext.create(log, quiet, append, tmpdir, local_tmpdir, default_reference, global_seed, backend)
HailContext.create(log, quiet, append, default_reference, global_seed, backend)
if not quiet:
connect_logger(backend._utils_package_object, 'localhost', 12888)

Expand Down Expand Up @@ -571,7 +569,7 @@ async def init_batch(
tmpdir = os.path.join(backend.remote_tmpdir, 'tmp/hail', secret_alnum_string())
local_tmpdir = _get_local_tmpdir(local_tmpdir)

HailContext.create(log, quiet, append, tmpdir, local_tmpdir, default_reference, global_seed, backend)
HailContext.create(log, quiet, append, default_reference, global_seed, backend)


@typecheck(
Expand Down Expand Up @@ -623,7 +621,7 @@ def init_local(
if not backend.fs.exists(tmpdir):
backend.fs.mkdir(tmpdir)

HailContext.create(log, quiet, append, tmpdir, tmpdir, default_reference, global_seed, backend)
HailContext.create(log, quiet, append, default_reference, global_seed, backend)
if not quiet:
connect_logger(backend._utils_package_object, 'localhost', 12888)

Expand Down

0 comments on commit 7965c8f

Please sign in to comment.