Skip to content

Commit

Permalink
Merge branch 'main' into add-terraform
Browse files Browse the repository at this point in the history
  • Loading branch information
joshmoore authored Feb 25, 2021
2 parents 8e59d1d + b8b6547 commit dbd96cf
Show file tree
Hide file tree
Showing 21 changed files with 2,466 additions and 148 deletions.
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
data
.mypy_cache
12 changes: 12 additions & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -1,2 +1,14 @@
MINIO_ACCESS_KEY=minioadmin
MINIO_SECRET_KEY=minioadmin
HOST=nginx
XY=32768
Z=1
C=32
T=1
XC=256
ZC=1
ROOT=/uod/idr-scratch/ngff-latency-benchmark
DIR=${ROOT}/${XY}-Z-${Z}-T-${T}-C-${C}-XYC-${XC}-ZC-${ZC}
BASE=IMS_XY-${XY}-Z-${Z}-T-${T}-C-${C}-XYC-${XC}-ZC-${ZC}
ROUNDS=5
TEST_REPEATS=10
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@ __pycache__
data/*.ims
data/*.tiff
data/*.zarr
*.swp
data/*.bfmemo
data/tmp
.swp
2 changes: 1 addition & 1 deletion .isort.cfg
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[settings]
known_third_party = boto3,botocore,fsspec,h5py,pytest,requests,s3fs,tifffile
known_third_party = h5py,matplotlib,numpy,pytest,requests,s3fs,tifffile,zarr
50 changes: 50 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# How to use this repository

To get started, clone this repository locally:
```
git clone https://github.com/ome/ngff-latency-benchmark.git
cd ngff-latency-benchmark
```

## Generate sample data

You will likely want to adjust the parameters in `.env` first, then run:

```
./generate.sh
```

which will run several docker-compose commands in a row. This could take
a substantial amount of time depending on your parameters.

## Or, alternatively download a sample file

```
mkdir data
cd data
../retina.sh
```

If you choose to use `retina_large`, you will also need to adjust the parameters in `.env`.


## Then, start S3 and upload the data

Start the various Docker containers in the background ("detached" mode):
```
docker-compose up -d
```

Once the containers are up, run:
```
docker-compose run --rm upload
```

## Finally, run the benchmark

```
docker-compose run --rm benchmark -sv
```

This will store both the benchmarking results ('benchmark_data.json') as well as a plotted graph
('benchmark_plot.png') in the directory along with the input data.
17 changes: 17 additions & 0 deletions benchmark/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
FROM continuumio/miniconda3
COPY environment.yml /tmp/environment.yml
RUN conda update -n base -c defaults conda
RUN conda env create -n benchmark -f /tmp/environment.yml

# Note: docker-compose.yml mounts this file on each run
# to prevent stale copies.
COPY benchmark.sh /benchmark/benchmark.sh
COPY benchmark.py /benchmark/benchmark.py
COPY plot_results.py /benchmarks/plot_results.py

# see https://github.com/zarr-developers/zarr-python/pull/699
RUN conda run -n benchmark pip install git+https://github.com/joshmoore/zarr-python@key-sep#egg=zarr
RUN conda run -n benchmark pip install pytest-profiling
RUN conda run -n benchmark pip install seaborn

ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "benchmark", "bash", "/benchmark/benchmark.sh"]
170 changes: 170 additions & 0 deletions benchmark/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import time
import fsspec
import h5py
import pytest
import random
import requests
import s3fs
import tifffile
import zarr
from copy import deepcopy
from os import environ

# for product
from functools import reduce # Required in Python 3
import operator

DIR = environ.get("DIR", "data")
BASE = environ.get("BASE", "retina_large")
HOST = environ.get("HOST", "localhost")
ROUNDS = int(environ.get("ROUNDS", 10))

fsspec_default_args = {
"skip_instance_cache": False,
#"use_listings_cache": False}
}


class ChunkChoices:

def __init__(self):
self.z = int(environ.get("Z"))
self.t = int(environ.get("T"))
self.zc = int(environ.get("ZC"))
self.xy = int(environ.get("XY"))
self.c = int(environ.get("C"))
self.xc = int(environ.get("XC"))
chunk_indexes = list()
for ix in range(self.xy // self.xc):
for iy in range(self.xy // self.xc):
for iz in range(self.z // self.zc):
for ic in range(self.c):
for it in range(self.t):
chunk_indexes.append((it+1, ic+1, iz+1, iy+1, ix+1))
self.chunk_choices = random.sample(chunk_indexes, ROUNDS)

def pop(self):
return self.chunk_choices.pop()


CHOICES = ChunkChoices()


class Fixture:

def __init__(self, benchmark):
self.choices = deepcopy(CHOICES)
benchmark.pedantic(self.run, setup=self.setup, rounds=ROUNDS)

def prod(self, seq):
return reduce(operator.mul, seq, 1)

def load(self, data, chunk_shape, chunk_index):
X = list() # eXtents
for i in range(len(chunk_shape)): # zarr=5, HDF5=3
shape = chunk_shape[i]
index = chunk_index[i]
X.append(slice(shape*(index-1), shape*index))
return len(data[tuple(X)]) == self.prod(chunk_shape)

@classmethod
def methods(cls):
return (cls.local, cls.http, cls.s3)

@staticmethod
def local(filename: str) -> str:
return f"{DIR}/{filename}", fsspec.filesystem('file', **fsspec_default_args)

@staticmethod
def http(filename: str) -> str:
return f"http://{HOST}:8000/{filename}", fsspec.filesystem('http', **fsspec_default_args)

@staticmethod
def s3(filename: str) -> str:
return f"s3://data/{filename}", s3fs.S3FileSystem(
anon=True, client_kwargs={"endpoint_url": f"http://{HOST}:9000"}, **fsspec_default_args)

def setup(self):
pass

def run(self):
raise NotImplemented()


@pytest.mark.parametrize("method", Fixture.methods())
def test_1_byte_overhead(benchmark, method):

filename, fs = method("1-byte")

class ByteFixture(Fixture):

def setup(self):
self.f = fs.open(filename)

def run(self):
self.f.read()

ByteFixture(benchmark)


@pytest.mark.parametrize("method", Fixture.methods())
def test_zarr_chunk(benchmark, method):

filename, fs = method(f"{BASE}.ome.zarr")

class ZarrFixture(Fixture):

def setup(self):
store = zarr.storage.FSStore(
filename,
key_separator="/",
**fs.storage_options)
self.group = zarr.group(store=store)

def run(self):
data = self.group["0"]
chunks = data.chunks
self.load(data, chunks, self.choices.pop())

ZarrFixture(benchmark)


@pytest.mark.parametrize("method", Fixture.methods())
def test_tiff_tile(benchmark, method):

filename, fs = method(f"{BASE}.ome.tiff")

class TiffFixture(Fixture):

def setup(self):
self.f = fs.open(filename)

def run(self):
with tifffile.TiffFile(self.f) as tif:
store = tif.aszarr()
group = zarr.group(store=store)
data = group["0"]
chunks = data.chunks
self.load(data, chunks, self.choices.pop())

TiffFixture(benchmark)


@pytest.mark.parametrize("method", Fixture.methods())
def test_hdf5_chunk(benchmark, method):

filename, fs = method(f"{BASE}.ims")

class HDF5Fixture(Fixture):

def setup(self):
self.f = fs.open(filename)
self.file = h5py.File(self.f)

def run(self):
t, c, *idx = self.choices.pop()
data = self.file["DataSet"]["ResolutionLevel 0"][f"TimePoint {t-1}"][f"Channel {c-1}"]["Data"]
chunks = data.chunks
self.load(data, chunks, idx)

HDF5Fixture(benchmark)
18 changes: 18 additions & 0 deletions benchmark/benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash

DIR=${DIR:-./data}
export BENCHMARK_DATA=${DIR}
export BENCHMARK_PLOT=${DIR}/benchmark_plot.png

set -e
set -u
set -x

cd /benchmark # TODO: should work without docker

for (( i=0; i<$TEST_REPEATS; i++ ))
do
pytest benchmark.py "$@" --benchmark-json=${BENCHMARK_DATA}/${i}_benchmark_data.json
done

python plot_results.py
3 changes: 0 additions & 3 deletions environment.yml → benchmark/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ channels:
- defaults
dependencies:
- python == 3.9.0
- napari
- ipython
- wheel
- requests
Expand All @@ -15,8 +14,6 @@ dependencies:
- scipy
- xarray
- zarr >= 2.4.0
- bioformats2raw
- raw2ometiff
- go-mc
- pip
- pip:
Expand Down
Loading

0 comments on commit dbd96cf

Please sign in to comment.