Merge branch 'main' into run_notebooks

joshmoore · web-flow · commit 1128d1f18d9e · 2021-04-16T22:54:01.000+02:00
diff --git a/.env b/.env
@@ -3,13 +3,17 @@ AWS_SECRET_ACCESS_KEY=minioadmin
 BUCKET=ngff-latency-benchmark
 HOST=nginx
 
-XY=32768
-Z=1
-C=32
-T=1
-XC=256
-ZC=1
+XY=1024
+Z=1024
+C=1
+T=100
+XC=32
+ZC=32
+ROOT=/var/lib/docker
 ROOT=/uod/idr-scratch/ngff-latency-benchmark
+ROOT=/uod/idr/scratch/idr1-slot2/ngff-latency-benchmark
+ROOT=/home/ubuntu/ngff-latency-benchmark
+
 NAME=${XY}-Z-${Z}-T-${T}-C-${C}-XYC-${XC}-ZC-${ZC}
 DIR=${ROOT}/${NAME}
 BASE=IMS_XY-${NAME}
diff --git a/.env-gha b/.env-gha
@@ -0,0 +1,19 @@
+AWS_ACCESS_KEY_ID=minioadmin
+AWS_SECRET_ACCESS_KEY=minioadmin
+BUCKET=ngff-latency-benchmark
+HOST=nginx
+
+XY=32
+Z=1
+C=1
+T=1
+XC=1
+ZC=1
+ROOT=/tmp
+NAME=${XY}-Z-${Z}-T-${T}-C-${C}-XYC-${XC}-ZC-${ZC}
+DIR=${ROOT}/${NAME}
+BASE=IMS_XY-${NAME}
+ROUNDS=20
+TEST_REPEATS=1
+
+S3ARGS={"anon": true, "client_kwargs": {"endpoint_url": "http://${HOST}:9000"}}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -0,0 +1,28 @@
+---
+name: Build
+
+on: 
+  push:
+  pull_request:
+  schedule:
+    - cron: '0 0 * * 0'
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set env
+        run: |
+          rm .env
+          mv .env-gha .env
+      - name: Build
+        run: |
+          docker-compose pull && docker-compose build
+      - name: Generate
+        run: ./generate.sh
+      - name: Start Upload and Run benchmark
+        run: ./test.sh
+      - name: Check logs
+        run: docker-compose logs
+        if: ${{ failure() }}
diff --git a/README.md b/README.md
@@ -1,7 +1,8 @@
+[![DOI](https://zenodo.org/badge/329595844.svg)](https://zenodo.org/badge/latestdoi/329595844)
 [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/ome/ngff-latency-benchmark/master?filepath=notebooks)
 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ome/ngff-latency-benchmark/)
 
-# How to use this repository
+# OME-NGFF: latency benchmark
 
 To get started, clone this repository locally:
 ```
@@ -20,16 +21,6 @@ You will likely want to adjust the parameters in `.env` first, then run:
 which will run several docker-compose commands in a row. This could take
 a substantial amount of time depending on your parameters.
 
-## Or, alternatively download a sample file
-
-```
-mkdir data
-cd data
-../retina.sh
-```
-
-If you choose to use `retina_large`, you will also need to adjust the parameters in `.env`.
-
 
 ## Then, start S3 and upload the data
 
diff --git a/benchmark/Dockerfile b/benchmark/Dockerfile
@@ -2,15 +2,14 @@ FROM continuumio/miniconda3
 COPY environment.yml /tmp/environment.yml
 RUN conda update -n base -c defaults conda
 RUN conda env create -n benchmark -f /tmp/environment.yml
+RUN conda list -n benchmark
 
 # Note: docker-compose.yml mounts this file on each run
 #       to prevent stale copies.
 COPY benchmark.sh /benchmark/benchmark.sh
 COPY benchmark.py /benchmark/benchmark.py
-COPY plot_results.py /benchmarks/plot_results.py
+COPY plot_results.py /benchmark/plot_results.py
 
-# see https://github.com/zarr-developers/zarr-python/pull/699
-RUN conda run -n benchmark pip install git+https://github.com/zarr-developers/zarr-python@master#egg=zarr
 RUN conda run -n benchmark pip install pytest-profiling
 RUN conda run -n benchmark pip install seaborn
 RUN conda run -n benchmark pip install pytest-csv
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
@@ -183,7 +183,7 @@ def run(self, chunk_index):
                     try:
                         group = zarr.group(store=store)
                         data = group["0"]
-                    except KeyError:
+                    except (KeyError, ValueError):
                         # This likely happens due to dim
                         data = zarr.open(store, mode="r")
                     chunks = data.chunks
diff --git a/benchmark/environment.yml b/benchmark/environment.yml
@@ -13,7 +13,7 @@ dependencies:
   - scikit-image
   - scipy
   - xarray
-  - zarr >= 2.4.0
+  - zarr >= 2.7.0
   - go-mc
   - pip
   - pip:
diff --git a/benchmark/plot_results.py b/benchmark/plot_results.py
@@ -5,68 +5,26 @@
 import pandas as pd
 import seaborn as sns
 
-json_path = os.environ.get("BENCHMARK_DATA", "benchmark_data")
+data_path = os.environ.get("BENCHMARK_DATA", "benchmark_data")
 plot_path = os.environ.get("BENCHMARK_PLOT", "benchmark_plot.png")
-
 xy = os.environ.get("XY", "unknown")
 
-# s3+hdf5, s3+tiff, s3+zarr, remote+hdf5, remote+… so I’d color by tiff/hdf5/zarr
-
-print("base_path", json_path)
-
-
-three_col = []
-two_col = []
-
-test_repeats = int(os.getenv("TEST_REPEATS", "1"))
-json_files = ["%s_benchmark_data.json" % r for r in range(test_repeats)]
-print("json_files", json_files)
-
-for root, dirs, files in os.walk(json_path):
-    for file_name in files:
-        if file_name in json_files:
-            path = os.path.join(root, file_name)
-            print("json path", path)
-            with open(path) as json_file:
-                benchmarks = json.load(json_file)["benchmarks"]
+csv = pd.read_csv(f"{data_path}/0_benchmark_data.csv")
 
-                for bm in benchmarks:
-                    m = re.match(r"test_(1_byte|\w+)_(\w+)\[(\w+)\]", bm["name"])
-                    if not m:
-                        raise Exception(bm["name"])
-                    typ = m.group(1).replace("1_byte", "overhead")
-                    src = m.group(3)
-
-                    if typ == "overhead" and src == "local":
-                        # 10e-5 skews the view.
-                        continue
-
-                    if test_repeats == 1:
-                        # Ran tests once: plot every data point
-                        vals = bm["stats"]["data"]
-                        for run, val in enumerate(vals):
-                            three_col.append(
-                                {"type": typ, "source": src, "seconds": val}
-                            )
-                            two_col.append({"name": f"{typ}-{src}", "seconds": val})
-                    else:
-                        # Repeats: take mean value from each
-                        val = bm["stats"]["mean"]
-                        three_col.append({"type": typ, "source": src, "seconds": val})
-                        two_col.append({"name": f"{typ}-{src}", "seconds": val})
-
-df3 = pd.DataFrame.from_dict(three_col)
-df2 = pd.DataFrame.from_dict(two_col)
-
-types = ("overhead", "zarr", "tiff", "hdf5")
+types = ("Overhead", "Zarr", "TIFF", "HDF5")
 sources = ("local", "http", "s3")
 orders = {"type": types, "source": sources}
 
 pal_points = "colorblind"
 pal_violins = "pastel"
 
 g = sns.FacetGrid(
-    df3, col="source", col_order=sources, sharey=False, height=5, aspect=0.6,
+    csv,
+    col="source",
+    col_order=sources,
+    sharey=False,
+    height=5,
+    aspect=0.6,
 )
 
 g = g.map(
@@ -83,7 +41,7 @@
 
 g = g.map(
     sns.stripplot,
-    "type",
+    "type", # was type
     "seconds",
     dodge=True,
     order=types,
@@ -93,7 +51,7 @@
 )
 
 g.despine(left=True)
-g.set(yscale="log", ylim=(0.0009, 1))
+g.set(yscale ='log', ylim=(0.0009, 1))
 
 # Set axis labels & ticks #
 for ax in g.fig.get_axes():
@@ -105,7 +63,11 @@
         # Remove outline of violins
         col.set_edgecolor("white")
 
+# Only show on far left plot
 g.fig.get_axes()[0].set_ylabel("Seconds")
 g.fig.get_axes()[0].spines["left"].set_visible(True)
 
+# Add annotations
+g.fig.get_axes()[0].text(0.0001, 0.001, "off-\nscale")
+
 g.savefig(plot_path, dpi=600)
diff --git a/convert/Dockerfile b/convert/Dockerfile
@@ -5,7 +5,7 @@ RUN conda install -c ome bioformats2raw raw2ometiff
 # Temporary
 #
 RUN conda install -y -c anaconda -c conda-forge maven git
-RUN git clone -b nested --depth=1 git://github.com/joshmoore/bioformats2raw /tmp/bioformats2raw
+RUN git clone -b nested-re-re --depth=1 git://github.com/joshmoore/bioformats2raw /tmp/bioformats2raw
 RUN git clone -b nested --depth=1 git://github.com/joshmoore/raw2ometiff /tmp/raw2ometiff
 RUN git clone -b nested --depth=1 git://github.com/joshmoore/jzarr /tmp/jzarr
 RUN cd /tmp/jzarr \
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -32,9 +32,10 @@ services:
     environment:
       - BASE
       - DIR
+      - NAME
       - HOST
-      - MINIO_ACCESS_KEY="${AWS_ACCESS_KEY_ID}"
-      - MINIO_SECRET_KEY="${AWS_SECRET_ACCESS_KEY}"
+      - MINIO_ACCESS_KEY=${AWS_ACCESS_KEY_ID}
+      - MINIO_SECRET_KEY=${AWS_SECRET_ACCESS_KEY}
 
   benchmark:
     build: benchmark
@@ -76,8 +77,8 @@ services:
     expose:
       - "9000"
     environment:
-      - MINIO_ACCESS_KEY="${AWS_ACCESS_KEY_ID}"
-      - MINIO_SECRET_KEY="${AWS_SECRET_ACCESS_KEY}"
+      - MINIO_ACCESS_KEY=${AWS_ACCESS_KEY_ID}
+      - MINIO_SECRET_KEY=${AWS_SECRET_ACCESS_KEY}
     command: server http://minio{1...4}/data{1...2}
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
@@ -93,8 +94,8 @@ services:
     expose:
       - "9000"
     environment:
-      - MINIO_ACCESS_KEY="${AWS_ACCESS_KEY_ID}"
-      - MINIO_SECRET_KEY="${AWS_SECRET_ACCESS_KEY}"
+      - MINIO_ACCESS_KEY=${AWS_ACCESS_KEY_ID}
+      - MINIO_SECRET_KEY=${AWS_SECRET_ACCESS_KEY}
     command: server http://minio{1...4}/data{1...2}
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
@@ -110,8 +111,8 @@ services:
     expose:
       - "9000"
     environment:
-      - MINIO_ACCESS_KEY="${AWS_ACCESS_KEY_ID}"
-      - MINIO_SECRET_KEY="${AWS_SECRET_ACCESS_KEY}"
+      - MINIO_ACCESS_KEY=${AWS_ACCESS_KEY_ID}
+      - MINIO_SECRET_KEY=${AWS_SECRET_ACCESS_KEY}
     command: server http://minio{1...4}/data{1...2}
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
@@ -127,8 +128,8 @@ services:
     expose:
       - "9000"
     environment:
-      - MINIO_ACCESS_KEY="${AWS_ACCESS_KEY_ID}"
-      - MINIO_SECRET_KEY="${AWS_SECRET_ACCESS_KEY}"
+      - MINIO_ACCESS_KEY=${AWS_ACCESS_KEY_ID}
+      - MINIO_SECRET_KEY=${AWS_SECRET_ACCESS_KEY}
     command: server http://minio{1...4}/data{1...2}
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
diff --git a/generate.sh b/generate.sh
@@ -15,19 +15,23 @@ TIFF=IMS_XY-${XY}-Z-${Z}-T-${T}-C-${C}-XYC-${XC}-ZC-${ZC}.ome.tiff
 BF2RAW=/tmp/bioformats2raw/build/install/bioformats2raw/bin/bioformats2raw
 RAW2OMETIFF=/tmp/raw2ometiff/build/install/raw2ometiff/bin/raw2ometiff
 
-docker-compose run ${CLEAN} -v $DIR:$DIR \
+echo IMS
+time docker-compose run ${CLEAN} -v $DIR:$DIR \
 	generate $DIR \
 	$XY $Z $C $T $XC $ZC
 
-docker-compose run ${CLEAN} -v $DIR:$DIR \
+echo ZARR
+time docker-compose run ${CLEAN} -v $DIR:$DIR \
 	convert $BF2RAW \
 		--nested \
+		--chunk_depth $ZC \
 		--compression=raw \
 		$DIR/$IMS \
 		$DIR/out \
 	-w $XC -h $XC
 
-docker-compose run ${CLEAN} -v $DIR:$DIR \
+echo TIFF
+time docker-compose run ${CLEAN} -v $DIR:$DIR \
 	convert $RAW2OMETIFF \
 		--compression=raw \
 		$DIR/out \
diff --git a/generate/imarisWriter-Generate.sh b/generate/imarisWriter-Generate.sh
@@ -8,6 +8,7 @@ set -x
 OUTPUT=${1:-ImarisFiles}
 THREADS=${THREADS:-8}
 BITS=${BITS:-16bit}
+COMPRESSION=${COMPRESSION:-0}
 echo ********************************************************************
 echo Writing ${BITS} data with ${THREADS} threads to: $OUTPUT
 echo ********************************************************************
@@ -21,4 +22,7 @@ c=$4
 t=$5
 xc=$6
 zc=$7
-./ImarisWriterTestRelease -sizex $x -sizey $x -sizez $z -sizet $t -sizec $c -chunkx $xc -chunky $xc -chunkz $zc -type ${BITS} -threads ${THREADS} -outputpath ${OUTPUT} IMS_XY-$x-Z-$z-T-$t-C-$c-XYC-$xc-ZC-$zc.ims
+./ImarisWriterTestRelease \
+	-sizex $x -sizey $x -sizez $z -sizet $t -sizec $c -chunkx $xc -chunky $xc -chunkz $zc \
+	-compression ${COMPRESSION} -type ${BITS} -threads ${THREADS} -outputpath ${OUTPUT} \
+	IMS_XY-$x-Z-$z-T-$t-C-$c-XYC-$xc-ZC-$zc.ims
diff --git a/generate/imarisWriter-Setup.sh b/generate/imarisWriter-Setup.sh
@@ -34,20 +34,23 @@ cd lz4-dev
 make DESTDIR=../lz4-install install
 cd ..
 
+
 #imarisWriter
-wget -N -O ImarisWriter.zip https://github.com/dgault/ImarisWriter/archive/master.zip
+BRANCH=2021-04-07
+wget -N -O ImarisWriter.zip https://github.com/ome/ImarisWriter/archive/${BRANCH}.zip
 unzip ImarisWriter.zip
-mv ImarisWriter-master ImarisWriter
+mv ImarisWriter-${BRANCH} ImarisWriter
 cd ImarisWriter
 mkdir release
 cd release
 cmake -DHDF5_ROOT:PATH="../CMake-hdf5-1.12.0/HDF_Group/HDF5/1.12.0" -DZLIB_ROOT:PATH="../zlib-1.2.11/zlibInstall" -DLZ4_ROOT:PATH="../lz4-dev/lz4-install/usr/local" ..
 make install
 cd ../..
 
+
 #imarisWriterTest
-BRANCH=ngff-benchmark-gen
-wget -N -O ImarisWriterTest.zip https://github.com/dgault/ImarisWriterTest/archive/${BRANCH}.zip
+BRANCH=2021-04-08
+wget -N -O ImarisWriterTest.zip https://github.com/ome/ImarisWriterTest/archive/${BRANCH}.zip
 unzip ImarisWriterTest.zip
 mv ImarisWriterTest-${BRANCH} ImarisWriterTest
 cd ImarisWriterTest/application
diff --git a/test.sh b/test.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+set -eu
+
+# start
+docker-compose up -d
+# upload
+docker-compose run --rm upload
+# run benchmark
+docker-compose run --rm benchmark -sv
diff --git a/upload.sh b/upload.sh

Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,7 @@ RUN conda install -c ome bioformats2raw raw2ometiff`
`5`	`5`	`# Temporary`
`6`	`6`	`#`
`7`	`7`	`RUN conda install -y -c anaconda -c conda-forge maven git`
`8`		`-RUN git clone -b nested --depth=1 git://github.com/joshmoore/bioformats2raw /tmp/bioformats2raw`
	`8`	`+RUN git clone -b nested-re-re --depth=1 git://github.com/joshmoore/bioformats2raw /tmp/bioformats2raw`
`9`	`9`	`RUN git clone -b nested --depth=1 git://github.com/joshmoore/raw2ometiff /tmp/raw2ometiff`
`10`	`10`	`RUN git clone -b nested --depth=1 git://github.com/joshmoore/jzarr /tmp/jzarr`
`11`	`11`	`RUN cd /tmp/jzarr \`