Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more tests for /data/ endpoint #90

Merged
merged 2 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 53 additions & 20 deletions test/base_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,9 @@ def test_attr_on_root(self, server):
retrieved_attributes = decode_response(response)
assert retrieved_attributes == nx_attributes

@pytest.mark.parametrize("format_arg", ("json", "npy"))
@pytest.mark.parametrize("flatten", (False, True))
def test_data_on_array(self, server, format_arg, flatten):
"""Test /data/ endpoint on array dataset in a group"""
@pytest.mark.parametrize("format_arg", ("json", "bin", "npy", "csv", "tiff"))
def test_data_on_array_with_format(self, server, format_arg):
"""Test /data/ endpoint on array dataset"""
Comment on lines +55 to +57
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one now tests all the supported formats without setting the other parameters. This means we no longer need to test flatten: False and dtype: 'origin' in subsequent tests.

# Test condition
tested_h5entity_path = "/entry/image"
data = np.random.random((128, 128))
Expand All @@ -65,40 +64,40 @@ def test_data_on_array(self, server, format_arg, flatten):
h5file[tested_h5entity_path] = data

response = server.get(
f"/data/?{urlencode({'file': filename, 'path': tested_h5entity_path, 'format': format_arg, 'flatten': flatten})}"
f"/data/?{urlencode({'file': filename, 'path': tested_h5entity_path, 'format': format_arg})}"
)
retrieved_data = decode_array_response(
response, format_arg, data.dtype.str, data.shape
)
retrieved_data = np.array(decode_response(response, format_arg))

assert np.array_equal(retrieved_data, data.flatten() if flatten else data)
assert np.array_equal(retrieved_data, data)

@pytest.mark.parametrize("format_arg", ("npy", "bin"))
@pytest.mark.parametrize("dtype_arg", ("origin", "safe"))
def test_data_on_array_with_dtype(self, server, format_arg, dtype_arg):
"""Test /data/ endpoint on array dataset with dtype"""
def test_data_on_array_with_dtype_safe(
self,
server,
format_arg,
):
"""Test /data/ endpoint on array dataset with dtype=safe"""
# Test condition
tested_h5entity_path = "/entry/image"
data = np.random.random((128, 128)).astype(">f2")
# No Float16Array in JS => converted to float32
ref_dtype = "<f4" if dtype_arg == "safe" else ">f2"

filename = "test.h5"
with h5py.File(server.served_directory / filename, mode="w") as h5file:
h5file[tested_h5entity_path] = data

response = server.get(
f"/data/?{urlencode({'file': filename, 'path': tested_h5entity_path, 'format': format_arg, 'dtype': dtype_arg})}"
)

retrieved_data = decode_array_response(
response, format_arg, ref_dtype, data.shape
f"/data/?{urlencode({'file': filename, 'path': tested_h5entity_path, 'format': format_arg, 'dtype': 'safe'})}"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now focusing on testing dtype: 'safe' in this test.

)

retrieved_data = decode_array_response(response, format_arg, "<f4", data.shape)
assert np.array_equal(retrieved_data, data)

@pytest.mark.parametrize("format_arg", ("json", "npy"))
@pytest.mark.parametrize("flatten", (False, True))
def test_data_on_slice(self, server, format_arg, flatten):
"""Test /data/ endpoint on array dataset in a group"""
def test_data_on_slice_with_format_and_flatten(self, server, format_arg):
"""Test /data/ endpoint on array dataset with flatten"""
# Test condition
tested_h5entity_path = "/entry/image"
data = np.random.random((128, 128))
Expand All @@ -108,12 +107,31 @@ def test_data_on_slice(self, server, format_arg, flatten):
h5file[tested_h5entity_path] = data

response = server.get(
f"/data/?{urlencode({'file': filename, 'path': tested_h5entity_path, 'selection': '100,0', 'format': format_arg, 'flatten': flatten})}"
f"/data/?{urlencode({'file': filename, 'path': tested_h5entity_path, 'selection': '100,0', 'format': format_arg, 'flatten': True})}"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now focusing on testing flatten: True in this test.

)
retrieved_data = np.array(decode_response(response, format_arg))

assert retrieved_data - data[100, 0] < 1e-8

def test_data_on_opaque(self, server):
"""Test /data/ endpoint on opaque dataset with format=bin"""
tested_h5entity_path = "/opaque"
data = np.void(b"\x00")

filename = "test.h5"
with h5py.File(server.served_directory / filename, mode="w") as h5file:
h5file[tested_h5entity_path] = data

response = server.get(
f"/data/?{urlencode({'file': filename, 'path': tested_h5entity_path, 'format': 'bin'})}"
)

content_type = response.find_header_value("content-type")
assert content_type == "application/octet-stream"

retrieved_data = np.void(response.content)
assert np.array_equal(retrieved_data, data)

def test_meta_on_chunked_compressed_dataset(self, server):
"""Test /meta/ endpoint on a chunked and compressed dataset"""
filename = "test.h5"
Expand Down Expand Up @@ -517,6 +535,21 @@ def test_422_on_dtype_safe_with_non_numeric_data(self, server):

server.assert_error_code(f"/data/?file={filename}&path={path}&dtype=safe", 422)

@pytest.mark.parametrize(
"format_arg",
("csv", "npy", "tiff"),
)
def test_422_on_format_incompatible_with_non_numeric_data(self, server, format_arg):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To cover the condition that was moved in #89.

filename = "test.h5"
path = "/data"

with h5py.File(server.served_directory / filename, mode="w") as h5file:
h5file[path] = "I am not numeric"

server.assert_error_code(
f"/data/?file={filename}&path={path}&format={format_arg}", 422
)

def test_422_on_invalid_query_arg(self, server):
filename = "test.h5"
path = "/data"
Expand Down
10 changes: 9 additions & 1 deletion test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
from typing import List, NamedTuple, Tuple

import tifffile

from h5grove.utils import hdf_path_join


Expand Down Expand Up @@ -40,14 +42,20 @@ def decode_response(response: Response, format: str = "json"):
if format == "npy":
assert content_type == "application/octet-stream"
return np.load(io.BytesIO(response.content))
if format == "csv":
assert content_type == "text/csv"
return np.genfromtxt(response.content.splitlines(), delimiter=",")
if format == "tiff":
assert content_type == "image/tiff"
return tifffile.imread(io.BytesIO(response.content))
raise ValueError(f"Unsupported format: {format}")


def decode_array_response(
response: Response,
format: str,
dtype: str,
shape: Tuple[int],
shape: Tuple[int, ...],
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Matching numpy's internal _Shape type.

) -> np.ndarray:
"""Decode data array response content according to given information"""
content_type = response.find_header_value("content-type")
Expand Down
Loading