From 09c262a7a21afc50825883f590dbf74a31068e91 Mon Sep 17 00:00:00 2001 From: Loic Huder Date: Fri, 2 Aug 2024 11:07:53 +0200 Subject: [PATCH 1/4] Allow copy for scalar and nested sequences when converting data to numpy arrays --- h5grove/content.py | 2 +- h5grove/encoders.py | 2 +- test/base_test.py | 23 ++++++++++++++++++++++- test/utils.py | 2 +- 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/h5grove/content.py b/h5grove/content.py index 459d1d6..2b28ccc 100644 --- a/h5grove/content.py +++ b/h5grove/content.py @@ -190,7 +190,7 @@ def data_stats( return get_array_stats(data) def _get_finite_data(self, selection: Selection) -> np.ndarray: - data = np.array(self.data(selection), copy=False) # So it works with scalars + data = np.asarray(self.data(selection)) # So it works with scalars if not np.issubdtype(data.dtype, np.floating): return data diff --git a/h5grove/encoders.py b/h5grove/encoders.py index b9a3316..a2ad5ea 100644 --- a/h5grove/encoders.py +++ b/h5grove/encoders.py @@ -111,7 +111,7 @@ def encode(content: Any, encoding: Optional[str] = "json") -> Response: headers={"Content-Type": "application/json"}, ) - content_array = np.array(content, copy=False) + content_array = np.asarray(content) if encoding == "bin": return Response( diff --git a/test/base_test.py b/test/base_test.py index 525006f..b3bc66d 100644 --- a/test/base_test.py +++ b/test/base_test.py @@ -77,6 +77,27 @@ def test_data_on_array_with_format(self, server, format_arg): assert np.array_equal(retrieved_data, data) + # TODO: What should we do for csv, tiff + @pytest.mark.parametrize("format_arg", ("json", "bin", "npy")) + def test_data_on_scalar_with_format(self, server, format_arg): + """Test /data/ endpoint on scalar dataset""" + # Test condition + tested_h5entity_path = "/entry/scalar" + data = 5 + + filename = "test.h5" + with h5py.File(server.served_directory / filename, mode="w") as h5file: + dset = h5file.create_dataset(tested_h5entity_path, data=data) + dtype = dset.dtype + shape = dset.shape + + response = server.get( + f"/data/?{urlencode({'file': filename, 'path': tested_h5entity_path, 'format': format_arg})}" + ) + retrieved_data = decode_array_response(response, format_arg, dtype.str, shape) + + assert np.array_equal(retrieved_data, data) + @pytest.mark.parametrize("format_arg", ("npy", "bin")) def test_data_on_array_with_dtype_safe( self, @@ -114,7 +135,7 @@ def test_data_on_slice_with_format_and_flatten(self, server, format_arg): response = server.get( f"/data/?{urlencode({'file': filename, 'path': tested_h5entity_path, 'selection': '100,0', 'format': format_arg, 'flatten': True})}" ) - retrieved_data = np.array(decode_response(response, format_arg)) + retrieved_data = np.asarray(decode_response(response, format_arg)) assert retrieved_data - data[100, 0] < 1e-8 diff --git a/test/utils.py b/test/utils.py index 301941e..fcedcf2 100644 --- a/test/utils.py +++ b/test/utils.py @@ -64,7 +64,7 @@ def decode_array_response( assert content_type == "application/octet-stream" return np.frombuffer(response.content, dtype=dtype).reshape(shape) - return np.array(decode_response(response, format), copy=False) + return np.asarray(decode_response(response, format)) def assert_error_response(response: Response, error_code: int): From 9524ada2b861d8b339bdde2816efe01e0a1e145d Mon Sep 17 00:00:00 2001 From: Loic Huder Date: Fri, 2 Aug 2024 14:58:11 +0200 Subject: [PATCH 2/4] Replace types-pkg-resources with types-setuptools --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 7d4d8d6..14e7687 100644 --- a/setup.cfg +++ b/setup.cfg @@ -61,7 +61,7 @@ dev = types-contextvars types-dataclasses types-orjson - types-pkg-resources + types-setuptools # E501 (line too long) ignored for now # E203 and W503 incompatible with black formatting (https://black.readthedocs.io/en/stable/compatible_configs.html#flake8) From 7ffbcf53a4bc75e25c4038eca556c3658cede8c2 Mon Sep 17 00:00:00 2001 From: Loic Huder Date: Mon, 26 Aug 2024 12:01:37 +0200 Subject: [PATCH 3/4] Raise 422 errors when requesting scalar datasets with tiff or csv format --- h5grove/encoders.py | 21 +++++++++++++-------- test/base_test.py | 18 +++++++++++++++++- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/h5grove/encoders.py b/h5grove/encoders.py index a2ad5ea..12a1b44 100644 --- a/h5grove/encoders.py +++ b/h5grove/encoders.py @@ -126,21 +126,26 @@ def encode(content: Any, encoding: Optional[str] = "json") -> Response: f"Unsupported encoding {encoding} for non-numeric content" ) - if encoding == "csv": + if encoding == "npy": return Response( - csv_encode(content_array), + npy_encode(content_array), headers={ - "Content-Type": "text/csv", - "Content-Disposition": 'attachment; filename="data.csv"', + "Content-Type": "application/octet-stream", + "Content-Disposition": 'attachment; filename="data.npy"', }, ) - if encoding == "npy": + if content_array.ndim == 0: + raise QueryArgumentError( + f"Unsupported encoding {encoding} for empty and scalar datasets" + ) + + if encoding == "csv": return Response( - npy_encode(content_array), + csv_encode(content_array), headers={ - "Content-Type": "application/octet-stream", - "Content-Disposition": 'attachment; filename="data.npy"', + "Content-Type": "text/csv", + "Content-Disposition": 'attachment; filename="data.csv"', }, ) diff --git a/test/base_test.py b/test/base_test.py index b3bc66d..154d161 100644 --- a/test/base_test.py +++ b/test/base_test.py @@ -77,7 +77,6 @@ def test_data_on_array_with_format(self, server, format_arg): assert np.array_equal(retrieved_data, data) - # TODO: What should we do for csv, tiff @pytest.mark.parametrize("format_arg", ("json", "bin", "npy")) def test_data_on_scalar_with_format(self, server, format_arg): """Test /data/ endpoint on scalar dataset""" @@ -596,3 +595,20 @@ def test_422_on_invalid_query_arg(self, server): f"/meta/?file={filename}&path={path}&resolve_links={invalid_link_resolution}", 422, ) + + @pytest.mark.parametrize("format_arg", ("csv", "tiff")) + def test_422_on_format_incompatible_with_empty_or_scalar_datasets( + self, server, format_arg + ): + filename = "test.h5" + + with h5py.File(server.served_directory / filename, mode="w") as h5file: + h5file["scalar"] = 55 + h5file["empty"] = h5py.Empty(dtype="<4f") + + server.assert_error_code( + f"/data/?file={filename}&path=/scalar&format={format_arg}", 422 + ) + server.assert_error_code( + f"/data/?file={filename}&path=/empty&format={format_arg}", 422 + ) From 0f31eb2caf229ed53a03830b54150f04eff04f2d Mon Sep 17 00:00:00 2001 From: Loic Huder Date: Mon, 26 Aug 2024 16:23:59 +0200 Subject: [PATCH 4/4] Fix file not closed when resolve_links cannot be parsed --- h5grove/content.py | 1 + 1 file changed, 1 insertion(+) diff --git a/h5grove/content.py b/h5grove/content.py index 2b28ccc..72c2206 100644 --- a/h5grove/content.py +++ b/h5grove/content.py @@ -288,6 +288,7 @@ def get_content_from_file( fallback=LinkResolution.ONLY_VALID, ) except QueryArgumentError as e: + f.close() raise create_error(422, str(e)) try: