From 09c262a7a21afc50825883f590dbf74a31068e91 Mon Sep 17 00:00:00 2001
From: Loic Huder <loic.huder@esrf.fr>
Date: Fri, 2 Aug 2024 11:07:53 +0200
Subject: [PATCH 1/4] Allow copy for scalar and nested sequences when
 converting data to numpy arrays

---
 h5grove/content.py  |  2 +-
 h5grove/encoders.py |  2 +-
 test/base_test.py   | 23 ++++++++++++++++++++++-
 test/utils.py       |  2 +-
 4 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/h5grove/content.py b/h5grove/content.py
index 459d1d6..2b28ccc 100644
--- a/h5grove/content.py
+++ b/h5grove/content.py
@@ -190,7 +190,7 @@ def data_stats(
         return get_array_stats(data)
 
     def _get_finite_data(self, selection: Selection) -> np.ndarray:
-        data = np.array(self.data(selection), copy=False)  # So it works with scalars
+        data = np.asarray(self.data(selection))  # So it works with scalars
 
         if not np.issubdtype(data.dtype, np.floating):
             return data
diff --git a/h5grove/encoders.py b/h5grove/encoders.py
index b9a3316..a2ad5ea 100644
--- a/h5grove/encoders.py
+++ b/h5grove/encoders.py
@@ -111,7 +111,7 @@ def encode(content: Any, encoding: Optional[str] = "json") -> Response:
             headers={"Content-Type": "application/json"},
         )
 
-    content_array = np.array(content, copy=False)
+    content_array = np.asarray(content)
 
     if encoding == "bin":
         return Response(
diff --git a/test/base_test.py b/test/base_test.py
index 525006f..b3bc66d 100644
--- a/test/base_test.py
+++ b/test/base_test.py
@@ -77,6 +77,27 @@ def test_data_on_array_with_format(self, server, format_arg):
 
         assert np.array_equal(retrieved_data, data)
 
+    # TODO: What should we do for csv, tiff
+    @pytest.mark.parametrize("format_arg", ("json", "bin", "npy"))
+    def test_data_on_scalar_with_format(self, server, format_arg):
+        """Test /data/ endpoint on scalar dataset"""
+        # Test condition
+        tested_h5entity_path = "/entry/scalar"
+        data = 5
+
+        filename = "test.h5"
+        with h5py.File(server.served_directory / filename, mode="w") as h5file:
+            dset = h5file.create_dataset(tested_h5entity_path, data=data)
+            dtype = dset.dtype
+            shape = dset.shape
+
+        response = server.get(
+            f"/data/?{urlencode({'file': filename, 'path': tested_h5entity_path, 'format': format_arg})}"
+        )
+        retrieved_data = decode_array_response(response, format_arg, dtype.str, shape)
+
+        assert np.array_equal(retrieved_data, data)
+
     @pytest.mark.parametrize("format_arg", ("npy", "bin"))
     def test_data_on_array_with_dtype_safe(
         self,
@@ -114,7 +135,7 @@ def test_data_on_slice_with_format_and_flatten(self, server, format_arg):
         response = server.get(
             f"/data/?{urlencode({'file': filename, 'path': tested_h5entity_path, 'selection': '100,0', 'format': format_arg, 'flatten': True})}"
         )
-        retrieved_data = np.array(decode_response(response, format_arg))
+        retrieved_data = np.asarray(decode_response(response, format_arg))
 
         assert retrieved_data - data[100, 0] < 1e-8
 
diff --git a/test/utils.py b/test/utils.py
index 301941e..fcedcf2 100644
--- a/test/utils.py
+++ b/test/utils.py
@@ -64,7 +64,7 @@ def decode_array_response(
         assert content_type == "application/octet-stream"
         return np.frombuffer(response.content, dtype=dtype).reshape(shape)
 
-    return np.array(decode_response(response, format), copy=False)
+    return np.asarray(decode_response(response, format))
 
 
 def assert_error_response(response: Response, error_code: int):

From 9524ada2b861d8b339bdde2816efe01e0a1e145d Mon Sep 17 00:00:00 2001
From: Loic Huder <loic.huder@esrf.fr>
Date: Fri, 2 Aug 2024 14:58:11 +0200
Subject: [PATCH 2/4] Replace types-pkg-resources with types-setuptools

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 7d4d8d6..14e7687 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -61,7 +61,7 @@ dev =
 	types-contextvars
 	types-dataclasses
 	types-orjson
-	types-pkg-resources
+	types-setuptools
 
 # E501 (line too long) ignored for now
 # E203 and W503 incompatible with black formatting (https://black.readthedocs.io/en/stable/compatible_configs.html#flake8)

From 7ffbcf53a4bc75e25c4038eca556c3658cede8c2 Mon Sep 17 00:00:00 2001
From: Loic Huder <loic.huder@esrf.fr>
Date: Mon, 26 Aug 2024 12:01:37 +0200
Subject: [PATCH 3/4] Raise 422 errors when requesting scalar datasets with
 tiff or csv format

---
 h5grove/encoders.py | 21 +++++++++++++--------
 test/base_test.py   | 18 +++++++++++++++++-
 2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/h5grove/encoders.py b/h5grove/encoders.py
index a2ad5ea..12a1b44 100644
--- a/h5grove/encoders.py
+++ b/h5grove/encoders.py
@@ -126,21 +126,26 @@ def encode(content: Any, encoding: Optional[str] = "json") -> Response:
             f"Unsupported encoding {encoding} for non-numeric content"
         )
 
-    if encoding == "csv":
+    if encoding == "npy":
         return Response(
-            csv_encode(content_array),
+            npy_encode(content_array),
             headers={
-                "Content-Type": "text/csv",
-                "Content-Disposition": 'attachment; filename="data.csv"',
+                "Content-Type": "application/octet-stream",
+                "Content-Disposition": 'attachment; filename="data.npy"',
             },
         )
 
-    if encoding == "npy":
+    if content_array.ndim == 0:
+        raise QueryArgumentError(
+            f"Unsupported encoding {encoding} for empty and scalar datasets"
+        )
+
+    if encoding == "csv":
         return Response(
-            npy_encode(content_array),
+            csv_encode(content_array),
             headers={
-                "Content-Type": "application/octet-stream",
-                "Content-Disposition": 'attachment; filename="data.npy"',
+                "Content-Type": "text/csv",
+                "Content-Disposition": 'attachment; filename="data.csv"',
             },
         )
 
diff --git a/test/base_test.py b/test/base_test.py
index b3bc66d..154d161 100644
--- a/test/base_test.py
+++ b/test/base_test.py
@@ -77,7 +77,6 @@ def test_data_on_array_with_format(self, server, format_arg):
 
         assert np.array_equal(retrieved_data, data)
 
-    # TODO: What should we do for csv, tiff
     @pytest.mark.parametrize("format_arg", ("json", "bin", "npy"))
     def test_data_on_scalar_with_format(self, server, format_arg):
         """Test /data/ endpoint on scalar dataset"""
@@ -596,3 +595,20 @@ def test_422_on_invalid_query_arg(self, server):
             f"/meta/?file={filename}&path={path}&resolve_links={invalid_link_resolution}",
             422,
         )
+
+    @pytest.mark.parametrize("format_arg", ("csv", "tiff"))
+    def test_422_on_format_incompatible_with_empty_or_scalar_datasets(
+        self, server, format_arg
+    ):
+        filename = "test.h5"
+
+        with h5py.File(server.served_directory / filename, mode="w") as h5file:
+            h5file["scalar"] = 55
+            h5file["empty"] = h5py.Empty(dtype="<4f")
+
+        server.assert_error_code(
+            f"/data/?file={filename}&path=/scalar&format={format_arg}", 422
+        )
+        server.assert_error_code(
+            f"/data/?file={filename}&path=/empty&format={format_arg}", 422
+        )

From 0f31eb2caf229ed53a03830b54150f04eff04f2d Mon Sep 17 00:00:00 2001
From: Loic Huder <loic.huder@esrf.fr>
Date: Mon, 26 Aug 2024 16:23:59 +0200
Subject: [PATCH 4/4] Fix file not closed when resolve_links cannot be parsed

---
 h5grove/content.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/h5grove/content.py b/h5grove/content.py
index 2b28ccc..72c2206 100644
--- a/h5grove/content.py
+++ b/h5grove/content.py
@@ -288,6 +288,7 @@ def get_content_from_file(
             fallback=LinkResolution.ONLY_VALID,
         )
     except QueryArgumentError as e:
+        f.close()
         raise create_error(422, str(e))
 
     try: