Skip to content

Commit

Permalink
Provide more HDF5 type metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
axelboc committed Jan 24, 2024
1 parent 6455788 commit 0f21787
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 25 deletions.
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
pip install -e .[dev]
```

will install `h5grove` in editable mode with all the linting/formating/testing packages. This will also [flask](https://flask.palletsprojects.com/en/) and [tornado](https://www.tornadoweb.org/en/stable/) packages as they are needed to build the documentation.
will install `h5grove` in editable mode with all the linting/formating/testing packages. This will also install the [flask](https://flask.palletsprojects.com/en/) and [tornado](https://www.tornadoweb.org/en/stable/) packages as they are needed to build the documentation.

## Linting

Expand Down
30 changes: 15 additions & 15 deletions h5grove/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
get_array_stats,
open_file_with_error_fallback,
parse_link_resolution_arg,
stringify_dtype,
get_type_metadata,
get_filters,
get_entity_from_file,
hdf_path_join,
Expand All @@ -30,17 +30,17 @@
class EntityContent:
"""Base content for an entity."""

type = "other"
kind = "other"

def __init__(self, path: str):
self._path = path

def metadata(self) -> Dict[str, str]:
"""Entity metadata
:returns: {"name": str, "type": str}
:returns: {"name": str, "kind": str}
"""
return {"name": self.name, "type": self.type}
return {"name": self.name, "kind": self.kind}

@property
def name(self) -> str:
Expand All @@ -54,7 +54,7 @@ def path(self) -> str:


class ExternalLinkContent(EntityContent):
type = "external_link"
kind = "external_link"

def __init__(self, path: str, link: h5py.ExternalLink):
super().__init__(path)
Expand All @@ -64,7 +64,7 @@ def __init__(self, path: str, link: h5py.ExternalLink):
def metadata(self, depth=None):
"""External link metadata
:returns: {"name": str, "target_file": str, "target_path": str, "type": str}
:returns: {"name": str, "target_file": str, "target_path": str, "kind": str}
"""
return sorted_dict(
("target_file", self._target_file),
Expand All @@ -84,7 +84,7 @@ def target_path(self) -> str:


class SoftLinkContent(EntityContent):
type = "soft_link"
kind = "soft_link"

def __init__(self, path: str, link: h5py.SoftLink) -> None:
super().__init__(path)
Expand All @@ -93,7 +93,7 @@ def __init__(self, path: str, link: h5py.SoftLink) -> None:

def metadata(self, depth=None):
"""
:returns: {"name": str, "target_path": str, "type": str}
:returns: {"name": str, "target_path": str, "kind": str}
"""
return sorted_dict(
("target_path", self._target_path), *super().metadata().items()
Expand Down Expand Up @@ -125,7 +125,7 @@ def attributes(self, attr_keys: Optional[Sequence[str]] = None):

def metadata(self, depth=None):
"""
:returns: {"attributes": AttributeMetadata, "name": str, "type": str}
:returns: {"attributes": AttributeMetadata, "name": str, "kind": str}
"""
attribute_names = sorted(self._h5py_entity.attrs.keys())
return sorted_dict(
Expand All @@ -141,17 +141,17 @@ def metadata(self, depth=None):


class DatasetContent(ResolvedEntityContent[h5py.Dataset]):
type = "dataset"
kind = "dataset"

def metadata(self, depth=None):
"""
:returns: {"attributes": AttributeMetadata, chunks": tuple, "dtype": str, "filters": tuple, "shape": tuple, "name": str, "type": str}
:returns: {"attributes": AttributeMetadata, chunks": tuple, "filters": tuple, "kind": str, "name": str, "shape": tuple, "type": TypeMetadata}
"""
return sorted_dict(
("chunks", self._h5py_entity.chunks),
("dtype", stringify_dtype(self._h5py_entity.dtype)),
("filters", get_filters(self._h5py_entity)),
("shape", self._h5py_entity.shape),
("type", get_type_metadata(self._h5py_entity.id.get_type())),
*super().metadata().items(),
)

Expand Down Expand Up @@ -203,7 +203,7 @@ def _get_finite_data(self, selection: Selection) -> np.ndarray:


class GroupContent(ResolvedEntityContent[h5py.Group]):
type = "group"
kind = "group"

def __init__(self, path: str, h5py_entity: h5py.Group, h5file: h5py.File):
super().__init__(path, h5py_entity)
Expand All @@ -222,7 +222,7 @@ def metadata(self, depth: int = 1):
"""Metadata of the group. Recursively includes child metadata if depth > 0.
:parameter depth: The level of child metadata resolution.
:returns: {"attributes": AttributeMetadata, "children": ChildMetadata, "name": str, "type": str}
:returns: {"attributes": AttributeMetadata, "children": ChildMetadata, "name": str, "kind": str}
"""
if depth <= 0:
return super().metadata()
Expand Down Expand Up @@ -269,7 +269,7 @@ def create_content(
if isinstance(entity, h5py.Datatype):
return ResolvedEntityContent(path, entity)

raise TypeError(f"h5py type {type(entity)} not supported")
raise TypeError(f"h5py entity {type(entity)} not supported")


@contextlib.contextmanager
Expand Down
20 changes: 20 additions & 0 deletions h5grove/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from enum import Enum
from typing import Dict, Tuple, Union
from typing_extensions import TypedDict
import h5py

H5pyEntity = Union[
Expand All @@ -19,3 +20,22 @@ class LinkResolution(str, Enum):

# Recursive types not supported by mypy: https://github.com/python/mypy/issues/731
StrDtype = Union[str, Dict[str, "StrDtype"]] # type: ignore

# https://api.h5py.org/h5t.html
TypeMetadata = TypedDict(
"TypeMetadata",
{
"class": int, # HDF5 class code
"dtype": StrDtype, # Numpy-style dtype
"size": int, # all (but most relevant for int, float, string)
"order": int, # int, float, bitfield
"sign": int, # int
"cset": int, # string
"vlen": bool, # string
"tag": str, # opaque
"dims": Tuple[int, ...], # array
"members": Union[Dict[str, "TypeMetadata"], Dict[str, int]], # compound, enum
"base": "TypeMetadata", # array, enum, vlen
},
total=False,
)
73 changes: 70 additions & 3 deletions h5grove/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np
from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar, Union

from .models import H5pyEntity, LinkResolution, Selection, StrDtype
from .models import H5pyEntity, LinkResolution, Selection, StrDtype, TypeMetadata


class NotFoundError(Exception):
Expand Down Expand Up @@ -43,9 +43,9 @@ def attr_metadata(entity_attrs: h5py.AttributeManager, attr_name: str) -> dict:
attrId = get_attr_id(entity_attrs, attr_name)

return {
"dtype": stringify_dtype(attrId.dtype),
"name": attr_name,
"shape": attrId.shape,
"type": get_type_metadata(attrId.get_type()),
}


Expand Down Expand Up @@ -123,7 +123,74 @@ def parse_slice_member(slice_member: str) -> Union[slice, int]:


def sorted_dict(*args: Tuple[str, Any]):
return dict(sorted(args))
return dict(sorted(args, key=lambda entry: entry[0]))


def get_type_metadata(type_id: h5py.h5t.TypeID) -> TypeMetadata:
base_metadata: TypeMetadata = {
"class": type_id.get_class(),
"dtype": stringify_dtype(type_id.dtype),
"size": type_id.get_size(),
}
members = {}

if isinstance(type_id, h5py.h5t.TypeIntegerID):
return {
**base_metadata,
"order": type_id.get_order(),
"sign": type_id.get_sign(),
}

if isinstance(type_id, h5py.h5t.TypeFloatID):
return {
**base_metadata,
"order": type_id.get_order(),
}

if isinstance(type_id, h5py.h5t.TypeStringID):
return {
**base_metadata,
"cset": type_id.get_cset(),
"vlen": type_id.is_variable_str(),
}

if isinstance(type_id, h5py.h5t.TypeBitfieldID):
return {**base_metadata, "order": type_id.get_order()}

if isinstance(type_id, h5py.h5t.TypeOpaqueID):
return {**base_metadata, "tag": type_id.get_tag()}

if isinstance(type_id, h5py.h5t.TypeCompoundID):
for i in range(0, type_id.get_nmembers()):
members[type_id.get_member_name(i).decode("utf-8")] = get_type_metadata(
type_id.get_member_type(i)
)

return {**base_metadata, "members": members}

if isinstance(type_id, h5py.h5t.TypeEnumID):
for i in range(0, type_id.get_nmembers()):
members[
type_id.get_member_name(i).decode("utf-8")
] = type_id.get_member_value(i)

return {
**base_metadata,
"members": members,
"base": get_type_metadata(type_id.get_super()),
}

if isinstance(type_id, h5py.h5t.TypeVlenID):
return {**base_metadata, "base": get_type_metadata(type_id.get_super())}

if isinstance(type_id, h5py.h5t.TypeArrayID):
return {
**base_metadata,
"dims": type_id.get_array_dims(),
"base": get_type_metadata(type_id.get_super()),
}

return base_metadata


def _sanitize_dtype(dtype: np.dtype) -> np.dtype:
Expand Down
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ dev =
myst-parser
# Needed for fastapi tests. Could be removed by setting fastapi[all] as dep in the future.
httpx >= 0.23
pydantic_settings
pytest
pytest-benchmark
pytest-cov
Expand All @@ -70,7 +71,7 @@ per-file-ignores =
__init__.py: F401

[mypy]
python_version = 3.6
python_version = 3.8

[mypy-h5py.*]
ignore_missing_imports = True
Expand Down
10 changes: 5 additions & 5 deletions test/base_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,12 @@ def test_meta_on_chunked_compressed_dataset(self, server):

assert content == {
"attributes": [],
"filters": [{"id": 2, "name": "shuffle"}, {"id": 1, "name": "deflate"}],
"chunks": [5, 5],
"name": "data",
"dtype": "<f8",
"filters": [{"id": 2, "name": "shuffle"}, {"id": 1, "name": "deflate"}],
"kind": "dataset",
"name": "data",
"shape": [10, 10],
"type": "dataset",
}

def test_meta_on_compound_dataset(self, server):
Expand Down Expand Up @@ -212,20 +212,20 @@ def test_meta_on_valid_ext_link(self, server, resolve_links):
# Valid link is not resolved only if link resolution is 'none'.
if resolve_links == LinkResolution.NONE:
assert content == {
"kind": "external_link",
"name": "ext_link",
"target_file": "source.h5",
"target_path": "data",
"type": "external_link",
}
else:
assert content == {
"attributes": [],
"chunks": None,
"dtype": "<f8",
"filters": None,
"kind": "dataset",
"name": "ext_link",
"shape": [10],
"type": "dataset",
}

def test_stats_on_negative_scalar(self, server):
Expand Down

0 comments on commit 0f21787

Please sign in to comment.