Skip to content

Commit

Permalink
Define metadata return types
Browse files Browse the repository at this point in the history
  • Loading branch information
axelboc committed Aug 29, 2024
1 parent 922dc0f commit 298a6f1
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 49 deletions.
88 changes: 47 additions & 41 deletions h5grove/content.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
import contextlib
from pathlib import Path
from typing import Any, Callable, Dict, Generic, Optional, Sequence, TypeVar, Union
from typing import (
Any,
Callable,
Dict,
Generic,
Optional,
Sequence,
TypeVar,
Union,
cast,
)
import h5py
import numpy as np

Expand All @@ -9,7 +19,19 @@
except ImportError:
pass

from .models import LinkResolution, Selection
from .models import (
LinkResolution,
Selection,
EntityMetadata,
ExternalLinkMetadata,
SoftLinkMetadata,
AttributeMetadata,
ResolvedEntityMetadata,
GroupMetadata,
DatasetMetadata,
DatatypeMetadata,
Stats,
)
from .utils import (
NotFoundError,
QueryArgumentError,
Expand All @@ -35,21 +57,18 @@ class EntityContent:
def __init__(self, path: str):
self._path = path

def metadata(self) -> Dict[str, str]:
"""Entity metadata
:returns: {"name": str, "kind": str}
"""
def metadata(self) -> EntityMetadata:
"""Entity metadata"""
return {"name": self.name, "kind": self.kind}

@property
def name(self) -> str:
"""Entity name. Last member of the path."""
"""Entity name (last path segment)"""
return self._path.split("/")[-1]

@property
def path(self) -> str:
"""Path in the file."""
"""Path in the file"""
return self._path


Expand All @@ -61,11 +80,8 @@ def __init__(self, path: str, link: h5py.ExternalLink):
self._target_file = link.filename
self._target_path = link.path

def metadata(self, depth=None):
"""External link metadata
:returns: {"name": str, "target_file": str, "target_path": str, "kind": str}
"""
def metadata(self, depth=None) -> ExternalLinkMetadata:
"""External link metadata"""
return sorted_dict(
("target_file", self._target_file),
("target_path", self._target_path),
Expand All @@ -89,12 +105,10 @@ class SoftLinkContent(EntityContent):
def __init__(self, path: str, link: h5py.SoftLink) -> None:
super().__init__(path)
self._target_path = link.path
""" The target path of the link """
"""The target path of the link"""

def metadata(self, depth=None):
"""
:returns: {"name": str, "target_path": str, "kind": str}
"""
def metadata(self, depth=None) -> SoftLinkMetadata:
"""Soft link metadata"""
return sorted_dict(
("target_path", self._target_path), *super().metadata().items()
)
Expand All @@ -114,19 +128,19 @@ class ResolvedEntityContent(EntityContent, Generic[T]):
def __init__(self, path: str, h5py_entity: T):
super().__init__(path)
self._h5py_entity = h5py_entity
""" Resolved h5py entity """
"""Resolved h5py entity"""

def attributes(self, attr_keys: Optional[Sequence[str]] = None):
def attributes(
self, attr_keys: Optional[Sequence[str]] = None
) -> Dict[str, AttributeMetadata]:
"""Attributes of the h5py entity. Can be filtered by keys."""
if attr_keys is None:
return dict((*self._h5py_entity.attrs.items(),))

return dict((key, self._h5py_entity.attrs[key]) for key in attr_keys)

def metadata(self, depth=None):
"""
:returns: {"attributes": AttributeMetadata, "name": str, "kind": str}
"""
def metadata(self, depth=None) -> ResolvedEntityMetadata:
"""Resolved entity metadata"""
attribute_names = sorted(self._h5py_entity.attrs.keys())
return sorted_dict(
(
Expand All @@ -143,10 +157,8 @@ def metadata(self, depth=None):
class DatasetContent(ResolvedEntityContent[h5py.Dataset]):
kind = "dataset"

def metadata(self, depth=None):
"""
:returns: {"attributes": AttributeMetadata, chunks": tuple, "filters": tuple, "kind": str, "name": str, "shape": tuple, "type": TypeMetadata}
"""
def metadata(self, depth=None) -> DatasetMetadata:
"""Dataset metadata"""
return sorted_dict(
("chunks", self._h5py_entity.chunks),
("filters", get_filters(self._h5py_entity)),
Expand Down Expand Up @@ -177,13 +189,10 @@ def data(

return result

def data_stats(
self, selection: Selection = None
) -> Dict[str, Union[float, int, None]]:
def data_stats(self, selection: Selection = None) -> Stats:
"""Statistics on the data. Providing a selection will compute stats only on the selected slice.
:param selection: NumPy-like indexing to define a selection as a slice
:returns: {"strict_positive_min": number | None, "positive_min": number | None, "min": number | None, "max": number | None, "mean": number | None, "std": number | None}
"""
data = self._get_finite_data(selection)

Expand All @@ -208,7 +217,7 @@ class GroupContent(ResolvedEntityContent[h5py.Group]):
def __init__(self, path: str, h5py_entity: h5py.Group, h5file: h5py.File):
super().__init__(path, h5py_entity)
self._h5file = h5file
""" File in which the entity was resolved. This is needed to resolve child entity. """
"""File in which the entity was resolved. This is needed to resolve child entity."""

def _get_child_metadata_content(self, depth=0):
return [
Expand All @@ -218,14 +227,13 @@ def _get_child_metadata_content(self, depth=0):
for child_path in self._h5py_entity.keys()
]

def metadata(self, depth: int = 1):
def metadata(self, depth: int = 1) -> GroupMetadata:
"""Metadata of the group. Recursively includes child metadata if depth > 0.
:parameter depth: The level of child metadata resolution.
:returns: {"attributes": AttributeMetadata, "children": ChildMetadata, "name": str, "kind": str}
"""
if depth <= 0:
return super().metadata()
return cast(GroupMetadata, super().metadata())

return sorted_dict(
("children", self._get_child_metadata_content(depth - 1)),
Expand All @@ -236,10 +244,8 @@ def metadata(self, depth: int = 1):
class DatatypeContent(ResolvedEntityContent[h5py.Datatype]):
kind = "datatype"

def metadata(self, depth=None):
"""
:returns: {"attributes": AttributeMetadata, "kind": str, "name": str, "type": TypeMetadata}
"""
def metadata(self, depth=None) -> DatatypeMetadata:
"""Datatype metadata"""
return sorted_dict(
("type", get_type_metadata(self._h5py_entity.id)),
*super().metadata().items(),
Expand Down
55 changes: 53 additions & 2 deletions h5grove/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from enum import Enum
from typing import Dict, Tuple, Union
from typing_extensions import TypedDict
from typing import Dict, Tuple, Union, List
from typing_extensions import TypedDict, NotRequired, Optional
import h5py

H5pyEntity = Union[
Expand Down Expand Up @@ -40,3 +40,54 @@ class LinkResolution(str, Enum):
},
total=False,
)


class EntityMetadata(TypedDict):
name: str
kind: str


class ExternalLinkMetadata(EntityMetadata):
target_file: str
target_path: str


class SoftLinkMetadata(EntityMetadata):
target_path: str


AttributeMetadata = TypedDict(
"AttributeMetadata", {"name": str, "shape": tuple, "type": TypeMetadata}
)


class ResolvedEntityMetadata(EntityMetadata):
attributes: List[AttributeMetadata]


class GroupMetadata(ResolvedEntityMetadata):
children: NotRequired[List[EntityMetadata]]


class DatasetMetadata(ResolvedEntityMetadata):
chunks: tuple
filters: tuple
shape: tuple
type: TypeMetadata


class DatatypeMetadata(ResolvedEntityMetadata):
type: TypeMetadata


Stats = TypedDict(
"Stats",
{
"strict_positive_min": Optional[Union[int, float]],
"positive_min": Optional[Union[int, float]],
"min": Optional[Union[int, float]],
"max": Optional[Union[int, float]],
"mean": Optional[Union[int, float]],
"std": Optional[Union[int, float]],
},
)
22 changes: 16 additions & 6 deletions h5grove/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,15 @@
import numpy as np
from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar, Union

from .models import H5pyEntity, LinkResolution, Selection, StrDtype, TypeMetadata
from .models import (
H5pyEntity,
LinkResolution,
Selection,
StrDtype,
TypeMetadata,
Stats,
AttributeMetadata,
)


class NotFoundError(Exception):
Expand Down Expand Up @@ -39,7 +47,9 @@ def _legacy_get_attr_id(entity_attrs: h5py.AttributeManager, attr_name: str):
)


def attr_metadata(entity_attrs: h5py.AttributeManager, attr_name: str) -> dict:
def attr_metadata(
entity_attrs: h5py.AttributeManager, attr_name: str
) -> AttributeMetadata:
attrId = get_attr_id(entity_attrs, attr_name)

return {
Expand Down Expand Up @@ -171,9 +181,9 @@ def get_type_metadata(type_id: h5py.h5t.TypeID) -> TypeMetadata:

if isinstance(type_id, h5py.h5t.TypeEnumID):
for i in range(0, type_id.get_nmembers()):
members[type_id.get_member_name(i).decode("utf-8")] = (
type_id.get_member_value(i)
)
members[
type_id.get_member_name(i).decode("utf-8")
] = type_id.get_member_value(i)

return {
**base_metadata,
Expand Down Expand Up @@ -248,7 +258,7 @@ def is_numeric_data(data: Union[np.ndarray, np.number, np.bool_, bytes]) -> bool
return np.issubdtype(data.dtype, np.number) or np.issubdtype(data.dtype, np.bool_)


def get_array_stats(data: np.ndarray) -> Dict[str, Union[float, int, None]]:
def get_array_stats(data: np.ndarray) -> Stats:
if data.size == 0:
return {
"strict_positive_min": None,
Expand Down

0 comments on commit 298a6f1

Please sign in to comment.