Skip to content

Commit

Permalink
[python/r] Expose shape-related accessors to Python/R bindings (#2953)
Browse files Browse the repository at this point in the history
* [python/r] Expose shape-related accessors from C++ to bindings

* code-review feedback

* code-review feedback
  • Loading branch information
johnkerl authored Sep 5, 2024
1 parent 79ec0c8 commit dd47db9
Show file tree
Hide file tree
Showing 22 changed files with 732 additions and 8 deletions.
22 changes: 22 additions & 0 deletions apis/python/src/tiledbsoma/_common_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,28 @@ def shape(self) -> Tuple[int, ...]:
"""
return cast(Tuple[int, ...], tuple(self._handle.shape))

@property
def maxshape(self) -> Tuple[int, ...]:
"""Returns the maximum resizable capacity of each dimension, always a list of length
``ndim``. This will not necessarily match the bounds of occupied cells within the array.
It is the upper limit for ``resize`` on the array.
Lifecycle:
Maturing.
"""
return cast(Tuple[int, ...], tuple(self._handle.maxshape))

@property
def has_upgraded_shape(self) -> bool:
"""Returns true if the array has the upgraded resizeable shape feature
from TileDB-SOMA 1.14: the array was created with this support, or it has
had ``.upgrade_shape`` applied to it.
Lifecycle:
Maturing.
"""
return self._handle.has_upgraded_shape

@classmethod
def _dim_capacity_and_extent(
cls,
Expand Down
34 changes: 34 additions & 0 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,40 @@ def count(self) -> int:
# if is it in read open mode, then it is a DataFrameWrapper
return cast(DataFrameWrapper, self._handle).count

@property
def _maybe_soma_joinid_shape(self) -> Optional[int]:
"""An internal helper method that returns the shape
value along the ``soma_joinid`` index column, if the ``DataFrame
has one, else ``None``.
Lifecycle:
Experimental.
"""
return self._handle.maybe_soma_joinid_shape

@property
def _maybe_soma_joinid_maxshape(self) -> Optional[int]:
"""An internal helper method that returns the maxshape
value along the ``soma_joinid`` index column, if the ``DataFrame
has one, else ``None``.
Lifecycle:
Experimental.
"""
return self._handle.maybe_soma_joinid_maxshape

@property
def has_upgraded_domain(self) -> bool:
"""Returns true if the array has the upgraded resizeable domain feature
from TileDB-SOMA 1.14: the array was created with this support, or it has
had ``.upgrade_domain`` applied to it.
Lifecycle:
Maturing.
"""
return self._handle.has_upgraded_domain

def __len__(self) -> int:
"""Returns the number of rows in the dataframe. Same as ``df.count``."""
return self.count
Expand Down
84 changes: 80 additions & 4 deletions apis/python/src/tiledbsoma/_tdb_handles.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,33 @@ def dim_names(self) -> Tuple[str, ...]:

@property
def shape(self) -> Tuple[int, ...]:
return tuple(self._handle.shape)
"""Not implemented for DataFrame."""
return cast(Tuple[int, ...], tuple(self._handle.shape))

@property
def maxshape(self) -> Tuple[int, ...]:
"""Not implemented for DataFrame."""
return cast(Tuple[int, ...], tuple(self._handle.maxshape))

@property
def maybe_soma_joinid_shape(self) -> Optional[int]:
"""Only implemented for DataFrame."""
raise NotImplementedError

@property
def maybe_soma_joinid_maxshape(self) -> Optional[int]:
"""Only implemented for DataFrame."""
raise NotImplementedError

@property
def has_upgraded_shape(self) -> bool:
"""Not implemented for DataFrame."""
raise NotImplementedError

@property
def has_upgraded_domain(self) -> bool:
"""Only implemented for DataFrame."""
raise NotImplementedError


class DataFrameWrapper(SOMAArrayWrapper[clib.SOMADataFrame]):
Expand All @@ -422,16 +448,55 @@ def write(self, values: pa.RecordBatch) -> None:
self._handle.write(values)

@property
def shape(self) -> Tuple[int, ...]:
# Shape is not implemented for DataFrames
raise NotImplementedError
def maybe_soma_joinid_shape(self) -> Optional[int]:
"""Return the shape slot for the soma_joinid dim, if the array has one.
This is an important test-point and dev-internal access-point,
in particular, for the tiledbsoma-io experiment-level resizer.
Lifecycle:
Maturing.
"""
return cast(Optional[int], self._handle.maybe_soma_joinid_shape)

@property
def maybe_soma_joinid_maxshape(self) -> Optional[int]:
"""Return the maxshape slot for the soma_joinid dim, if the array has one.
This is an important test-point and dev-internal access-point,
in particular, for the tiledbsoma-io experiment-level resizer.
Lifecycle:
Maturing.
"""
return cast(Optional[int], self._handle.maybe_soma_joinid_maxshape)

@property
def has_upgraded_domain(self) -> bool:
"""Returns true if the array has the upgraded resizeable domain feature
from TileDB-SOMA 1.14: the array was created with this support, or it has
had ``.upgrade_domain`` applied to it.
Lifecycle:
Maturing.
"""
return cast(bool, self._handle.has_upgraded_domain)


class DenseNDArrayWrapper(SOMAArrayWrapper[clib.SOMADenseNDArray]):
"""Wrapper around a Pybind11 DenseNDArrayWrapper handle."""

_ARRAY_WRAPPED_TYPE = clib.SOMADenseNDArray

@property
def has_upgraded_shape(self) -> bool:
"""Returns true if the array has the upgraded resizeable shape feature
from TileDB-SOMA 1.14: the array was created with this support, or it has
had ``.upgrade_shape`` applied to it.
Lifecycle:
Maturing.
"""
return cast(bool, self._handle.has_upgraded_shape)


class SparseNDArrayWrapper(SOMAArrayWrapper[clib.SOMASparseNDArray]):
"""Wrapper around a Pybind11 SparseNDArrayWrapper handle."""
Expand All @@ -442,6 +507,17 @@ class SparseNDArrayWrapper(SOMAArrayWrapper[clib.SOMASparseNDArray]):
def nnz(self) -> int:
return int(self._handle.nnz())

@property
def has_upgraded_shape(self) -> bool:
"""Returns true if the array has the upgraded resizeable shape feature
from TileDB-SOMA 1.14: the array was created with this support, or it has
had ``.upgrade_shape`` applied to it.
Lifecycle:
Maturing.
"""
return cast(bool, self._handle.has_upgraded_shape)


class _DictMod(enum.Enum):
"""State machine to keep track of modifications to a dictionary.
Expand Down
10 changes: 9 additions & 1 deletion apis/python/src/tiledbsoma/soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,17 @@ void load_soma_dataframe(py::module& m) {
.def_static("exists", &SOMADataFrame::exists)
.def_property_readonly(
"index_column_names", &SOMADataFrame::index_column_names)

.def_property_readonly(
"count",
&SOMADataFrame::count,
py::call_guard<py::gil_scoped_release>());
py::call_guard<py::gil_scoped_release>())
.def_property_readonly(
"maybe_soma_joinid_shape", &SOMADataFrame::maybe_soma_joinid_shape)
.def_property_readonly(
"maybe_soma_joinid_maxshape",
&SOMADataFrame::maybe_soma_joinid_maxshape)
.def_property_readonly(
"has_upgraded_domain", &SOMAArray::has_current_domain);
}
} // namespace libtiledbsomacpp
7 changes: 6 additions & 1 deletion apis/python/src/tiledbsoma/soma_dense_ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ void load_soma_dense_ndarray(py::module& m) {

.def_static("exists", &SOMADenseNDArray::exists)

.def("write", write);
.def("write", write)

.def_property_readonly("shape", &SOMADenseNDArray::shape)
.def_property_readonly("maxshape", &SOMADenseNDArray::maxshape)
.def_property_readonly(
"has_upgraded_shape", &SOMAArray::has_current_domain);
}
} // namespace libtiledbsomacpp
7 changes: 6 additions & 1 deletion apis/python/src/tiledbsoma/soma_sparse_ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ void load_soma_sparse_ndarray(py::module& m) {
"result_order"_a = ResultOrder::automatic,
"timestamp"_a = py::none())

.def_static("exists", &SOMASparseNDArray::exists);
.def_static("exists", &SOMASparseNDArray::exists)

.def_property_readonly("shape", &SOMASparseNDArray::shape)
.def_property_readonly("maxshape", &SOMASparseNDArray::maxshape)
.def_property_readonly(
"has_upgraded_shape", &SOMAArray::has_current_domain);
}
} // namespace libtiledbsomacpp
7 changes: 7 additions & 0 deletions apis/python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,16 @@ def test_dataframe(tmp_path, arrow_schema):
assert sdf.count == 5
assert len(sdf) == 5

# More to come on https://github.com/single-cell-data/TileDB-SOMA/issues/2407
assert not sdf.has_upgraded_domain

with pytest.raises(AttributeError):
assert sdf.shape is None

# soma_joinid is not a dim here
assert sdf._maybe_soma_joinid_shape is None
assert sdf._maybe_soma_joinid_maxshape is None

# Read all
table = sdf.read().concat()
assert table.num_rows == 5
Expand Down
Loading

0 comments on commit dd47db9

Please sign in to comment.