Skip to content

Commit

Permalink
Merge branch 'main' into bkmartinjr/experimentdatapipe
Browse files Browse the repository at this point in the history
  • Loading branch information
bkmartinjr committed Sep 6, 2024
2 parents ce6426b + 796c168 commit de44410
Show file tree
Hide file tree
Showing 33 changed files with 2,001 additions and 441 deletions.
4 changes: 4 additions & 0 deletions apis/python/src/tiledbsoma/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,10 @@
tiledbsoma_stats_enable,
tiledbsoma_stats_reset,
)
from .stats import (
tiledbsoma_stats_json,
tiledbsoma_stats_as_py,
)

__version__ = get_implementation_version()

Expand Down
22 changes: 22 additions & 0 deletions apis/python/src/tiledbsoma/_common_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,28 @@ def shape(self) -> Tuple[int, ...]:
"""
return cast(Tuple[int, ...], tuple(self._handle.shape))

@property
def maxshape(self) -> Tuple[int, ...]:
"""Returns the maximum resizable capacity of each dimension, always a list of length
``ndim``. This will not necessarily match the bounds of occupied cells within the array.
It is the upper limit for ``resize`` on the array.
Lifecycle:
Maturing.
"""
return cast(Tuple[int, ...], tuple(self._handle.maxshape))

@property
def has_upgraded_shape(self) -> bool:
"""Returns true if the array has the upgraded resizeable shape feature
from TileDB-SOMA 1.14: the array was created with this support, or it has
had ``.upgrade_shape`` applied to it.
Lifecycle:
Maturing.
"""
return self._handle.has_upgraded_shape

@classmethod
def _dim_capacity_and_extent(
cls,
Expand Down
34 changes: 34 additions & 0 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,40 @@ def count(self) -> int:
# if is it in read open mode, then it is a DataFrameWrapper
return cast(DataFrameWrapper, self._handle).count

@property
def _maybe_soma_joinid_shape(self) -> Optional[int]:
"""An internal helper method that returns the shape
value along the ``soma_joinid`` index column, if the ``DataFrame
has one, else ``None``.
Lifecycle:
Experimental.
"""
return self._handle.maybe_soma_joinid_shape

@property
def _maybe_soma_joinid_maxshape(self) -> Optional[int]:
"""An internal helper method that returns the maxshape
value along the ``soma_joinid`` index column, if the ``DataFrame
has one, else ``None``.
Lifecycle:
Experimental.
"""
return self._handle.maybe_soma_joinid_maxshape

@property
def has_upgraded_domain(self) -> bool:
"""Returns true if the array has the upgraded resizeable domain feature
from TileDB-SOMA 1.14: the array was created with this support, or it has
had ``.upgrade_domain`` applied to it.
Lifecycle:
Maturing.
"""
return self._handle.has_upgraded_domain

def __len__(self) -> int:
"""Returns the number of rows in the dataframe. Same as ``df.count``."""
return self.count
Expand Down
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsoma/_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class Experiment( # type: ignore[misc] # __eq__ false positive
... obs_df = exp.obs
...
... # the primary use case is to run queries on the experiment data.
... q = exp.query(
... q = exp.axis_query(
... "mtdna",
... obs_query=tiledbsoma.AxisQuery(value_filter="tissue == 'lung'"),
... var_query=tiledbsoma.AxisQuery(coords=(slice(50, 100),)),
Expand Down
84 changes: 80 additions & 4 deletions apis/python/src/tiledbsoma/_tdb_handles.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,33 @@ def dim_names(self) -> Tuple[str, ...]:

@property
def shape(self) -> Tuple[int, ...]:
return tuple(self._handle.shape)
"""Not implemented for DataFrame."""
return cast(Tuple[int, ...], tuple(self._handle.shape))

@property
def maxshape(self) -> Tuple[int, ...]:
"""Not implemented for DataFrame."""
return cast(Tuple[int, ...], tuple(self._handle.maxshape))

@property
def maybe_soma_joinid_shape(self) -> Optional[int]:
"""Only implemented for DataFrame."""
raise NotImplementedError

@property
def maybe_soma_joinid_maxshape(self) -> Optional[int]:
"""Only implemented for DataFrame."""
raise NotImplementedError

@property
def has_upgraded_shape(self) -> bool:
"""Not implemented for DataFrame."""
raise NotImplementedError

@property
def has_upgraded_domain(self) -> bool:
"""Only implemented for DataFrame."""
raise NotImplementedError


class DataFrameWrapper(SOMAArrayWrapper[clib.SOMADataFrame]):
Expand All @@ -422,16 +448,55 @@ def write(self, values: pa.RecordBatch) -> None:
self._handle.write(values)

@property
def shape(self) -> Tuple[int, ...]:
# Shape is not implemented for DataFrames
raise NotImplementedError
def maybe_soma_joinid_shape(self) -> Optional[int]:
"""Return the shape slot for the soma_joinid dim, if the array has one.
This is an important test-point and dev-internal access-point,
in particular, for the tiledbsoma-io experiment-level resizer.
Lifecycle:
Maturing.
"""
return cast(Optional[int], self._handle.maybe_soma_joinid_shape)

@property
def maybe_soma_joinid_maxshape(self) -> Optional[int]:
"""Return the maxshape slot for the soma_joinid dim, if the array has one.
This is an important test-point and dev-internal access-point,
in particular, for the tiledbsoma-io experiment-level resizer.
Lifecycle:
Maturing.
"""
return cast(Optional[int], self._handle.maybe_soma_joinid_maxshape)

@property
def has_upgraded_domain(self) -> bool:
"""Returns true if the array has the upgraded resizeable domain feature
from TileDB-SOMA 1.14: the array was created with this support, or it has
had ``.upgrade_domain`` applied to it.
Lifecycle:
Maturing.
"""
return cast(bool, self._handle.has_upgraded_domain)


class DenseNDArrayWrapper(SOMAArrayWrapper[clib.SOMADenseNDArray]):
"""Wrapper around a Pybind11 DenseNDArrayWrapper handle."""

_ARRAY_WRAPPED_TYPE = clib.SOMADenseNDArray

@property
def has_upgraded_shape(self) -> bool:
"""Returns true if the array has the upgraded resizeable shape feature
from TileDB-SOMA 1.14: the array was created with this support, or it has
had ``.upgrade_shape`` applied to it.
Lifecycle:
Maturing.
"""
return cast(bool, self._handle.has_upgraded_shape)


class SparseNDArrayWrapper(SOMAArrayWrapper[clib.SOMASparseNDArray]):
"""Wrapper around a Pybind11 SparseNDArrayWrapper handle."""
Expand All @@ -442,6 +507,17 @@ class SparseNDArrayWrapper(SOMAArrayWrapper[clib.SOMASparseNDArray]):
def nnz(self) -> int:
return int(self._handle.nnz())

@property
def has_upgraded_shape(self) -> bool:
"""Returns true if the array has the upgraded resizeable shape feature
from TileDB-SOMA 1.14: the array was created with this support, or it has
had ``.upgrade_shape`` applied to it.
Lifecycle:
Maturing.
"""
return cast(bool, self._handle.has_upgraded_shape)


class _DictMod(enum.Enum):
"""State machine to keep track of modifications to a dictionary.
Expand Down
7 changes: 7 additions & 0 deletions apis/python/src/tiledbsoma/pytiledbsoma.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,13 @@ PYBIND11_MODULE(pytiledbsoma, m) {
py::print(stats);
},
"Print TileDB internal statistics. Lifecycle: experimental.");
m.def(
"tiledbsoma_stats_string",
[]() {
std::string stats = tiledbsoma::stats::dump();
return stats;
},
"Print TileDB internal statistics. Lifecycle: experimental.");

py::class_<PlatformConfig>(m, "PlatformConfig")
.def(py::init<>())
Expand Down
10 changes: 9 additions & 1 deletion apis/python/src/tiledbsoma/soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,17 @@ void load_soma_dataframe(py::module& m) {
.def_static("exists", &SOMADataFrame::exists)
.def_property_readonly(
"index_column_names", &SOMADataFrame::index_column_names)

.def_property_readonly(
"count",
&SOMADataFrame::count,
py::call_guard<py::gil_scoped_release>());
py::call_guard<py::gil_scoped_release>())
.def_property_readonly(
"maybe_soma_joinid_shape", &SOMADataFrame::maybe_soma_joinid_shape)
.def_property_readonly(
"maybe_soma_joinid_maxshape",
&SOMADataFrame::maybe_soma_joinid_maxshape)
.def_property_readonly(
"has_upgraded_domain", &SOMAArray::has_current_domain);
}
} // namespace libtiledbsomacpp
7 changes: 6 additions & 1 deletion apis/python/src/tiledbsoma/soma_dense_ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ void load_soma_dense_ndarray(py::module& m) {

.def_static("exists", &SOMADenseNDArray::exists)

.def("write", write);
.def("write", write)

.def_property_readonly("shape", &SOMADenseNDArray::shape)
.def_property_readonly("maxshape", &SOMADenseNDArray::maxshape)
.def_property_readonly(
"has_upgraded_shape", &SOMAArray::has_current_domain);
}
} // namespace libtiledbsomacpp
7 changes: 6 additions & 1 deletion apis/python/src/tiledbsoma/soma_sparse_ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ void load_soma_sparse_ndarray(py::module& m) {
"result_order"_a = ResultOrder::automatic,
"timestamp"_a = py::none())

.def_static("exists", &SOMASparseNDArray::exists);
.def_static("exists", &SOMASparseNDArray::exists)

.def_property_readonly("shape", &SOMASparseNDArray::shape)
.def_property_readonly("maxshape", &SOMASparseNDArray::maxshape)
.def_property_readonly(
"has_upgraded_shape", &SOMAArray::has_current_domain);
}
} // namespace libtiledbsomacpp
22 changes: 22 additions & 0 deletions apis/python/src/tiledbsoma/stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright (c) 2024 TileDB, Inc.
#
# Licensed under the MIT License.

import json
from typing import Dict, List, Literal, Union, cast

from .pytiledbsoma import tiledbsoma_stats_string

ParsedStats = List[Dict[Literal["counters", "timers"], Dict[str, Union[float, int]]]]


def tiledbsoma_stats_json() -> str:
"""Returns tiledbsoma stats as a JSON string"""
# cast is needed for pybind11 things
return cast(str, tiledbsoma_stats_string())


def tiledbsoma_stats_as_py() -> ParsedStats:
"""Returns tiledbsoma stats as a Python dict"""
# cast is needed for pybind11 things
return cast(ParsedStats, json.loads(tiledbsoma_stats_string()))
7 changes: 7 additions & 0 deletions apis/python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,16 @@ def test_dataframe(tmp_path, arrow_schema):
assert sdf.count == 5
assert len(sdf) == 5

# More to come on https://github.com/single-cell-data/TileDB-SOMA/issues/2407
assert not sdf.has_upgraded_domain

with pytest.raises(AttributeError):
assert sdf.shape is None

# soma_joinid is not a dim here
assert sdf._maybe_soma_joinid_shape is None
assert sdf._maybe_soma_joinid_maxshape is None

# Read all
table = sdf.read().concat()
assert table.num_rows == 5
Expand Down
Loading

0 comments on commit de44410

Please sign in to comment.