Skip to content

Commit

Permalink
[python] Update a confusing filename (#2776)
Browse files Browse the repository at this point in the history
* [python] Update a confusing filename

* lint
  • Loading branch information
johnkerl committed Jul 8, 2024
1 parent 94f4457 commit 13b171c
Show file tree
Hide file tree
Showing 10 changed files with 53 additions and 39 deletions.
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsoma/_common_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from .options._soma_tiledb_context import (
SOMATileDBContext,
)
from .options._tiledb_create_options import TileDBCreateOptions
from .options._tiledb_create_write_options import TileDBCreateOptions


class NDArray(SOMAArray, somacore.NDArray):
Expand Down
9 changes: 6 additions & 3 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@
from ._types import NPFloating, NPInteger, OpenTimestamp, Slice, is_slice_of
from .options import SOMATileDBContext
from .options._soma_tiledb_context import _validate_soma_tiledb_context
from .options._tiledb_create_options import TileDBCreateOptions, TileDBWriteOptions
from .options._tiledb_create_write_options import (
TileDBCreateOptions,
TileDBWriteOptions,
)

_UNBATCHED = options.BatchSize()
AxisDomain = Union[None, Tuple[Any, Any], List[Any]]
Expand Down Expand Up @@ -773,7 +776,7 @@ def _fill_out_slot_domain(

def _find_extent_for_domain(
index_column_name: str,
tiledb_create_options: TileDBCreateOptions,
tiledb_create_write_options: TileDBCreateOptions,
dtype: Any,
slot_domain: Tuple[Any, Any],
) -> Any:
Expand All @@ -783,7 +786,7 @@ def _find_extent_for_domain(
"""

# Default 2048 mods to 0 for 8-bit types and 0 is an invalid extent
extent = tiledb_create_options.dim_tile(index_column_name)
extent = tiledb_create_write_options.dim_tile(index_column_name)
if isinstance(dtype, np.dtype) and dtype.itemsize == 1:
extent = 64

Expand Down
6 changes: 3 additions & 3 deletions apis/python/src/tiledbsoma/_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
SOMATileDBContext,
_validate_soma_tiledb_context,
)
from .options._tiledb_create_options import TileDBCreateOptions
from .options._tiledb_create_write_options import TileDBCreateOptions


class DenseNDArray(NDArray, somacore.DenseNDArray):
Expand Down Expand Up @@ -287,10 +287,10 @@ def write(
self._set_reader_coords(clib_dense_array, new_coords)
clib_dense_array.write(input)

tiledb_create_options = TileDBCreateOptions.from_platform_config(
tiledb_create_write_options = TileDBCreateOptions.from_platform_config(
platform_config
)
if tiledb_create_options.consolidate_and_vacuum:
if tiledb_create_write_options.consolidate_and_vacuum:
clib_dense_array.consolidate_and_vacuum()
return self

Expand Down
5 changes: 4 additions & 1 deletion apis/python/src/tiledbsoma/_sparse_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,10 @@
SOMATileDBContext,
_validate_soma_tiledb_context,
)
from .options._tiledb_create_options import TileDBCreateOptions, TileDBWriteOptions
from .options._tiledb_create_write_options import (
TileDBCreateOptions,
TileDBWriteOptions,
)

_UNBATCHED = options.BatchSize()

Expand Down
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsoma/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from . import pytiledbsoma as clib
from ._types import OpenTimestamp, Slice, is_slice_of
from .options._tiledb_create_options import (
from .options._tiledb_create_write_options import (
TileDBCreateOptions,
_ColumnConfig,
_DictFilterSpec,
Expand Down
62 changes: 35 additions & 27 deletions apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
)
from ..options import SOMATileDBContext
from ..options._soma_tiledb_context import _validate_soma_tiledb_context
from ..options._tiledb_create_options import TileDBCreateOptions
from ..options._tiledb_create_write_options import TileDBCreateOptions
from . import conversions
from ._common import (
_DATAFRAME_ORIGINAL_INDEX_NAME_JSON,
Expand Down Expand Up @@ -1105,19 +1105,19 @@ def _extract_new_values_for_append(
def _write_arrow_table(
arrow_table: pa.Table,
handle: Union[DataFrame, SparseNDArray],
tiledb_create_options: TileDBCreateOptions,
tiledb_create_write_options: TileDBCreateOptions,
) -> None:
"""Handles num-bytes capacity for remote object stores."""
cap = tiledb_create_options.remote_cap_nbytes
cap = tiledb_create_write_options.remote_cap_nbytes
if arrow_table.nbytes > cap:
n = len(arrow_table)
if n < 2:
raise SOMAError(
"single table row nbytes {arrow_table.nbytes} exceeds cap nbytes {cap}"
)
m = n // 2
_write_arrow_table(arrow_table[:m], handle, tiledb_create_options)
_write_arrow_table(arrow_table[m:], handle, tiledb_create_options)
_write_arrow_table(arrow_table[:m], handle, tiledb_create_write_options)
_write_arrow_table(arrow_table[m:], handle, tiledb_create_write_options)
else:
logging.log_io(
None,
Expand Down Expand Up @@ -1234,10 +1234,12 @@ def _write_dataframe_impl(
add_metadata(soma_df, additional_metadata)
return soma_df

tiledb_create_options = TileDBCreateOptions.from_platform_config(platform_config)
tiledb_create_write_options = TileDBCreateOptions.from_platform_config(
platform_config
)

if arrow_table:
_write_arrow_table(arrow_table, soma_df, tiledb_create_options)
_write_arrow_table(arrow_table, soma_df, tiledb_create_write_options)

# Save the original index name for outgest. We use JSON for elegant indication of index name
# being None (in Python anyway).
Expand Down Expand Up @@ -1334,7 +1336,7 @@ def _create_from_matrix(
_write_matrix_to_denseNDArray(
soma_ndarray,
matrix,
tiledb_create_options=TileDBCreateOptions.from_platform_config(
tiledb_create_write_options=TileDBCreateOptions.from_platform_config(
platform_config
),
ingestion_params=ingestion_params,
Expand All @@ -1344,7 +1346,7 @@ def _create_from_matrix(
_write_matrix_to_sparseNDArray(
soma_ndarray,
matrix,
tiledb_create_options=TileDBCreateOptions.from_platform_config(
tiledb_create_write_options=TileDBCreateOptions.from_platform_config(
platform_config
),
ingestion_params=ingestion_params,
Expand Down Expand Up @@ -1513,7 +1515,9 @@ def _update_dataframe(
add_keys = new_keys.difference(old_keys)
common_keys = old_keys.intersection(new_keys)

tiledb_create_options = TileDBCreateOptions.from_platform_config(platform_config)
tiledb_create_write_options = TileDBCreateOptions.from_platform_config(
platform_config
)

msgs = []
for key in common_keys:
Expand Down Expand Up @@ -1550,7 +1554,9 @@ def _update_dataframe(
)
)

filters = tiledb_create_options.attr_filters_tiledb(add_key, ["ZstdFilter"])
filters = tiledb_create_write_options.attr_filters_tiledb(
add_key, ["ZstdFilter"]
)

# An update can create (or drop) columns, or mutate existing ones. A
# brand-new column might have nulls in it -- or it might not. And a
Expand Down Expand Up @@ -1656,7 +1662,7 @@ def update_matrix(
_write_matrix_to_denseNDArray(
soma_ndarray,
new_data,
tiledb_create_options=TileDBCreateOptions.from_platform_config(
tiledb_create_write_options=TileDBCreateOptions.from_platform_config(
platform_config
),
ingestion_params=ingestion_params,
Expand All @@ -1666,7 +1672,7 @@ def update_matrix(
_write_matrix_to_sparseNDArray(
soma_ndarray,
new_data,
tiledb_create_options=TileDBCreateOptions.from_platform_config(
tiledb_create_write_options=TileDBCreateOptions.from_platform_config(
platform_config
),
ingestion_params=ingestion_params,
Expand Down Expand Up @@ -1781,7 +1787,7 @@ def add_matrix_to_collection(
def _write_matrix_to_denseNDArray(
soma_ndarray: DenseNDArray,
matrix: Union[Matrix, h5py.Dataset],
tiledb_create_options: TileDBCreateOptions,
tiledb_create_write_options: TileDBCreateOptions,
ingestion_params: IngestionParams,
additional_metadata: AdditionalMetadata = None,
) -> None:
Expand Down Expand Up @@ -1815,7 +1821,7 @@ def _write_matrix_to_denseNDArray(
return

# Write all at once?
if not tiledb_create_options.write_X_chunked:
if not tiledb_create_write_options.write_X_chunked:
if not isinstance(matrix, np.ndarray):
matrix = matrix.toarray()
soma_ndarray.write((slice(None),), pa.Tensor.from_numpy(matrix))
Expand All @@ -1838,7 +1844,9 @@ def _write_matrix_to_denseNDArray(
# it controls how much is read into client RAM from the backing store on each chunk.
# * The remote_cap_nbytes is an older parameter.
# * Compute chunk sizes for both and take the minimum.
chunk_size_using_nnz = int(math.ceil(tiledb_create_options.goal_chunk_nnz / ncol))
chunk_size_using_nnz = int(
math.ceil(tiledb_create_write_options.goal_chunk_nnz / ncol)
)

try:
# not scipy csr/csc
Expand All @@ -1849,7 +1857,7 @@ def _write_matrix_to_denseNDArray(

total_nbytes = matrix.size * itemsize
nbytes_num_chunks = math.ceil(
total_nbytes / tiledb_create_options.remote_cap_nbytes
total_nbytes / tiledb_create_write_options.remote_cap_nbytes
)
nbytes_num_chunks = min(1, nbytes_num_chunks)
chunk_size_using_nbytes = math.floor(nrow / nbytes_num_chunks)
Expand Down Expand Up @@ -2174,7 +2182,7 @@ def _find_sparse_chunk_size_backed(
def _write_matrix_to_sparseNDArray(
soma_ndarray: SparseNDArray,
matrix: Matrix,
tiledb_create_options: TileDBCreateOptions,
tiledb_create_write_options: TileDBCreateOptions,
ingestion_params: IngestionParams,
additional_metadata: AdditionalMetadata,
axis_0_mapping: AxisIDMapping,
Expand Down Expand Up @@ -2234,7 +2242,7 @@ def _coo_to_table(
add_metadata(soma_ndarray, additional_metadata)

# Write all at once?
if not tiledb_create_options.write_X_chunked:
if not tiledb_create_write_options.write_X_chunked:
soma_ndarray.write(
_coo_to_table(sp.coo_matrix(matrix), axis_0_mapping, axis_1_mapping)
)
Expand All @@ -2253,7 +2261,7 @@ def _coo_to_table(
dim_max_size = matrix.shape[stride_axis]

eta_tracker = eta.Tracker()
goal_chunk_nnz = tiledb_create_options.goal_chunk_nnz
goal_chunk_nnz = tiledb_create_write_options.goal_chunk_nnz
mean_nnz = _find_mean_nnz(matrix, stride_axis)

coords = [slice(None), slice(None)]
Expand Down Expand Up @@ -2321,19 +2329,19 @@ def _coo_to_table(
# send them off, with simplified logic.
num_tries = 0
max_tries = 20
while chunk_coo.nnz > tiledb_create_options.goal_chunk_nnz:
while chunk_coo.nnz > tiledb_create_write_options.goal_chunk_nnz:
num_tries += 1
# The logger we use doesn't have a TRACE level. If it did, we'd use it here.
# logging.logger.trace(
# f"Adapt: {num_tries}/{max_tries} {chunk_coo.nnz}/{tiledb_create_options.goal_chunk_nnz}"
# f"Adapt: {num_tries}/{max_tries} {chunk_coo.nnz}/{tiledb_create_write_options.goal_chunk_nnz}"
# )
if num_tries > max_tries:
raise SOMAError(
f"Unable to accommodate goal_chunk_nnz {goal_chunk_nnz}. "
"This may be reduced in TileDBCreateOptions."
)

ratio = chunk_coo.nnz / tiledb_create_options.goal_chunk_nnz
ratio = chunk_coo.nnz / tiledb_create_write_options.goal_chunk_nnz
chunk_size = int(math.floor(0.9 * (i2 - i) / ratio))
if chunk_size < 1:
raise SOMAError(
Expand Down Expand Up @@ -2363,7 +2371,7 @@ def _coo_to_table(
dim_max_size,
chunk_percent,
chunk_coo.nnz,
tiledb_create_options.goal_chunk_nnz,
tiledb_create_write_options.goal_chunk_nnz,
),
)
i = i2
Expand All @@ -2379,14 +2387,14 @@ def _coo_to_table(
dim_max_size,
chunk_percent,
chunk_coo.nnz,
tiledb_create_options.goal_chunk_nnz,
tiledb_create_write_options.goal_chunk_nnz,
),
)

arrow_table = _coo_to_table(
chunk_coo, axis_0_mapping, axis_1_mapping, stride_axis, i
)
_write_arrow_table(arrow_table, soma_ndarray, tiledb_create_options)
_write_arrow_table(arrow_table, soma_ndarray, tiledb_create_write_options)

t2 = time.time()
chunk_seconds = t2 - t1
Expand Down Expand Up @@ -2808,7 +2816,7 @@ def _ingest_uns_ndarray(
_write_matrix_to_denseNDArray(
soma_arr,
value,
tiledb_create_options=TileDBCreateOptions.from_platform_config(
tiledb_create_write_options=TileDBCreateOptions.from_platform_config(
platform_config
),
ingestion_params=ingestion_params,
Expand Down
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsoma/options/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._soma_tiledb_context import SOMATileDBContext
from ._tiledb_create_options import TileDBCreateOptions, TileDBWriteOptions
from ._tiledb_create_write_options import TileDBCreateOptions, TileDBWriteOptions

__all__ = [
"SOMATileDBContext",
Expand Down
2 changes: 1 addition & 1 deletion apis/python/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import tiledbsoma as soma
import tiledbsoma.io as somaio
from tiledbsoma import _factory
from tiledbsoma.options._tiledb_create_options import TileDBCreateOptions
from tiledbsoma.options._tiledb_create_write_options import TileDBCreateOptions


@pytest.fixture
Expand Down
2 changes: 1 addition & 1 deletion apis/python/tests/test_platform_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import tiledbsoma
import tiledbsoma.io
import tiledbsoma.options._tiledb_create_options as tco
import tiledbsoma.options._tiledb_create_write_options as tco
from tiledbsoma._util import verify_obs_and_var_eq
import tiledb

Expand Down

0 comments on commit 13b171c

Please sign in to comment.