From 13b171c2f22be4ea88d986948b10774fa30c062a Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 8 Jul 2024 10:02:15 -0400 Subject: [PATCH] [python] Update a confusing filename (#2776) * [python] Update a confusing filename * lint --- .../python/src/tiledbsoma/_common_nd_array.py | 2 +- apis/python/src/tiledbsoma/_dataframe.py | 9 ++- apis/python/src/tiledbsoma/_dense_nd_array.py | 6 +- .../python/src/tiledbsoma/_sparse_nd_array.py | 5 +- apis/python/src/tiledbsoma/_util.py | 2 +- apis/python/src/tiledbsoma/io/ingest.py | 62 +++++++++++-------- .../python/src/tiledbsoma/options/__init__.py | 2 +- ...ons.py => _tiledb_create_write_options.py} | 0 apis/python/tests/test_io.py | 2 +- apis/python/tests/test_platform_config.py | 2 +- 10 files changed, 53 insertions(+), 39 deletions(-) rename apis/python/src/tiledbsoma/options/{_tiledb_create_options.py => _tiledb_create_write_options.py} (100%) diff --git a/apis/python/src/tiledbsoma/_common_nd_array.py b/apis/python/src/tiledbsoma/_common_nd_array.py index fe7fc786e6..58859d418b 100644 --- a/apis/python/src/tiledbsoma/_common_nd_array.py +++ b/apis/python/src/tiledbsoma/_common_nd_array.py @@ -17,7 +17,7 @@ from .options._soma_tiledb_context import ( SOMATileDBContext, ) -from .options._tiledb_create_options import TileDBCreateOptions +from .options._tiledb_create_write_options import TileDBCreateOptions class NDArray(SOMAArray, somacore.NDArray): diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index 5bfca2a92c..951234aeb5 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -27,7 +27,10 @@ from ._types import NPFloating, NPInteger, OpenTimestamp, Slice, is_slice_of from .options import SOMATileDBContext from .options._soma_tiledb_context import _validate_soma_tiledb_context -from .options._tiledb_create_options import TileDBCreateOptions, TileDBWriteOptions +from .options._tiledb_create_write_options import ( + TileDBCreateOptions, + TileDBWriteOptions, +) _UNBATCHED = options.BatchSize() AxisDomain = Union[None, Tuple[Any, Any], List[Any]] @@ -773,7 +776,7 @@ def _fill_out_slot_domain( def _find_extent_for_domain( index_column_name: str, - tiledb_create_options: TileDBCreateOptions, + tiledb_create_write_options: TileDBCreateOptions, dtype: Any, slot_domain: Tuple[Any, Any], ) -> Any: @@ -783,7 +786,7 @@ def _find_extent_for_domain( """ # Default 2048 mods to 0 for 8-bit types and 0 is an invalid extent - extent = tiledb_create_options.dim_tile(index_column_name) + extent = tiledb_create_write_options.dim_tile(index_column_name) if isinstance(dtype, np.dtype) and dtype.itemsize == 1: extent = 64 diff --git a/apis/python/src/tiledbsoma/_dense_nd_array.py b/apis/python/src/tiledbsoma/_dense_nd_array.py index b222ed6b87..a64573afbe 100644 --- a/apis/python/src/tiledbsoma/_dense_nd_array.py +++ b/apis/python/src/tiledbsoma/_dense_nd_array.py @@ -27,7 +27,7 @@ SOMATileDBContext, _validate_soma_tiledb_context, ) -from .options._tiledb_create_options import TileDBCreateOptions +from .options._tiledb_create_write_options import TileDBCreateOptions class DenseNDArray(NDArray, somacore.DenseNDArray): @@ -287,10 +287,10 @@ def write( self._set_reader_coords(clib_dense_array, new_coords) clib_dense_array.write(input) - tiledb_create_options = TileDBCreateOptions.from_platform_config( + tiledb_create_write_options = TileDBCreateOptions.from_platform_config( platform_config ) - if tiledb_create_options.consolidate_and_vacuum: + if tiledb_create_write_options.consolidate_and_vacuum: clib_dense_array.consolidate_and_vacuum() return self diff --git a/apis/python/src/tiledbsoma/_sparse_nd_array.py b/apis/python/src/tiledbsoma/_sparse_nd_array.py index 44f10d5bc1..ddca9d6596 100644 --- a/apis/python/src/tiledbsoma/_sparse_nd_array.py +++ b/apis/python/src/tiledbsoma/_sparse_nd_array.py @@ -47,7 +47,10 @@ SOMATileDBContext, _validate_soma_tiledb_context, ) -from .options._tiledb_create_options import TileDBCreateOptions, TileDBWriteOptions +from .options._tiledb_create_write_options import ( + TileDBCreateOptions, + TileDBWriteOptions, +) _UNBATCHED = options.BatchSize() diff --git a/apis/python/src/tiledbsoma/_util.py b/apis/python/src/tiledbsoma/_util.py index df19260738..a55b223a78 100644 --- a/apis/python/src/tiledbsoma/_util.py +++ b/apis/python/src/tiledbsoma/_util.py @@ -19,7 +19,7 @@ from . import pytiledbsoma as clib from ._types import OpenTimestamp, Slice, is_slice_of -from .options._tiledb_create_options import ( +from .options._tiledb_create_write_options import ( TileDBCreateOptions, _ColumnConfig, _DictFilterSpec, diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index 8c3cf27600..d2fa1ad95e 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -78,7 +78,7 @@ ) from ..options import SOMATileDBContext from ..options._soma_tiledb_context import _validate_soma_tiledb_context -from ..options._tiledb_create_options import TileDBCreateOptions +from ..options._tiledb_create_write_options import TileDBCreateOptions from . import conversions from ._common import ( _DATAFRAME_ORIGINAL_INDEX_NAME_JSON, @@ -1105,10 +1105,10 @@ def _extract_new_values_for_append( def _write_arrow_table( arrow_table: pa.Table, handle: Union[DataFrame, SparseNDArray], - tiledb_create_options: TileDBCreateOptions, + tiledb_create_write_options: TileDBCreateOptions, ) -> None: """Handles num-bytes capacity for remote object stores.""" - cap = tiledb_create_options.remote_cap_nbytes + cap = tiledb_create_write_options.remote_cap_nbytes if arrow_table.nbytes > cap: n = len(arrow_table) if n < 2: @@ -1116,8 +1116,8 @@ def _write_arrow_table( "single table row nbytes {arrow_table.nbytes} exceeds cap nbytes {cap}" ) m = n // 2 - _write_arrow_table(arrow_table[:m], handle, tiledb_create_options) - _write_arrow_table(arrow_table[m:], handle, tiledb_create_options) + _write_arrow_table(arrow_table[:m], handle, tiledb_create_write_options) + _write_arrow_table(arrow_table[m:], handle, tiledb_create_write_options) else: logging.log_io( None, @@ -1234,10 +1234,12 @@ def _write_dataframe_impl( add_metadata(soma_df, additional_metadata) return soma_df - tiledb_create_options = TileDBCreateOptions.from_platform_config(platform_config) + tiledb_create_write_options = TileDBCreateOptions.from_platform_config( + platform_config + ) if arrow_table: - _write_arrow_table(arrow_table, soma_df, tiledb_create_options) + _write_arrow_table(arrow_table, soma_df, tiledb_create_write_options) # Save the original index name for outgest. We use JSON for elegant indication of index name # being None (in Python anyway). @@ -1334,7 +1336,7 @@ def _create_from_matrix( _write_matrix_to_denseNDArray( soma_ndarray, matrix, - tiledb_create_options=TileDBCreateOptions.from_platform_config( + tiledb_create_write_options=TileDBCreateOptions.from_platform_config( platform_config ), ingestion_params=ingestion_params, @@ -1344,7 +1346,7 @@ def _create_from_matrix( _write_matrix_to_sparseNDArray( soma_ndarray, matrix, - tiledb_create_options=TileDBCreateOptions.from_platform_config( + tiledb_create_write_options=TileDBCreateOptions.from_platform_config( platform_config ), ingestion_params=ingestion_params, @@ -1513,7 +1515,9 @@ def _update_dataframe( add_keys = new_keys.difference(old_keys) common_keys = old_keys.intersection(new_keys) - tiledb_create_options = TileDBCreateOptions.from_platform_config(platform_config) + tiledb_create_write_options = TileDBCreateOptions.from_platform_config( + platform_config + ) msgs = [] for key in common_keys: @@ -1550,7 +1554,9 @@ def _update_dataframe( ) ) - filters = tiledb_create_options.attr_filters_tiledb(add_key, ["ZstdFilter"]) + filters = tiledb_create_write_options.attr_filters_tiledb( + add_key, ["ZstdFilter"] + ) # An update can create (or drop) columns, or mutate existing ones. A # brand-new column might have nulls in it -- or it might not. And a @@ -1656,7 +1662,7 @@ def update_matrix( _write_matrix_to_denseNDArray( soma_ndarray, new_data, - tiledb_create_options=TileDBCreateOptions.from_platform_config( + tiledb_create_write_options=TileDBCreateOptions.from_platform_config( platform_config ), ingestion_params=ingestion_params, @@ -1666,7 +1672,7 @@ def update_matrix( _write_matrix_to_sparseNDArray( soma_ndarray, new_data, - tiledb_create_options=TileDBCreateOptions.from_platform_config( + tiledb_create_write_options=TileDBCreateOptions.from_platform_config( platform_config ), ingestion_params=ingestion_params, @@ -1781,7 +1787,7 @@ def add_matrix_to_collection( def _write_matrix_to_denseNDArray( soma_ndarray: DenseNDArray, matrix: Union[Matrix, h5py.Dataset], - tiledb_create_options: TileDBCreateOptions, + tiledb_create_write_options: TileDBCreateOptions, ingestion_params: IngestionParams, additional_metadata: AdditionalMetadata = None, ) -> None: @@ -1815,7 +1821,7 @@ def _write_matrix_to_denseNDArray( return # Write all at once? - if not tiledb_create_options.write_X_chunked: + if not tiledb_create_write_options.write_X_chunked: if not isinstance(matrix, np.ndarray): matrix = matrix.toarray() soma_ndarray.write((slice(None),), pa.Tensor.from_numpy(matrix)) @@ -1838,7 +1844,9 @@ def _write_matrix_to_denseNDArray( # it controls how much is read into client RAM from the backing store on each chunk. # * The remote_cap_nbytes is an older parameter. # * Compute chunk sizes for both and take the minimum. - chunk_size_using_nnz = int(math.ceil(tiledb_create_options.goal_chunk_nnz / ncol)) + chunk_size_using_nnz = int( + math.ceil(tiledb_create_write_options.goal_chunk_nnz / ncol) + ) try: # not scipy csr/csc @@ -1849,7 +1857,7 @@ def _write_matrix_to_denseNDArray( total_nbytes = matrix.size * itemsize nbytes_num_chunks = math.ceil( - total_nbytes / tiledb_create_options.remote_cap_nbytes + total_nbytes / tiledb_create_write_options.remote_cap_nbytes ) nbytes_num_chunks = min(1, nbytes_num_chunks) chunk_size_using_nbytes = math.floor(nrow / nbytes_num_chunks) @@ -2174,7 +2182,7 @@ def _find_sparse_chunk_size_backed( def _write_matrix_to_sparseNDArray( soma_ndarray: SparseNDArray, matrix: Matrix, - tiledb_create_options: TileDBCreateOptions, + tiledb_create_write_options: TileDBCreateOptions, ingestion_params: IngestionParams, additional_metadata: AdditionalMetadata, axis_0_mapping: AxisIDMapping, @@ -2234,7 +2242,7 @@ def _coo_to_table( add_metadata(soma_ndarray, additional_metadata) # Write all at once? - if not tiledb_create_options.write_X_chunked: + if not tiledb_create_write_options.write_X_chunked: soma_ndarray.write( _coo_to_table(sp.coo_matrix(matrix), axis_0_mapping, axis_1_mapping) ) @@ -2253,7 +2261,7 @@ def _coo_to_table( dim_max_size = matrix.shape[stride_axis] eta_tracker = eta.Tracker() - goal_chunk_nnz = tiledb_create_options.goal_chunk_nnz + goal_chunk_nnz = tiledb_create_write_options.goal_chunk_nnz mean_nnz = _find_mean_nnz(matrix, stride_axis) coords = [slice(None), slice(None)] @@ -2321,11 +2329,11 @@ def _coo_to_table( # send them off, with simplified logic. num_tries = 0 max_tries = 20 - while chunk_coo.nnz > tiledb_create_options.goal_chunk_nnz: + while chunk_coo.nnz > tiledb_create_write_options.goal_chunk_nnz: num_tries += 1 # The logger we use doesn't have a TRACE level. If it did, we'd use it here. # logging.logger.trace( - # f"Adapt: {num_tries}/{max_tries} {chunk_coo.nnz}/{tiledb_create_options.goal_chunk_nnz}" + # f"Adapt: {num_tries}/{max_tries} {chunk_coo.nnz}/{tiledb_create_write_options.goal_chunk_nnz}" # ) if num_tries > max_tries: raise SOMAError( @@ -2333,7 +2341,7 @@ def _coo_to_table( "This may be reduced in TileDBCreateOptions." ) - ratio = chunk_coo.nnz / tiledb_create_options.goal_chunk_nnz + ratio = chunk_coo.nnz / tiledb_create_write_options.goal_chunk_nnz chunk_size = int(math.floor(0.9 * (i2 - i) / ratio)) if chunk_size < 1: raise SOMAError( @@ -2363,7 +2371,7 @@ def _coo_to_table( dim_max_size, chunk_percent, chunk_coo.nnz, - tiledb_create_options.goal_chunk_nnz, + tiledb_create_write_options.goal_chunk_nnz, ), ) i = i2 @@ -2379,14 +2387,14 @@ def _coo_to_table( dim_max_size, chunk_percent, chunk_coo.nnz, - tiledb_create_options.goal_chunk_nnz, + tiledb_create_write_options.goal_chunk_nnz, ), ) arrow_table = _coo_to_table( chunk_coo, axis_0_mapping, axis_1_mapping, stride_axis, i ) - _write_arrow_table(arrow_table, soma_ndarray, tiledb_create_options) + _write_arrow_table(arrow_table, soma_ndarray, tiledb_create_write_options) t2 = time.time() chunk_seconds = t2 - t1 @@ -2808,7 +2816,7 @@ def _ingest_uns_ndarray( _write_matrix_to_denseNDArray( soma_arr, value, - tiledb_create_options=TileDBCreateOptions.from_platform_config( + tiledb_create_write_options=TileDBCreateOptions.from_platform_config( platform_config ), ingestion_params=ingestion_params, diff --git a/apis/python/src/tiledbsoma/options/__init__.py b/apis/python/src/tiledbsoma/options/__init__.py index c3292f82a5..7c60d27130 100644 --- a/apis/python/src/tiledbsoma/options/__init__.py +++ b/apis/python/src/tiledbsoma/options/__init__.py @@ -1,5 +1,5 @@ from ._soma_tiledb_context import SOMATileDBContext -from ._tiledb_create_options import TileDBCreateOptions, TileDBWriteOptions +from ._tiledb_create_write_options import TileDBCreateOptions, TileDBWriteOptions __all__ = [ "SOMATileDBContext", diff --git a/apis/python/src/tiledbsoma/options/_tiledb_create_options.py b/apis/python/src/tiledbsoma/options/_tiledb_create_write_options.py similarity index 100% rename from apis/python/src/tiledbsoma/options/_tiledb_create_options.py rename to apis/python/src/tiledbsoma/options/_tiledb_create_write_options.py diff --git a/apis/python/tests/test_io.py b/apis/python/tests/test_io.py index 100c619163..efd8497937 100644 --- a/apis/python/tests/test_io.py +++ b/apis/python/tests/test_io.py @@ -7,7 +7,7 @@ import tiledbsoma as soma import tiledbsoma.io as somaio from tiledbsoma import _factory -from tiledbsoma.options._tiledb_create_options import TileDBCreateOptions +from tiledbsoma.options._tiledb_create_write_options import TileDBCreateOptions @pytest.fixture diff --git a/apis/python/tests/test_platform_config.py b/apis/python/tests/test_platform_config.py index 5985ba05e3..a3c084d6ef 100644 --- a/apis/python/tests/test_platform_config.py +++ b/apis/python/tests/test_platform_config.py @@ -5,7 +5,7 @@ import tiledbsoma import tiledbsoma.io -import tiledbsoma.options._tiledb_create_options as tco +import tiledbsoma.options._tiledb_create_write_options as tco from tiledbsoma._util import verify_obs_and_var_eq import tiledb