From 18df8bb37adfa9960e6996b5989a3d384f535033 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Tue, 16 Jul 2024 07:16:19 -0500 Subject: [PATCH] [r] Support `SparseNDArray` writes case (#2755) * [r] Support sparse nd array writes via libtiledbsoma * Additional test tweak * Another test tweak with as-needed datatype recovery * Micro-cleanup of test file There was another change here that we were able to revert so also reverting this one remaining changed line --- apis/r/R/SOMASparseNDArray.R | 31 ++++++++++++++++++++----------- apis/r/src/arrow.cpp | 5 ++++- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/apis/r/R/SOMASparseNDArray.R b/apis/r/R/SOMASparseNDArray.R index a3adbe3b32..50e20852ce 100644 --- a/apis/r/R/SOMASparseNDArray.R +++ b/apis/r/R/SOMASparseNDArray.R @@ -177,9 +177,6 @@ SOMASparseNDArray <- R6::R6Class( self$set_metadata(bbox_flat) private$.write_coo_dataframe(coo) - # tiledb-r always closes the array after a write operation so we need to - # manually reopen it until close-on-write is optional - self$open("WRITE", internal_use_only = "allowed_use") invisible(self) }, @@ -227,14 +224,26 @@ SOMASparseNDArray <- R6::R6Class( if (!is.null(private$tiledb_timestamp)) { arr@timestamp <- private$tiledb_timestamp } - ## spdl::debug("[SOMASparseNDArray] '.write_coo_dataframe' layout '{}' is_sparse '{}' ", - ## tiledb::query_layout(arr), private$.is_sparse) - ## if (!private$.is_sparse && tiledb::query_layout(arr) == "UNORDERED") { - ## tiledb::query_layout(arr) <- "GLOBAL_ORDER" - ## cat("*********", tiledb::query_layout(arr), "*****\n") - ## print(arr) - ## } - arr[] <- values + nms <- colnames(values) + + ## the 'soma_data' data type may not have been cached, and if so we need to fetch it + if (is.null(private$.type)) { + ## TODO: replace with a libtiledbsoma accessor as discussed + tpstr <- tiledb::datatype(tiledb::attrs(tiledb::schema(self$uri))[["soma_data"]]) + arstr <- arrow_type_from_tiledb_type(tpstr) + private$.type <- arstr + } + + arrsch <- arrow::schema(arrow::field(nms[1], arrow::int64()), + arrow::field(nms[2], arrow::int64()), + arrow::field(nms[3], private$.type)) + + tbl <- arrow::arrow_table(values, schema = arrsch) + spdl::debug("[SOMASparseNDArray::write] array created, writing to {}", self$uri) + naap <- nanoarrow::nanoarrow_allocate_array() + nasp <- nanoarrow::nanoarrow_allocate_schema() + arrow::as_record_batch(tbl)$export_to_c(naap, nasp) + writeArrayFromArrow(self$uri, naap, nasp, "SOMASparseNDArray") }, # Internal marking of one or zero based matrices for iterated reads diff --git a/apis/r/src/arrow.cpp b/apis/r/src/arrow.cpp index a077ee8490..061ad46c13 100644 --- a/apis/r/src/arrow.cpp +++ b/apis/r/src/arrow.cpp @@ -152,10 +152,13 @@ void writeArrayFromArrow(const std::string& uri, naxpArray naap, naxpSchema nasp } else if (arraytype == "SOMADenseNDArray") { arrup = tdbs::SOMADenseNDArray::open(OpenMode::write, uri, somactx, "unnamed", {}, "auto", ResultOrder::colmajor); + } else if (arraytype == "SOMASparseNDArray") { + arrup = tdbs::SOMASparseNDArray::open(OpenMode::write, uri, somactx); + } else { // not reached + Rcpp::stop(tfm::format("Unexpected array type '%s'", arraytype)); } arrup.get()->set_array_data(std::move(schema), std::move(array)); arrup.get()->write(); arrup.get()->close(); - }