Skip to content

Commit

Permalink
[r] Support SparseNDArray writes case (#2755)
Browse files Browse the repository at this point in the history
* [r] Support sparse nd array writes via libtiledbsoma

* Additional test tweak

* Another test tweak with as-needed datatype recovery

* Micro-cleanup of test file

There was another change here that we were able to revert so also reverting
this one remaining changed line
  • Loading branch information
eddelbuettel committed Jul 16, 2024
1 parent 1a474dc commit 18df8bb
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 12 deletions.
31 changes: 20 additions & 11 deletions apis/r/R/SOMASparseNDArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,6 @@ SOMASparseNDArray <- R6::R6Class(
self$set_metadata(bbox_flat)
private$.write_coo_dataframe(coo)

# tiledb-r always closes the array after a write operation so we need to
# manually reopen it until close-on-write is optional
self$open("WRITE", internal_use_only = "allowed_use")
invisible(self)
},

Expand Down Expand Up @@ -227,14 +224,26 @@ SOMASparseNDArray <- R6::R6Class(
if (!is.null(private$tiledb_timestamp)) {
arr@timestamp <- private$tiledb_timestamp
}
## spdl::debug("[SOMASparseNDArray] '.write_coo_dataframe' layout '{}' is_sparse '{}' ",
## tiledb::query_layout(arr), private$.is_sparse)
## if (!private$.is_sparse && tiledb::query_layout(arr) == "UNORDERED") {
## tiledb::query_layout(arr) <- "GLOBAL_ORDER"
## cat("*********", tiledb::query_layout(arr), "*****\n")
## print(arr)
## }
arr[] <- values
nms <- colnames(values)

## the 'soma_data' data type may not have been cached, and if so we need to fetch it
if (is.null(private$.type)) {
## TODO: replace with a libtiledbsoma accessor as discussed
tpstr <- tiledb::datatype(tiledb::attrs(tiledb::schema(self$uri))[["soma_data"]])
arstr <- arrow_type_from_tiledb_type(tpstr)
private$.type <- arstr
}

arrsch <- arrow::schema(arrow::field(nms[1], arrow::int64()),
arrow::field(nms[2], arrow::int64()),
arrow::field(nms[3], private$.type))

tbl <- arrow::arrow_table(values, schema = arrsch)
spdl::debug("[SOMASparseNDArray::write] array created, writing to {}", self$uri)
naap <- nanoarrow::nanoarrow_allocate_array()
nasp <- nanoarrow::nanoarrow_allocate_schema()
arrow::as_record_batch(tbl)$export_to_c(naap, nasp)
writeArrayFromArrow(self$uri, naap, nasp, "SOMASparseNDArray")
},

# Internal marking of one or zero based matrices for iterated reads
Expand Down
5 changes: 4 additions & 1 deletion apis/r/src/arrow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,13 @@ void writeArrayFromArrow(const std::string& uri, naxpArray naap, naxpSchema nasp
} else if (arraytype == "SOMADenseNDArray") {
arrup = tdbs::SOMADenseNDArray::open(OpenMode::write, uri, somactx,
"unnamed", {}, "auto", ResultOrder::colmajor);
} else if (arraytype == "SOMASparseNDArray") {
arrup = tdbs::SOMASparseNDArray::open(OpenMode::write, uri, somactx);
} else { // not reached
Rcpp::stop(tfm::format("Unexpected array type '%s'", arraytype));
}

arrup.get()->set_array_data(std::move(schema), std::move(array));
arrup.get()->write();
arrup.get()->close();

}

0 comments on commit 18df8bb

Please sign in to comment.