From b5e7e017035e840e2a61317e7563b7c6cdae0117 Mon Sep 17 00:00:00 2001
From: Paul Hoffman <mojaveazure@users.noreply.github.com>
Date: Mon, 21 Oct 2024 17:13:41 -0400
Subject: [PATCH] [r] Backport sparse COO writer to 1.14 (#3217)

---
 apis/r/NAMESPACE                              |  11 ++
 apis/r/NEWS.md                                |   7 +
 apis/r/R/SOMASparseNDArray.R                  | 151 ++++++++++++------
 apis/r/R/utils-arrow.R                        |  94 ++++++++++-
 apis/r/R/utils.R                              |  76 +++++++++
 apis/r/man/SOMADenseNDArray.Rd                |   2 +
 apis/r/man/SOMANDArrayBase.Rd                 |  48 ++++++
 apis/r/man/SOMASparseNDArray.Rd               |  50 ++----
 apis/r/man/r_type_from_arrow_type.Rd          |  47 ++++++
 .../r/tests/testthat/test-SOMASparseNDArray.R | 104 +++++++++++-
 apis/r/tests/testthat/test-r-arrow-types.R    |  93 +++++++++++
 apis/r/tests/testthat/test-utils.R            |  95 ++++++++++-
 12 files changed, 687 insertions(+), 91 deletions(-)
 create mode 100644 apis/r/man/r_type_from_arrow_type.Rd
 create mode 100644 apis/r/tests/testthat/test-r-arrow-types.R

diff --git a/apis/r/NAMESPACE b/apis/r/NAMESPACE
index 522816de8a..83db784869 100644
--- a/apis/r/NAMESPACE
+++ b/apis/r/NAMESPACE
@@ -2,15 +2,25 @@
 
 S3method("[[",MappingBase)
 S3method("[[<-",MappingBase)
+S3method(.is_integerish,Array)
+S3method(.is_integerish,ChunkedArray)
+S3method(.is_integerish,DataType)
+S3method(.is_integerish,Field)
+S3method(.is_integerish,default)
+S3method(.is_integerish,integer64)
 S3method(.read_soma_joinids,SOMADataFrame)
 S3method(.read_soma_joinids,SOMASparseNDArray)
 S3method(as.list,CoordsStrider)
 S3method(as.list,MappingBase)
+S3method(as.logical,Scalar)
 S3method(iterators::nextElem,CoordsStrider)
 S3method(itertools::hasNext,CoordsStrider)
 S3method(length,CoordsStrider)
 S3method(length,MappingBase)
 S3method(names,MappingBase)
+S3method(r_type_from_arrow_type,DataType)
+S3method(r_type_from_arrow_type,Field)
+S3method(r_type_from_arrow_type,Schema)
 S3method(write_soma,Assay)
 S3method(write_soma,DataFrame)
 S3method(write_soma,DimReduc)
@@ -84,6 +94,7 @@ export(has_metadata)
 export(list_datasets)
 export(load_dataset)
 export(matrixZeroBasedView)
+export(r_type_from_arrow_type)
 export(set_log_level)
 export(set_metadata)
 export(show_package_versions)
diff --git a/apis/r/NEWS.md b/apis/r/NEWS.md
index 1067665c76..d197a9c477 100644
--- a/apis/r/NEWS.md
+++ b/apis/r/NEWS.md
@@ -1,3 +1,10 @@
+# Unreleased
+
+## Changes
+
+* Add new Arrow-to-R type mapper
+* Expose block/random writer for sparse arrays [#3204](https://github.com/single-cell-data/TileDB-SOMA/pull/3204)
+
 # tiledbsoma 1.14.3
 
 ## Changes
diff --git a/apis/r/R/SOMASparseNDArray.R b/apis/r/R/SOMASparseNDArray.R
index 4bbeb22b92..251c9cca91 100644
--- a/apis/r/R/SOMASparseNDArray.R
+++ b/apis/r/R/SOMASparseNDArray.R
@@ -66,12 +66,14 @@ SOMASparseNDArray <- R6::R6Class(
     #' @param bbox A vector of integers describing the upper bounds of each
     #' dimension of `values`. Generally should be `NULL`.
     #'
+    #' @return Invisibly returns \code{self}
+    #'
     write = function(values, bbox = NULL) {
       stopifnot(
         "'values' must be a matrix" = is_matrix(values),
         "'bbox' must contain two entries" = is.null(bbox) || length(bbox) == length(dim(values)),
         "'bbox' must be a vector of two integers or a list with each entry containg two integers" = is.null(bbox) ||
-          (is_integerish(bbox) || bit64::is.integer64(bbox)) ||
+          .is_integerish(bbox) ||
           (is.list(bbox) && all(vapply_lgl(bbox, function(x, n) length(x) == 2L)))
       )
       # coerce to a TsparseMatrix, which uses 0-based COO indexing
@@ -81,6 +83,12 @@ SOMASparseNDArray <- R6::R6Class(
         j = bit64::as.integer64(values@j),
         x = values@x
       )
+      if (!is.null(private$.type)) {
+        rt <- r_type_from_arrow_type(private$.type)
+        if (rt == 'integer' && rlang::is_integerish(coo$x)) {
+          coo$x <- as.integer(coo$x)
+        }
+      }
       dnames <- self$dimnames()
       colnames(coo) <- c(dnames, self$attrnames())
       ranges <- sapply(
@@ -181,9 +189,9 @@ SOMASparseNDArray <- R6::R6Class(
         self$tiledb_timestamp %||% "now"
       )
 
-      private$.write_coo_dataframe(coo)
+      self$.write_coordinates(coo)
 
-      invisible(self)
+      return(invisible(self))
     },
 
     #' @description Retrieve number of non-zero elements (lifecycle: maturing)
@@ -224,6 +232,96 @@ SOMASparseNDArray <- R6::R6Class(
       )
       # Checking slotwise new shape >= old shape, and <= max_shape, is already done in libtiledbsoma
       tiledbsoma_upgrade_shape(self$uri, shape, private$.soma_context)
+    },
+
+    #' @description Write a COO table to the array
+    #'
+    #' @param values A \code{data.frame} or \code{\link[arrow:Table]{Arrow::Table}}
+    #' with data in COO format; must be named with the dimension and attribute
+    #' labels of the array
+    #'
+    #' @return Invisibly returns \code{self}
+    #'
+    .write_coordinates = function(values) {
+      private$check_open_for_write()
+      dnames <- self$dimnames()
+      attrn <- self$attrnames()
+
+      stopifnot(
+        "'values' must be a data frame or Arrow Table" = is.data.frame(values) ||
+          inherits(values, what = 'Table'),
+        "'values' must have one column for each dimension and the data" = ncol(values) == length(dnames) + 1L,
+        "'values' must be named with the dimension and attribute labels" = is.null(names(values)) ||
+          identical(names(values), c(dnames, attrn))
+      )
+
+      # Arrow Tables cannot have NULL names, so this only applies to dataframes
+      if (is.null(names(values))) {
+        spdl::warn("[SOMASparseNDArray$.write_coordinates] no names on input data frame, assuming <dimensions[...], data> order")
+        names(values) <- c(dnames, attrn)
+      }
+
+      # Check dimensions
+      spdl::debug("[SOMASparseNDArray$.write_coordinates] checking dimension values")
+      for (i in seq_along(dnames)) {
+        dn <- dnames[i]
+        offending <- sprintf("(offending column: '%s')", dn)
+        if (!.is_integerish(values[[dn]])) {
+          stop("All dimension columns must be integerish ", offending)
+        }
+        if (as.logical(min(values[[dn]]) < 0L)) {
+          stop("Dimension columns cannot contain negative values ", offending)
+        }
+        if (as.logical(max(values[[dn]]) >= as.numeric(self$shape()[i]))) {
+          stop("Dimension columns cannot exceed the shape of the array ", offending)
+        }
+      }
+
+      # Check attribute
+      spdl::debug("[SOMASparseNDArray$.write_coordinates] checking data values")
+      if (is.null(private$.type)) {
+        tt <- self$schema()[attrn]$type
+        if (is.null(tt)) {
+          tt <- if (is.data.frame(values)) {
+            arrow::infer_type(values[[attrn]])
+          } else {
+            values[[attrn]]$type
+          }
+        }
+        private$.type <- tt
+      }
+      vt <- if (is.data.frame(values)) {
+        arrow::infer_type(values[[attrn]])
+      } else {
+        values[[attrn]]$type
+      }
+      if ((vrt <- r_type_from_arrow_type(vt)) != (rt <- r_type_from_arrow_type(private$.type))) {
+        stop("The data column must be of type '", rt, "', got '", vrt, "'")
+      }
+
+      # Build our Arrow table and schema
+      fields <- c(
+        lapply(dnames, arrow::field, type = arrow::int64()),
+        arrow::field(attrn, private$.type)
+      )
+      sch <- do.call(arrow::schema, fields)
+      tbl <- arrow::as_arrow_table(values, schema = sch)
+
+      # Write via libtiledbsoma
+      spdl::debug("[SOMASparseNDArray$.write_coordinates] writing arrow table")
+      naap <- nanoarrow::nanoarrow_allocate_array()
+      nasp <- nanoarrow::nanoarrow_allocate_schema()
+      arrow::as_record_batch(tbl)$export_to_c(naap, nasp)
+      writeArrayFromArrow(
+        uri = self$uri,
+        naap = naap,
+        nasp = nasp,
+        ctxxp = private$.soma_context,
+        arraytype = "SOMASparseNDArray",
+        config = NULL,
+        tsvec = self$.tiledb_timestamp_range
+      )
+      return(invisible(self))
     }
 
   ),
@@ -252,53 +350,6 @@ SOMASparseNDArray <- R6::R6Class(
       out
     },
 
-    # @description Ingest COO-formatted dataframe into the TileDB array.
-    # (lifecycle: maturing)
-    # @param values A [`data.frame`].
-    .write_coo_dataframe = function(values) {
-      private$check_open_for_write()
-
-      stopifnot(is.data.frame(values))
-      # private$log_array_ingestion()
-      #arr <- self$object
-      #if (!is.null(self$tiledb_timestamp)) {
-      #    # arr@timestamp <- self$tiledb_timestamp
-      #   arr@timestamp_end <- self$tiledb_timestamp
-      #}
-      nms <- colnames(values)
-
-      ## the 'soma_data' data type may not have been cached, and if so we need to fetch it
-      if (is.null(private$.type)) {
-          ## TODO: replace with a libtiledbsoma accessor as discussed
-          tpstr <- tiledb::datatype(tiledb::attrs(tiledb::schema(self$uri))[["soma_data"]])
-          arstr <- arrow_type_from_tiledb_type(tpstr)
-          private$.type <- arstr
-      }
-
-      arrsch <- arrow::schema(arrow::field(nms[1], arrow::int64()),
-                              arrow::field(nms[2], arrow::int64()),
-                              arrow::field(nms[3], private$.type))
-
-      tbl <- arrow::arrow_table(values, schema = arrsch)
-      spdl::debug(
-        "[SOMASparseNDArray$write] array created, writing to {} at ({})",
-        self$uri,
-        self$tiledb_timestamp %||% "now"
-      )
-      naap <- nanoarrow::nanoarrow_allocate_array()
-      nasp <- nanoarrow::nanoarrow_allocate_schema()
-      arrow::as_record_batch(tbl)$export_to_c(naap, nasp)
-      writeArrayFromArrow(
-        uri = self$uri,
-        naap = naap,
-        nasp = nasp,
-        ctxxp = private$.soma_context,
-        arraytype = "SOMASparseNDArray",
-        config = NULL,
-        tsvec = self$.tiledb_timestamp_range
-      )
-    },
-
     # Internal marking of one or zero based matrices for iterated reads
     zero_based = NA
 
diff --git a/apis/r/R/utils-arrow.R b/apis/r/R/utils-arrow.R
index 7ee2ebc2ae..c64099a692 100644
--- a/apis/r/R/utils-arrow.R
+++ b/apis/r/R/utils-arrow.R
@@ -34,6 +34,11 @@ is_arrow_dictionary <- function(x) {
   is_arrow_object(x) && inherits(x, "Field") && inherits(x$type, "DictionaryType")
 }
 
+#' @method as.logical Scalar
+#' @export
+#'
+as.logical.Scalar <- \(x, ...) as.logical(x$as_vector(), ...)
+
 #' Convert Arrow types to supported TileDB type
 #' List of TileDB types supported in R: https://github.com/TileDB-Inc/TileDB-R/blob/8014da156b5fee5b4cc221d57b4aa7d388abc968/inst/tinytest/test_dim.R#L97-L121
 #' Note: TileDB attrs may be UTF-8; TileDB dims may not.
@@ -114,6 +119,92 @@ arrow_type_from_tiledb_type <- function(x) {
   )
 }
 
+#' Get the \R Type from an Arrow Type
+#'
+#' Get an \R \link[base:typeof]{type} from an Arrow type. This function is
+#' equivalent to \code{\link[base]{typeof}()} rather than
+#' \code{\link[base]{mode}()} or \code{\link[base]{class}()}, and returns the
+#' equivalent \strong{type}. For example, the equivalent \R type to an Arrow
+#' \link[arrow]{dictionary} is \dQuote{\code{integer}}, not
+#' \dQuote{\code{factor}}; likewise, the equivalent \R type to an Arrow 64-bit
+#' integer is \dQuote{\code{double}}
+#'
+#' @param x An \CRANpkg{Arrow} \link[arrow:Schema]{schema},
+#' \link[arrow:Field]{field}, or \link[arrow:infer_type]{data type}
+#'
+#' @return If \code{x} is a \link[arrow:infer_type]{data type}, a single
+#' character value giving the \R \link[base:typeof]{type} of \code{x}; if no
+#' corresponding \R type, returns the \CRANpkg{Arrow} type name
+#'
+#' @return If \code{x} is a \link[arrow:Field]{field}, a single named character
+#' vector with the name being the field name and the value being the \R
+#' \link[base:typeof]{type}
+#'
+#' @return If \code{x} is a \link[arrow:Schema]{schema}, a named vector where
+#' the names are field names and the values are the \R \link[base:typeof]{types}
+#' of each field
+#'
+#' @keywords internal
+#'
+#' @export
+#'
+#' @seealso \code{\link[base]{typeof}()}
+#'
+r_type_from_arrow_type <- function(x) UseMethod('r_type_from_arrow_type')
+
+#' @rdname r_type_from_arrow_type
+#'
+#' @method r_type_from_arrow_type Schema
+#' @export
+#'
+r_type_from_arrow_type.Schema <- function(x) {
+  return(vapply(
+    X = x$names,
+    FUN = function(f) r_type_from_arrow_type(x[[f]]),
+    FUN.VALUE = character(1L),
+    USE.NAMES = TRUE
+  ))
+}
+
+#' @rdname r_type_from_arrow_type
+#'
+#' @method r_type_from_arrow_type Field
+#' @export
+#'
+r_type_from_arrow_type.Field <- function(x) {
+  tt <- r_type_from_arrow_type(x$type)
+  names(x = tt) <- x$name
+  return(tt)
+}
+
+#' @rdname r_type_from_arrow_type
+#'
+#' @method r_type_from_arrow_type DataType
+#' @export
+#'
+r_type_from_arrow_type.DataType <- function(x) {
+  # Types are equivalent to `typeof()`, not `mode()` or `class()`
+  return(switch(
+    EXPR = x$name,
+    int8 = ,
+    int16 = ,
+    int32 = ,
+    dictionary = ,
+    uint8 = ,
+    uint16 = ,
+    uint32 = 'integer',
+    int64 = ,
+    uint64 = ,
+    date32 = ,
+    timestamp = ,
+    float = 'double',
+    bool = 'logical',
+    utf8 = ,
+    large_utf8 = 'character',
+    x$name
+  ))
+}
+
 #' Retrieve limits for Arrow types
 #' @importFrom bit64 lim.integer64
 #' @noRd
@@ -174,10 +265,9 @@ arrow_field_from_tiledb_dim <- function(x) {
 ## With a nod to Kevin Ushey
 #' @noRd
 yoink <- function(package, symbol) {
-    do.call(":::", list(package, symbol))
+  do.call(":::", list(package, symbol))
 }
 
-
 #' Create an Arrow field from a TileDB attribute
 #' @noRd
 arrow_field_from_tiledb_attr <- function(x, arrptr=NULL) {
diff --git a/apis/r/R/utils.R b/apis/r/R/utils.R
index dec2f8aa38..1459fad5a4 100644
--- a/apis/r/R/utils.R
+++ b/apis/r/R/utils.R
@@ -98,6 +98,82 @@ uns_hint <- function(type = c('1d', '2d')) {
   })
 }
 
+#' Is an Object Integerish
+#'
+#' @inheritParams rlang::is_integerish
+#'
+#' @return \code{TRUE} if \code{x} is integerish, otherwise \code{FALSE}
+#'
+#' @keywords internal
+#'
+#' @noRd
+#'
+.is_integerish <- function(x, n = NULL, finite = NULL) {
+  UseMethod(generic = '.is_integerish', object = x)
+}
+
+#' @method .is_integerish default
+#' @export
+#'
+.is_integerish.default <- function(x, n = NULL, finite = NULL) {
+  return(rlang::is_integerish(x = x, n = n, finite = finite))
+}
+
+#' @method .is_integerish integer64
+#' @export
+#'
+.is_integerish.integer64 <- function(x, n = NULL, finite = NULL) {
+  res <- if (!is.null(x = n)) {
+    stopifnot(
+      "'n' must be a single integerish value" = .is_integerish(x = n) &&
+        length(x = n) == 1L &&
+        is.finite(x = n)
+    )
+    length(x = x) == n
+  } else {
+    TRUE
+  }
+  res <- res && if (!is.null(x = finite)) {
+    stopifnot(isTRUE(x = finite) || isFALSE(x = finite))
+    # In `rlang::is_integerish()`,
+    # `finite = TRUE`: all values are finite
+    # `finite = FALSE`: at least one value is infinite
+    # `bit64::is.infinite()` returns FALSE for NA
+    ifelse(
+      test = finite,
+      yes = all(is.finite(x = x)),
+      no = any(is.infinite(x = x) | is.na(x = x))
+    )
+  } else {
+    TRUE
+  }
+  return(res)
+}
+
+#' @method .is_integerish Field
+#' @export
+#'
+.is_integerish.Field <-function(x, n = NULL, finite = NULL) {
+  return(.is_integerish(x = x$type, n = n, finite = finite))
+}
+
+#' @method .is_integerish Array
+#' @export
+#'
+.is_integerish.Array <- .is_integerish.Field
+
+#' @method .is_integerish ChunkedArray
+#' @export
+#'
+.is_integerish.ChunkedArray <- .is_integerish.Field
+
+#' @method .is_integerish DataType
+#' @export
+#'
+.is_integerish.DataType <-function(x, n = NULL, finite = NULL) {
+  return(grepl(pattern = '^[u]?int[[:digit:]]{1,2}$', x = x$name))
+}
+
 .maybe_muffle <- function(w, cond = getOption('verbose', default = FALSE)) {
   if (isTRUE(x = cond)) {
     warning(warningCondition(
diff --git a/apis/r/man/SOMADenseNDArray.Rd b/apis/r/man/SOMADenseNDArray.Rd
index e2156ec384..20aef8bbab 100644
--- a/apis/r/man/SOMADenseNDArray.Rd
+++ b/apis/r/man/SOMADenseNDArray.Rd
@@ -66,8 +66,10 @@ The \code{write} method is currently limited to writing from 2-d matrices.
 <li><span class="pkg-link" data-pkg="tiledbsoma" data-topic="TileDBArray" data-id="tiledb_schema"><a href='../../tiledbsoma/html/TileDBArray.html#method-TileDBArray-tiledb_schema'><code>tiledbsoma::TileDBArray$tiledb_schema()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="tiledbsoma" data-topic="TileDBArray" data-id="used_shape"><a href='../../tiledbsoma/html/TileDBArray.html#method-TileDBArray-used_shape'><code>tiledbsoma::TileDBArray$used_shape()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="tiledbsoma" data-topic="SOMANDArrayBase" data-id="create"><a href='../../tiledbsoma/html/SOMANDArrayBase.html#method-SOMANDArrayBase-create'><code>tiledbsoma::SOMANDArrayBase$create()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="tiledbsoma" data-topic="SOMANDArrayBase" data-id="resize"><a href='../../tiledbsoma/html/SOMANDArrayBase.html#method-SOMANDArrayBase-resize'><code>tiledbsoma::SOMANDArrayBase$resize()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="tiledbsoma" data-topic="SOMANDArrayBase" data-id="set_data_type"><a href='../../tiledbsoma/html/SOMANDArrayBase.html#method-SOMANDArrayBase-set_data_type'><code>tiledbsoma::SOMANDArrayBase$set_data_type()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="tiledbsoma" data-topic="SOMANDArrayBase" data-id="tiledbsoma_has_upgraded_shape"><a href='../../tiledbsoma/html/SOMANDArrayBase.html#method-SOMANDArrayBase-tiledbsoma_has_upgraded_shape'><code>tiledbsoma::SOMANDArrayBase$tiledbsoma_has_upgraded_shape()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="tiledbsoma" data-topic="SOMANDArrayBase" data-id="tiledbsoma_upgrade_shape"><a href='../../tiledbsoma/html/SOMANDArrayBase.html#method-SOMANDArrayBase-tiledbsoma_upgrade_shape'><code>tiledbsoma::SOMANDArrayBase$tiledbsoma_upgrade_shape()</code></a></span></li>
 </ul>
 </details>
 }}
diff --git a/apis/r/man/SOMANDArrayBase.Rd b/apis/r/man/SOMANDArrayBase.Rd
index 1f43ce1306..0722a2eb3e 100644
--- a/apis/r/man/SOMANDArrayBase.Rd
+++ b/apis/r/man/SOMANDArrayBase.Rd
@@ -17,6 +17,8 @@ Adds NDArray-specific functionality to the \code{\link{SOMAArrayBase}} class.
 \item \href{#method-SOMANDArrayBase-create}{\code{SOMANDArrayBase$create()}}
 \item \href{#method-SOMANDArrayBase-set_data_type}{\code{SOMANDArrayBase$set_data_type()}}
 \item \href{#method-SOMANDArrayBase-tiledbsoma_has_upgraded_shape}{\code{SOMANDArrayBase$tiledbsoma_has_upgraded_shape()}}
+\item \href{#method-SOMANDArrayBase-resize}{\code{SOMANDArrayBase$resize()}}
+\item \href{#method-SOMANDArrayBase-tiledbsoma_upgrade_shape}{\code{SOMANDArrayBase$tiledbsoma_upgrade_shape()}}
 \item \href{#method-SOMANDArrayBase-clone}{\code{SOMANDArrayBase$clone()}}
 }
 }
@@ -119,6 +121,52 @@ Logical
 }
 }
 \if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-SOMANDArrayBase-resize"></a>}}
+\if{latex}{\out{\hypertarget{method-SOMANDArrayBase-resize}{}}}
+\subsection{Method \code{resize()}}{
+Increases the shape of the array as specfied. Raises an error
+if the new shape is less than the current shape in any dimension. Raises
+an error if the new shape exceeds maxshape in any dimension. Raises an
+error if the array doesn't already have a shape: in that case please call
+tiledbsoma_upgrade_shape.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{SOMANDArrayBase$resize(new_shape)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{new_shape}}{A vector of integerish, of the same length as the array's \code{ndim}.}
+}
+\if{html}{\out{</div>}}
+}
+\subsection{Returns}{
+No return value
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-SOMANDArrayBase-tiledbsoma_upgrade_shape"></a>}}
+\if{latex}{\out{\hypertarget{method-SOMANDArrayBase-tiledbsoma_upgrade_shape}{}}}
+\subsection{Method \code{tiledbsoma_upgrade_shape()}}{
+Allows the array to have a resizeable shape as described in the
+TileDB-SOMA 1.15 release notes.  Raises an error if the shape exceeds maxshape in any
+dimension. Raises an error if the array already has a shape.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{SOMANDArrayBase$tiledbsoma_upgrade_shape(shape)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{shape}}{A vector of integerish, of the same length as the array's \code{ndim}.}
+}
+\if{html}{\out{</div>}}
+}
+\subsection{Returns}{
+No return value
+}
+}
+\if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-SOMANDArrayBase-clone"></a>}}
 \if{latex}{\out{\hypertarget{method-SOMANDArrayBase-clone}{}}}
 \subsection{Method \code{clone()}}{
diff --git a/apis/r/man/SOMASparseNDArray.Rd b/apis/r/man/SOMASparseNDArray.Rd
index 7a030c04f0..326736a42f 100644
--- a/apis/r/man/SOMASparseNDArray.Rd
+++ b/apis/r/man/SOMASparseNDArray.Rd
@@ -31,8 +31,7 @@ the object are overwritten and new index values are added. (lifecycle: maturing)
 \item \href{#method-SOMASparseNDArray-read}{\code{SOMASparseNDArray$read()}}
 \item \href{#method-SOMASparseNDArray-write}{\code{SOMASparseNDArray$write()}}
 \item \href{#method-SOMASparseNDArray-nnz}{\code{SOMASparseNDArray$nnz()}}
-\item \href{#method-SOMASparseNDArray-resize}{\code{SOMASparseNDArray$resize()}}
-\item \href{#method-SOMASparseNDArray-tiledbsoma_upgrade_shape}{\code{SOMASparseNDArray$tiledbsoma_upgrade_shape()}}
+\item \href{#method-SOMASparseNDArray-.write_coordinates}{\code{SOMASparseNDArray$.write_coordinates()}}
 \item \href{#method-SOMASparseNDArray-clone}{\code{SOMASparseNDArray$clone()}}
 }
 }
@@ -66,8 +65,10 @@ the object are overwritten and new index values are added. (lifecycle: maturing)
 <li><span class="pkg-link" data-pkg="tiledbsoma" data-topic="TileDBArray" data-id="tiledb_schema"><a href='../../tiledbsoma/html/TileDBArray.html#method-TileDBArray-tiledb_schema'><code>tiledbsoma::TileDBArray$tiledb_schema()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="tiledbsoma" data-topic="TileDBArray" data-id="used_shape"><a href='../../tiledbsoma/html/TileDBArray.html#method-TileDBArray-used_shape'><code>tiledbsoma::TileDBArray$used_shape()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="tiledbsoma" data-topic="SOMANDArrayBase" data-id="create"><a href='../../tiledbsoma/html/SOMANDArrayBase.html#method-SOMANDArrayBase-create'><code>tiledbsoma::SOMANDArrayBase$create()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="tiledbsoma" data-topic="SOMANDArrayBase" data-id="resize"><a href='../../tiledbsoma/html/SOMANDArrayBase.html#method-SOMANDArrayBase-resize'><code>tiledbsoma::SOMANDArrayBase$resize()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="tiledbsoma" data-topic="SOMANDArrayBase" data-id="set_data_type"><a href='../../tiledbsoma/html/SOMANDArrayBase.html#method-SOMANDArrayBase-set_data_type'><code>tiledbsoma::SOMANDArrayBase$set_data_type()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="tiledbsoma" data-topic="SOMANDArrayBase" data-id="tiledbsoma_has_upgraded_shape"><a href='../../tiledbsoma/html/SOMANDArrayBase.html#method-SOMANDArrayBase-tiledbsoma_has_upgraded_shape'><code>tiledbsoma::SOMANDArrayBase$tiledbsoma_has_upgraded_shape()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="tiledbsoma" data-topic="SOMANDArrayBase" data-id="tiledbsoma_upgrade_shape"><a href='../../tiledbsoma/html/SOMANDArrayBase.html#method-SOMANDArrayBase-tiledbsoma_upgrade_shape'><code>tiledbsoma::SOMANDArrayBase$tiledbsoma_upgrade_shape()</code></a></span></li>
 </ul>
 </details>
 }}
@@ -126,6 +127,9 @@ dimension of \code{values}. Generally should be \code{NULL}.}
 }
 \if{html}{\out{</div>}}
 }
+\subsection{Returns}{
+Invisibly returns \code{self}
+}
 }
 \if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-SOMASparseNDArray-nnz"></a>}}
@@ -141,49 +145,25 @@ A scalar with the number of non-zero elements
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-SOMASparseNDArray-resize"></a>}}
-\if{latex}{\out{\hypertarget{method-SOMASparseNDArray-resize}{}}}
-\subsection{Method \code{resize()}}{
-Increases the shape of the array as specfied. Raises an error
-if the new shape is less than the current shape in any dimension. Raises
-an error if the new shape exceeds maxshape in any dimension. Raises an
-error if the array doesn't already have a shape: in that case please call
-tiledbsoma_upgrade_shape.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{SOMASparseNDArray$resize(new_shape)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{new_shape}}{A vector of integerish, of the same length as the array's \code{ndim}.}
-}
-\if{html}{\out{</div>}}
-}
-\subsection{Returns}{
-No return value
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-SOMASparseNDArray-tiledbsoma_upgrade_shape"></a>}}
-\if{latex}{\out{\hypertarget{method-SOMASparseNDArray-tiledbsoma_upgrade_shape}{}}}
-\subsection{Method \code{tiledbsoma_upgrade_shape()}}{
-Allows the array to have a resizeable shape as described in the
-TileDB-SOMA 1.15 release notes.  Raises an error if the shape exceeds maxshape in any
-dimension. Raises an error if the array already has a shape.
+\if{html}{\out{<a id="method-SOMASparseNDArray-.write_coordinates"></a>}}
+\if{latex}{\out{\hypertarget{method-SOMASparseNDArray-.write_coordinates}{}}}
+\subsection{Method \code{.write_coordinates()}}{
+Write a COO table to the array
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{SOMASparseNDArray$tiledbsoma_upgrade_shape(shape)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{SOMASparseNDArray$.write_coordinates(values)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
-\item{\code{shape}}{A vector of integerish, of the same length as the array's \code{ndim}.}
+\item{\code{values}}{A \code{data.frame} or \code{\link[arrow:Table]{Arrow::Table}}
+with data in COO format; must be named with the dimension and attribute
+labels of the array}
 }
 \if{html}{\out{</div>}}
 }
 \subsection{Returns}{
-No return value
+Invisibly returns \code{self}
 }
 }
 \if{html}{\out{<hr>}}
diff --git a/apis/r/man/r_type_from_arrow_type.Rd b/apis/r/man/r_type_from_arrow_type.Rd
new file mode 100644
index 0000000000..4bb62a59c7
--- /dev/null
+++ b/apis/r/man/r_type_from_arrow_type.Rd
@@ -0,0 +1,47 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils-arrow.R
+\name{r_type_from_arrow_type}
+\alias{r_type_from_arrow_type}
+\alias{r_type_from_arrow_type.Schema}
+\alias{r_type_from_arrow_type.Field}
+\alias{r_type_from_arrow_type.DataType}
+\title{Get the \R Type from an Arrow Type}
+\usage{
+r_type_from_arrow_type(x)
+
+\method{r_type_from_arrow_type}{Schema}(x)
+
+\method{r_type_from_arrow_type}{Field}(x)
+
+\method{r_type_from_arrow_type}{DataType}(x)
+}
+\arguments{
+\item{x}{An \CRANpkg{Arrow} \link[arrow:Schema]{schema},
+\link[arrow:Field]{field}, or \link[arrow:infer_type]{data type}}
+}
+\value{
+If \code{x} is a \link[arrow:infer_type]{data type}, a single
+character value giving the \R \link[base:typeof]{type} of \code{x}; if no
+corresponding \R type, returns the \CRANpkg{Arrow} type name
+
+If \code{x} is a \link[arrow:Field]{field}, a single named character
+vector with the name being the field name and the value being the \R
+\link[base:typeof]{type}
+
+If \code{x} is a \link[arrow:Schema]{schema}, a named vector where
+the names are field names and the values are the \R \link[base:typeof]{types}
+of each field
+}
+\description{
+Get an \R \link[base:typeof]{type} from an Arrow type. This function is
+equivalent to \code{\link[base]{typeof}()} rather than
+\code{\link[base]{mode}()} or \code{\link[base]{class}()}, and returns the
+equivalent \strong{type}. For example, the equivalent \R type to an Arrow
+\link[arrow]{dictionary} is \dQuote{\code{integer}}, not
+\dQuote{\code{factor}}; likewise, the equivalent \R type to an Arrow 64-bit
+integer is \dQuote{\code{double}}
+}
+\seealso{
+\code{\link[base]{typeof}()}
+}
+\keyword{internal}
diff --git a/apis/r/tests/testthat/test-SOMASparseNDArray.R b/apis/r/tests/testthat/test-SOMASparseNDArray.R
index 9777bcaefd..808ff6e977 100644
--- a/apis/r/tests/testthat/test-SOMASparseNDArray.R
+++ b/apis/r/tests/testthat/test-SOMASparseNDArray.R
@@ -1,3 +1,4 @@
+
 test_that("SOMASparseNDArray creation", {
   skip_if(!extended_tests())
   uri <- tempfile(pattern="sparse-ndarray")
@@ -96,6 +97,105 @@ test_that("SOMASparseNDArray creation", {
 
 })
 
+test_that("SOMASparseNDArray write COO assertions", {
+  skip_if(!extended_tests())
+  uri <- tempfile(pattern = "sparse-ndarray-coo")
+  shape <- c(10L, 10L)
+  ndarray <- SOMASparseNDArrayCreate(uri, arrow::int32(), shape = shape)
+
+  expect_s3_class(ndarray, 'SOMASparseNDArray')
+  expect_equal(ndarray$ndim(), 2L)
+  mat <- create_sparse_matrix_with_int_dims(10L, 10L)
+  df <- data.frame(
+    soma_dim_0 = mat@i,
+    soma_dim_1 = mat@j,
+    soma_data = as.integer(mat@x)
+  )
+
+  ndarray$reopen("WRITE")
+  expect_invisible(ndarray$.write_coordinates(df))
+  ndarray$close()
+
+  # Test write with Table
+  tbl <- arrow::as_arrow_table(df)
+  ndarray <- SOMASparseNDArrayCreate(
+    tempfile(pattern = "sparse-ndarray-coo-table"),
+    type = tbl$soma_data$type,
+    shape = shape
+  )
+  expect_invisible(ndarray$.write_coordinates(tbl))
+  ndarray$close()
+
+  # Test write unnamed data frame
+  udf <- df
+  names(udf) <- NULL
+  ndarray <- SOMASparseNDArrayCreate(
+    uri = tempfile(pattern = "sparse-ndarray-coo-unnamed"),
+    type = arrow::int32(),
+    shape = shape
+  )
+  expect_invisible(ndarray$.write_coordinates(udf))
+  ndarray$close()
+
+  # Test argument assertions
+  arr <- SOMASparseNDArrayCreate(tempfile(), arrow::int32(), shape = shape)
+  on.exit(arr$close(), add = TRUE, after = FALSE)
+  expect_error(
+    arr$.write_coordinates(mat),
+    regexp = "'values' must be a data frame or Arrow Table"
+  )
+  expect_error(
+    arr$.write_coordinates(mtcars),
+    regexp = "'values' must have one column for each dimension and the data"
+  )
+
+  sdf <- df
+  while (identical(names(sdf), c(ndarray$dimnames(), ndarray$attrnames()))) {
+    sdf <- sdf[, sample(names(sdf)), drop = FALSE]
+  }
+  expect_error(
+    arr$.write_coordinates(sdf),
+    regexp = "'values' must be named with the dimension and attribute labels"
+  )
+
+  # Test dimension assertions
+  ddf <- df
+  ddf$soma_dim_0 <- ddf$soma_dim_0 + 0.1
+  expect_error(
+    arr$.write_coordinates(ddf),
+    regexp = "All dimension columns must be integerish"
+  )
+
+  ndf <- df
+  ndf$soma_dim_0 <- -ndf$soma_dim_0
+  expect_error(
+    arr$.write_coordinates(ndf),
+    regexp = "Dimension columns cannot contain negative values"
+  )
+
+  bdf <- df
+  bdf$soma_dim_0 <- bdf$soma_dim_0 * 1000
+  expect_error(
+    arr$.write_coordinates(bdf),
+    regexp = "Dimension columns cannot exceed the shape of the array"
+  )
+
+  # Test attribute assertions
+  ldf <- df
+  ldf$soma_data <- TRUE
+  expect_error(
+    arr$.write_coordinates(ldf),
+    regexp = "The data column must be of type 'integer'"
+  )
+
+  fdf <- df
+  fdf$soma_data <- fdf$soma_data + 0.1
+  expect_error(
+    arr$.write_coordinates(fdf),
+    regexp = "The data column must be of type 'integer'"
+  )
+})
+
 test_that("SOMASparseNDArray read_sparse_matrix", {
   skip_if(!extended_tests())
   uri <- tempfile(pattern="sparse-ndarray-3")
@@ -611,10 +711,10 @@ test_that("SOMASparseNDArray with failed bounding box", {
   coo <- data.frame(
     i = bit64::as.integer64(slot(mat, "i")),
     j = bit64::as.integer64(slot(mat, "j")),
-    x = slot(mat, "x")
+    x = as.integer(slot(mat, "x"))
   )
   names(coo) <- c(ndarray$dimnames(), ndarray$attrnames())
-  ndarray$.__enclos_env__$private$.write_coo_dataframe(coo)
+  expect_invisible(ndarray$.write_coordinates(coo))
 
   ndarray$close()
 
diff --git a/apis/r/tests/testthat/test-r-arrow-types.R b/apis/r/tests/testthat/test-r-arrow-types.R
new file mode 100644
index 0000000000..42ef712358
--- /dev/null
+++ b/apis/r/tests/testthat/test-r-arrow-types.R
@@ -0,0 +1,93 @@
+test_that("Arrow to R types: data type", {
+  skip_if(!extended_tests())
+  skip_if_not_installed('arrow')
+
+  ints <- apply(
+    expand.grid(c('', 'u'), 'int', c('8', '16', '32')),
+    MARGIN = 1L,
+    FUN = paste,
+    collapse = ''
+  )
+  for (i in c(ints, 'dictionary')) {
+    f <- get(i, envir = asNamespace('arrow'))
+    expect_type(rt <- r_type_from_arrow_type(f()), 'character')
+    expect_length(rt, 1L)
+    expect_null(names(rt))
+    expect_identical(
+      rt,
+      'integer',
+      label = sprintf('r_type_from_arrow_type(arrow::%s())', i)
+    )
+  }
+
+  dbls <- c('int64', 'uint64', 'date32', 'timestamp' ,'float', 'float32')
+  for (i in dbls) {
+    f <- get(i, envir = asNamespace('arrow'))
+    expect_type(rt <- r_type_from_arrow_type(f()), 'character')
+    expect_length(rt, 1L)
+    expect_null(names(rt))
+    expect_identical(
+      rt,
+      'double',
+      label = sprintf('r_type_from_arrow_type(arrow::%s())', i)
+    )
+  }
+
+  for (i in c('bool', 'boolean')) {
+    f <- get(i, envir = asNamespace('arrow'))
+    expect_type(rt <- r_type_from_arrow_type(f()), 'character')
+    expect_length(rt, 1L)
+    expect_null(names(rt))
+    expect_identical(
+      rt,
+      'logical',
+      label = sprintf('r_type_from_arrow_type(arrow::%s())', i)
+    )
+  }
+  for (i in c('utf8', 'string', 'large_utf8')) {
+    f <- get(i, envir = asNamespace('arrow'))
+    expect_type(rt <- r_type_from_arrow_type(f()), 'character')
+    expect_length(rt, 1L)
+    expect_null(names(rt))
+    expect_identical(
+      rt,
+      'character',
+      label = sprintf('r_type_from_arrow_type(arrow::%s())', i)
+    )
+  }
+})
+
+test_that("Arrow to R types: field", {
+  skip_if(!extended_tests())
+  skip_if_not_installed('arrow')
+
+  field <- arrow::field(name = random_name(), type = arrow::int8())
+  expect_type(rt <- r_type_from_arrow_type(field), 'character')
+  expect_length(rt, 1L)
+  expect_named(rt, field$name)
+  expect_equivalent(rt, 'integer')
+})
+
+test_that("Arrow to R types: schema", {
+  skip_if(!extended_tests())
+
+  asch <- create_arrow_schema()
+  expect_type(rt <- r_type_from_arrow_type(asch), 'character')
+  expect_length(rt, length(asch))
+  expect_named(rt, asch$names)
+  for (fn in names(rt)) {
+    et <- switch(
+      EXPR = fn,
+      foo = 'integer',
+      soma_joinid = 'double',
+      bar = 'double',
+      baz = 'character'
+    )
+    expect_equivalent(
+      rt[fn],
+      et,
+      label = sprintf('r_type_from_arrow_type(schema[[%s]])', fn),
+      expected.label = dQuote(et, FALSE)
+    )
+  }
+})
diff --git a/apis/r/tests/testthat/test-utils.R b/apis/r/tests/testthat/test-utils.R
index 261ee26df0..b967a427fe 100644
--- a/apis/r/tests/testthat/test-utils.R
+++ b/apis/r/tests/testthat/test-utils.R
@@ -29,7 +29,6 @@ test_that("validate read coords", {
   )
 })
 
-
 test_that("validate read coords with dimension names", {
 
   # assume vector or unnamed list of length 1 corresponds to first dimension
@@ -50,7 +49,6 @@ test_that("validate read coords with dimension names", {
   )
 })
 
-
 test_that("validate read coords with dimension names and schema", {
   asch <- create_arrow_schema()
 
@@ -82,3 +80,96 @@ test_that("validate read coords with dimension names and schema", {
   expect_equal(test_coords$foo, 1:10)
   expect_equal(test_coords$soma_joinid, as.integer64(1:10))
 })
+
+test_that("is_integerish: default", {
+  expect_true(.is_integerish(vector("integer")))
+  expect_true(.is_integerish(vector("numeric")))
+  types <- c("logical", "complex", "character", "expression", "list", "raw")
+  for (tt in types) {
+    expect_false(
+      .is_integerish(vector(tt)),
+      label = sprintf(".is_integerish(vector('%s'))", tt)
+    )
+  }
+})
+
+test_that("is_integerish: integer64", {
+  # Basic tests
+  for (n in 0:3) {
+    expect_true(
+      .is_integerish(bit64::integer64(length = n)),
+      label = sprintf(".is_integerish(integer64(length = %i))", n)
+    )
+    expect_true(
+      .is_integerish(bit64::integer64(length = n), n = n),
+      label = sprintf(".is_integerish(integer64(length = %i), n = %i)", n, n)
+    )
+    expect_false(
+      .is_integerish(bit64::integer64(length = n), n = n + 1L),
+      label = sprintf(".is_integerish(integer64(length = %i), n = %i)", n, n + 1L)
+    )
+  }
+
+  # Test finiteness
+  expect_true(.is_integerish(bit64::NA_integer64_))
+  expect_true(.is_integerish(bit64::NA_integer64_, finite = FALSE))
+  expect_false(.is_integerish(bit64::integer64(), finite = FALSE))
+  expect_false(.is_integerish(bit64::NA_integer64_, finite = TRUE))
+
+  # Test large number
+  expect_true(.is_integerish(bit64::as.integer64((2 ^ 31) + 1L)))
+})
+
+test_that("is_integerish: arrow::DataType", {
+
+  ints <- paste0("int", c(8, 16, 32, 64))
+  for (it in c(ints, paste0("u", ints))) {
+    f <- get(it, envir = asNamespace("arrow"))
+    expect_true(
+      .is_integerish(f()),
+      label = sprintf(".is_integerish(arrow::%s())", it)
+    )
+  }
+
+  types <- c(
+    paste0("float", c("", 16, 32, 64)),
+    paste0("bool", c("", "ean")),
+    "string",
+    paste0(c("", "large_"), "utf8"),
+    paste0("date", c(32, 64)),
+    paste0("time", c(32, 64, "stamp"))
+  )
+  for (at in types) {
+    f <- get(at, envir = asNamespace("arrow"))
+    expect_false(
+      .is_integerish(f()),
+      label = sprintf(".is_integerish(arrow::%s())", at)
+    )
+  }
+})
+
+test_that("is_integerish: arrow::Field", {
+  sch <- create_arrow_schema()
+  for (i in names(sch)) {
+    label <- sprintf(".is_integerish(sch[['%s']])", i)
+    switch(
+      EXPR = i,
+      foo = ,
+      soma_joinid = expect_true(.is_integerish(sch[[i]]), label = label),
+      expect_false(.is_integerish(sch[[i]]), label = label)
+    )
+  }
+})
+
+test_that("is_integerish: arrow::Arrays", {
+  tbl <- create_arrow_table()
+  for (i in names(tbl)) {
+    label <- sprintf(".is_integerish(tbl[['%s']])", i)
+    switch(
+      EXPR = i,
+      foo = ,
+      soma_joinid = expect_true(.is_integerish(tbl[[i]]), label = label),
+      expect_false(.is_integerish(tbl[[i]]), label = label)
+    )
+  }
+})