From cc30a6b3b675a0397bdc1e0e50d5874dcd331348 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 30 Aug 2024 13:04:22 -0400 Subject: [PATCH] iterating --- libtiledbsoma/src/soma/soma_array.h | 7 +++ libtiledbsoma/src/utils/arrow_adapter.cc | 5 +- libtiledbsoma/test/unit_soma_dataframe.cc | 63 ++++++++++++++++++++--- 3 files changed, 66 insertions(+), 9 deletions(-) diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index 8e87f3b04a..790c8594bf 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -796,6 +796,13 @@ class SOMAArray : public SOMAObject { */ std::optional timestamp(); + /** + * Exposed for testing purposes. + */ + CurrentDomain get_current_domain() { + return _get_current_domain(); + } + private: //=================================================================== //= private non-static diff --git a/libtiledbsoma/src/utils/arrow_adapter.cc b/libtiledbsoma/src/utils/arrow_adapter.cc index 6dd7a2908e..1b804fb7db 100644 --- a/libtiledbsoma/src/utils/arrow_adapter.cc +++ b/libtiledbsoma/src/utils/arrow_adapter.cc @@ -868,9 +868,8 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema( // nullptr) // // Fortunately, these are ASCII dims and we can range - // these accordingly. These are minimum and maximum - // values, avoiding the extremes 0x00 and 0xff. - ndrect.set_range(col_name, "\x01", "\xfe"); + // these accordingly. + ndrect.set_range(col_name, "", "\xff"); } else { const void* buff = index_column_array->children[i] ->buffers[1]; diff --git a/libtiledbsoma/test/unit_soma_dataframe.cc b/libtiledbsoma/test/unit_soma_dataframe.cc index c9a6cdbc2a..d3bda538d0 100644 --- a/libtiledbsoma/test/unit_soma_dataframe.cc +++ b/libtiledbsoma/test/unit_soma_dataframe.cc @@ -429,6 +429,7 @@ TEST_CASE("SOMADataFrame: variant-indexed dataframe 1") { helper::create_arrow_schema_and_index_columns( dim_infos, attr_infos); + // Create SOMADataFrame::create( uri, std::move(schema), @@ -437,7 +438,27 @@ TEST_CASE("SOMADataFrame: variant-indexed dataframe 1") { std::move(index_columns.second)), ctx); - auto soma_dataframe = SOMADataFrame::open(uri, OpenMode::write, ctx); + // Check current domain + auto soma_dataframe = SOMADataFrame::open(uri, OpenMode::read, ctx); + + CurrentDomain current_domain = soma_dataframe->get_current_domain(); + if (!use_current_domain) { + REQUIRE(current_domain.is_empty()); + } else { + REQUIRE(!current_domain.is_empty()); + REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE); + NDRectangle ndrect = current_domain.ndrectangle(); + + std::array dim_1_range = ndrect.range( + dim_1_name); + REQUIRE(dim_1_range[0] == (int64_t)0); + REQUIRE(dim_1_range[1] == (int64_t)soma_joinid_dim_max); + } + + soma_dataframe->close(); + + // Write + soma_dataframe = SOMADataFrame::open(uri, OpenMode::write, ctx); std::vector dim_1_data({1, 2}); std::vector dim_2_data({1234, 5678}); @@ -455,15 +476,15 @@ TEST_CASE("SOMADataFrame: variant-indexed dataframe 1") { } TEST_CASE("SOMADataFrame: variant-indexed dataframe 2") { + LOG_SET_LEVEL("debug"); int64_t soma_joinid_dim_max = 100; auto use_current_domain = GENERATE(false, true); std::ostringstream section; section << "- use_current_domain=" << use_current_domain; SECTION(section.str()) { - // LOG_SET_LEVEL("debug"); auto ctx = std::make_shared(); std::string uri = "mem://unit-test-variant-indexed-dataframe-1"; - // std::string uri = "/tmp/fooze"; + // std::string uri = use_current_domain? "/tmp/fooze2" : "/tmp/fooze1"; std::string dim_1_name = "soma_joinid"; std::string dim_2_name = "mystring"; std::string attr_1_name = "myuint32"; @@ -492,6 +513,7 @@ TEST_CASE("SOMADataFrame: variant-indexed dataframe 2") { helper::create_arrow_schema_and_index_columns( dim_infos, attr_infos); + // Create SOMADataFrame::create( uri, std::move(schema), @@ -500,9 +522,39 @@ TEST_CASE("SOMADataFrame: variant-indexed dataframe 2") { std::move(index_columns.second)), ctx); - auto soma_dataframe = SOMADataFrame::open(uri, OpenMode::write, ctx); + // Check current domain + auto soma_dataframe = SOMADataFrame::open(uri, OpenMode::read, ctx); + + CurrentDomain current_domain = soma_dataframe->get_current_domain(); + if (!use_current_domain) { + REQUIRE(current_domain.is_empty()); + } else { + REQUIRE(!current_domain.is_empty()); + REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE); + NDRectangle ndrect = current_domain.ndrectangle(); + + std::array dim_1_range = ndrect.range( + dim_1_name); + REQUIRE(dim_1_range[0] == (int64_t)0); + REQUIRE(dim_1_range[1] == (int64_t)soma_joinid_dim_max); + + std::array dim_2_range = ndrect.range( + dim_2_name); + // Can we write ASCII values in this range? + REQUIRE(dim_2_range[0] < " "); + REQUIRE(dim_2_range[1] > "~"); + // Can we write empty strings in this range? + REQUIRE(dim_2_range[0] <= ""); + REQUIRE(dim_2_range[1] >= ""); + } + + soma_dataframe->close(); + + // Write + soma_dataframe = SOMADataFrame::open(uri, OpenMode::write, ctx); std::vector dim_1_data({1, 2}); + // std::vector dim_2_data({"", ""}); std::vector dim_2_data({"apple", "bat"}); std::vector attr_1_data({1234, 5678}); soma_dataframe->set_column_data( @@ -518,15 +570,14 @@ TEST_CASE("SOMADataFrame: variant-indexed dataframe 2") { } TEST_CASE("SOMADataFrame: variant-indexed dataframe 3") { + LOG_SET_LEVEL("debug"); int64_t other_dim_max = 10000; auto use_current_domain = GENERATE(false, true); std::ostringstream section; section << "- use_current_domain=" << use_current_domain; SECTION(section.str()) { - // LOG_SET_LEVEL("debug"); auto ctx = std::make_shared(); std::string uri = "mem://unit-test-variant-indexed-dataframe-1"; - // std::string uri = "/tmp/fooze"; std::string dim_1_name = "mystring"; std::string dim_2_name = "myuint32"; std::string attr_1_name = "soma_joinid";