Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[c++] Remove ArrayBuffers from SOMAArray #2979

Merged
merged 1 commit into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions libtiledbsoma/src/soma/managed_query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,30 @@ void ManagedQuery::set_column_data(
}
}

std::shared_ptr<ColumnBuffer> ManagedQuery::setup_column_data(
std::string_view name) {
if (this->query_type() != TILEDB_WRITE) {
throw TileDBSOMAError("[SOMAArray] array must be opened in write mode");
}

// Create the array_buffer_ as necessary
if (buffers_ == nullptr) {
buffers_ = std::make_shared<ArrayBuffers>();
}

// Create a ColumnBuffer object instead of passing it in as an argument to
// `set_column_data` because ColumnBuffer::create requires a TileDB Array
// argument which should remain a private member of SOMAArray
auto column = ColumnBuffer::create(array_, name);

// Keep the ColumnBuffer alive by attaching it to the ArrayBuffers class
// member. Otherwise, the data held by the ColumnBuffer will be garbage
// collected before it is submitted to the write query
buffers_->emplace(std::string(name), column);

return column;
};

void ManagedQuery::setup_read() {
// If the query is complete, return so we do not submit it again
auto status = query_->query_status();
Expand Down
3 changes: 3 additions & 0 deletions libtiledbsoma/src/soma/managed_query.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,9 @@ class ManagedQuery {
*/
void set_column_data(std::shared_ptr<ColumnBuffer> buffer);

// Helper function for set_column_data
std::shared_ptr<ColumnBuffer> setup_column_data(std::string_view name);

/**
* @brief Configure query and allocate result buffers for reads.
*
Expand Down
41 changes: 3 additions & 38 deletions libtiledbsoma/src/soma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,7 @@ void SOMAArray::set_column_data(
const void* data,
uint64_t* offsets,
uint8_t* validity) {
auto column = SOMAArray::_setup_column_data(name);
auto column = mq_->setup_column_data(name);
column->set_data(num_elems, data, offsets, validity);
mq_->set_column_data(column);
};
Expand All @@ -493,47 +493,18 @@ void SOMAArray::set_column_data(
const void* data,
uint32_t* offsets,
uint8_t* validity) {
auto column = SOMAArray::_setup_column_data(name);
auto column = mq_->setup_column_data(name);
column->set_data(num_elems, data, offsets, validity);
mq_->set_column_data(column);
};

std::shared_ptr<ColumnBuffer> SOMAArray::_setup_column_data(
std::string_view name) {
if (mq_->query_type() != TILEDB_WRITE) {
throw TileDBSOMAError("[SOMAArray] array must be opened in write mode");
}

// Create the array_buffer_ as necessary
if (array_buffer_ == nullptr) {
array_buffer_ = std::make_shared<ArrayBuffers>();
}

// Create a ColumnBuffer object instead of passing it in as an argument to
// `set_column_data` because ColumnBuffer::create requires a TileDB Array
// argument which should remain a private member of SOMAArray
auto column = ColumnBuffer::create(arr_, name);

// Keep the ColumnBuffer alive by attaching it to the ArrayBuffers class
// member. Otherwise, the data held by the ColumnBuffer will be garbage
// collected before it is submitted to the write query
array_buffer_->emplace(std::string(name), column);

return column;
};

void SOMAArray::set_array_data(
std::unique_ptr<ArrowSchema> arrow_schema,
std::unique_ptr<ArrowArray> arrow_array) {
if (mq_->query_type() != TILEDB_WRITE) {
throw TileDBSOMAError("[SOMAArray] array must be opened in write mode");
}

// Create the array_buffer_ as necessary
if (array_buffer_ == nullptr) {
array_buffer_ = std::make_shared<ArrayBuffers>();
}

auto [casted_array, casted_schema] = SOMAArray::_cast_table(
std::move(arrow_schema), std::move(arrow_array));

Expand All @@ -544,7 +515,7 @@ void SOMAArray::set_array_data(
// Create a ColumnBuffer object instead of passing it in as an argument
// to `set_column_data` because ColumnBuffer::create requires a TileDB
// Array argument which should remain a private member of SOMAArray
auto column = ColumnBuffer::create(arr_, arrow_sch_->name);
auto column = mq_->setup_column_data(arrow_sch_->name);

const void* data;
uint8_t* validities = nullptr;
Expand Down Expand Up @@ -573,11 +544,6 @@ void SOMAArray::set_array_data(
static_cast<uint64_t*>(nullptr),
validities);
}
// Keep the ColumnBuffer alive by attaching it to the ArrayBuffers class
// member. Otherwise, the data held by the ColumnBuffer will be garbage
// collected before it is submitted to the write query
array_buffer_->emplace(std::string(arrow_sch_->name), column);

mq_->set_column_data(column);
}
};
Expand Down Expand Up @@ -1050,7 +1016,6 @@ void SOMAArray::write(bool sort_coords) {
mq_->submit_write(sort_coords);

mq_->reset();
array_buffer_ = nullptr;
}

void SOMAArray::consolidate_and_vacuum(std::vector<std::string> modes) {
Expand Down
10 changes: 1 addition & 9 deletions libtiledbsoma/src/soma/soma_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,7 @@ class SOMAArray : public SOMAObject {
, arr_(other.arr_)
, meta_cache_arr_(other.meta_cache_arr_)
, first_read_next_(other.first_read_next_)
, submitted_(other.submitted_)
, array_buffer_(other.array_buffer_) {
, submitted_(other.submitted_) {
fill_metadata_cache();
}

Expand Down Expand Up @@ -1415,9 +1414,6 @@ class SOMAArray : public SOMAObject {
// Helper function to cast Boolean of bits (Arrow) to uint8 (TileDB)
void _cast_bit_to_uint8(ArrowSchema* arrow_schema, ArrowArray* arrow_array);

// Helper function for set_column_data
std::shared_ptr<ColumnBuffer> _setup_column_data(std::string_view name);

// Fills the metadata cache upon opening the array.
void fill_metadata_cache();

Expand Down Expand Up @@ -1467,10 +1463,6 @@ class SOMAArray : public SOMAObject {

// Unoptimized method for computing nnz() (issue `count_cells` query)
uint64_t _nnz_slow();

// ArrayBuffers to hold ColumnBuffers alive when submitting to write
// query
std::shared_ptr<ArrayBuffers> array_buffer_ = nullptr;
};

} // namespace tiledbsoma
Expand Down
Loading