Skip to content

Release query result after materialization & transformation #1027

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/include/reltoaltrep.hpp
Original file line number Diff line number Diff line change
@@ -17,13 +17,22 @@ struct AltrepRelationWrapper {

duckdb::unique_ptr<QueryResult> Materialize();

void MarkColumnAsTransformed();

const bool allow_materialization;
const size_t n_rows;
const size_t n_cells;

rel_extptr_t rel_eptr;
duckdb::shared_ptr<Relation> rel;
duckdb::unique_ptr<QueryResult> res;

R_xlen_t rowcount;
bool rowcount_retrieved;
Comment on lines +30 to +31
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth to initialize with -1 to avoid rowcount_retrieved ? I honestly don't know.


size_t ncols;
size_t cols_transformed;
Comment on lines +33 to +34
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By the same token, use a single n_cols_to_retrieve that counts down to zero?


};

}
37 changes: 35 additions & 2 deletions src/reltoaltrep.cpp
Original file line number Diff line number Diff line change
@@ -101,13 +101,24 @@ AltrepRelationWrapper *AltrepRelationWrapper::Get(SEXP x) {
}

AltrepRelationWrapper::AltrepRelationWrapper(rel_extptr_t rel_, bool allow_materialization_, size_t n_rows_, size_t n_cells_)
: allow_materialization(allow_materialization_), n_rows(n_rows_), n_cells(n_cells_), rel_eptr(rel_), rel(rel_->rel) {
: allow_materialization(allow_materialization_), n_rows(n_rows_), n_cells(n_cells_), rel_eptr(rel_), rel(rel_->rel), rowcount(0), rowcount_retrieved(false), ncols(0), cols_transformed(0) {
}

bool AltrepRelationWrapper::HasQueryResult() const {
return (bool)res;
}

void AltrepRelationWrapper::MarkColumnAsTransformed() {
// AltrepRelationWrapper keeps tabs on how many of the columns have been transformed
// to their R-representation
cols_transformed++;
// If all of the columns have been transformed, we can reset
// the query-result pointer and free the memory
if (cols_transformed == ncols) {
res.reset();
}
}

MaterializedQueryResult *AltrepRelationWrapper::GetQueryResult() {
if (!res) {
if (!allow_materialization || n_cells == 0) {
@@ -149,6 +160,10 @@ MaterializedQueryResult *AltrepRelationWrapper::GetQueryResult() {
cpp11::stop("Query execution was interrupted");
}


rowcount = ((MaterializedQueryResult *)res.get())->RowCount();
rowcount_retrieved = true;

signal_handler.Disable();
}
D_ASSERT(res);
@@ -196,7 +211,7 @@ duckdb::unique_ptr<QueryResult> AltrepRelationWrapper::Materialize() {

struct AltrepRownamesWrapper {

AltrepRownamesWrapper(duckdb::shared_ptr<AltrepRelationWrapper> rel_p) : rel(rel_p) {
AltrepRownamesWrapper(duckdb::shared_ptr<AltrepRelationWrapper> rel_p) : rel(rel_p), rowlen_data_retrieved(false) {
rowlen_data[0] = NA_INTEGER;
}

@@ -206,6 +221,7 @@ struct AltrepRownamesWrapper {

int32_t rowlen_data[2];
duckdb::shared_ptr<AltrepRelationWrapper> rel;
bool rowlen_data_retrieved;
};

struct AltrepVectorWrapper {
@@ -228,6 +244,8 @@ struct AltrepVectorWrapper {
duckdb_r_transform(chunk.data[column_index], dest, dest_offset, chunk.size(), false);
dest_offset += chunk.size();
}

rel->MarkColumnAsTransformed();
}
return DATAPTR(transformed_vector);
}
@@ -306,16 +324,27 @@ const void *RelToAltrep::RownamesDataptrOrNull(SEXP x) {

void *RelToAltrep::DoRownamesDataptrGet(SEXP x) {
auto rownames_wrapper = AltrepRownamesWrapper::Get(x);

// the query has been materialized, return the rowcount
// (and void recomputing the query if it's been reset)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// (and void recomputing the query if it's been reset)
// (and avoid recomputing the query if it's been reset)

if (rownames_wrapper->rowlen_data_retrieved) {
return rownames_wrapper->rowlen_data;
}

auto row_count = rownames_wrapper->rel->GetQueryResult()->RowCount();
if (row_count > (idx_t)NumericLimits<int32_t>::Maximum()) {
cpp11::stop("Integer overflow for row.names attribute");
}
rownames_wrapper->rowlen_data[1] = -row_count;
rownames_wrapper->rowlen_data_retrieved = true;
return rownames_wrapper->rowlen_data;
}

R_xlen_t RelToAltrep::VectorLength(SEXP x) {
BEGIN_CPP11
if (AltrepVectorWrapper::Get(x)->rel->rowcount_retrieved) {
return AltrepVectorWrapper::Get(x)->rel->rowcount;
}
return AltrepVectorWrapper::Get(x)->rel->GetQueryResult()->RowCount();
END_CPP11_EX(0)
}
@@ -404,6 +433,8 @@ size_t DoubleToSize(double d) {
auto relation_wrapper = make_shared_ptr<AltrepRelationWrapper>(rel, allow_materialization, DoubleToSize(n_rows),
DoubleToSize(n_cells));

relation_wrapper->ncols = drel->Columns().size();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need to update twice (here and below)?


cpp11::writable::list data_frame;
data_frame.reserve(ncols);

@@ -427,6 +458,8 @@ size_t DoubleToSize(double d) {
}
SET_NAMES(data_frame, StringsToSexp(names));

relation_wrapper->ncols = drel->Columns().size();

// Row names
cpp11::external_pointer<AltrepRownamesWrapper> ptr(new AltrepRownamesWrapper(relation_wrapper));
R_SetExternalPtrTag(ptr, RStrings::get().duckdb_row_names_sym);