Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 26 additions & 4 deletions cpp/src/arrow/compute/kernels/scalar_if_else.cc
Original file line number Diff line number Diff line change
Expand Up @@ -759,11 +759,22 @@ struct IfElseFunctor<Type, enable_if_base_binary<Type>> {
auto* out_data = out->array_data().get();
auto offset_length = (cond.length + 1) * sizeof(OffsetType);
ARROW_ASSIGN_OR_RAISE(out_data->buffers[1], ctx->Allocate(offset_length));
std::memcpy(out_data->buffers[1]->mutable_data(), right_offsets, offset_length);

if (right_offsets[0] == 0) {
std::memcpy(out_data->buffers[1]->mutable_data(), right_offsets, offset_length);
} else {
OffsetType base = right_offsets[0];
auto* out_offsets =
reinterpret_cast<OffsetType*>(out_data->buffers[1]->mutable_data());
for (int64_t i = 0; i <= cond.length; ++i) {
out_offsets[i] = right_offsets[i] - base;
}
}

auto right_data_length = right_offsets[right.length] - right_offsets[0];
ARROW_ASSIGN_OR_RAISE(out_data->buffers[2], ctx->Allocate(right_data_length));
std::memcpy(out_data->buffers[2]->mutable_data(), right_data, right_data_length);
std::memcpy(out_data->buffers[2]->mutable_data(), right_data + right_offsets[0],
right_data_length);
return Status::OK();
}

Expand Down Expand Up @@ -801,11 +812,22 @@ struct IfElseFunctor<Type, enable_if_base_binary<Type>> {
auto* out_data = out->array_data().get();
auto offset_length = (cond.length + 1) * sizeof(OffsetType);
ARROW_ASSIGN_OR_RAISE(out_data->buffers[1], ctx->Allocate(offset_length));
std::memcpy(out_data->buffers[1]->mutable_data(), left_offsets, offset_length);

if (left_offsets[0] == 0) {
std::memcpy(out_data->buffers[1]->mutable_data(), left_offsets, offset_length);
} else {
OffsetType base = left_offsets[0];
auto* out_offsets =
reinterpret_cast<OffsetType*>(out_data->buffers[1]->mutable_data());
for (int64_t i = 0; i <= cond.length; ++i) {
out_offsets[i] = left_offsets[i] - base;
}
}

auto left_data_length = left_offsets[left.length] - left_offsets[0];
ARROW_ASSIGN_OR_RAISE(out_data->buffers[2], ctx->Allocate(left_data_length));
std::memcpy(out_data->buffers[2]->mutable_data(), left_data, left_data_length);
std::memcpy(out_data->buffers[2]->mutable_data(), left_data + left_offsets[0],
left_data_length);
return Status::OK();
}

Expand Down
53 changes: 53 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "arrow/compute/kernels/test_util_internal.h"
#include "arrow/compute/registry.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/util/bitmap_builders.h"
#include "arrow/util/checked_cast.h"

namespace arrow {
Expand Down Expand Up @@ -609,6 +610,58 @@ TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinaryRand) {
CheckIfElseOutput(cond, left, right, expected_data);
}

TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinarySliced) {
auto type = TypeTraits<TypeParam>::type_singleton();

auto full_arr = ArrayFromJSON(type, R"(["not used", null, "x", "x"])");
auto sliced = full_arr->Slice(1);
auto expected = ArrayFromJSON(type, R"([null, "x", "x"])");

auto cond_asa = ArrayFromJSON(boolean(), "[true, false, false]");
ASSERT_OK_AND_ASSIGN(auto result_asa,
CallFunction("if_else", {cond_asa, MakeNullScalar(type), sliced}));
ASSERT_OK(result_asa.make_array()->ValidateFull());
AssertArraysEqual(*expected, *result_asa.make_array(), true);

auto cond_aas = ArrayFromJSON(boolean(), "[false, true, true]");
ASSERT_OK_AND_ASSIGN(auto result_aas,
CallFunction("if_else", {cond_aas, sliced, MakeNullScalar(type)}));
ASSERT_OK(result_aas.make_array()->ValidateFull());
AssertArraysEqual(*expected, *result_aas.make_array(), true);
}

// array offset=0 but offsets[0] != 0
TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinaryNonZeroFirst) {
auto type = TypeTraits<TypeParam>::type_singleton();
using OffsetType = typename TypeTraits<TypeParam>::OffsetType::c_type;

std::vector<OffsetType> raw_offsets = {8, 8, 9, 10};
std::string raw_data(8, 'p');
raw_data += "ab";
auto offsets_buf = Buffer::Wrap(raw_offsets.data(), raw_offsets.size());
auto data_buf = Buffer::Wrap(raw_data.data(), raw_data.size());
auto array_data = ArrayData::Make(type, /*length=*/3, {nullptr, offsets_buf, data_buf},
/*null_count=*/1, /*offset=*/0);
std::vector<uint8_t> validity_bytes = {0, 1, 1};
ASSERT_OK_AND_ASSIGN(array_data->buffers[0],
internal::BytesToBits(validity_bytes, default_memory_pool()));
auto arr = MakeArray(array_data);
ASSERT_OK(arr->ValidateFull());
auto expected = ArrayFromJSON(type, R"([null, "a", "b"])");

auto cond_asa = ArrayFromJSON(boolean(), "[true, false, false]");
ASSERT_OK_AND_ASSIGN(auto result_asa,
CallFunction("if_else", {cond_asa, MakeNullScalar(type), arr}));
ASSERT_OK(result_asa.make_array()->ValidateFull());
AssertArraysEqual(*expected, *result_asa.make_array(), true);

auto cond_aas = ArrayFromJSON(boolean(), "[false, true, true]");
ASSERT_OK_AND_ASSIGN(auto result_aas,
CallFunction("if_else", {cond_aas, arr, MakeNullScalar(type)}));
ASSERT_OK(result_aas.make_array()->ValidateFull());
AssertArraysEqual(*expected, *result_aas.make_array(), true);
}

Result<std::shared_ptr<Array>> MakeBinaryArrayWithData(
const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data_buffer) {
// Make a (large-)binary array with a single item backed by the given data
Expand Down
Loading