diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index ed32ec203c1..0c5cfe1c903 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -759,11 +759,22 @@ struct IfElseFunctor> { auto* out_data = out->array_data().get(); auto offset_length = (cond.length + 1) * sizeof(OffsetType); ARROW_ASSIGN_OR_RAISE(out_data->buffers[1], ctx->Allocate(offset_length)); - std::memcpy(out_data->buffers[1]->mutable_data(), right_offsets, offset_length); + + if (right_offsets[0] == 0) { + std::memcpy(out_data->buffers[1]->mutable_data(), right_offsets, offset_length); + } else { + OffsetType base = right_offsets[0]; + auto* out_offsets = + reinterpret_cast(out_data->buffers[1]->mutable_data()); + for (int64_t i = 0; i <= cond.length; ++i) { + out_offsets[i] = right_offsets[i] - base; + } + } auto right_data_length = right_offsets[right.length] - right_offsets[0]; ARROW_ASSIGN_OR_RAISE(out_data->buffers[2], ctx->Allocate(right_data_length)); - std::memcpy(out_data->buffers[2]->mutable_data(), right_data, right_data_length); + std::memcpy(out_data->buffers[2]->mutable_data(), right_data + right_offsets[0], + right_data_length); return Status::OK(); } @@ -801,11 +812,22 @@ struct IfElseFunctor> { auto* out_data = out->array_data().get(); auto offset_length = (cond.length + 1) * sizeof(OffsetType); ARROW_ASSIGN_OR_RAISE(out_data->buffers[1], ctx->Allocate(offset_length)); - std::memcpy(out_data->buffers[1]->mutable_data(), left_offsets, offset_length); + + if (left_offsets[0] == 0) { + std::memcpy(out_data->buffers[1]->mutable_data(), left_offsets, offset_length); + } else { + OffsetType base = left_offsets[0]; + auto* out_offsets = + reinterpret_cast(out_data->buffers[1]->mutable_data()); + for (int64_t i = 0; i <= cond.length; ++i) { + out_offsets[i] = left_offsets[i] - base; + } + } auto left_data_length = left_offsets[left.length] - left_offsets[0]; ARROW_ASSIGN_OR_RAISE(out_data->buffers[2], ctx->Allocate(left_data_length)); - std::memcpy(out_data->buffers[2]->mutable_data(), left_data, left_data_length); + std::memcpy(out_data->buffers[2]->mutable_data(), left_data + left_offsets[0], + left_data_length); return Status::OK(); } diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc index e5cf73742b6..6fdcff8d970 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc @@ -27,6 +27,7 @@ #include "arrow/compute/kernels/test_util_internal.h" #include "arrow/compute/registry.h" #include "arrow/testing/gtest_util.h" +#include "arrow/util/bitmap_builders.h" #include "arrow/util/checked_cast.h" namespace arrow { @@ -609,6 +610,58 @@ TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinaryRand) { CheckIfElseOutput(cond, left, right, expected_data); } +TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinarySliced) { + auto type = TypeTraits::type_singleton(); + + auto full_arr = ArrayFromJSON(type, R"(["not used", null, "x", "x"])"); + auto sliced = full_arr->Slice(1); + auto expected = ArrayFromJSON(type, R"([null, "x", "x"])"); + + auto cond_asa = ArrayFromJSON(boolean(), "[true, false, false]"); + ASSERT_OK_AND_ASSIGN(auto result_asa, + CallFunction("if_else", {cond_asa, MakeNullScalar(type), sliced})); + ASSERT_OK(result_asa.make_array()->ValidateFull()); + AssertArraysEqual(*expected, *result_asa.make_array(), true); + + auto cond_aas = ArrayFromJSON(boolean(), "[false, true, true]"); + ASSERT_OK_AND_ASSIGN(auto result_aas, + CallFunction("if_else", {cond_aas, sliced, MakeNullScalar(type)})); + ASSERT_OK(result_aas.make_array()->ValidateFull()); + AssertArraysEqual(*expected, *result_aas.make_array(), true); +} + +// array offset=0 but offsets[0] != 0 +TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinaryNonZeroFirst) { + auto type = TypeTraits::type_singleton(); + using OffsetType = typename TypeTraits::OffsetType::c_type; + + std::vector raw_offsets = {8, 8, 9, 10}; + std::string raw_data(8, 'p'); + raw_data += "ab"; + auto offsets_buf = Buffer::Wrap(raw_offsets.data(), raw_offsets.size()); + auto data_buf = Buffer::Wrap(raw_data.data(), raw_data.size()); + auto array_data = ArrayData::Make(type, /*length=*/3, {nullptr, offsets_buf, data_buf}, + /*null_count=*/1, /*offset=*/0); + std::vector validity_bytes = {0, 1, 1}; + ASSERT_OK_AND_ASSIGN(array_data->buffers[0], + internal::BytesToBits(validity_bytes, default_memory_pool())); + auto arr = MakeArray(array_data); + ASSERT_OK(arr->ValidateFull()); + auto expected = ArrayFromJSON(type, R"([null, "a", "b"])"); + + auto cond_asa = ArrayFromJSON(boolean(), "[true, false, false]"); + ASSERT_OK_AND_ASSIGN(auto result_asa, + CallFunction("if_else", {cond_asa, MakeNullScalar(type), arr})); + ASSERT_OK(result_asa.make_array()->ValidateFull()); + AssertArraysEqual(*expected, *result_asa.make_array(), true); + + auto cond_aas = ArrayFromJSON(boolean(), "[false, true, true]"); + ASSERT_OK_AND_ASSIGN(auto result_aas, + CallFunction("if_else", {cond_aas, arr, MakeNullScalar(type)})); + ASSERT_OK(result_aas.make_array()->ValidateFull()); + AssertArraysEqual(*expected, *result_aas.make_array(), true); +} + Result> MakeBinaryArrayWithData( const std::shared_ptr& type, const std::shared_ptr& data_buffer) { // Make a (large-)binary array with a single item backed by the given data