-
Notifications
You must be signed in to change notification settings - Fork 3.9k
Description
Describe the bug, including details regarding any error messages, version, and platform.
The following method causes illegal memory access when creating from a combination of negative zero and non-zero values, and produces incorrect tensor values (potentially leading to illegal memory access) when creating from a column-major tensor.
arrow/cpp/src/arrow/sparse_tensor.h
Lines 583 to 594 in fddd356
static inline Result<std::shared_ptr<SparseTensorImpl<SparseIndexType>>> Make( | |
const Tensor& tensor, const std::shared_ptr<DataType>& index_value_type, | |
MemoryPool* pool = default_memory_pool()) { | |
std::shared_ptr<SparseIndex> sparse_index; | |
std::shared_ptr<Buffer> data; | |
ARROW_RETURN_NOT_OK(internal::MakeSparseTensorFromTensor( | |
tensor, SparseIndexType::format_id, index_value_type, pool, &sparse_index, | |
&data)); | |
return std::make_shared<SparseTensorImpl<SparseIndexType>>( | |
internal::checked_pointer_cast<SparseIndexType>(sparse_index), tensor.type(), | |
data, tensor.shape(), tensor.dim_names_); | |
} |
1- The following code leads to illegal memory access.
TEST(MyTest, SegFault) {
// clang-format off
std::vector<float> data{
-0.0, -0.0, -0.0,
-0.0, -0.0, -0.0,
-0.0, -0.0, -0.0,
-1.0, -0.0, -0.0,
};
// clang-format on
std::vector<int64_t> shape = {4, 3};
auto buffer = Buffer::FromVector(data);
ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(float32(), buffer, shape));
ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor,
SparseCOOTensor::Make(*dense_tensor, int64()));
ARROW_LOGGER_INFO("", sparse_coo_tensor->sparse_index()->non_zero_length());
}
and the error is:
: 1
mimalloc: error: buffer overflow in heap block 0x0200000100C0 of size 152: write after 152 bytes
Process finished with exit code 134 (interrupted by signal 6:SIGABRT)
The reason for this is rooted in the following code.
arrow/cpp/src/arrow/tensor/coo_converter.cc
Line 179 in fddd356
ARROW_ASSIGN_OR_RAISE(int64_t nonzero_count, tensor_.CountNonZero()); |
In the above code, both positive and negative zero are considered zero; however, in the following code, negative zero is treated as a non-zero value.
arrow/cpp/src/arrow/tensor/coo_converter.cc
Line 193 in fddd356
if (std::any_of(tensor_data, tensor_data + value_elsize, IsNonZero)) { |
if (ARROW_PREDICT_FALSE(x != zero)) { |
Note that in the above code, the type of
c_value_type
is always an unsigned integer due to the following code.arrow/cpp/src/arrow/tensor/converter_internal.h
Lines 22 to 88 in fddd356
#define DISPATCH(ACTION, index_elsize, value_elsize, ...) \ | |
switch (index_elsize) { \ | |
case 1: \ | |
switch (value_elsize) { \ | |
case 1: \ | |
ACTION(uint8_t, uint8_t, __VA_ARGS__); \ | |
break; \ | |
case 2: \ | |
ACTION(uint8_t, uint16_t, __VA_ARGS__); \ | |
break; \ | |
case 4: \ | |
ACTION(uint8_t, uint32_t, __VA_ARGS__); \ | |
break; \ | |
case 8: \ | |
ACTION(uint8_t, uint64_t, __VA_ARGS__); \ | |
break; \ | |
} \ | |
break; \ | |
case 2: \ | |
switch (value_elsize) { \ | |
case 1: \ | |
ACTION(uint16_t, uint8_t, __VA_ARGS__); \ | |
break; \ | |
case 2: \ | |
ACTION(uint16_t, uint16_t, __VA_ARGS__); \ | |
break; \ | |
case 4: \ | |
ACTION(uint16_t, uint32_t, __VA_ARGS__); \ | |
break; \ | |
case 8: \ | |
ACTION(uint16_t, uint64_t, __VA_ARGS__); \ | |
break; \ | |
} \ | |
break; \ | |
case 4: \ | |
switch (value_elsize) { \ | |
case 1: \ | |
ACTION(uint32_t, uint8_t, __VA_ARGS__); \ | |
break; \ | |
case 2: \ | |
ACTION(uint32_t, uint16_t, __VA_ARGS__); \ | |
break; \ | |
case 4: \ | |
ACTION(uint32_t, uint32_t, __VA_ARGS__); \ | |
break; \ | |
case 8: \ | |
ACTION(uint32_t, uint64_t, __VA_ARGS__); \ | |
break; \ | |
} \ | |
break; \ | |
case 8: \ | |
switch (value_elsize) { \ | |
case 1: \ | |
ACTION(int64_t, uint8_t, __VA_ARGS__); \ | |
break; \ | |
case 2: \ | |
ACTION(int64_t, uint16_t, __VA_ARGS__); \ | |
break; \ | |
case 4: \ | |
ACTION(int64_t, uint32_t, __VA_ARGS__); \ | |
break; \ | |
case 8: \ | |
ACTION(int64_t, uint64_t, __VA_ARGS__); \ | |
break; \ | |
} \ | |
break; \ | |
} |
2- Incorrect tensor values when creating a SparseCOOTensor from a ColumnMajorTensor via SparseCOOTensor::Make
TEST(My, ColumnMajor) {
// clang-format off
std::vector<int> data{
1, 4, 7, 10,
2, 5, 8, 11,
3, 6, 9, 12
};
// clang-format on
std::vector<int> data_2{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
auto buffer = Buffer::FromVector(data);
auto buffer_2 = Buffer::FromVector(data_2);
std::vector<int64_t> shape = {4, 3};
std::vector<int64_t> strides = {sizeof(int), 4 * sizeof(int)};
ASSERT_OK_AND_ASSIGN(auto tensor, Tensor::Make(int32(), buffer, shape, strides));
ASSERT_OK_AND_ASSIGN(auto tensor_2, Tensor::Make(int32(), buffer_2, shape));
ASSERT_TRUE(tensor->Equals(*tensor_2));
ASSERT_TRUE(tensor->is_contiguous());
ASSERT_TRUE(tensor->is_column_major());
ASSERT_OK_AND_ASSIGN(auto sparse_tensor, SparseCOOTensor::Make(*tensor))
ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_tensor->ToTensor());
ASSERT_EQ(12, sparse_tensor->non_zero_length());
ASSERT_TRUE(new_tensor->is_contiguous());
ASSERT_TRUE(new_tensor->is_row_major());
// new_tensor is not equal to tensor!!
ASSERT_FALSE(new_tensor->Equals(*tensor));
}
ASSERT_FALSE(new_tensor->Equals(*tensor));
passes because of the following code.
arrow/cpp/src/arrow/tensor/coo_converter.cc
Lines 86 to 91 in fddd356
// transpose indices | |
for (int64_t i = 0; i < size; ++i) { | |
for (int j = 0; j < ndim / 2; ++j) { | |
std::swap(indices[i * ndim + j], indices[i * ndim + ndim - j - 1]); | |
} | |
} |
Given that logic, the index
{2,3}
is produced for a column-major contiguous tensor with shape {4,3}
, which is not a correct index.
Component(s)
C++