Skip to content

Commit 69807c0

Browse files
GH-48206 Fix Statistics logic to enable Parquet DB support on s390x
1 parent 2fb2f79 commit 69807c0

File tree

2 files changed

+74
-1
lines changed

2 files changed

+74
-1
lines changed

cpp/src/parquet/statistics.cc

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "arrow/type_traits.h"
3131
#include "arrow/util/bit_run_reader.h"
3232
#include "arrow/util/checked_cast.h"
33+
#include "arrow/util/endian.h"
3334
#include "arrow/util/float16.h"
3435
#include "arrow/util/logging_internal.h"
3536
#include "arrow/util/ubsan.h"
@@ -925,22 +926,94 @@ void TypedStatisticsImpl<DType>::UpdateSpaced(const T* values, const uint8_t* va
925926

926927
template <typename DType>
927928
void TypedStatisticsImpl<DType>::PlainEncode(const T& src, std::string* dst) const {
929+
#if ARROW_LITTLE_ENDIAN
928930
auto encoder = MakeTypedEncoder<DType>(Encoding::PLAIN, false, descr_, pool_);
929931
encoder->Put(&src, 1);
930932
auto buffer = encoder->FlushValues();
931933
auto ptr = reinterpret_cast<const char*>(buffer->data());
932934
dst->assign(ptr, static_cast<size_t>(buffer->size()));
935+
#else
936+
// For fixed-width numeric types, write explicit little-endian bytes per spec
937+
if constexpr (std::is_same_v<DType, Int32Type>) {
938+
uint32_t u;
939+
std::memcpy(&u, &src, sizeof(u));
940+
u = ::arrow::bit_util::ToLittleEndian(u);
941+
dst->assign(reinterpret_cast<const char*>(&u), sizeof(u));
942+
return;
943+
} else if constexpr (std::is_same_v<DType, Int64Type>) {
944+
uint64_t u;
945+
std::memcpy(&u, &src, sizeof(u));
946+
u = ::arrow::bit_util::ToLittleEndian(u);
947+
dst->assign(reinterpret_cast<const char*>(&u), sizeof(u));
948+
return;
949+
} else if constexpr (std::is_same_v<DType, FloatType>) {
950+
uint32_t u;
951+
static_assert(sizeof(u) == sizeof(float), "size");
952+
std::memcpy(&u, &src, sizeof(u));
953+
u = ::arrow::bit_util::ToLittleEndian(u);
954+
dst->assign(reinterpret_cast<const char*>(&u), sizeof(u));
955+
return;
956+
} else if constexpr (std::is_same_v<DType, DoubleType>) {
957+
uint64_t u;
958+
static_assert(sizeof(u) == sizeof(double), "size");
959+
std::memcpy(&u, &src, sizeof(u));
960+
u = ::arrow::bit_util::ToLittleEndian(u);
961+
dst->assign(reinterpret_cast<const char*>(&u), sizeof(u));
962+
return;
963+
}
964+
// Fallback: use encoder for other types
965+
auto encoder = MakeTypedEncoder<DType>(Encoding::PLAIN, false, descr_, pool_);
966+
encoder->Put(&src, 1);
967+
auto buffer = encoder->FlushValues();
968+
dst->assign(reinterpret_cast<const char*>(buffer->data()),
969+
static_cast<size_t>(buffer->size()));
970+
#endif
933971
}
934972

935973
template <typename DType>
936974
void TypedStatisticsImpl<DType>::PlainDecode(const std::string& src, T* dst) const {
975+
#if ARROW_LITTLE_ENDIAN
976+
auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
977+
decoder->SetData(1, reinterpret_cast<const uint8_t*>(src.c_str()),
978+
static_cast<int>(src.size()));
979+
int decoded_values = decoder->Decode(dst, 1);
980+
if (decoded_values != 1) {
981+
throw ParquetException("Failed to decode statistic value from plain encoded string");
982+
}
983+
#else
984+
if constexpr (std::is_same_v<DType, Int32Type>) {
985+
uint32_t u = 0;
986+
std::memcpy(&u, src.data(), std::min(src.size(), sizeof(u)));
987+
u = ::arrow::bit_util::FromLittleEndian(u);
988+
std::memcpy(dst, &u, sizeof(u));
989+
return;
990+
} else if constexpr (std::is_same_v<DType, Int64Type>) {
991+
uint64_t u = 0;
992+
std::memcpy(&u, src.data(), std::min(src.size(), sizeof(u)));
993+
u = ::arrow::bit_util::FromLittleEndian(u);
994+
std::memcpy(dst, &u, sizeof(u));
995+
return;
996+
} else if constexpr (std::is_same_v<DType, FloatType>) {
997+
uint32_t u = 0;
998+
std::memcpy(&u, src.data(), std::min(src.size(), sizeof(u)));
999+
u = ::arrow::bit_util::FromLittleEndian(u);
1000+
std::memcpy(dst, &u, sizeof(u));
1001+
return;
1002+
} else if constexpr (std::is_same_v<DType, DoubleType>) {
1003+
uint64_t u = 0;
1004+
std::memcpy(&u, src.data(), std::min(src.size(), sizeof(u)));
1005+
u = ::arrow::bit_util::FromLittleEndian(u);
1006+
std::memcpy(dst, &u, sizeof(u));
1007+
return;
1008+
}
9371009
auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
9381010
decoder->SetData(1, reinterpret_cast<const uint8_t*>(src.c_str()),
9391011
static_cast<int>(src.size()));
9401012
int decoded_values = decoder->Decode(dst, 1);
9411013
if (decoded_values != 1) {
9421014
throw ParquetException("Failed to decode statistic value from plain encoded string");
9431015
}
1016+
#endif
9441017
}
9451018

9461019
template <>

0 commit comments

Comments
 (0)