|
30 | 30 | #include "arrow/type_traits.h" |
31 | 31 | #include "arrow/util/bit_run_reader.h" |
32 | 32 | #include "arrow/util/checked_cast.h" |
| 33 | +#include "arrow/util/endian.h" |
33 | 34 | #include "arrow/util/float16.h" |
34 | 35 | #include "arrow/util/logging_internal.h" |
35 | 36 | #include "arrow/util/ubsan.h" |
@@ -925,22 +926,94 @@ void TypedStatisticsImpl<DType>::UpdateSpaced(const T* values, const uint8_t* va |
925 | 926 |
|
926 | 927 | template <typename DType> |
927 | 928 | void TypedStatisticsImpl<DType>::PlainEncode(const T& src, std::string* dst) const { |
| 929 | +#if ARROW_LITTLE_ENDIAN |
928 | 930 | auto encoder = MakeTypedEncoder<DType>(Encoding::PLAIN, false, descr_, pool_); |
929 | 931 | encoder->Put(&src, 1); |
930 | 932 | auto buffer = encoder->FlushValues(); |
931 | 933 | auto ptr = reinterpret_cast<const char*>(buffer->data()); |
932 | 934 | dst->assign(ptr, static_cast<size_t>(buffer->size())); |
| 935 | +#else |
| 936 | + // For fixed-width numeric types, write explicit little-endian bytes per spec |
| 937 | + if constexpr (std::is_same_v<DType, Int32Type>) { |
| 938 | + uint32_t u; |
| 939 | + std::memcpy(&u, &src, sizeof(u)); |
| 940 | + u = ::arrow::bit_util::ToLittleEndian(u); |
| 941 | + dst->assign(reinterpret_cast<const char*>(&u), sizeof(u)); |
| 942 | + return; |
| 943 | + } else if constexpr (std::is_same_v<DType, Int64Type>) { |
| 944 | + uint64_t u; |
| 945 | + std::memcpy(&u, &src, sizeof(u)); |
| 946 | + u = ::arrow::bit_util::ToLittleEndian(u); |
| 947 | + dst->assign(reinterpret_cast<const char*>(&u), sizeof(u)); |
| 948 | + return; |
| 949 | + } else if constexpr (std::is_same_v<DType, FloatType>) { |
| 950 | + uint32_t u; |
| 951 | + static_assert(sizeof(u) == sizeof(float), "size"); |
| 952 | + std::memcpy(&u, &src, sizeof(u)); |
| 953 | + u = ::arrow::bit_util::ToLittleEndian(u); |
| 954 | + dst->assign(reinterpret_cast<const char*>(&u), sizeof(u)); |
| 955 | + return; |
| 956 | + } else if constexpr (std::is_same_v<DType, DoubleType>) { |
| 957 | + uint64_t u; |
| 958 | + static_assert(sizeof(u) == sizeof(double), "size"); |
| 959 | + std::memcpy(&u, &src, sizeof(u)); |
| 960 | + u = ::arrow::bit_util::ToLittleEndian(u); |
| 961 | + dst->assign(reinterpret_cast<const char*>(&u), sizeof(u)); |
| 962 | + return; |
| 963 | + } |
| 964 | + // Fallback: use encoder for other types |
| 965 | + auto encoder = MakeTypedEncoder<DType>(Encoding::PLAIN, false, descr_, pool_); |
| 966 | + encoder->Put(&src, 1); |
| 967 | + auto buffer = encoder->FlushValues(); |
| 968 | + dst->assign(reinterpret_cast<const char*>(buffer->data()), |
| 969 | + static_cast<size_t>(buffer->size())); |
| 970 | +#endif |
933 | 971 | } |
934 | 972 |
|
935 | 973 | template <typename DType> |
936 | 974 | void TypedStatisticsImpl<DType>::PlainDecode(const std::string& src, T* dst) const { |
| 975 | +#if ARROW_LITTLE_ENDIAN |
| 976 | + auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_); |
| 977 | + decoder->SetData(1, reinterpret_cast<const uint8_t*>(src.c_str()), |
| 978 | + static_cast<int>(src.size())); |
| 979 | + int decoded_values = decoder->Decode(dst, 1); |
| 980 | + if (decoded_values != 1) { |
| 981 | + throw ParquetException("Failed to decode statistic value from plain encoded string"); |
| 982 | + } |
| 983 | +#else |
| 984 | + if constexpr (std::is_same_v<DType, Int32Type>) { |
| 985 | + uint32_t u = 0; |
| 986 | + std::memcpy(&u, src.data(), std::min(src.size(), sizeof(u))); |
| 987 | + u = ::arrow::bit_util::FromLittleEndian(u); |
| 988 | + std::memcpy(dst, &u, sizeof(u)); |
| 989 | + return; |
| 990 | + } else if constexpr (std::is_same_v<DType, Int64Type>) { |
| 991 | + uint64_t u = 0; |
| 992 | + std::memcpy(&u, src.data(), std::min(src.size(), sizeof(u))); |
| 993 | + u = ::arrow::bit_util::FromLittleEndian(u); |
| 994 | + std::memcpy(dst, &u, sizeof(u)); |
| 995 | + return; |
| 996 | + } else if constexpr (std::is_same_v<DType, FloatType>) { |
| 997 | + uint32_t u = 0; |
| 998 | + std::memcpy(&u, src.data(), std::min(src.size(), sizeof(u))); |
| 999 | + u = ::arrow::bit_util::FromLittleEndian(u); |
| 1000 | + std::memcpy(dst, &u, sizeof(u)); |
| 1001 | + return; |
| 1002 | + } else if constexpr (std::is_same_v<DType, DoubleType>) { |
| 1003 | + uint64_t u = 0; |
| 1004 | + std::memcpy(&u, src.data(), std::min(src.size(), sizeof(u))); |
| 1005 | + u = ::arrow::bit_util::FromLittleEndian(u); |
| 1006 | + std::memcpy(dst, &u, sizeof(u)); |
| 1007 | + return; |
| 1008 | + } |
937 | 1009 | auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_); |
938 | 1010 | decoder->SetData(1, reinterpret_cast<const uint8_t*>(src.c_str()), |
939 | 1011 | static_cast<int>(src.size())); |
940 | 1012 | int decoded_values = decoder->Decode(dst, 1); |
941 | 1013 | if (decoded_values != 1) { |
942 | 1014 | throw ParquetException("Failed to decode statistic value from plain encoded string"); |
943 | 1015 | } |
| 1016 | +#endif |
944 | 1017 | } |
945 | 1018 |
|
946 | 1019 | template <> |
|
0 commit comments