From 00597ddd68bf58206e0e00359ab7776d3f517134 Mon Sep 17 00:00:00 2001 From: zclllyybb Date: Wed, 15 Apr 2026 20:07:22 +0800 Subject: [PATCH] [refactor](be) Remove io_helper helper indirection from BE cast paths (#62179) Problem Summary: Remove the legacy io_helper wrapper layer and move its parsing and formatting entry points onto the existing cast implementations so serde and conversion code depends on the real owners of that behavior. --- be/src/core/data_type/data_type_bitmap.cpp | 1 - be/src/core/data_type/data_type_date.cpp | 1 - .../data_type_date_or_datetime_v2.cpp | 1 - be/src/core/data_type/data_type_date_time.cpp | 1 - be/src/core/data_type/data_type_decimal.cpp | 1 - be/src/core/data_type/data_type_hll.cpp | 1 - be/src/core/data_type/data_type_ipv4.cpp | 1 - be/src/core/data_type/data_type_ipv6.cpp | 1 - .../core/data_type/data_type_number_base.cpp | 7 +- .../data_type/data_type_quantilestate.cpp | 3 +- .../data_type_date_or_datetime_serde.cpp | 15 +- .../data_type_datetimev2_serde.cpp | 5 +- .../data_type_datev2_serde.cpp | 5 +- .../data_type_decimal_serde.cpp | 18 +- .../data_type_serde/data_type_ipv4_serde.cpp | 4 +- .../data_type_serde/data_type_ipv6_serde.cpp | 4 +- .../data_type_number_serde.cpp | 23 +- be/src/core/data_type_serde/data_type_serde.h | 2 +- be/src/core/field.cpp | 1 - be/src/exec/common/hash_table/hash_table.h | 1 - be/src/exec/common/histogram_helpers.hpp | 1 - be/src/exec/common/space_saving.h | 1 - .../aggregate/aggregate_function_array_agg.h | 1 - .../aggregate_function_avg_weighted.h | 1 - .../exprs/aggregate/aggregate_function_bit.h | 1 - .../aggregate/aggregate_function_collect.h | 1 - .../exprs/aggregate/aggregate_function_corr.h | 6 +- .../aggregate/aggregate_function_histogram.h | 1 - .../aggregate/aggregate_function_java_udaf.h | 1 - .../aggregate/aggregate_function_min_max.h | 2 +- .../aggregate/aggregate_function_product.h | 14 +- .../exprs/aggregate/aggregate_function_rpc.h | 1 - .../aggregate_function_sequence_match.h | 39 +- .../exprs/aggregate/aggregate_function_topn.h | 3 +- .../exprs/aggregate/aggregate_function_uniq.h | 3 +- be/src/exprs/aggregate/moments.h | 1 - be/src/exprs/function/cast/cast_base.h | 1 - .../cast/cast_to_basic_number_common.h | 63 ++- be/src/exprs/function/cast/cast_to_boolean.h | 8 +- be/src/exprs/function/cast/cast_to_decimal.h | 414 ++++++++++-------- be/src/exprs/function/cast/cast_to_float.h | 7 +- be/src/exprs/function/cast/cast_to_int.h | 16 +- be/src/exprs/function/cast/cast_to_jsonb.h | 3 +- be/src/exprs/function/cast/cast_to_string.h | 24 +- .../exprs/function/cast/cast_to_timestamptz.h | 3 +- be/src/exprs/function/function_json.cpp | 1 - be/src/format/column_type_convert.h | 133 +++--- be/src/runtime/runtime_state.cpp | 8 +- be/src/storage/olap_scan_common.h | 1 - be/src/util/counts.h | 1 - be/src/util/io_helper.cpp | 89 ---- be/src/util/io_helper.h | 237 ---------- be/src/util/jsonb_document_cast.h | 16 +- be/test/common/check_overflow.cpp | 5 +- .../data_type_serde_decimal_test.cpp | 28 ++ .../data_type_serde_number_test.cpp | 36 ++ be/test/core/field_test.cpp | 2 +- be/test/exec/column_type_convert_test.cpp | 124 +++++- .../aggregate/vec_sequence_match_test.cpp | 104 ++++- .../function/cast/cast_to_float_double.cpp | 21 +- .../function/cast/cast_to_string_api_test.cpp | 14 +- 61 files changed, 812 insertions(+), 720 deletions(-) delete mode 100644 be/src/util/io_helper.cpp delete mode 100644 be/src/util/io_helper.h diff --git a/be/src/core/data_type/data_type_bitmap.cpp b/be/src/core/data_type/data_type_bitmap.cpp index 3831e22dda214c..06806b7bde6d41 100644 --- a/be/src/core/data_type/data_type_bitmap.cpp +++ b/be/src/core/data_type/data_type_bitmap.cpp @@ -27,7 +27,6 @@ #include "core/string_buffer.hpp" #include "core/string_ref.h" #include "core/value/bitmap_value.h" -#include "util/io_helper.h" namespace doris { #include "common/compile_check_begin.h" diff --git a/be/src/core/data_type/data_type_date.cpp b/be/src/core/data_type/data_type_date.cpp index 614f60d8097186..6229fcbbf2ec5d 100644 --- a/be/src/core/data_type/data_type_date.cpp +++ b/be/src/core/data_type/data_type_date.cpp @@ -33,7 +33,6 @@ #include "core/types.h" #include "core/value/vdatetime_value.h" #include "exprs/function/cast/cast_to_date_or_datetime_impl.hpp" -#include "util/io_helper.h" namespace doris { bool DataTypeDate::equals(const IDataType& rhs) const { diff --git a/be/src/core/data_type/data_type_date_or_datetime_v2.cpp b/be/src/core/data_type/data_type_date_or_datetime_v2.cpp index 23106da687e512..00395459f782f1 100644 --- a/be/src/core/data_type/data_type_date_or_datetime_v2.cpp +++ b/be/src/core/data_type/data_type_date_or_datetime_v2.cpp @@ -35,7 +35,6 @@ #include "exprs/function/cast/cast_to_datetimev2_impl.hpp" #include "exprs/function/cast/cast_to_datev2_impl.hpp" #include "exprs/function/cast/cast_to_string.h" -#include "util/io_helper.h" namespace doris { #include "common/compile_check_begin.h" diff --git a/be/src/core/data_type/data_type_date_time.cpp b/be/src/core/data_type/data_type_date_time.cpp index 21baad0181dd47..3813871f764bc8 100644 --- a/be/src/core/data_type/data_type_date_time.cpp +++ b/be/src/core/data_type/data_type_date_time.cpp @@ -34,7 +34,6 @@ #include "core/value/vdatetime_value.h" #include "exprs/function/cast/cast_to_date_or_datetime_impl.hpp" #include "exprs/function/cast/cast_to_string.h" -#include "util/io_helper.h" namespace doris { diff --git a/be/src/core/data_type/data_type_decimal.cpp b/be/src/core/data_type/data_type_decimal.cpp index c30fd0a7b10bab..cfc06c83ceecc8 100644 --- a/be/src/core/data_type/data_type_decimal.cpp +++ b/be/src/core/data_type/data_type_decimal.cpp @@ -46,7 +46,6 @@ #include "core/value/decimalv2_value.h" #include "exec/common/int_exp.h" #include "exprs/function/cast/cast_to_string.h" -#include "util/io_helper.h" #include "util/string_parser.hpp" namespace doris { diff --git a/be/src/core/data_type/data_type_hll.cpp b/be/src/core/data_type/data_type_hll.cpp index 8e18b765402477..9e877d7407ee03 100644 --- a/be/src/core/data_type/data_type_hll.cpp +++ b/be/src/core/data_type/data_type_hll.cpp @@ -27,7 +27,6 @@ #include "core/column/column_complex.h" #include "core/column/column_const.h" #include "core/string_buffer.hpp" -#include "util/io_helper.h" #include "util/slice.h" namespace doris { diff --git a/be/src/core/data_type/data_type_ipv4.cpp b/be/src/core/data_type/data_type_ipv4.cpp index e3ed7a91e56603..26696cc679382d 100644 --- a/be/src/core/data_type/data_type_ipv4.cpp +++ b/be/src/core/data_type/data_type_ipv4.cpp @@ -25,7 +25,6 @@ #include "core/data_type/data_type.h" #include "core/string_buffer.hpp" #include "exprs/function/cast/cast_to_string.h" -#include "util/io_helper.h" namespace doris { diff --git a/be/src/core/data_type/data_type_ipv6.cpp b/be/src/core/data_type/data_type_ipv6.cpp index a29d787879816a..cc08a9903a6bb3 100644 --- a/be/src/core/data_type/data_type_ipv6.cpp +++ b/be/src/core/data_type/data_type_ipv6.cpp @@ -26,7 +26,6 @@ #include "core/string_buffer.hpp" #include "core/value/ipv6_value.h" #include "exprs/function/cast/cast_to_string.h" -#include "util/io_helper.h" namespace doris { bool DataTypeIPv6::equals(const IDataType& rhs) const { diff --git a/be/src/core/data_type/data_type_number_base.cpp b/be/src/core/data_type/data_type_number_base.cpp index cff73964a165c0..f2149bf29eb254 100644 --- a/be/src/core/data_type/data_type_number_base.cpp +++ b/be/src/core/data_type/data_type_number_base.cpp @@ -41,7 +41,6 @@ #include "core/types.h" #include "core/value/large_int_value.h" #include "exprs/function/cast/cast_to_string.h" -#include "util/io_helper.h" #include "util/mysql_global.h" #include "util/string_parser.hpp" #include "util/to_string.h" @@ -55,7 +54,11 @@ std::string DataTypeNumberBase::to_string( if constexpr (std::is_same::CppType, int128_t>::value || std::is_same::CppType, uint128_t>::value || std::is_same::CppType, UInt128>::value) { - return int128_to_string(value); + if constexpr (std::is_same::CppType, int128_t>::value) { + return CastToString::from_int128(value); + } else { + return CastToString::from_uint128(value); + } } else if constexpr (std::is_integral::CppType>::value) { return std::to_string(value); } else if constexpr (T == TYPE_DATETIME || T == TYPE_DATE) { diff --git a/be/src/core/data_type/data_type_quantilestate.cpp b/be/src/core/data_type/data_type_quantilestate.cpp index 5926fb5f942ddb..a3f98435cb9349 100644 --- a/be/src/core/data_type/data_type_quantilestate.cpp +++ b/be/src/core/data_type/data_type_quantilestate.cpp @@ -23,7 +23,6 @@ #include "core/column/column_complex.h" #include "core/string_buffer.hpp" #include "core/string_ref.h" -#include "util/io_helper.h" #include "util/slice.h" namespace doris { @@ -176,4 +175,4 @@ void DataTypeQuantileState::deserialize_as_stream(QuantileState& value, BufferRe value.deserialize(ref.to_slice()); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp b/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp index 8ad8ac581d7199..3ed1b7ecb43fa2 100644 --- a/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp +++ b/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp @@ -27,7 +27,6 @@ #include "core/value/vdatetime_value.h" #include "exprs/function/cast/cast_base.h" #include "exprs/function/cast/cast_to_date_or_datetime_impl.hpp" -#include "util/io_helper.h" namespace doris { #include "common/compile_check_begin.h" @@ -77,9 +76,13 @@ Status DataTypeDateSerDe::deserialize_one_cell_from_json( slice.trim_quote(); } VecDateTimeValue val; - if (StringRef str(slice.data, slice.size); !read_date_text_impl(val, str)) { + StringRef str(slice.data, slice.size); + CastParameters params; + if (!CastToDateOrDatetime::from_string_non_strict_mode( + str, val, nullptr, params)) { return Status::InvalidArgument("parse date fail, string: '{}'", str.to_string()); } + val.cast_to_date(); column_data.insert_value(val); return Status::OK(); } @@ -124,9 +127,13 @@ Status DataTypeDateTimeSerDe::deserialize_one_cell_from_json(IColumn& column, Sl slice.trim_quote(); } VecDateTimeValue val; - if (StringRef str(slice.data, slice.size); !read_datetime_text_impl(val, str)) { + StringRef str(slice.data, slice.size); + CastParameters params; + if (!CastToDateOrDatetime::from_string_non_strict_mode( + str, val, nullptr, params)) { return Status::InvalidArgument("parse datetime fail, string: '{}'", str.to_string()); } + val.to_datetime(); column_data.insert_value(val); return Status::OK(); } @@ -225,10 +232,10 @@ Status DataTypeDateSerDe::_read_column_from_arrow(IColumn& column, } else if (arrow_array->type()->id() == arrow::Type::STRING) { // to be compatible with old version, we use string type for date. const auto* concrete_array = dynamic_cast(arrow_array); + CastParameters params; for (auto value_i = start; value_i < end; ++value_i) { auto val_str = concrete_array->GetString(value_i); VecDateTimeValue v; - CastParameters params; CastToDateOrDatetime::from_string_non_strict_mode( {val_str.c_str(), val_str.length()}, v, &ctz, params); if constexpr (is_date) { diff --git a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp index cf239ebcc16aad..ff8c33045e4dd6 100644 --- a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp +++ b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp @@ -32,7 +32,6 @@ #include "core/value/vdatetime_value.h" #include "exprs/function/cast/cast_to_datetimev2_impl.hpp" #include "exprs/function/cast/cast_to_string.h" -#include "util/io_helper.h" enum { DIVISOR_FOR_SECOND = 1, @@ -346,7 +345,9 @@ Status DataTypeDateTimeV2SerDe::deserialize_one_cell_from_json(IColumn& column, slice.trim_quote(); } DateV2Value val; - if (StringRef str(slice.data, slice.size); !read_datetime_v2_text_impl(val, str, _scale)) { + StringRef str(slice.data, slice.size); + CastParameters params; + if (!CastToDatetimeV2::from_string_non_strict_mode(str, val, nullptr, _scale, params)) { return Status::InvalidArgument("parse date fail, string: '{}'", str.to_string()); } column_data.insert_value(val); diff --git a/be/src/core/data_type_serde/data_type_datev2_serde.cpp b/be/src/core/data_type_serde/data_type_datev2_serde.cpp index 5413b9753fcc62..bd90bf91ab1949 100644 --- a/be/src/core/data_type_serde/data_type_datev2_serde.cpp +++ b/be/src/core/data_type_serde/data_type_datev2_serde.cpp @@ -31,7 +31,6 @@ #include "core/value/vdatetime_value.h" #include "exprs/function/cast/cast_to_datev2_impl.hpp" #include "exprs/function/cast/cast_to_string.h" -#include "util/io_helper.h" namespace doris { @@ -81,7 +80,9 @@ Status DataTypeDateV2SerDe::deserialize_one_cell_from_json(IColumn& column, Slic } auto& column_data = assert_cast(column); DateV2Value val; - if (StringRef str(slice.data, slice.size); !read_date_v2_text_impl(val, str)) { + StringRef str(slice.data, slice.size); + CastParameters params; + if (!CastToDateV2::from_string_non_strict_mode(str, val, nullptr, params)) { return Status::InvalidArgument("parse date fail, string: '{}'", str.to_string()); } column_data.insert_value(val); diff --git a/be/src/core/data_type_serde/data_type_decimal_serde.cpp b/be/src/core/data_type_serde/data_type_decimal_serde.cpp index 1e277bda86be88..aace06c0152f5e 100644 --- a/be/src/core/data_type_serde/data_type_decimal_serde.cpp +++ b/be/src/core/data_type_serde/data_type_decimal_serde.cpp @@ -25,6 +25,7 @@ #include #include "arrow/type.h" +#include "common/cast_set.h" #include "common/consts.h" #include "core/column/column.h" #include "core/column/column_decimal.h" @@ -36,10 +37,10 @@ #include "exprs/function/cast/cast_to_string.h" #include "orc/Int128.hh" #include "storage/tablet/tablet_schema.h" -#include "util/io_helper.h" #include "util/jsonb_document.h" #include "util/jsonb_document_cast.h" #include "util/jsonb_writer.h" +#include "util/string_parser.hpp" namespace doris { // #include "common/compile_check_begin.h" @@ -140,7 +141,7 @@ Status DataTypeDecimalSerDe::from_olap_string(const std::string& str, Field& // DecimalV2: zonemap stores "integer.fraction" with 9 zero-padded fractional digits. // E.g., DecimalV2 value 123.456 → to_olap_string() → "123.456000000". // Caller sets ignore_scale=false → parse with scale=9 → correctly restores the value. - // Note: read_decimal_text_impl() currently hardcodes DecimalV2Value::SCALE=9 for + // Note: CastToDecimal::from_string() currently hardcodes DecimalV2Value::SCALE=9 for // DecimalV2, so the passed-in scale is effectively ignored. But callers should still // set ignore_scale=false for semantic correctness. if (!CastToDecimal::from_string(StringRef(str), to, static_cast(precision), @@ -207,9 +208,16 @@ Status DataTypeDecimalSerDe::deserialize_one_cell_from_json(IColumn& column, auto& column_data = assert_cast&>(column).get_data(); FieldType val = {}; StringRef str_ref(slice.data, slice.size); - StringParser::ParseResult res = - read_decimal_text_impl(val, str_ref, precision, scale); - if (res == StringParser::PARSE_SUCCESS || res == StringParser::PARSE_UNDERFLOW) { + StringParser::ParseResult result = StringParser::PARSE_SUCCESS; + if constexpr (T == TYPE_DECIMALV2) { + val = DecimalV2Value(StringParser::string_to_decimal( + str_ref.data, cast_set(str_ref.size), DecimalV2Value::PRECISION, + DecimalV2Value::SCALE, &result)); + } else { + val.value = StringParser::string_to_decimal(str_ref.data, cast_set(str_ref.size), + precision, scale, &result); + } + if (result == StringParser::PARSE_SUCCESS || result == StringParser::PARSE_UNDERFLOW) { column_data.emplace_back(val); return Status::OK(); } diff --git a/be/src/core/data_type_serde/data_type_ipv4_serde.cpp b/be/src/core/data_type_serde/data_type_ipv4_serde.cpp index 4eb969c138f3bf..1b3978f929a8cb 100644 --- a/be/src/core/data_type_serde/data_type_ipv4_serde.cpp +++ b/be/src/core/data_type_serde/data_type_ipv4_serde.cpp @@ -23,7 +23,6 @@ #include "core/types.h" #include "exprs/function/cast/cast_to_ip.h" #include "exprs/function/cast/cast_to_string.h" -#include "util/io_helper.h" namespace doris { #include "common/compile_check_begin.h" @@ -68,7 +67,8 @@ Status DataTypeIPv4SerDe::deserialize_one_cell_from_json(IColumn& column, Slice& auto& column_data = reinterpret_cast(column); StringRef str(slice.data, slice.size); IPv4 val = 0; - if (!read_ipv4_text_impl(val, str)) { + CastParameters params; + if (!CastToIPv4::from_string(str, val, params)) { return Status::InvalidArgument("parse ipv4 fail, string: '{}'", str.to_string()); } column_data.insert_value(val); diff --git a/be/src/core/data_type_serde/data_type_ipv6_serde.cpp b/be/src/core/data_type_serde/data_type_ipv6_serde.cpp index ccd5a236807945..e860662e404003 100644 --- a/be/src/core/data_type_serde/data_type_ipv6_serde.cpp +++ b/be/src/core/data_type_serde/data_type_ipv6_serde.cpp @@ -26,7 +26,6 @@ #include "core/types.h" #include "exprs/function/cast/cast_to_ip.h" #include "exprs/function/cast/cast_to_string.h" -#include "util/io_helper.h" #include "util/jsonb_writer.h" namespace doris { @@ -91,7 +90,8 @@ Status DataTypeIPv6SerDe::deserialize_one_cell_from_json(IColumn& column, Slice& auto& column_data = reinterpret_cast(column); StringRef str(slice.data, slice.size); IPv6 val = 0; - if (!read_ipv6_text_impl(val, str)) { + CastParameters params; + if (!CastToIPv6::from_string(str, val, params)) { return Status::InvalidArgument("parse ipv6 fail, string: '{}'", str.to_string()); } column_data.insert_value(val); diff --git a/be/src/core/data_type_serde/data_type_number_serde.cpp b/be/src/core/data_type_serde/data_type_number_serde.cpp index 30b7ad479110e0..2835e9002a6891 100644 --- a/be/src/core/data_type_serde/data_type_number_serde.cpp +++ b/be/src/core/data_type_serde/data_type_number_serde.cpp @@ -34,7 +34,6 @@ #include "exprs/function/cast/cast_to_string.h" #include "storage/olap_common.h" #include "storage/types.h" -#include "util/io_helper.h" #include "util/jsonb_document.h" #include "util/jsonb_document_cast.h" #include "util/jsonb_writer.h" @@ -167,20 +166,23 @@ Status DataTypeNumberSerDe::deserialize_one_cell_from_json(IColumn& column, S return Status::InvalidArgument("uint128 is not support"); } else if constexpr (is_float_or_double(T) || T == TYPE_TIMEV2 || T == TYPE_TIME) { typename PrimitiveTypeTraits::CppType val = 0; - if (!try_read_float_text(val, str_ref)) { + CastParameters params; + if (!CastToFloat::from_string(str_ref, val, params)) { return Status::InvalidArgument("parse number fail, string: '{}'", slice.to_string()); } column_data.insert_value(val); } else if constexpr (T == TYPE_BOOLEAN) { // Note: here we should handle the bool type typename PrimitiveTypeTraits::CppType val = 0; - if (!try_read_bool_text(val, str_ref)) { + CastParameters params; + if (!CastToBool::from_string(str_ref, val, params)) { return Status::InvalidArgument("parse boolean fail, string: '{}'", slice.to_string()); } column_data.insert_value(val); } else if constexpr (is_int_or_bool(T)) { typename PrimitiveTypeTraits::CppType val = 0; - if (!try_read_int_text(val, str_ref)) { + CastParameters params; + if (!CastToInt::from_string(str_ref, val, params)) { return Status::InvalidArgument("parse number fail, string: '{}'", slice.to_string()); } column_data.insert_value(val); @@ -206,7 +208,7 @@ Status DataTypeNumberSerDe::serialize_one_cell_to_json(const IColumn& column, row_num = result.second; auto data = assert_cast(*ptr).get_element(row_num); if constexpr (T == TYPE_IPV6) { - std::string hex = int128_to_string(data); + std::string hex = CastToString::from_uint128(data); bw.write(hex.data(), hex.size()); } else if constexpr (T == TYPE_FLOAT || T == TYPE_DOUBLE) { auto str = CastToString::from_number(data); @@ -247,6 +249,7 @@ Status DataTypeNumberSerDe::read_column_from_arrow(IColumn& column, if (arrow_array->type_id() == arrow::Type::STRING) { const auto* concrete_array = dynamic_cast(arrow_array); std::shared_ptr buffer = concrete_array->value_data(); + CastParameters params; const auto* offsets_data = concrete_array->value_offsets()->data(); const size_t offset_size = sizeof(int32_t); @@ -267,7 +270,7 @@ Status DataTypeNumberSerDe::read_column_from_arrow(IColumn& column, if constexpr (T == TYPE_DATETIMEV2 || T == TYPE_TIMESTAMPTZ) { StringRef str_ref(raw_data, raw_data_len); UInt64 val = 0; - if (!try_read_int_text(val, str_ref)) { + if (!CastToInt::from_string(str_ref, val, params)) { return Status::Error(ErrorCode::INVALID_ARGUMENT, "parse number fail, string: '{}'", std::string(str_ref.data, str_ref.size).c_str()); @@ -277,7 +280,7 @@ Status DataTypeNumberSerDe::read_column_from_arrow(IColumn& column, } else if constexpr (T == TYPE_DATE || T == TYPE_DATETIME) { StringRef str_ref(raw_data, raw_data_len); Int64 val = 0; - if (!try_read_int_text(val, str_ref)) { + if (!CastToInt::from_string(str_ref, val, params)) { return Status::Error(ErrorCode::INVALID_ARGUMENT, "parse number fail, string: '{}'", std::string(str_ref.data, str_ref.size).c_str()); @@ -287,7 +290,7 @@ Status DataTypeNumberSerDe::read_column_from_arrow(IColumn& column, } else if constexpr (T == TYPE_DATEV2) { StringRef str_ref(raw_data, raw_data_len); UInt32 val = 0; - if (!try_read_int_text(val, str_ref)) { + if (!CastToInt::from_string(str_ref, val, params)) { return Status::Error(ErrorCode::INVALID_ARGUMENT, "parse number fail, string: '{}'", std::string(str_ref.data, str_ref.size).c_str()); @@ -297,7 +300,7 @@ Status DataTypeNumberSerDe::read_column_from_arrow(IColumn& column, } else { Int128 val = 0; StringRef str_ref(raw_data, raw_data_len); - if (!try_read_int_text(val, str_ref)) { + if (!CastToInt::from_string(str_ref, val, params)) { return Status::Error(ErrorCode::INVALID_ARGUMENT, "parse number fail, string: '{}'", std::string(str_ref.data, str_ref.size).c_str()); @@ -717,7 +720,7 @@ template bool try_parse_impl(typename PrimitiveTypeTraits::CppType& x, const StringRef& str_ref, CastParameters& params) { if constexpr (is_float_or_double(PT)) { - return try_read_float_text(x, str_ref); + return CastToFloat::from_string(str_ref, x, params); } else if constexpr (PT == TYPE_BOOLEAN) { return CastToBool::from_string(str_ref, x, params); } else if constexpr (is_int(PT)) { diff --git a/be/src/core/data_type_serde/data_type_serde.h b/be/src/core/data_type_serde/data_type_serde.h index 1643b0cf1512d1..4ffac54b89922f 100644 --- a/be/src/core/data_type_serde/data_type_serde.h +++ b/be/src/core/data_type_serde/data_type_serde.h @@ -222,7 +222,7 @@ class DataTypeSerDe { * E.g., DecimalV2 value 123.456 is stored as "123.456000000"; * parsing with scale=9 correctly restores the original value. * - * Note: for DecimalV2, read_decimal_text_impl() currently hardcodes + * Note: for DecimalV2, CastToDecimal::from_string() currently hardcodes * DecimalV2Value::SCALE=9 regardless of the passed-in scale, so the flag * does not actually affect DecimalV2 parsing today. However, callers should * still set it correctly for semantic clarity and future-proofing. diff --git a/be/src/core/field.cpp b/be/src/core/field.cpp index a2fae427350538..ad659303cce027 100644 --- a/be/src/core/field.cpp +++ b/be/src/core/field.cpp @@ -31,7 +31,6 @@ #include "core/value/timestamptz_value.h" #include "core/value/vdatetime_value.h" #include "exprs/function/cast/cast_to_string.h" -#include "util/io_helper.h" #include "util/var_int.h" namespace doris { diff --git a/be/src/exec/common/hash_table/hash_table.h b/be/src/exec/common/hash_table/hash_table.h index 0c0c98238db006..e996b9eca9f4e0 100644 --- a/be/src/exec/common/hash_table/hash_table.h +++ b/be/src/exec/common/hash_table/hash_table.h @@ -30,7 +30,6 @@ #include "common/status.h" #include "core/types.h" #include "runtime/runtime_profile.h" -#include "util/io_helper.h" /** NOTE HashTable could only be used for memmoveable (position independent) types. * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++. diff --git a/be/src/exec/common/histogram_helpers.hpp b/be/src/exec/common/histogram_helpers.hpp index b94f59827ad00e..98b042ac63c9a1 100644 --- a/be/src/exec/common/histogram_helpers.hpp +++ b/be/src/exec/common/histogram_helpers.hpp @@ -25,7 +25,6 @@ #include "common/cast_set.h" #include "core/data_type/data_type_decimal.h" -#include "util/io_helper.h" namespace doris { #include "common/compile_check_begin.h" diff --git a/be/src/exec/common/space_saving.h b/be/src/exec/common/space_saving.h index 1ff5976fc41daa..57b9a66a17a558 100644 --- a/be/src/exec/common/space_saving.h +++ b/be/src/exec/common/space_saving.h @@ -25,7 +25,6 @@ #include "core/arena_with_free_lists.h" #include "core/string_buffer.hpp" #include "exec/common/hash_table/hash_map.h" -#include "util/io_helper.h" namespace doris { diff --git a/be/src/exprs/aggregate/aggregate_function_array_agg.h b/be/src/exprs/aggregate/aggregate_function_array_agg.h index eda644add88e94..b82af1f2d9b892 100644 --- a/be/src/exprs/aggregate/aggregate_function_array_agg.h +++ b/be/src/exprs/aggregate/aggregate_function_array_agg.h @@ -30,7 +30,6 @@ #include "core/string_ref.h" #include "core/types.h" #include "exprs/aggregate/aggregate_function.h" -#include "util/io_helper.h" namespace doris { #include "common/compile_check_begin.h" diff --git a/be/src/exprs/aggregate/aggregate_function_avg_weighted.h b/be/src/exprs/aggregate/aggregate_function_avg_weighted.h index 23a36541a9824a..75643457d96f2e 100644 --- a/be/src/exprs/aggregate/aggregate_function_avg_weighted.h +++ b/be/src/exprs/aggregate/aggregate_function_avg_weighted.h @@ -33,7 +33,6 @@ #include "core/types.h" #include "core/value/decimalv2_value.h" #include "exprs/aggregate/aggregate_function.h" -#include "util/io_helper.h" namespace doris { #include "common/compile_check_begin.h" diff --git a/be/src/exprs/aggregate/aggregate_function_bit.h b/be/src/exprs/aggregate/aggregate_function_bit.h index 754cb392778eaf..42291a24037ef8 100644 --- a/be/src/exprs/aggregate/aggregate_function_bit.h +++ b/be/src/exprs/aggregate/aggregate_function_bit.h @@ -28,7 +28,6 @@ #include "core/data_type/data_type_number.h" // IWYU pragma: keep #include "core/types.h" #include "exprs/aggregate/aggregate_function.h" -#include "util/io_helper.h" namespace doris { #include "common/compile_check_begin.h" diff --git a/be/src/exprs/aggregate/aggregate_function_collect.h b/be/src/exprs/aggregate/aggregate_function_collect.h index b93815e3880aa1..a6d53939ee7f12 100644 --- a/be/src/exprs/aggregate/aggregate_function_collect.h +++ b/be/src/exprs/aggregate/aggregate_function_collect.h @@ -41,7 +41,6 @@ #include "core/string_ref.h" #include "core/types.h" #include "exprs/aggregate/aggregate_function.h" -#include "util/io_helper.h" #include "util/var_int.h" namespace doris { diff --git a/be/src/exprs/aggregate/aggregate_function_corr.h b/be/src/exprs/aggregate/aggregate_function_corr.h index d63f9c64aaf162..dfd40f240ceddc 100644 --- a/be/src/exprs/aggregate/aggregate_function_corr.h +++ b/be/src/exprs/aggregate/aggregate_function_corr.h @@ -15,8 +15,12 @@ // specific language governing permissions and limitations // under the License. +#include +#include + +#include "core/data_type/primitive_type.h" +#include "core/string_buffer.hpp" #include "core/types.h" -#include "util/io_helper.h" namespace doris { diff --git a/be/src/exprs/aggregate/aggregate_function_histogram.h b/be/src/exprs/aggregate/aggregate_function_histogram.h index b7651be3684b5b..5a5dd07551d85e 100644 --- a/be/src/exprs/aggregate/aggregate_function_histogram.h +++ b/be/src/exprs/aggregate/aggregate_function_histogram.h @@ -35,7 +35,6 @@ #include "exec/common/histogram_helpers.hpp" #include "exprs/aggregate/aggregate_function.h" #include "exprs/aggregate/aggregate_function_simple_factory.h" -#include "util/io_helper.h" namespace doris { #include "common/compile_check_begin.h" diff --git a/be/src/exprs/aggregate/aggregate_function_java_udaf.h b/be/src/exprs/aggregate/aggregate_function_java_udaf.h index 2bb5df64206966..12845637f27ca5 100644 --- a/be/src/exprs/aggregate/aggregate_function_java_udaf.h +++ b/be/src/exprs/aggregate/aggregate_function_java_udaf.h @@ -38,7 +38,6 @@ #include "exec/connector/jni_connector.h" #include "exprs/aggregate/aggregate_function.h" #include "runtime/user_function_cache.h" -#include "util/io_helper.h" #include "util/jni-util.h" namespace doris { diff --git a/be/src/exprs/aggregate/aggregate_function_min_max.h b/be/src/exprs/aggregate/aggregate_function_min_max.h index 6d7f6c5986e1db..04cee942d8aff7 100644 --- a/be/src/exprs/aggregate/aggregate_function_min_max.h +++ b/be/src/exprs/aggregate/aggregate_function_min_max.h @@ -35,6 +35,7 @@ #include "core/assert_cast.h" #include "core/column/column.h" #include "core/column/column_array.h" +#include "core/column/column_decimal.h" #include "core/column/column_fixed_length_object.h" #include "core/column/column_string.h" #include "core/custom_allocator.h" @@ -47,7 +48,6 @@ #include "core/type_limit.h" #include "core/types.h" #include "exprs/aggregate/aggregate_function.h" -#include "util/io_helper.h" namespace doris { #include "common/compile_check_begin.h" diff --git a/be/src/exprs/aggregate/aggregate_function_product.h b/be/src/exprs/aggregate/aggregate_function_product.h index 7511886d3b8eca..93607083b68a19 100644 --- a/be/src/exprs/aggregate/aggregate_function_product.h +++ b/be/src/exprs/aggregate/aggregate_function_product.h @@ -28,7 +28,7 @@ #include "core/string_buffer.hpp" #include "core/types.h" #include "exprs/aggregate/aggregate_function.h" -#include "util/io_helper.h" +#include "exprs/function/cast/cast_to_string.h" namespace doris { #include "common/compile_check_begin.h" @@ -58,13 +58,21 @@ struct AggregateFunctionProductData { void add(typename PrimitiveTypeTraits::CppType value, typename PrimitiveTypeTraits::CppType) { add_impl(value, product); - VLOG_DEBUG << "product: " << product; + if constexpr (std::is_same_v::CppType, int128_t>) { + VLOG_DEBUG << "product: " << CastToString::from_int128(product); + } else { + VLOG_DEBUG << "product: " << product; + } } void merge(const AggregateFunctionProductData& other, typename PrimitiveTypeTraits::CppType) { add_impl(other.product, product); - VLOG_DEBUG << "product: " << product; + if constexpr (std::is_same_v::CppType, int128_t>) { + VLOG_DEBUG << "product: " << CastToString::from_int128(product); + } else { + VLOG_DEBUG << "product: " << product; + } } void write(BufferWritable& buffer) const { buffer.write_binary(product); } diff --git a/be/src/exprs/aggregate/aggregate_function_rpc.h b/be/src/exprs/aggregate/aggregate_function_rpc.h index a7978f74598a26..b083d81bf430a9 100644 --- a/be/src/exprs/aggregate/aggregate_function_rpc.h +++ b/be/src/exprs/aggregate/aggregate_function_rpc.h @@ -39,7 +39,6 @@ #include "runtime/exec_env.h" #include "runtime/user_function_cache.h" #include "util/brpc_client_cache.h" -#include "util/io_helper.h" #include "util/jni-util.h" namespace doris { #include "common/compile_check_avoid_begin.h" diff --git a/be/src/exprs/aggregate/aggregate_function_sequence_match.h b/be/src/exprs/aggregate/aggregate_function_sequence_match.h index 23c1302307b310..d7084f57695496 100644 --- a/be/src/exprs/aggregate/aggregate_function_sequence_match.h +++ b/be/src/exprs/aggregate/aggregate_function_sequence_match.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -45,7 +46,7 @@ #include "core/string_ref.h" #include "core/types.h" #include "exprs/aggregate/aggregate_function.h" -#include "util/io_helper.h" +#include "util/string_parser.hpp" namespace doris { #include "common/compile_check_begin.h" @@ -214,15 +215,24 @@ struct AggregateFunctionSequenceMatchData final { dfa_states.emplace_back(true); pattern_has_time = false; + conditions_in_pattern.reset(); const char* pos = pattern.data(); const char* begin = pos; const char* end = pos + pattern.size(); // Pattern is checked in fe, so pattern should be valid here, we check it and if pattern is invalid, we return. + auto fail_parse = [&]() { + actions.clear(); + dfa_states.clear(); + conditions_in_pattern.reset(); + pattern_has_time = false; + }; + auto throw_exception = [&](const std::string& msg) { LOG(WARNING) << msg + " '" + std::string(pos, end) + "' at position " + std::to_string(pos - begin); + fail_parse(); }; auto match = [&pos, end](const char* str) mutable { @@ -234,6 +244,22 @@ struct AggregateFunctionSequenceMatchData final { return false; }; + auto parse_uint = [&pos, end](auto& value) { + const auto* start = pos; + while (pos < end && std::isdigit(static_cast(*pos))) { + ++pos; + } + + if (pos == start) { + return false; + } + + StringParser::ParseResult result; + value = StringParser::string_to_int, false>( + start, pos - start, &result); + return result == StringParser::PARSE_SUCCESS; + }; + while (pos < end) { if (match("(?")) { if (match("t")) { @@ -255,9 +281,7 @@ struct AggregateFunctionSequenceMatchData final { } NativeType duration = 0; - const auto* prev_pos = pos; - pos = try_read_first_int_text(duration, pos, end); - if (pos == prev_pos) { + if (!parse_uint(duration)) { throw_exception("Could not parse number"); return; } @@ -274,9 +298,10 @@ struct AggregateFunctionSequenceMatchData final { actions.emplace_back(type, duration); } else { UInt64 event_number = 0; - const auto* prev_pos = pos; - pos = try_read_first_int_text(event_number, pos, end); - if (pos == prev_pos) throw_exception("Could not parse number"); + if (!parse_uint(event_number)) { + throw_exception("Could not parse number"); + return; + } if (event_number > arg_count - 1) { throw_exception("Event number " + std::to_string(event_number) + diff --git a/be/src/exprs/aggregate/aggregate_function_topn.h b/be/src/exprs/aggregate/aggregate_function_topn.h index 3095034919489b..77c9e260d4a0e9 100644 --- a/be/src/exprs/aggregate/aggregate_function_topn.h +++ b/be/src/exprs/aggregate/aggregate_function_topn.h @@ -33,18 +33,19 @@ #include "core/assert_cast.h" #include "core/column/column.h" #include "core/column/column_array.h" +#include "core/column/column_decimal.h" #include "core/column/column_nullable.h" #include "core/column/column_string.h" #include "core/column/column_vector.h" #include "core/data_type/data_type_array.h" #include "core/data_type/data_type_nullable.h" #include "core/data_type/data_type_string.h" +#include "core/string_buffer.hpp" #include "core/string_ref.h" #include "core/types.h" #include "exec/common/hash_table/phmap_fwd_decl.h" #include "exprs/aggregate/aggregate_function.h" #include "exprs/aggregate/aggregate_function_simple_factory.h" -#include "util/io_helper.h" namespace doris { #include "common/compile_check_begin.h" diff --git a/be/src/exprs/aggregate/aggregate_function_uniq.h b/be/src/exprs/aggregate/aggregate_function_uniq.h index 4f10bc9f2ceece..fd9ca602d4bdf7 100644 --- a/be/src/exprs/aggregate/aggregate_function_uniq.h +++ b/be/src/exprs/aggregate/aggregate_function_uniq.h @@ -30,16 +30,17 @@ #include "common/compiler_util.h" // IWYU pragma: keep #include "core/assert_cast.h" #include "core/column/column.h" +#include "core/column/column_decimal.h" #include "core/column/column_vector.h" #include "core/data_type/data_type_number.h" #include "core/data_type/primitive_type.h" +#include "core/string_buffer.hpp" #include "core/string_ref.h" #include "core/types.h" #include "core/uint128.h" #include "exec/common/hash_table/hash.h" #include "exec/common/hash_table/phmap_fwd_decl.h" #include "exprs/aggregate/aggregate_function.h" -#include "util/io_helper.h" #include "util/var_int.h" template diff --git a/be/src/exprs/aggregate/moments.h b/be/src/exprs/aggregate/moments.h index 6f7a3ce66fe56a..287776c11fc2e4 100644 --- a/be/src/exprs/aggregate/moments.h +++ b/be/src/exprs/aggregate/moments.h @@ -21,7 +21,6 @@ #include "common/exception.h" #include "common/status.h" -#include "util/io_helper.h" namespace doris { #include "common/compile_check_begin.h" diff --git a/be/src/exprs/function/cast/cast_base.h b/be/src/exprs/function/cast/cast_base.h index 22d35fea4932f7..43a97ad68b1e29 100644 --- a/be/src/exprs/function/cast/cast_base.h +++ b/be/src/exprs/function/cast/cast_base.h @@ -41,7 +41,6 @@ #include "exprs/function/cast/cast_parameters.h" #include "exprs/function/function.h" #include "exprs/function/function_helpers.h" -#include "util/io_helper.h" namespace doris { diff --git a/be/src/exprs/function/cast/cast_to_basic_number_common.h b/be/src/exprs/function/cast/cast_to_basic_number_common.h index 0ccd929d2589af..7270107f6735ba 100644 --- a/be/src/exprs/function/cast/cast_to_basic_number_common.h +++ b/be/src/exprs/function/cast/cast_to_basic_number_common.h @@ -29,6 +29,7 @@ #include "core/data_type/primitive_type.h" #include "core/types.h" #include "exprs/function/cast/cast_base.h" +#include "util/string_parser.hpp" namespace doris { #include "common/compile_check_begin.h" @@ -131,11 +132,14 @@ template constexpr bool IsCppTypeDateTime = std::is_same_v::CppType> || std::is_same_v::CppType>; + struct CastToInt { template - requires(IsCppTypeInt) + requires(IsCppTypeInt || std::is_unsigned_v) static inline bool from_string(const StringRef& from, ToCppT& to, CastParameters& params) { - return try_read_int_text(to, from); + StringParser::ParseResult result; + to = StringParser::string_to_int(from.data, from.size, &result); + return result == StringParser::PARSE_SUCCESS; } template @@ -222,16 +226,14 @@ struct CastToInt { constexpr auto min_result = std::numeric_limits::lowest(); constexpr auto max_result = std::numeric_limits::max(); auto tmp = from.value() / scale_multiplier; - if (narrow_integral) { - if (tmp < min_result || tmp > max_result) { - params.status = Status::Error( - ErrorCode::ARITHMETIC_OVERFLOW_ERRROR, - fmt::format("Arithmetic overflow when converting " - "value {} from type {} to type {}", - decimal_to_string(from.value(), from_scale), - type_to_string(FromCppT::PType), int_type_name)); - return false; - } + if (narrow_integral && (tmp < min_result || tmp > max_result)) { + params.status = Status::Error( + ErrorCode::ARITHMETIC_OVERFLOW_ERRROR, + fmt::format("Arithmetic overflow when converting " + "value {} from type {} to type {}", + decimal_to_string(from.value(), from_scale), + type_to_string(FromCppT::PType), int_type_name)); + return false; } to = static_cast(tmp); return true; @@ -246,22 +248,19 @@ struct CastToInt { constexpr auto min_result = std::numeric_limits::lowest(); constexpr auto max_result = std::numeric_limits::max(); auto tmp = from.value / scale_multiplier; - if (narrow_integral) { - if (tmp < min_result || tmp > max_result) { - params.status = Status::Error( - ErrorCode::ARITHMETIC_OVERFLOW_ERRROR, - fmt::format("Arithmetic overflow when converting " - "value {} from type {} to type {}", - decimal_to_string(from.value, from_scale), - type_to_string(FromCppT::PType), int_type_name)); - return false; - } + if (narrow_integral && (tmp < min_result || tmp > max_result)) { + params.status = Status::Error( + ErrorCode::ARITHMETIC_OVERFLOW_ERRROR, + fmt::format("Arithmetic overflow when converting " + "value {} from type {} to type {}", + decimal_to_string(from.value, from_scale), + type_to_string(FromCppT::PType), int_type_name)); + return false; } to = static_cast(tmp); return true; } - // cast from date and datetime to int template requires((IsCppTypeDate && IntAllowCastFromDate) || (IsCppTypeDateTime && IntAllowCastFromDatetime)) @@ -304,7 +303,9 @@ struct CastToFloat { template requires(IsCppTypeFloat) static inline bool from_string(const StringRef& from, ToCppT& to, CastParameters& params) { - return try_read_float_text(to, from); + StringParser::ParseResult result; + to = StringParser::string_to_float(from.data, from.size, &result); + return result == StringParser::PARSE_SUCCESS; } template requires(IsCppTypeFloat && @@ -330,14 +331,9 @@ struct CastToFloat { requires(IsCppTypeFloat && IsDecimalNumber) static inline bool from_decimal(const FromCppT& from, UInt32 from_scale, ToCppT& to, CastParameters& params) { - if constexpr (IsDecimalV2) { - to = binary_cast(from); - return true; - } else { - typename FromCppT::NativeType scale_multiplier = - DataTypeDecimal::get_scale_multiplier(from_scale); - return _from_decimalv3(from, from_scale, to, scale_multiplier, params); - } + typename FromCppT::NativeType scale_multiplier = + DataTypeDecimal::get_scale_multiplier(from_scale); + return _from_decimalv3(from, from_scale, to, scale_multiplier, params); } template requires(IsCppTypeFloat && IsDecimalNumber && !IsDecimalV2) @@ -357,7 +353,7 @@ struct CastToFloat { static_cast(scale_multiplier)); return true; } - // cast from date and datetime to float/double, will not overflow + template requires(IsCppTypeFloat && (IsCppTypeDate || IsCppTypeDateTime)) static inline bool from_datetime(FromCppT from, ToCppT& to, CastParameters& params) { @@ -365,7 +361,6 @@ struct CastToFloat { return true; } - // from time to float/double, will not overflow template requires(IsCppTypeFloat) static inline bool from_time(FromCppT from, ToCppT& to, CastParameters& params) { diff --git a/be/src/exprs/function/cast/cast_to_boolean.h b/be/src/exprs/function/cast/cast_to_boolean.h index 39d3eaf31ddbed..b6b85700b74fbf 100644 --- a/be/src/exprs/function/cast/cast_to_boolean.h +++ b/be/src/exprs/function/cast/cast_to_boolean.h @@ -19,7 +19,7 @@ #include "core/types.h" #include "exprs/function/cast/cast_base.h" -#include "util/io_helper.h" +#include "util/string_parser.hpp" namespace doris { @@ -113,7 +113,9 @@ inline bool CastToBool::from_decimal(const Decimal256& from, UInt8& to, UInt32, } inline bool CastToBool::from_string(const StringRef& from, UInt8& to, CastParameters&) { - return try_read_bool_text(to, from); + StringParser::ParseResult result; + to = StringParser::string_to_bool(from.data, from.size, &result); + return result == StringParser::PARSE_SUCCESS; } template @@ -198,4 +200,4 @@ class CastToImpl : public CastToBase { } }; -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/exprs/function/cast/cast_to_decimal.h b/be/src/exprs/function/cast/cast_to_decimal.h index cf99e409c282a1..7d5cc3993d4eff 100644 --- a/be/src/exprs/function/cast/cast_to_decimal.h +++ b/be/src/exprs/function/cast/cast_to_decimal.h @@ -25,7 +25,6 @@ #include "core/data_type/data_type_number.h" #include "core/types.h" #include "exprs/function/cast/cast_to_basic_number_common.h" -#include "util/io_helper.h" namespace doris { #include "common/compile_check_begin.h" @@ -35,34 +34,46 @@ namespace doris { fmt::format( \ "Arithmetic overflow when converting value {} from type {} to decimal({}, {})", \ value, from_type_name, precision, scale)) + struct CastToDecimal { template requires(IsDecimalNumber) static inline bool from_string(const StringRef& from, ToCppT& to, UInt32 precision, UInt32 scale, CastParameters& params) { if constexpr (IsDecimalV2) { - return StringParser::PARSE_SUCCESS == - try_read_decimal_text(to, from, precision, scale); + StringParser::ParseResult result = StringParser::PARSE_SUCCESS; + to = DecimalV2Value(StringParser::string_to_decimal( + from.data, (int)from.size, DecimalV2Value::PRECISION, DecimalV2Value::SCALE, + &result)); + return result == StringParser::PARSE_SUCCESS; } if constexpr (IsDecimal32) { - return StringParser::PARSE_SUCCESS == - try_read_decimal_text(to, from, precision, scale); + StringParser::ParseResult result = StringParser::PARSE_SUCCESS; + to.value = StringParser::string_to_decimal(from.data, (int)from.size, + precision, scale, &result); + return result == StringParser::PARSE_SUCCESS; } if constexpr (IsDecimal64) { - return StringParser::PARSE_SUCCESS == - try_read_decimal_text(to, from, precision, scale); + StringParser::ParseResult result = StringParser::PARSE_SUCCESS; + to.value = StringParser::string_to_decimal(from.data, (int)from.size, + precision, scale, &result); + return result == StringParser::PARSE_SUCCESS; } if constexpr (IsDecimal128V3) { - return StringParser::PARSE_SUCCESS == - try_read_decimal_text(to, from, precision, scale); + StringParser::ParseResult result = StringParser::PARSE_SUCCESS; + to.value = StringParser::string_to_decimal(from.data, (int)from.size, + precision, scale, &result); + return result == StringParser::PARSE_SUCCESS; } if constexpr (IsDecimal256) { - return StringParser::PARSE_SUCCESS == - try_read_decimal_text(to, from, precision, scale); + StringParser::ParseResult result = StringParser::PARSE_SUCCESS; + to.value = StringParser::string_to_decimal(from.data, (int)from.size, + precision, scale, &result); + return result == StringParser::PARSE_SUCCESS; } } @@ -122,8 +133,104 @@ struct CastToDecimal { DataTypeDecimal::get_max_digits_number(to_precision); typename ToCppT::NativeType min_result = -max_result; - return _from_float(from, to, to_precision, to_scale, scale_multiplier, - min_result, max_result, params); + return _from_float(from, to, to_precision, to_scale, scale_multiplier, min_result, + max_result, params); + } + + template || + std::is_same_v), + Decimal128V3, + std::conditional_t<(sizeof(FromCppT) > sizeof(ToCppT)), FromCppT, ToCppT>>> + requires(IsDecimalNumber && IsDecimalNumber) + static inline bool from_decimal(const FromCppT& from, const UInt32 from_precision, + const UInt32 from_scale, ToCppT& to, UInt32 to_precision, + UInt32 to_scale, CastParameters& params) { + return _from_decimalv3(from, from_precision, from_scale, to, + to_precision, to_scale, params); + } + + template || + std::is_same_v), + Decimal128V3, + std::conditional_t<(sizeof(FromCppT) > sizeof(ToCppT)), FromCppT, ToCppT>>> + requires(IsDecimalNumber && IsDecimalNumber) + static inline bool _from_decimalv3(const FromCppT& from, const UInt32 from_precision, + const UInt32 from_scale, ToCppT& to, UInt32 to_precision, + UInt32 to_scale, CastParameters& params) { + using MaxNativeType = typename MaxFieldType::NativeType; + + auto from_max_int_digit_count = from_precision - from_scale; + auto to_max_int_digit_count = to_precision - to_scale; + bool narrow_integral = + (to_max_int_digit_count < from_max_int_digit_count) || + (to_max_int_digit_count == from_max_int_digit_count && to_scale < from_scale); + + UInt32 to_max_digits = NumberTraits::max_ascii_len(); + bool multiply_may_overflow = false; + if (to_scale > from_scale) { + multiply_may_overflow = (from_precision + to_scale - from_scale) >= to_max_digits; + } + + typename ToCppT::NativeType max_result = + DataTypeDecimal::get_max_digits_number(to_precision); + typename ToCppT::NativeType min_result = -max_result; + + MaxNativeType multiplier {}; + if (from_scale < to_scale) { + multiplier = DataTypeDecimal::get_scale_multiplier(to_scale - + from_scale); + } else if (from_scale > to_scale) { + multiplier = DataTypeDecimal::get_scale_multiplier(from_scale - + to_scale); + } + + return std::visit( + [&](auto multiply_may_overflow, auto narrow_integral) { + return _from_decimal( + from, from_precision, from_scale, to, to_precision, to_scale, + min_result, max_result, multiplier, params); + }, + make_bool_variant(multiply_may_overflow), make_bool_variant(narrow_integral)); + } + + template || + std::is_same_v), + Decimal128V3, + std::conditional_t<(sizeof(FromCppT) > sizeof(ToCppT)), FromCppT, ToCppT>>> + requires(IsDecimalNumber && IsDecimalNumber) + static inline bool _from_decimal(const FromCppT& from, const UInt32 from_precision, + const UInt32 from_scale, ToCppT& to, UInt32 to_precision, + UInt32 to_scale, const ToCppT::NativeType& min_result, + const ToCppT::NativeType& max_result, + const typename MaxFieldType::NativeType& scale_multiplier, + CastParameters& params) { + using MaxNativeType = typename MaxFieldType::NativeType; + + if (from_scale < to_scale) { + return _from_decimal_smaller_scale( + from, from_precision, from_scale, to, to_precision, to_scale, scale_multiplier, + min_result, max_result, params); + } else if (from_scale == to_scale) { + return _from_decimal_same_scale( + from, from_precision, from_scale, to, to_precision, to_scale, min_result, + max_result, params); + } else { + return _from_decimal_bigger_scale( + from, from_precision, from_scale, to, to_precision, to_scale, scale_multiplier, + min_result, max_result, params); + } + return true; } template @@ -140,9 +247,6 @@ struct CastToDecimal { "to decimal"); return false; } - // For decimal256, we need to use long double to avoid overflow when - // static casting the multiplier to floating type, and also to be as precise as possible; - // For other decimal types, we use double to be as precise as possible. using DoubleType = std::conditional_t, long double, double>; DoubleType tmp = from * static_cast(scale_multiplier); if (tmp <= DoubleType(min_result) || tmp >= DoubleType(max_result)) { @@ -156,6 +260,7 @@ struct CastToDecimal { from * static_cast(scale_multiplier) + ((from >= 0) ? 0.5 : -0.5))); return true; } + template requires(IsDecimal128V2 && IsCppTypeFloat) static inline bool _from_float(const FromCppT& from, ToCppT& to, UInt32 to_precision, @@ -170,9 +275,6 @@ struct CastToDecimal { "to decimal"); return false; } - // For decimal256, we need to use long double to avoid overflow when - // static casting the multiplier to floating type, and also to be as precise as possible; - // For other decimal types, we use double to be as precise as possible. using DoubleType = std::conditional_t, long double, double>; DoubleType tmp = from * static_cast(scale_multiplier); if (tmp <= DoubleType(min_result) || tmp >= DoubleType(max_result)) { @@ -195,10 +297,27 @@ struct CastToDecimal { Decimal128V3, std::conditional_t<(sizeof(FromCppT) > sizeof(ToCppT)), FromCppT, ToCppT>>> requires(IsDecimalNumber && IsDecimalNumber) - static inline bool from_decimalv2(const FromCppT& from, const UInt32 from_precision, - const UInt32 from_scale, UInt32 from_original_precision, - UInt32 from_original_scale, ToCppT& to, UInt32 to_precision, - UInt32 to_scale, CastParameters& params) { + static inline bool from_decimal(const FromCppT& from, const UInt32 from_precision, + const UInt32 from_scale, UInt32 from_original_precision, + UInt32 from_original_scale, ToCppT& to, UInt32 to_precision, + UInt32 to_scale, CastParameters& params) { + return _from_decimalv2( + from, from_precision, from_scale, from_original_precision, from_original_scale, to, + to_precision, to_scale, params); + } + + template || + std::is_same_v), + Decimal128V3, + std::conditional_t<(sizeof(FromCppT) > sizeof(ToCppT)), FromCppT, ToCppT>>> + requires(IsDecimalNumber && IsDecimalNumber) + static inline bool _from_decimalv2(const FromCppT& from, const UInt32 from_precision, + const UInt32 from_scale, UInt32 from_original_precision, + UInt32 from_original_scale, ToCppT& to, UInt32 to_precision, + UInt32 to_scale, CastParameters& params) { using MaxNativeType = typename MaxFieldType::NativeType; auto from_max_int_digit_count = from_original_precision - from_original_scale; @@ -246,84 +365,96 @@ struct CastToDecimal { make_bool_variant(multiply_may_overflow), make_bool_variant(narrow_integral)); } - template || - std::is_same_v), - Decimal128V3, - std::conditional_t<(sizeof(FromCppT) > sizeof(ToCppT)), FromCppT, ToCppT>>> - requires(IsDecimalNumber && IsDecimalNumber) - static inline bool from_decimalv3(const FromCppT& from, const UInt32 from_precision, - const UInt32 from_scale, ToCppT& to, UInt32 to_precision, - UInt32 to_scale, CastParameters& params) { - using MaxNativeType = typename MaxFieldType::NativeType; - - auto from_max_int_digit_count = from_precision - from_scale; - auto to_max_int_digit_count = to_precision - to_scale; - bool narrow_integral = - (to_max_int_digit_count < from_max_int_digit_count) || - (to_max_int_digit_count == from_max_int_digit_count && to_scale < from_scale); - - UInt32 to_max_digits = NumberTraits::max_ascii_len(); - bool multiply_may_overflow = false; - if (to_scale > from_scale) { - multiply_may_overflow = (from_precision + to_scale - from_scale) >= to_max_digits; - } - - typename ToCppT::NativeType max_result = - DataTypeDecimal::get_max_digits_number(to_precision); - typename ToCppT::NativeType min_result = -max_result; - - MaxNativeType multiplier {}; - if (from_scale < to_scale) { - multiplier = DataTypeDecimal::get_scale_multiplier(to_scale - - from_scale); - } else if (from_scale > to_scale) { - multiplier = DataTypeDecimal::get_scale_multiplier(from_scale - - to_scale); + template sizeof(typename ToCppT::NativeType)), + FromCppT, typename ToCppT::NativeType>> + requires(IsDecimalNumber && !IsDecimal128V2 && + (IsCppTypeInt || std::is_same_v)) + static inline bool _from_int(const FromCppT& from, ToCppT& to, UInt32 precision, UInt32 scale, + const MaxNativeType& scale_multiplier, + const typename ToCppT::NativeType& min_result, + const typename ToCppT::NativeType& max_result, + CastParameters& params) { + MaxNativeType tmp; + if constexpr (multiply_may_overflow) { + if (common::mul_overflow(static_cast(from), scale_multiplier, tmp)) { + if (params.is_strict) { + params.status = DECIMAL_CONVERT_OVERFLOW_ERROR(from, int_type_name, + precision, scale); + } + return false; + } + if constexpr (narrow_integral) { + if (tmp < min_result || tmp > max_result) { + if (params.is_strict) { + params.status = DECIMAL_CONVERT_OVERFLOW_ERROR( + from, int_type_name, precision, scale); + } + return false; + } + } + to.value = static_cast(tmp); + } else { + tmp = scale_multiplier * from; + if constexpr (narrow_integral) { + if (tmp < min_result || tmp > max_result) { + if (params.is_strict) { + params.status = DECIMAL_CONVERT_OVERFLOW_ERROR( + from, int_type_name, precision, scale); + } + return false; + } + } + to.value = static_cast(tmp); } - return std::visit( - [&](auto multiply_may_overflow, auto narrow_integral) { - return _from_decimal( - from, from_precision, from_scale, to, to_precision, to_scale, - min_result, max_result, multiplier, params); - }, - make_bool_variant(multiply_may_overflow), make_bool_variant(narrow_integral)); + return true; } template || - std::is_same_v), - Decimal128V3, - std::conditional_t<(sizeof(FromCppT) > sizeof(ToCppT)), FromCppT, ToCppT>>> - requires(IsDecimalNumber && IsDecimalNumber) - static inline bool _from_decimal(const FromCppT& from, const UInt32 from_precision, - const UInt32 from_scale, ToCppT& to, UInt32 to_precision, - UInt32 to_scale, const ToCppT::NativeType& min_result, - const ToCppT::NativeType& max_result, - const typename MaxFieldType::NativeType& scale_multiplier, - CastParameters& params) { - using MaxNativeType = typename MaxFieldType::NativeType; - - if (from_scale < to_scale) { - return _from_decimal_smaller_scale( - from, from_precision, from_scale, to, to_precision, to_scale, scale_multiplier, - min_result, max_result, params); - } else if (from_scale == to_scale) { - return _from_decimal_same_scale( - from, from_precision, from_scale, to, to_precision, to_scale, min_result, - max_result, params); + typename MaxNativeType = + std::conditional_t<(sizeof(FromCppT) > sizeof(typename ToCppT::NativeType)), + FromCppT, typename ToCppT::NativeType>> + requires(IsDecimalV2 && (IsCppTypeInt || std::is_same_v)) + static inline bool _from_int(const FromCppT& from, ToCppT& to, UInt32 precision, UInt32 scale, + const MaxNativeType& scale_multiplier, + const typename ToCppT::NativeType& min_result, + const typename ToCppT::NativeType& max_result, + CastParameters& params) { + MaxNativeType tmp; + if constexpr (multiply_may_overflow) { + if (common::mul_overflow(static_cast(from), scale_multiplier, tmp)) { + if (params.is_strict) { + params.status = DECIMAL_CONVERT_OVERFLOW_ERROR(from, int_type_name, + precision, scale); + } + return false; + } + if constexpr (narrow_integral) { + if (tmp < min_result || tmp > max_result) { + if (params.is_strict) { + params.status = DECIMAL_CONVERT_OVERFLOW_ERROR( + from, int_type_name, precision, scale); + } + return false; + } + } + to = DecimalV2Value(static_cast(tmp)); } else { - return _from_decimal_bigger_scale( - from, from_precision, from_scale, to, to_precision, to_scale, scale_multiplier, - min_result, max_result, params); + tmp = scale_multiplier * from; + if constexpr (narrow_integral) { + if (tmp < min_result || tmp > max_result) { + if (params.is_strict) { + params.status = DECIMAL_CONVERT_OVERFLOW_ERROR( + from, int_type_name, precision, scale); + } + return false; + } + } + to = DecimalV2Value(static_cast(tmp)); } + return true; } @@ -547,99 +678,6 @@ struct CastToDecimal { } return true; } - - template sizeof(typename ToCppT::NativeType)), - FromCppT, typename ToCppT::NativeType>> - requires(IsDecimalNumber && !IsDecimal128V2 && - (IsCppTypeInt || std::is_same_v)) - static inline bool _from_int(const FromCppT& from, ToCppT& to, UInt32 precision, UInt32 scale, - const MaxNativeType& scale_multiplier, - const typename ToCppT::NativeType& min_result, - const typename ToCppT::NativeType& max_result, - CastParameters& params) { - MaxNativeType tmp; - if constexpr (multiply_may_overflow) { - if (common::mul_overflow(static_cast(from), scale_multiplier, tmp)) { - if (params.is_strict) { - params.status = DECIMAL_CONVERT_OVERFLOW_ERROR(from, int_type_name, - precision, scale); - } - return false; - } - if constexpr (narrow_integral) { - if (tmp < min_result || tmp > max_result) { - if (params.is_strict) { - params.status = DECIMAL_CONVERT_OVERFLOW_ERROR( - from, int_type_name, precision, scale); - } - return false; - } - } - to.value = static_cast(tmp); - } else { - tmp = scale_multiplier * from; - if constexpr (narrow_integral) { - if (tmp < min_result || tmp > max_result) { - if (params.is_strict) { - params.status = DECIMAL_CONVERT_OVERFLOW_ERROR( - from, int_type_name, precision, scale); - } - return false; - } - } - to.value = static_cast(tmp); - } - - return true; - } - - template sizeof(typename ToCppT::NativeType)), - FromCppT, typename ToCppT::NativeType>> - requires(IsDecimalV2 && (IsCppTypeInt || std::is_same_v)) - static inline bool _from_int(const FromCppT& from, ToCppT& to, UInt32 precision, UInt32 scale, - const MaxNativeType& scale_multiplier, - const typename ToCppT::NativeType& min_result, - const typename ToCppT::NativeType& max_result, - CastParameters& params) { - MaxNativeType tmp; - if constexpr (multiply_may_overflow) { - if (common::mul_overflow(static_cast(from), scale_multiplier, tmp)) { - if (params.is_strict) { - params.status = DECIMAL_CONVERT_OVERFLOW_ERROR(from, int_type_name, - precision, scale); - } - return false; - } - if constexpr (narrow_integral) { - if (tmp < min_result || tmp > max_result) { - if (params.is_strict) { - params.status = DECIMAL_CONVERT_OVERFLOW_ERROR( - from, int_type_name, precision, scale); - } - return false; - } - } - to = DecimalV2Value(static_cast(tmp)); - } else { - tmp = scale_multiplier * from; - if constexpr (narrow_integral) { - if (tmp < min_result || tmp > max_result) { - if (params.is_strict) { - params.status = DECIMAL_CONVERT_OVERFLOW_ERROR( - from, int_type_name, precision, scale); - } - return false; - } - } - to = DecimalV2Value(static_cast(tmp)); - } - - return true; - } }; // Casting from string to decimal types. @@ -1079,4 +1117,4 @@ class CastToImpl : public CastToBase { }; #include "common/compile_check_end.h" -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/exprs/function/cast/cast_to_float.h b/be/src/exprs/function/cast/cast_to_float.h index 0c7d41a7d3b6b1..83967346387e58 100644 --- a/be/src/exprs/function/cast/cast_to_float.h +++ b/be/src/exprs/function/cast/cast_to_float.h @@ -47,7 +47,6 @@ class CastToImpl : public CastToBase { Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count, const NullMap::value_type* null_map = nullptr) const override { - using FromFieldType = typename FromDataType::FieldType; const ColumnWithTypeAndName& named_from = block.get_by_position(arguments[0]); const auto* col_from = check_and_get_column(named_from.column.get()); @@ -68,12 +67,8 @@ class CastToImpl : public CastToBase { CastParameters params; params.is_strict = (CastMode == CastModeType::StrictMode); size_t size = vec_from.size(); - - typename FromFieldType::NativeType scale_multiplier = - DataTypeDecimal::get_scale_multiplier(from_scale); for (size_t i = 0; i < size; ++i) { - CastToFloat::_from_decimalv3(vec_from_data[i], from_scale, vec_to_data[i], - scale_multiplier, params); + CastToFloat::from_decimal(vec_from_data[i], from_scale, vec_to_data[i], params); } block.get_by_position(result).column = std::move(col_to); diff --git a/be/src/exprs/function/cast/cast_to_int.h b/be/src/exprs/function/cast/cast_to_int.h index ecee8b51f048a4..d1c1ce47851fd8 100644 --- a/be/src/exprs/function/cast/cast_to_int.h +++ b/be/src/exprs/function/cast/cast_to_int.h @@ -142,7 +142,6 @@ class CastToImpl : public CastToBase { uint32_t result, size_t input_rows_count, const NullMap::value_type* null_map = nullptr) const override { using ToFieldType = typename ToDataType::FieldType; - using FromFieldType = typename FromDataType::FieldType; const ColumnWithTypeAndName& named_from = block.get_by_position(arguments[0]); const auto* col_from = @@ -156,11 +155,9 @@ class CastToImpl : public CastToBase { UInt32 from_precision = from_decimal_type.get_precision(); UInt32 from_scale = from_decimal_type.get_scale(); - constexpr UInt32 to_max_digits = NumberTraits::max_ascii_len(); - bool narrow_integral = (from_precision - from_scale) >= to_max_digits; - // may overflow if integer part of decimal is larger than to_max_digits // in strict mode we also decide nullable on this. + constexpr UInt32 to_max_digits = NumberTraits::max_ascii_len(); bool overflow_and_nullable = (from_precision - from_scale) >= to_max_digits; // only in non-strict mode and may overflow, we set nullable bool set_nullable = (CastMode == CastModeType::NonStrictMode) && overflow_and_nullable; @@ -181,13 +178,10 @@ class CastToImpl : public CastToBase { CastParameters params; params.is_strict = (CastMode == CastModeType::StrictMode); size_t size = vec_from.size(); - typename FromFieldType::NativeType scale_multiplier = - DataTypeDecimal::get_scale_multiplier(from_scale); for (size_t i = 0; i < size; i++) { - if (!CastToInt::_from_decimal( - vec_from_data[i], from_precision, from_scale, vec_to_data[i], - scale_multiplier, narrow_integral, params)) { + if (!CastToInt::from_decimal( + vec_from_data[i], from_precision, from_scale, vec_to_data[i], params)) { if (set_nullable) { null_map_data[i] = 1; } else { @@ -231,4 +225,4 @@ class CastToImpl : public CastToBase { } }; #include "common/compile_check_end.h" -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/exprs/function/cast/cast_to_jsonb.h b/be/src/exprs/function/cast/cast_to_jsonb.h index 16287774ce38b6..ca90ff7d877567 100644 --- a/be/src/exprs/function/cast/cast_to_jsonb.h +++ b/be/src/exprs/function/cast/cast_to_jsonb.h @@ -24,7 +24,6 @@ #include "core/value/jsonb_value.h" #include "exprs/function/cast/cast_base.h" #include "exprs/function/cast/cast_to_string.h" -#include "util/io_helper.h" #include "util/jsonb_utils.h" #include "util/jsonb_writer.h" @@ -244,4 +243,4 @@ WrapperType create_cast_to_jsonb_wrapper(const DataTypePtr& from_type, const Dat }; } #include "common/compile_check_end.h" -} // namespace doris::CastWrapper \ No newline at end of file +} // namespace doris::CastWrapper diff --git a/be/src/exprs/function/cast/cast_to_string.h b/be/src/exprs/function/cast/cast_to_string.h index c0c2a6b1ab4ebf..31b2efc88ff945 100644 --- a/be/src/exprs/function/cast/cast_to_string.h +++ b/be/src/exprs/function/cast/cast_to_string.h @@ -26,6 +26,10 @@ namespace doris { #include "common/compile_check_begin.h" struct CastToString { + static inline std::string from_int128(int128_t value); + static inline std::string from_uint128(uint128_t value); + static inline std::string from_uint128(UInt128 value); + template static inline std::string from_number(const SRC& from); @@ -263,13 +267,27 @@ inline void CastToString::push_number(const Int64& num, BufferWritable& bw) { } // LARGEINT -template <> -inline std::string CastToString::from_number(const Int128& num) { +inline std::string CastToString::from_int128(int128_t value) { fmt::memory_buffer buffer; - fmt::format_to(buffer, "{}", num); + fmt::format_to(buffer, "{}", value); + return std::string(buffer.data(), buffer.size()); +} + +inline std::string CastToString::from_uint128(uint128_t value) { + fmt::memory_buffer buffer; + fmt::format_to(buffer, "{}", value); return std::string(buffer.data(), buffer.size()); } +inline std::string CastToString::from_uint128(UInt128 value) { + return value.to_hex_string(); +} + +template <> +inline std::string CastToString::from_number(const Int128& num) { + return from_int128(num); +} + template <> inline void CastToString::push_number(const Int128& num, ColumnString::Chars& chars) { fmt::memory_buffer buffer; diff --git a/be/src/exprs/function/cast/cast_to_timestamptz.h b/be/src/exprs/function/cast/cast_to_timestamptz.h index b8c7a8399c0025..5e7dcdb551e81f 100644 --- a/be/src/exprs/function/cast/cast_to_timestamptz.h +++ b/be/src/exprs/function/cast/cast_to_timestamptz.h @@ -27,7 +27,6 @@ #include "exprs/function/cast/cast_base.h" #include "exprs/function/cast/cast_to_datetimev2_impl.hpp" #include "exprs/function/cast/cast_to_timestamptz_impl.hpp" -#include "util/io_helper.h" namespace doris { @@ -212,4 +211,4 @@ class CastToImpl struct SafeCastString {}; -template <> -struct SafeCastString { +template +struct SafeCastString { // Ref: https://github.com/apache/hive/blob/4df4d75bf1e16fe0af75aad0b4179c34c07fc975/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java#L559 static inline const std::set FALSE_VALUES = {"false", "off", "no", "0", ""}; static bool safe_cast_string(const StringRef& str_ref, - PrimitiveTypeTraits::ColumnType::value_type* value) { + PrimitiveTypeTraits::ColumnType::value_type* value, + CastParameters& params) { std::string str_value = str_ref.to_string(); std::transform(str_value.begin(), str_value.end(), str_value.begin(), ::tolower); bool is_false = (FALSE_VALUES.contains(str_value)); @@ -373,7 +378,8 @@ struct SafeCastString { template <> struct SafeCastString { static bool safe_cast_string(const StringRef& str_ref, - PrimitiveTypeTraits::ColumnType::value_type* value) { + PrimitiveTypeTraits::ColumnType::value_type* value, + CastParameters& params) { int64_t cast_to_long = 0; bool can_cast = absl::SimpleAtoi({str_ref.data, str_ref.size}, &cast_to_long); *value = cast_to_long == 0 ? 0 : 1; @@ -381,10 +387,11 @@ struct SafeCastString { } }; -template <> -struct SafeCastString { +template +struct SafeCastString { static bool safe_cast_string(const StringRef& str_ref, - PrimitiveTypeTraits::ColumnType::value_type* value) { + PrimitiveTypeTraits::ColumnType::value_type* value, + CastParameters& params) { int32_t cast_to_int = 0; bool can_cast = absl::SimpleAtoi({str_ref.data, str_ref.size}, &cast_to_int); if (can_cast && cast_to_int <= std::numeric_limits::max() && @@ -397,11 +404,11 @@ struct SafeCastString { } }; -template <> -struct SafeCastString { - static bool safe_cast_string( - const StringRef& str_ref, - PrimitiveTypeTraits::ColumnType::value_type* value) { +template +struct SafeCastString { + static bool safe_cast_string(const StringRef& str_ref, + PrimitiveTypeTraits::ColumnType::value_type* value, + CastParameters& params) { int32_t cast_to_int = 0; bool can_cast = absl::SimpleAtoi({str_ref.data, str_ref.size}, &cast_to_int); if (can_cast && cast_to_int <= std::numeric_limits::max() && @@ -414,10 +421,11 @@ struct SafeCastString { } }; -template <> -struct SafeCastString { +template +struct SafeCastString { static bool safe_cast_string(const StringRef& str_ref, - PrimitiveTypeTraits::ColumnType::value_type* value) { + PrimitiveTypeTraits::ColumnType::value_type* value, + CastParameters& params) { int32_t cast_to_int = 0; bool can_cast = absl::SimpleAtoi({str_ref.data, str_ref.size}, &cast_to_int); *value = cast_to_int; @@ -425,10 +433,11 @@ struct SafeCastString { } }; -template <> -struct SafeCastString { +template +struct SafeCastString { static bool safe_cast_string(const StringRef& str_ref, - PrimitiveTypeTraits::ColumnType::value_type* value) { + PrimitiveTypeTraits::ColumnType::value_type* value, + CastParameters& params) { int64_t cast_to_int = 0; bool can_cast = absl::SimpleAtoi({str_ref.data, str_ref.size}, &cast_to_int); *value = cast_to_int; @@ -436,19 +445,20 @@ struct SafeCastString { } }; -template <> -struct SafeCastString { - static bool safe_cast_string( - const StringRef& str_ref, - PrimitiveTypeTraits::ColumnType::value_type* value) { - return try_read_int_text(*value, str_ref); +template +struct SafeCastString { + static bool safe_cast_string(const StringRef& str_ref, + PrimitiveTypeTraits::ColumnType::value_type* value, + CastParameters& params) { + return CastToInt::from_string(str_ref, *value, params); } }; template struct SafeCastString { static bool safe_cast_string(const StringRef& str_ref, - PrimitiveTypeTraits::ColumnType::value_type* value) { + PrimitiveTypeTraits::ColumnType::value_type* value, + CastParameters& params) { float cast_to_float = 0; bool can_cast = absl::SimpleAtof({str_ref.data, str_ref.size}, &cast_to_float); if (can_cast && fileFormat == ORC) { @@ -465,7 +475,8 @@ struct SafeCastString { template struct SafeCastString { static bool safe_cast_string(const StringRef& str_ref, - PrimitiveTypeTraits::ColumnType::value_type* value) { + PrimitiveTypeTraits::ColumnType::value_type* value, + CastParameters& params) { double cast_to_double = 0; bool can_cast = absl::SimpleAtod({str_ref.data, str_ref.size}, &cast_to_double); if (can_cast && fileFormat == ORC) { @@ -478,37 +489,51 @@ struct SafeCastString { } }; -template <> -struct SafeCastString { - static bool safe_cast_string( - const StringRef& str_ref, - PrimitiveTypeTraits::ColumnType::value_type* value) { - return read_datetime_text_impl(*value, str_ref); +template +struct SafeCastString { + static bool safe_cast_string(const StringRef& str_ref, + PrimitiveTypeTraits::ColumnType::value_type* value, + CastParameters& params) { + if (!CastToDateOrDatetime::from_string_non_strict_mode( + str_ref, *value, nullptr, params)) { + return false; + } + value->to_datetime(); + return true; } }; -template <> -struct SafeCastString { +template +struct SafeCastString { static bool safe_cast_string( const StringRef& str_ref, - PrimitiveTypeTraits::ColumnType::value_type* value, int scale) { - return read_datetime_v2_text_impl(*value, str_ref, scale); + PrimitiveTypeTraits::ColumnType::value_type* value, int scale, + CastParameters& params) { + return CastToDatetimeV2::from_string_non_strict_mode(str_ref, *value, nullptr, scale, + params); } }; -template <> -struct SafeCastString { +template +struct SafeCastString { static bool safe_cast_string(const StringRef& str_ref, - PrimitiveTypeTraits::ColumnType::value_type* value) { - return read_date_text_impl(*value, str_ref); + PrimitiveTypeTraits::ColumnType::value_type* value, + CastParameters& params) { + if (!CastToDateOrDatetime::from_string_non_strict_mode( + str_ref, *value, nullptr, params)) { + return false; + } + value->cast_to_date(); + return true; } }; -template <> -struct SafeCastString { +template +struct SafeCastString { static bool safe_cast_string(const StringRef& str_ref, - PrimitiveTypeTraits::ColumnType::value_type* value) { - return read_date_v2_text_impl(*value, str_ref); + PrimitiveTypeTraits::ColumnType::value_type* value, + CastParameters& params) { + return CastToDateV2::from_string_non_strict_mode(str_ref, *value, nullptr, params); } }; @@ -516,10 +541,9 @@ template struct SafeCastDecimalString { using CppType = typename PrimitiveTypeTraits::ColumnType::value_type; - static bool safe_cast_string(const StringRef& str_ref, CppType* value, int precision, - int scale) { - return read_decimal_text_impl( - *value, str_ref, precision, scale) == StringParser::PARSE_SUCCESS; + static bool safe_cast_string(const StringRef& str_ref, CppType* value, int precision, int scale, + CastParameters& params) { + return CastToDecimal::from_string(str_ref, *value, precision, scale, params); } }; @@ -561,22 +585,23 @@ class CastStringConverter : public ColumnTypeConverter { size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); auto& data = assert_cast(to_col.get())->get_data(); + CastParameters params; for (int i = 0; i < rows; ++i) { bool can_cast = false; if constexpr (is_decimal_type()) { can_cast = SafeCastDecimalString::safe_cast_string( string_col->get_data_at(i), &data[start_idx + i], - _dst_type_desc->get_precision(), _dst_type_desc->get_scale()); + _dst_type_desc->get_precision(), _dst_type_desc->get_scale(), params); } else if constexpr (DstPrimitiveType == TYPE_DATETIMEV2) { can_cast = SafeCastString::safe_cast_string( string_col->get_data_at(i), &data[start_idx + i], - _dst_type_desc->get_scale()); + _dst_type_desc->get_scale(), params); } else if constexpr (DstPrimitiveType == TYPE_BOOLEAN && fileFormat == ORC) { can_cast = SafeCastString::safe_cast_string( - string_col->get_data_at(i), &data[start_idx + i]); + string_col->get_data_at(i), &data[start_idx + i], params); } else { can_cast = SafeCastString::safe_cast_string( - string_col->get_data_at(i), &data[start_idx + i]); + string_col->get_data_at(i), &data[start_idx + i], params); } if (!can_cast) { diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp index 1694deba5825b8..eb12f83fedb1ac 100644 --- a/be/src/runtime/runtime_state.cpp +++ b/be/src/runtime/runtime_state.cpp @@ -53,7 +53,6 @@ #include "runtime/thread_context.h" #include "storage/id_manager.h" #include "storage/storage_engine.h" -#include "util/io_helper.h" #include "util/timezone_utils.h" #include "util/uid_util.h" @@ -174,8 +173,11 @@ RuntimeState::RuntimeState(const TQueryGlobals& query_globals) } else if (!query_globals.now_string.empty()) { _timezone = TimezoneUtils::default_time_zone; VecDateTimeValue dt; - read_datetime_text_impl( - dt, StringRef(query_globals.now_string.data(), query_globals.now_string.size())); + CastParameters params; + DORIS_CHECK((CastToDateOrDatetime::from_string_strict_mode( + {query_globals.now_string.c_str(), query_globals.now_string.size()}, dt, nullptr, + params))); int64_t timestamp; dt.unix_timestamp(×tamp, _timezone); _timestamp_ms = timestamp * 1000; diff --git a/be/src/storage/olap_scan_common.h b/be/src/storage/olap_scan_common.h index 54052c965d786a..9ef64b2780033c 100644 --- a/be/src/storage/olap_scan_common.h +++ b/be/src/storage/olap_scan_common.h @@ -51,7 +51,6 @@ #include "storage/olap_tuple.h" #include "storage/olap_utils.h" #include "storage/predicate/filter_olap_param.h" -#include "util/io_helper.h" namespace doris { #include "common/compile_check_begin.h" diff --git a/be/src/util/counts.h b/be/src/util/counts.h index 0dcafdf1f06453..a0299f6318b0eb 100644 --- a/be/src/util/counts.h +++ b/be/src/util/counts.h @@ -25,7 +25,6 @@ #include "core/pod_array.h" #include "core/string_buffer.hpp" -#include "util/io_helper.h" namespace doris { diff --git a/be/src/util/io_helper.cpp b/be/src/util/io_helper.cpp deleted file mode 100644 index 9cc541833e973d..00000000000000 --- a/be/src/util/io_helper.cpp +++ /dev/null @@ -1,89 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/registerFunctionsComparison.cpp -// and modified by Doris - -#include "util/io_helper.h" - -#include "core/binary_cast.hpp" -#include "exprs/function/cast/cast_to_date_or_datetime_impl.hpp" -#include "exprs/function/cast/cast_to_datetimev2_impl.hpp" -#include "exprs/function/cast/cast_to_datev2_impl.hpp" - -namespace doris { -bool read_date_text_impl(VecDateTimeValue& x, const StringRef& buf) { - CastParameters params; - auto ans = CastToDateOrDatetime::from_string_non_strict_mode( - buf, x, nullptr, params); - x.cast_to_date(); - return ans; -} - -bool read_datetime_text_impl(VecDateTimeValue& x, const StringRef& buf) { - CastParameters params; - auto ans = CastToDateOrDatetime::from_string_non_strict_mode( - buf, x, nullptr, params); - x.to_datetime(); - return ans; -} - -bool read_date_text_impl(Int64& x, const StringRef& buf, const cctz::time_zone& local_time_zone) { - auto dv = binary_cast(x); - CastParameters params; - auto ans = CastToDateOrDatetime::from_string_non_strict_mode( - buf, dv, &local_time_zone, params); - dv.cast_to_date(); - x = binary_cast(dv); - return ans; -} - -bool read_datetime_text_impl(Int64& x, const StringRef& buf, - const cctz::time_zone& local_time_zone) { - auto dv = binary_cast(x); - CastParameters params; - auto ans = CastToDateOrDatetime::from_string_non_strict_mode( - buf, dv, &local_time_zone, params); - dv.to_datetime(); - x = binary_cast(dv); - return ans; -} - -bool read_date_v2_text_impl(DateV2Value& x, const StringRef& buf) { - CastParameters params; - return CastToDateV2::from_string_non_strict_mode(buf, x, nullptr, params); -} - -bool read_date_v2_text_impl(DateV2Value& x, const StringRef& buf, - const cctz::time_zone& local_time_zone) { - CastParameters params; - return CastToDateV2::from_string_non_strict_mode(buf, x, &local_time_zone, params); -} - -bool read_datetime_v2_text_impl(DateV2Value& x, const StringRef& buf, - UInt32 scale) { - CastParameters params; - return CastToDatetimeV2::from_string_non_strict_mode(buf, x, nullptr, scale, params); -} - -bool read_datetime_v2_text_impl(DateV2Value& x, const StringRef& buf, - const cctz::time_zone& local_time_zone, UInt32 scale) { - CastParameters params; - return CastToDatetimeV2::from_string_non_strict_mode(buf, x, &local_time_zone, scale, params); -} - -} // namespace doris diff --git a/be/src/util/io_helper.h b/be/src/util/io_helper.h deleted file mode 100644 index 5d9bd32e3dc06e..00000000000000 --- a/be/src/util/io_helper.h +++ /dev/null @@ -1,237 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include - -#include -#include - -#include "common/exception.h" -#include "core/arena.h" -#include "core/binary_cast.hpp" -#include "core/field.h" -#include "core/string_buffer.hpp" -#include "core/string_ref.h" -#include "core/types.h" -#include "core/uint128.h" -#include "core/value/ipv4_value.h" -#include "core/value/ipv6_value.h" -#include "core/value/vdatetime_value.h" -#include "util/string_parser.hpp" -#include "util/var_int.h" - -namespace doris { -#include "common/compile_check_begin.h" -inline std::string int128_to_string(int128_t value) { - return fmt::format(FMT_COMPILE("{}"), value); -} - -inline std::string int128_to_string(uint128_t value) { - return fmt::format(FMT_COMPILE("{}"), value); -} - -inline std::string int128_to_string(UInt128 value) { - return value.to_hex_string(); -} - -template -void write_text(Decimal value, UInt32 scale, std::ostream& ostr) { - if (value < Decimal(0)) { - value *= Decimal(-1); - if (value > Decimal(0)) { - ostr << '-'; - } - } - - T whole_part = value; - - if (scale) { - whole_part = value / decimal_scale_multiplier(scale); - } - if constexpr (std::is_same_v) { - ostr << int128_to_string(whole_part); - } else { - ostr << whole_part; - } - if (scale) { - ostr << '.'; - String str_fractional(scale, '0'); - Int32 pos = scale - 1; - if (value < Decimal(0) && pos >= 0) { - // Reach here iff this value is a min value of a signed numeric type. It means min() - // which is -2147483648 multiply -1 is still -2147483648. - str_fractional[pos] += (value / 10 * 10) - value; - pos--; - value /= 10; - value *= Decimal(-1); - } - for (; pos >= 0; --pos, value /= 10) { - str_fractional[pos] += value % 10; - } - ostr.write(str_fractional.data(), scale); - } -} - -template -bool try_read_float_text(T& x, const StringRef& in) { - static_assert(std::is_same_v || std::is_same_v, - "Argument for readFloatTextImpl must be float or double"); - static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', - "Layout of char is not like ASCII"); //-V590 - - StringParser::ParseResult result; - x = StringParser::string_to_float(in.data, in.size, &result); - - return result == StringParser::PARSE_SUCCESS; -} - -template -bool try_read_int_text(T& x, const StringRef& buf) { - StringParser::ParseResult result; - x = StringParser::string_to_int(buf.data, buf.size, &result); - - return result == StringParser::PARSE_SUCCESS; -} - -bool read_date_text_impl(VecDateTimeValue& x, const StringRef& buf); - -bool read_date_text_impl(Int64& x, const StringRef& buf, const cctz::time_zone& local_time_zone); - -template -bool read_ipv4_text_impl(T& x, const StringRef& buf) { - static_assert(std::is_same_v); - bool res = IPv4Value::from_string(x, buf.data, buf.size); - return res; -} - -template -bool read_ipv6_text_impl(T& x, const StringRef& buf) { - static_assert(std::is_same_v); - bool res = IPv6Value::from_string(x, buf.data, buf.size); - return res; -} - -bool read_datetime_text_impl(VecDateTimeValue& x, const StringRef& buf); - -bool read_datetime_text_impl(Int64& x, const StringRef& buf, - const cctz::time_zone& local_time_zone); - -bool read_date_v2_text_impl(DateV2Value& x, const StringRef& buf); - -bool read_date_v2_text_impl(DateV2Value& x, const StringRef& buf, - const cctz::time_zone& local_time_zone); - -bool read_datetime_v2_text_impl(DateV2Value& x, const StringRef& buf, - UInt32 scale = -1); - -bool read_datetime_v2_text_impl(DateV2Value& x, const StringRef& buf, - const cctz::time_zone& local_time_zone, UInt32 scale = -1); - -template -StringParser::ParseResult read_decimal_text_impl(T& x, const StringRef& buf, UInt32 precision, - UInt32 scale) { - static_assert(IsDecimalNumber); - if constexpr (!std::is_same_v) { - // DecimalV3: uses the caller-supplied precision and scale. - // When called from from_olap_string with ignore_scale=true, scale=0 means the - // string is treated as an unscaled integer (e.g. "12345" → internal int 12345). - StringParser::ParseResult result = StringParser::PARSE_SUCCESS; - x.value = StringParser::string_to_decimal

(buf.data, (int)buf.size, precision, scale, - &result); - return result; - } else { - // DecimalV2: IGNORES the caller-supplied precision/scale and hardcodes - // DecimalV2Value::PRECISION (27) and DecimalV2Value::SCALE (9). - // This means from_olap_string's ignore_scale flag has no actual effect on DecimalV2 - // parsing today — the string "123.456000000" is always parsed with scale=9. - // Callers should still set ignore_scale=false for DecimalV2 for semantic correctness. - StringParser::ParseResult result = StringParser::PARSE_SUCCESS; - x = DecimalV2Value(StringParser::string_to_decimal( - buf.data, (int)buf.size, DecimalV2Value::PRECISION, DecimalV2Value::SCALE, - &result)); - return result; - } -} - -template -const char* try_read_first_int_text(T& x, const char* pos, const char* end) { - const int64_t len = end - pos; - int64_t i = 0; - while (i < len) { - if (pos[i] >= '0' && pos[i] <= '9') { - i++; - } else { - break; - } - } - const char* int_end = pos + i; - StringRef in((char*)pos, int_end - pos); - const size_t count = in.size; - try_read_int_text(x, in); - return pos + count; -} - -template -StringParser::ParseResult try_read_decimal_text(T& x, const StringRef& in, UInt32 precision, - UInt32 scale) { - return read_decimal_text_impl(x, in, precision, scale); -} - -template -bool try_read_ipv4_text(T& x, const StringRef& in) { - return read_ipv4_text_impl(x, in); -} - -template -bool try_read_ipv6_text(T& x, const StringRef& in) { - return read_ipv6_text_impl(x, in); -} - -template -bool try_read_datetime_text(T& x, const StringRef& in, const cctz::time_zone& local_time_zone) { - return read_datetime_text_impl(x, in, local_time_zone); -} - -template -bool try_read_date_text(T& x, const StringRef& in, const cctz::time_zone& local_time_zone) { - return read_date_text_impl(x, in, local_time_zone); -} - -template -bool try_read_date_v2_text(T& x, const StringRef& in, const cctz::time_zone& local_time_zone) { - return read_date_v2_text_impl(x, in, local_time_zone); -} - -template -bool try_read_datetime_v2_text(T& x, const StringRef& in, const cctz::time_zone& local_time_zone, - UInt32 scale) { - return read_datetime_v2_text_impl(x, in, local_time_zone, scale); -} - -bool inline try_read_bool_text(UInt8& x, const StringRef& buf) { - StringParser::ParseResult result; - x = StringParser::string_to_bool(buf.data, buf.size, &result); - return result == StringParser::PARSE_SUCCESS; -} - -#include "common/compile_check_end.h" - -} // namespace doris diff --git a/be/src/util/jsonb_document_cast.h b/be/src/util/jsonb_document_cast.h index 2fbeb10d15a2c5..870e43acfbb96d 100644 --- a/be/src/util/jsonb_document_cast.h +++ b/be/src/util/jsonb_document_cast.h @@ -287,29 +287,29 @@ struct JsonbCast { auto val = Decimal32 {jsonb_value->unpack()->val()}; UInt32 from_precision = jsonb_value->unpack()->precision; UInt32 from_scale = jsonb_value->unpack()->scale; - return CastToDecimal::from_decimalv3(val, from_precision, from_scale, to, to_precision, - to_scale, params); + return CastToDecimal::from_decimal(val, from_precision, from_scale, to, to_precision, + to_scale, params); } case JsonbType::T_Decimal64: { auto val = Decimal64 {jsonb_value->unpack()->val()}; UInt32 from_precision = jsonb_value->unpack()->precision; UInt32 from_scale = jsonb_value->unpack()->scale; - return CastToDecimal::from_decimalv3(val, from_precision, from_scale, to, to_precision, - to_scale, params); + return CastToDecimal::from_decimal(val, from_precision, from_scale, to, to_precision, + to_scale, params); } case JsonbType::T_Decimal128: { auto val = Decimal128V3 {jsonb_value->unpack()->val()}; UInt32 from_precision = jsonb_value->unpack()->precision; UInt32 from_scale = jsonb_value->unpack()->scale; - return CastToDecimal::from_decimalv3(val, from_precision, from_scale, to, to_precision, - to_scale, params); + return CastToDecimal::from_decimal(val, from_precision, from_scale, to, to_precision, + to_scale, params); } case JsonbType::T_Decimal256: { auto val = Decimal256 {jsonb_value->unpack()->val()}; UInt32 from_precision = jsonb_value->unpack()->precision; UInt32 from_scale = jsonb_value->unpack()->scale; - return CastToDecimal::from_decimalv3(val, from_precision, from_scale, to, to_precision, - to_scale, params); + return CastToDecimal::from_decimal(val, from_precision, from_scale, to, to_precision, + to_scale, params); } case JsonbType::T_String: { const auto* blob = jsonb_value->unpack(); diff --git a/be/test/common/check_overflow.cpp b/be/test/common/check_overflow.cpp index 7416446aed8910..20b82ca80fdee1 100644 --- a/be/test/common/check_overflow.cpp +++ b/be/test/common/check_overflow.cpp @@ -21,7 +21,7 @@ #include "core/types.h" #include "exec/common/arithmetic_overflow.h" -#include "util/io_helper.h" +#include "exprs/function/cast/cast_to_basic_number_common.h" namespace doris { @@ -37,7 +37,8 @@ struct CheckOverFlowTest : public testing::Test { Int128 to_i128(std::string str) { StringRef str_ref(str.data(), str.size()); Int128 val; - EXPECT_TRUE(try_read_int_text(val, str_ref)); + CastParameters params; + EXPECT_TRUE(CastToInt::from_string(str_ref, val, params)); return val; }; diff --git a/be/test/core/data_type_serde/data_type_serde_decimal_test.cpp b/be/test/core/data_type_serde/data_type_serde_decimal_test.cpp index 26d3d9235c9b9c..4394f012b7ceea 100644 --- a/be/test/core/data_type_serde/data_type_serde_decimal_test.cpp +++ b/be/test/core/data_type_serde/data_type_serde_decimal_test.cpp @@ -32,6 +32,7 @@ #include "core/data_type/common_data_type_serder_test.h" #include "core/data_type/common_data_type_test.h" #include "core/data_type/data_type.h" +#include "core/data_type/data_type_decimal.h" #include "core/data_type_serde/data_type_decimal_serde.h" #include "core/types.h" #include "testutil/test_util.h" @@ -331,4 +332,31 @@ TEST_F(DataTypeDecimalSerDeTest, ArrowMemNotAligned) { EXPECT_TRUE(st.ok()); } +TEST_F(DataTypeDecimalSerDeTest, JsonDeserializeKeepsUnderflowCompatibility) { + DataTypeDecimal decimal_type(1, 0); + Decimal32 expected {}; + ASSERT_TRUE(decimal_type.parse_from_string("-10", &expected)); + + auto column = ColumnDecimal32::create(0, 0); + DataTypeSerDe::FormatOptions options; + + std::string json_value = "-10"; + Slice slice(json_value.data(), json_value.size()); + auto st = serde_decimal32_1->deserialize_one_cell_from_json(*column, slice, options); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(column->size(), 1); + EXPECT_EQ(column->get_element(0), expected); + + std::vector slices; + slices.emplace_back(json_value.data(), json_value.size()); + auto vector_column = ColumnDecimal32::create(0, 0); + uint64_t num_deserialized = 0; + st = serde_decimal32_1->deserialize_column_from_json_vector(*vector_column, slices, + &num_deserialized, options); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(num_deserialized, 1); + ASSERT_EQ(vector_column->size(), 1); + EXPECT_EQ(vector_column->get_element(0), expected); +} + } // namespace doris diff --git a/be/test/core/data_type_serde/data_type_serde_number_test.cpp b/be/test/core/data_type_serde/data_type_serde_number_test.cpp index 49e7bcaf40daa4..b37b14a147dd59 100644 --- a/be/test/core/data_type_serde/data_type_serde_number_test.cpp +++ b/be/test/core/data_type_serde/data_type_serde_number_test.cpp @@ -50,6 +50,8 @@ static auto serde_int32 = std::make_shared>(); static auto serde_int64 = std::make_shared>(); static auto serde_int128 = std::make_shared>(); static auto serde_uint8 = std::make_shared>(); +static auto serde_datev2_num = std::make_shared>(); +static auto serde_datetimev2_num = std::make_shared>(); static ColumnFloat32::MutablePtr column_float32; static ColumnFloat64::MutablePtr column_float64; @@ -336,4 +338,38 @@ TEST_F(DataTypeNumberSerDeTest, ArrowMemNotAligned) { EXPECT_TRUE(st.ok()); } +TEST_F(DataTypeNumberSerDeTest, ArrowStringToUnsignedDateLikeTypes) { + std::vector strings = {"20240102", "20240102112233"}; + std::vector offsets = {0}; + int32_t total_length = 0; + for (const auto& str : strings) { + total_length += static_cast(str.length()); + offsets.push_back(total_length); + } + + std::string value_bytes; + value_bytes.reserve(total_length); + for (const auto& str : strings) { + value_bytes.append(str); + } + + auto value_buffer = arrow::Buffer::Wrap(value_bytes.data(), value_bytes.size()); + auto offset_buffer = arrow::Buffer::Wrap(offsets); + auto arr = std::make_shared(strings.size(), offset_buffer, value_buffer); + + auto datev2_column = ColumnVector::create(); + auto datetimev2_column = ColumnVector::create(); + cctz::time_zone tz; + + auto st = serde_datev2_num->read_column_from_arrow(*datev2_column, arr.get(), 0, 1, tz); + ASSERT_TRUE(st.ok()); + ASSERT_EQ(1, datev2_column->size()); + EXPECT_EQ(20240102U, datev2_column->get_data()[0].to_date_int_val()); + + st = serde_datetimev2_num->read_column_from_arrow(*datetimev2_column, arr.get(), 1, 2, tz); + ASSERT_TRUE(st.ok()); + ASSERT_EQ(1, datetimev2_column->size()); + EXPECT_EQ(20240102112233ULL, datetimev2_column->get_data()[0].to_date_int_val()); +} + } // namespace doris diff --git a/be/test/core/field_test.cpp b/be/test/core/field_test.cpp index 9f1a8cb0de5042..694b292049c383 100644 --- a/be/test/core/field_test.cpp +++ b/be/test/core/field_test.cpp @@ -28,8 +28,8 @@ #include "core/string_ref.h" #include "core/types.h" #include "core/value/vdatetime_value.h" +#include "exprs/function/cast/cast_to_string.h" #include "gtest/gtest_pred_impl.h" // IWYU pragma: keep -#include "util/io_helper.h" namespace doris { TEST(VFieldTest, field_string) { diff --git a/be/test/exec/column_type_convert_test.cpp b/be/test/exec/column_type_convert_test.cpp index 520f46669dff63..5178cddbd59d2f 100644 --- a/be/test/exec/column_type_convert_test.cpp +++ b/be/test/exec/column_type_convert_test.cpp @@ -25,6 +25,7 @@ #include "core/column/column_nullable.h" #include "core/column/column_string.h" #include "core/data_type/data_type_date.h" +#include "core/data_type/data_type_date_or_datetime_v2.h" #include "core/data_type/data_type_date_time.h" #include "core/data_type/data_type_decimal.h" #include "core/data_type/data_type_factory.hpp" @@ -1282,9 +1283,11 @@ TEST_F(ColumnTypeConverterTest, TestStringToIntegerTypes) { ASSERT_TRUE(st.ok()); ASSERT_EQ(4, nested_col.size()); - EXPECT_EQ("123456789012345678901234567890", int128_to_string(nested_col.get_data()[0])); + EXPECT_EQ("123456789012345678901234567890", + CastToString::from_uint128(nested_col.get_data()[0])); - EXPECT_EQ("-123456789012345678901234567890", int128_to_string(nested_col.get_data()[1])); + EXPECT_EQ("-123456789012345678901234567890", + CastToString::from_int128(nested_col.get_data()[1])); // Check zero EXPECT_EQ(0, nested_col.get_data()[2]); @@ -1440,7 +1443,8 @@ TEST_F(ColumnTypeConverterTest, TestDateTimeV2ToNumericConversions) { auto parse_datetimev2_str = [](const std::string& datetime_str) { DateV2Value x; StringRef buf((char*)datetime_str.data(), datetime_str.size()); - bool ok = read_datetime_v2_text_impl(x, buf, 6); + CastParameters params; + bool ok = CastToDatetimeV2::from_string_non_strict_mode(buf, x, nullptr, 6, params); CHECK(ok) << "parse_datetimev2_str failed for: " << datetime_str; return x; }; @@ -1587,6 +1591,118 @@ TEST_F(ColumnTypeConverterTest, TestDateTimeV2ToNumericConversions) { } } +TEST_F(ColumnTypeConverterTest, TestStringToDateLikeConversions) { + auto src_type = DataTypeFactory::instance().create_data_type(TYPE_STRING, false); + + { + auto dst_type = std::make_shared(); + auto nullable_dst_type = std::make_shared(dst_type); + auto converter = converter::ColumnTypeConverter::get_converter(src_type, nullable_dst_type, + converter::COMMON); + ASSERT_TRUE(converter->support()); + + auto src_col = ColumnString::create(); + src_col->insert_data("2024-01-02 03:04:05", 19); + src_col->insert_data("bad-date", 8); + + auto dst_col = nullable_dst_type->create_column(); + auto mutable_dst = dst_col->assume_mutable(); + auto& nullable_col = static_cast(*mutable_dst); + auto& nested_col = static_cast(nullable_col.get_nested_column()); + auto& null_map = nullable_col.get_null_map_data(); + null_map.resize_fill(src_col->size(), 0); + + Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); + ASSERT_TRUE(st.ok()); + ASSERT_EQ(2, nested_col.size()); + char date_buf[64]; + nested_col.get_element(0).to_string(date_buf); + EXPECT_EQ("2024-01-02", std::string(date_buf)); + EXPECT_EQ(0, null_map[0]); + EXPECT_EQ(1, null_map[1]); + } + + { + auto dst_type = std::make_shared(); + auto nullable_dst_type = std::make_shared(dst_type); + auto converter = converter::ColumnTypeConverter::get_converter(src_type, nullable_dst_type, + converter::COMMON); + ASSERT_TRUE(converter->support()); + + auto src_col = ColumnString::create(); + src_col->insert_data("2024-05-06", 10); + src_col->insert_data("bad-datev2", 10); + + auto dst_col = nullable_dst_type->create_column(); + auto mutable_dst = dst_col->assume_mutable(); + auto& nullable_col = static_cast(*mutable_dst); + auto& nested_col = static_cast(nullable_col.get_nested_column()); + auto& null_map = nullable_col.get_null_map_data(); + null_map.resize_fill(src_col->size(), 0); + + Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); + ASSERT_TRUE(st.ok()); + ASSERT_EQ(2, nested_col.size()); + EXPECT_EQ("2024-05-06", nested_col.get_element(0).to_string()); + EXPECT_EQ(0, null_map[0]); + EXPECT_EQ(1, null_map[1]); + } + + { + auto dst_type = std::make_shared(); + auto nullable_dst_type = std::make_shared(dst_type); + auto converter = converter::ColumnTypeConverter::get_converter(src_type, nullable_dst_type, + converter::COMMON); + ASSERT_TRUE(converter->support()); + + auto src_col = ColumnString::create(); + src_col->insert_data("2024-07-08 09:10:11", 19); + src_col->insert_data("bad-datetime", 12); + + auto dst_col = nullable_dst_type->create_column(); + auto mutable_dst = dst_col->assume_mutable(); + auto& nullable_col = static_cast(*mutable_dst); + auto& nested_col = static_cast(nullable_col.get_nested_column()); + auto& null_map = nullable_col.get_null_map_data(); + null_map.resize_fill(src_col->size(), 0); + + Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); + ASSERT_TRUE(st.ok()); + ASSERT_EQ(2, nested_col.size()); + char datetime_buf[64]; + nested_col.get_element(0).to_string(datetime_buf); + EXPECT_EQ("2024-07-08 09:10:11", std::string(datetime_buf)); + EXPECT_EQ(0, null_map[0]); + EXPECT_EQ(1, null_map[1]); + } + + { + auto dst_type = std::make_shared(6); + auto nullable_dst_type = std::make_shared(dst_type); + auto converter = converter::ColumnTypeConverter::get_converter(src_type, nullable_dst_type, + converter::COMMON); + ASSERT_TRUE(converter->support()); + + auto src_col = ColumnString::create(); + src_col->insert_data("2024-09-10 11:12:13.123456", 26); + src_col->insert_data("bad-datetimev2", 14); + + auto dst_col = nullable_dst_type->create_column(); + auto mutable_dst = dst_col->assume_mutable(); + auto& nullable_col = static_cast(*mutable_dst); + auto& nested_col = static_cast(nullable_col.get_nested_column()); + auto& null_map = nullable_col.get_null_map_data(); + null_map.resize_fill(src_col->size(), 0); + + Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); + ASSERT_TRUE(st.ok()); + ASSERT_EQ(2, nested_col.size()); + EXPECT_EQ("2024-09-10 11:12:13.123456", nested_col.get_element(0).to_string(6)); + EXPECT_EQ(0, null_map[0]); + EXPECT_EQ(1, null_map[1]); + } +} + TEST_F(ColumnTypeConverterTest, TestEmptyColumnConversions) { // Test empty column { @@ -1612,4 +1728,4 @@ TEST_F(ColumnTypeConverterTest, TestEmptyColumnConversions) { } } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/exprs/aggregate/vec_sequence_match_test.cpp b/be/test/exprs/aggregate/vec_sequence_match_test.cpp index 424983e18111aa..7ce30e7eb1cff1 100644 --- a/be/test/exprs/aggregate/vec_sequence_match_test.cpp +++ b/be/test/exprs/aggregate/vec_sequence_match_test.cpp @@ -340,6 +340,108 @@ TEST_F(VSequenceMatchTest, testMatchReverseSortedSerializeMerge) { agg_function_sequence_match->destroy(place3); } +TEST_F(VSequenceMatchTest, testMatchTimeConditionSerialize) { + const int NUM_CONDS = 3; + auto column_pattern = ColumnString::create(); + for (int i = 0; i < NUM_CONDS; i++) { + column_pattern->insert(Field::create_field("(?1)(?t<=2)(?2)")); + } + + auto column_timestamp = ColumnDateTimeV2::create(); + for (int i = 0; i < NUM_CONDS; i++) { + VecDateTimeValue time_value; + time_value.unchecked_set_time(2022, 11, 2, 0, 0, i); + column_timestamp->insert_data((char*)&time_value, 0); + } + + auto column_event1 = ColumnUInt8::create(); + column_event1->insert(Field::create_field(1)); + column_event1->insert(Field::create_field(0)); + column_event1->insert(Field::create_field(0)); + + auto column_event2 = ColumnUInt8::create(); + column_event2->insert(Field::create_field(0)); + column_event2->insert(Field::create_field(0)); + column_event2->insert(Field::create_field(1)); + + auto column_event3 = ColumnUInt8::create(); + column_event3->insert(Field::create_field(0)); + column_event3->insert(Field::create_field(1)); + column_event3->insert(Field::create_field(0)); + + std::unique_ptr memory(new char[agg_function_sequence_match->size_of_data()]); + AggregateDataPtr place = memory.get(); + agg_function_sequence_match->create(place); + const IColumn* column[5] = {column_pattern.get(), column_timestamp.get(), column_event1.get(), + column_event2.get(), column_event3.get()}; + for (int i = 0; i < NUM_CONDS; i++) { + agg_function_sequence_match->add(place, column, i, arena); + } + + ColumnString buf; + VectorBufferWriter buf_writer(buf); + agg_function_sequence_match->serialize(place, buf_writer); + buf_writer.commit(); + + std::unique_ptr memory2(new char[agg_function_sequence_match->size_of_data()]); + AggregateDataPtr place2 = memory2.get(); + agg_function_sequence_match->create(place2); + + VectorBufferReader buf_reader(buf.get_data_at(0)); + agg_function_sequence_match->deserialize(place2, buf_reader, arena); + + ColumnUInt8 column_result; + agg_function_sequence_match->insert_result_into(place, column_result); + EXPECT_EQ(column_result.get_data()[0], 1); + agg_function_sequence_match->destroy(place); + + ColumnUInt8 column_result2; + agg_function_sequence_match->insert_result_into(place2, column_result2); + EXPECT_EQ(column_result2.get_data()[0], 1); + agg_function_sequence_match->destroy(place2); +} + +TEST_F(VSequenceMatchTest, testMatchMalformedTimeConditionNumber) { + const int NUM_CONDS = 2; + auto column_pattern = ColumnString::create(); + for (int i = 0; i < NUM_CONDS; i++) { + column_pattern->insert(Field::create_field("(?1)(?t<=x)(?2)")); + } + + auto column_timestamp = ColumnDateTimeV2::create(); + for (int i = 0; i < NUM_CONDS; i++) { + VecDateTimeValue time_value; + time_value.unchecked_set_time(2022, 11, 2, 0, 0, i); + column_timestamp->insert_data((char*)&time_value, 0); + } + + auto column_event1 = ColumnUInt8::create(); + column_event1->insert(Field::create_field(1)); + column_event1->insert(Field::create_field(0)); + + auto column_event2 = ColumnUInt8::create(); + column_event2->insert(Field::create_field(0)); + column_event2->insert(Field::create_field(1)); + + auto column_event3 = ColumnUInt8::create(); + column_event3->insert(Field::create_field(0)); + column_event3->insert(Field::create_field(0)); + + std::unique_ptr memory(new char[agg_function_sequence_match->size_of_data()]); + AggregateDataPtr place = memory.get(); + agg_function_sequence_match->create(place); + const IColumn* column[5] = {column_pattern.get(), column_timestamp.get(), column_event1.get(), + column_event2.get(), column_event3.get()}; + for (int i = 0; i < NUM_CONDS; i++) { + agg_function_sequence_match->add(place, column, i, arena); + } + + ColumnUInt8 column_result; + agg_function_sequence_match->insert_result_into(place, column_result); + EXPECT_EQ(column_result.get_data()[0], 0); + agg_function_sequence_match->destroy(place); +} + TEST_F(VSequenceMatchTest, testCountReverseSortedSerializeMerge) { AggregateFunctionSimpleFactory factory = AggregateFunctionSimpleFactory::instance(); DataTypes data_types = {std::make_shared(), @@ -429,4 +531,4 @@ TEST_F(VSequenceMatchTest, testCountReverseSortedSerializeMerge) { agg_function_sequence_count->destroy(place3); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/exprs/function/cast/cast_to_float_double.cpp b/be/test/exprs/function/cast/cast_to_float_double.cpp index c0c9c28052993d..54656d944c4e28 100644 --- a/be/test/exprs/function/cast/cast_to_float_double.cpp +++ b/be/test/exprs/function/cast/cast_to_float_double.cpp @@ -1701,6 +1701,25 @@ TEST_F(FunctionCastToFloatTest, test_from_decimal) { from_decimal_test_func(); } + +TEST_F(FunctionCastToFloatTest, test_decimalv2_keeps_scale_when_cast_to_float_and_double) { + InputTypeSet input_types = {{TYPE_DECIMALV2, 2, 27}}; + + DataSet float_data_set = { + {{DECIMALV2VALUEFROMDOUBLE(1.23)}, Float32(1.23)}, + {{DECIMALV2VALUEFROMDOUBLE(-1.23)}, Float32(-1.23)}, + }; + check_function_for_cast(input_types, float_data_set, -1, -1); + check_function_for_cast(input_types, float_data_set, -1, -1); + + DataSet double_data_set = { + {{DECIMALV2VALUEFROMDOUBLE(1.23)}, Float64(1.23)}, + {{DECIMALV2VALUEFROMDOUBLE(-1.23)}, Float64(-1.23)}, + }; + check_function_for_cast(input_types, double_data_set, -1, -1); + check_function_for_cast(input_types, double_data_set, -1, -1); +} + TEST_F(FunctionCastToFloatTest, test_from_decimal_overflow) { from_decimal_overflow_test_func(); } @@ -1724,4 +1743,4 @@ TEST_F(FunctionCastToFloatTest, test_from_time) { from_time_test_func(); from_time_test_func(); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/exprs/function/cast/cast_to_string_api_test.cpp b/be/test/exprs/function/cast/cast_to_string_api_test.cpp index db010e88d0be5a..b8bd0c1255bee8 100644 --- a/be/test/exprs/function/cast/cast_to_string_api_test.cpp +++ b/be/test/exprs/function/cast/cast_to_string_api_test.cpp @@ -159,4 +159,16 @@ TEST(CastToStringTest, test) { } } -} // namespace doris \ No newline at end of file +TEST(CastToStringTest, from_int128_overloads) { + EXPECT_EQ(CastToString::from_int128(static_cast(-1234567890123456789LL)), + "-1234567890123456789"); + EXPECT_EQ(CastToString::from_uint128(static_cast(12345678901234567890ULL)), + "12345678901234567890"); + + UInt128 value; + value.items[0] = 0x0123456789ABCDEFULL; + value.items[1] = 0x0FEDCBA987654321ULL; + EXPECT_EQ(CastToString::from_uint128(value), "0123456789abcdeffedcba987654321"); +} + +} // namespace doris