Skip to content

Commit

Permalink
Expose min/max values for Decimal128/256 and improve docs (#6992)
Browse files Browse the repository at this point in the history
* Expose min/max values for Decimal128/256 and improve docs

* fix test
  • Loading branch information
alamb authored Jan 25, 2025
1 parent 6340d46 commit d6fa078
Showing 1 changed file with 108 additions and 34 deletions.
142 changes: 108 additions & 34 deletions arrow-data/src/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@
// specific language governing permissions and limitations
// under the License.

//! Defines maximum and minimum values for `decimal256` and `decimal128` types for varying precisions.
//! Maximum and minimum values for [`Decimal256`] and [`Decimal128`].
//!
//! Also provides functions to validate if a given decimal value is within the valid range of the decimal type.
//! Also provides functions to validate if a given decimal value is within
//! the valid range of the decimal type.
//!
//! [`Decimal128`]: arrow_schema::DataType::Decimal128
//! [`Decimal256`]: arrow_schema::DataType::Decimal256
use arrow_buffer::i256;
use arrow_schema::ArrowError;

Expand All @@ -27,12 +30,27 @@ pub use arrow_schema::{
DECIMAL_DEFAULT_SCALE,
};

/// MAX decimal256 value of little-endian format for each precision.
/// Each element is the max value of signed 256-bit integer for the specified precision which
/// is encoded to the 32-byte width format of little-endian.
/// `MAX_DECIMAL256_FOR_EACH_PRECISION[p]` holds the maximum [`i256`] value that can
/// be stored in a [`Decimal256`] value of precision `p`.
///
/// # Notes
///
/// Each element is the max value of signed 256-bit integer for the specified
/// precision which is encoded to the 32-byte width format of little-endian.
///
/// The first element is unused and is inserted so that we can look up using
/// precision as the index without the need to subtract 1 first.
pub(crate) const MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [i256; 77] = [
///
/// # Example
/// ```
/// # use arrow_buffer::i256;
/// # use arrow_data::decimal::MAX_DECIMAL256_FOR_EACH_PRECISION;
/// assert_eq!(MAX_DECIMAL256_FOR_EACH_PRECISION[3], i256::from(999));
/// ```
///
/// [`Decimal256`]: arrow_schema::DataType::Decimal256
/// [`i256`]: arrow_buffer::i256
pub const MAX_DECIMAL256_FOR_EACH_PRECISION: [i256; 77] = [
i256::from_i128(0_i128), // unused first element
i256::from_le_bytes([
9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Expand Down Expand Up @@ -340,12 +358,26 @@ pub(crate) const MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [i256; 77] = [
]),
];

/// MIN decimal256 value of little-endian format for each precision.
/// `MIN_DECIMAL256_FOR_EACH_PRECISION[p]` holds the minimum [`i256`] value that can
/// be stored in a [`Decimal256`] value of precision `p`.
///
/// # Notes
///
/// Each element is the min value of signed 256-bit integer for the specified precision which
/// is encoded to the 76-byte width format of little-endian.
///
/// The first element is unused and is inserted so that we can look up using
/// precision as the index without the need to subtract 1 first.
pub(crate) const MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [i256; 77] = [
/// # Example
/// ```
/// # use arrow_buffer::i256;
/// # use arrow_data::decimal::MIN_DECIMAL256_FOR_EACH_PRECISION;
/// assert_eq!(MIN_DECIMAL256_FOR_EACH_PRECISION[3], i256::from(-999));
/// ```
///
/// [`i256`]: arrow_buffer::i256
/// [`Decimal256`]: arrow_schema::DataType::Decimal256
pub const MIN_DECIMAL256_FOR_EACH_PRECISION: [i256; 77] = [
i256::from_i128(0_i128), // unused first element
i256::from_le_bytes([
247, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
Expand Down Expand Up @@ -654,7 +686,13 @@ pub(crate) const MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [i256; 77] = [
];

/// `MAX_DECIMAL_FOR_EACH_PRECISION[p-1]` holds the maximum `i128` value that can
/// be stored in [arrow_schema::DataType::Decimal128] value of precision `p`
/// be stored in a [`Decimal128`] value of precision `p`
///
/// [`Decimal128`]: arrow_schema::DataType::Decimal128
#[deprecated(
since = "54.1.0",
note = "Use MAX_DECIMAL128_FOR_EACH_PRECISION (note indexes are different)"
)]
#[allow(dead_code)] // no longer used but is part of our public API
pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
9,
Expand Down Expand Up @@ -698,8 +736,14 @@ pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
];

/// `MIN_DECIMAL_FOR_EACH_PRECISION[p-1]` holds the minimum `i128` value that can
/// be stored in a [arrow_schema::DataType::Decimal128] value of precision `p`
/// be stored in a [`Decimal128`] value of precision `p`
///
/// [`Decimal128`]: arrow_schema::DataType::Decimal128
#[allow(dead_code)] // no longer used but is part of our public API
#[deprecated(
since = "54.1.0",
note = "Use MIN_DECIMAL_FOR_EACH_PRECISION (note indexes are different)"
)]
pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
-9,
-99,
Expand Down Expand Up @@ -741,11 +785,22 @@ pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
-99999999999999999999999999999999999999,
];

/// `MAX_DECIMAL_FOR_EACH_PRECISION_ONE_BASED[p]` holds the maximum `i128` value that can
/// be stored in [arrow_schema::DataType::Decimal128] value of precision `p`.
/// `MAX_DECIMAL128_FOR_EACH_PRECISION[p]` holds the maximum `i128` value that can
/// be stored in [`Decimal128`] value of precision `p`.
///
/// # Notes
///
/// The first element is unused and is inserted so that we can look up using
/// precision as the index without the need to subtract 1 first.
pub(crate) const MAX_DECIMAL_FOR_EACH_PRECISION_ONE_BASED: [i128; 39] = [
///
/// # Example
/// ```
/// # use arrow_data::decimal::MAX_DECIMAL128_FOR_EACH_PRECISION;
/// assert_eq!(MAX_DECIMAL128_FOR_EACH_PRECISION[3], 999);
/// ```
///
/// [`Decimal128`]: arrow_schema::DataType::Decimal128
pub const MAX_DECIMAL128_FOR_EACH_PRECISION: [i128; 39] = [
0, // unused first element
9,
99,
Expand Down Expand Up @@ -788,10 +843,21 @@ pub(crate) const MAX_DECIMAL_FOR_EACH_PRECISION_ONE_BASED: [i128; 39] = [
];

/// `MIN_DECIMAL_FOR_EACH_PRECISION[p]` holds the minimum `i128` value that can
/// be stored in a [arrow_schema::DataType::Decimal128] value of precision `p`.
/// be stored in a [`Decimal128`] value of precision `p`.
///
/// # Notes
///
/// The first element is unused and is inserted so that we can look up using
/// precision as the index without the need to subtract 1 first.
pub(crate) const MIN_DECIMAL_FOR_EACH_PRECISION_ONE_BASED: [i128; 39] = [
///
/// # Example
/// ```
/// # use arrow_data::decimal::MIN_DECIMAL128_FOR_EACH_PRECISION;
/// assert_eq!(MIN_DECIMAL128_FOR_EACH_PRECISION[3], -999);
/// ```
///
/// [`Decimal128`]: arrow_schema::DataType::Decimal128
pub const MIN_DECIMAL128_FOR_EACH_PRECISION: [i128; 39] = [
0, // unused first element
-9,
-99,
Expand Down Expand Up @@ -834,67 +900,75 @@ pub(crate) const MIN_DECIMAL_FOR_EACH_PRECISION_ONE_BASED: [i128; 39] = [
];

/// Validates that the specified `i128` value can be properly
/// interpreted as a Decimal number with precision `precision`
/// interpreted as a [`Decimal128`] number with precision `precision`
///
/// [`Decimal128`]: arrow_schema::DataType::Decimal128
#[inline]
pub fn validate_decimal_precision(value: i128, precision: u8) -> Result<(), ArrowError> {
if precision > DECIMAL128_MAX_PRECISION {
return Err(ArrowError::InvalidArgumentError(format!(
"Max precision of a Decimal128 is {DECIMAL128_MAX_PRECISION}, but got {precision}",
)));
}
if value > MAX_DECIMAL_FOR_EACH_PRECISION_ONE_BASED[precision as usize] {
if value > MAX_DECIMAL128_FOR_EACH_PRECISION[precision as usize] {
Err(ArrowError::InvalidArgumentError(format!(
"{value} is too large to store in a Decimal128 of precision {precision}. Max is {}",
MAX_DECIMAL_FOR_EACH_PRECISION_ONE_BASED[precision as usize]
MAX_DECIMAL128_FOR_EACH_PRECISION[precision as usize]
)))
} else if value < MIN_DECIMAL_FOR_EACH_PRECISION_ONE_BASED[precision as usize] {
} else if value < MIN_DECIMAL128_FOR_EACH_PRECISION[precision as usize] {
Err(ArrowError::InvalidArgumentError(format!(
"{value} is too small to store in a Decimal128 of precision {precision}. Min is {}",
MIN_DECIMAL_FOR_EACH_PRECISION_ONE_BASED[precision as usize]
MIN_DECIMAL128_FOR_EACH_PRECISION[precision as usize]
)))
} else {
Ok(())
}
}

/// Determines whether the specified `i128` value can be properly
/// interpreted as a Decimal number with precision `precision`
/// Returns true if the specified `i128` value can be properly
/// interpreted as a [`Decimal128`] number with precision `precision`
///
/// [`Decimal128`]: arrow_schema::DataType::Decimal128
#[inline]
pub fn is_validate_decimal_precision(value: i128, precision: u8) -> bool {
precision <= DECIMAL128_MAX_PRECISION
&& value >= MIN_DECIMAL_FOR_EACH_PRECISION_ONE_BASED[precision as usize]
&& value <= MAX_DECIMAL_FOR_EACH_PRECISION_ONE_BASED[precision as usize]
&& value >= MIN_DECIMAL128_FOR_EACH_PRECISION[precision as usize]
&& value <= MAX_DECIMAL128_FOR_EACH_PRECISION[precision as usize]
}

/// Validates that the specified `i256` of value can be properly
/// interpreted as a Decimal256 number with precision `precision`
/// interpreted as a [`Decimal256`] number with precision `precision`
///
/// [`Decimal256`]: arrow_schema::DataType::Decimal256
#[inline]
pub fn validate_decimal256_precision(value: i256, precision: u8) -> Result<(), ArrowError> {
if precision > DECIMAL256_MAX_PRECISION {
return Err(ArrowError::InvalidArgumentError(format!(
"Max precision of a Decimal256 is {DECIMAL256_MAX_PRECISION}, but got {precision}",
)));
}
if value > MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[precision as usize] {
if value > MAX_DECIMAL256_FOR_EACH_PRECISION[precision as usize] {
Err(ArrowError::InvalidArgumentError(format!(
"{value:?} is too large to store in a Decimal256 of precision {precision}. Max is {:?}",
MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[precision as usize]
MAX_DECIMAL256_FOR_EACH_PRECISION[precision as usize]
)))
} else if value < MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[precision as usize] {
} else if value < MIN_DECIMAL256_FOR_EACH_PRECISION[precision as usize] {
Err(ArrowError::InvalidArgumentError(format!(
"{value:?} is too small to store in a Decimal256 of precision {precision}. Min is {:?}",
MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[precision as usize]
MIN_DECIMAL256_FOR_EACH_PRECISION[precision as usize]
)))
} else {
Ok(())
}
}

/// Determines whether the specified `i256` value can be properly
/// interpreted as a Decimal256 number with precision `precision`
/// Return true if the specified `i256` value can be properly
/// interpreted as a [`Decimal256`] number with precision `precision`
///
/// [`Decimal256`]: arrow_schema::DataType::Decimal256
#[inline]
pub fn is_validate_decimal256_precision(value: i256, precision: u8) -> bool {
precision <= DECIMAL256_MAX_PRECISION
&& value >= MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[precision as usize]
&& value <= MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[precision as usize]
&& value >= MIN_DECIMAL256_FOR_EACH_PRECISION[precision as usize]
&& value <= MAX_DECIMAL256_FOR_EACH_PRECISION[precision as usize]
}

0 comments on commit d6fa078

Please sign in to comment.