diff --git a/parquet/src/arrow/schema/extension.rs b/parquet/src/arrow/schema/extension.rs index a2e9c32dee8c..0244c1b6bb99 100644 --- a/parquet/src/arrow/schema/extension.rs +++ b/parquet/src/arrow/schema/extension.rs @@ -112,9 +112,7 @@ pub(crate) fn has_extension_type(parquet_type: &Type) -> bool { pub(crate) fn logical_type_for_struct(field: &Field) -> Option { use parquet_variant_compute::VariantType; if field.has_valid_extension_type::() { - Some(LogicalType::Variant { - specification_version: None, - }) + Some(LogicalType::variant(None)) } else { None } @@ -167,13 +165,13 @@ pub(crate) fn logical_type_for_binary(field: &Field) -> Option { match field.extension_type_name() { Some(n) if n == WkbType::NAME => match field.try_extension_type::() { Ok(wkb_type) => match wkb_type.metadata().type_hint() { - WkbTypeHint::Geometry => Some(LogicalType::Geometry { - crs: wkb_type.metadata().crs.as_ref().map(|c| c.to_string()), - }), - WkbTypeHint::Geography => Some(LogicalType::Geography { - crs: wkb_type.metadata().crs.as_ref().map(|c| c.to_string()), - algorithm: wkb_type.metadata().algorithm.map(|a| a.into()), - }), + WkbTypeHint::Geometry => Some(LogicalType::geometry( + wkb_type.metadata().crs.as_ref().map(|c| c.to_string()), + )), + WkbTypeHint::Geography => Some(LogicalType::geography( + wkb_type.metadata().crs.as_ref().map(|c| c.to_string()), + wkb_type.metadata().algorithm.map(|a| a.into()), + )), }, Err(_e) => None, }, diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs index b2b93687ba89..7fe6fbc9d93d 100644 --- a/parquet/src/arrow/schema/mod.rs +++ b/parquet/src/arrow/schema/mod.rs @@ -556,18 +556,12 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result { .with_id(id) .build(), DataType::Int8 => Type::primitive_type_builder(name, PhysicalType::INT32) - .with_logical_type(Some(LogicalType::Integer { - bit_width: 8, - is_signed: true, - })) + .with_logical_type(Some(LogicalType::integer(8, true))) .with_repetition(repetition) .with_id(id) .build(), DataType::Int16 => Type::primitive_type_builder(name, PhysicalType::INT32) - .with_logical_type(Some(LogicalType::Integer { - bit_width: 16, - is_signed: true, - })) + .with_logical_type(Some(LogicalType::integer(16, true))) .with_repetition(repetition) .with_id(id) .build(), @@ -580,34 +574,22 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result { .with_id(id) .build(), DataType::UInt8 => Type::primitive_type_builder(name, PhysicalType::INT32) - .with_logical_type(Some(LogicalType::Integer { - bit_width: 8, - is_signed: false, - })) + .with_logical_type(Some(LogicalType::integer(8, false))) .with_repetition(repetition) .with_id(id) .build(), DataType::UInt16 => Type::primitive_type_builder(name, PhysicalType::INT32) - .with_logical_type(Some(LogicalType::Integer { - bit_width: 16, - is_signed: false, - })) + .with_logical_type(Some(LogicalType::integer(16, false))) .with_repetition(repetition) .with_id(id) .build(), DataType::UInt32 => Type::primitive_type_builder(name, PhysicalType::INT32) - .with_logical_type(Some(LogicalType::Integer { - bit_width: 32, - is_signed: false, - })) + .with_logical_type(Some(LogicalType::integer(32, false))) .with_repetition(repetition) .with_id(id) .build(), DataType::UInt64 => Type::primitive_type_builder(name, PhysicalType::INT64) - .with_logical_type(Some(LogicalType::Integer { - bit_width: 64, - is_signed: false, - })) + .with_logical_type(Some(LogicalType::integer(64, false))) .with_repetition(repetition) .with_id(id) .build(), @@ -634,16 +616,16 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result { } DataType::Timestamp(time_unit, tz) => { Type::primitive_type_builder(name, PhysicalType::INT64) - .with_logical_type(Some(LogicalType::Timestamp { + .with_logical_type(Some(LogicalType::timestamp( // If timezone set, values are normalized to UTC timezone - is_adjusted_to_u_t_c: matches!(tz, Some(z) if !z.as_ref().is_empty()), - unit: match time_unit { + matches!(tz, Some(z) if !z.as_ref().is_empty()), + match time_unit { TimeUnit::Second => unreachable!(), TimeUnit::Millisecond => ParquetTimeUnit::MILLIS, TimeUnit::Microsecond => ParquetTimeUnit::MICROS, TimeUnit::Nanosecond => ParquetTimeUnit::NANOS, }, - })) + ))) .with_repetition(repetition) .with_id(id) .build() @@ -675,25 +657,25 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result { .build() } DataType::Time32(unit) => Type::primitive_type_builder(name, PhysicalType::INT32) - .with_logical_type(Some(LogicalType::Time { - is_adjusted_to_u_t_c: field.metadata().contains_key("adjusted_to_utc"), - unit: match unit { + .with_logical_type(Some(LogicalType::time( + field.metadata().contains_key("adjusted_to_utc"), + match unit { TimeUnit::Millisecond => ParquetTimeUnit::MILLIS, u => unreachable!("Invalid unit for Time32: {:?}", u), }, - })) + ))) .with_repetition(repetition) .with_id(id) .build(), DataType::Time64(unit) => Type::primitive_type_builder(name, PhysicalType::INT64) - .with_logical_type(Some(LogicalType::Time { - is_adjusted_to_u_t_c: field.metadata().contains_key("adjusted_to_utc"), - unit: match unit { + .with_logical_type(Some(LogicalType::time( + field.metadata().contains_key("adjusted_to_utc"), + match unit { TimeUnit::Microsecond => ParquetTimeUnit::MICROS, TimeUnit::Nanosecond => ParquetTimeUnit::NANOS, u => unreachable!("Invalid unit for Time64: {:?}", u), }, - })) + ))) .with_repetition(repetition) .with_id(id) .build(), @@ -749,10 +731,7 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result { .with_repetition(repetition) .with_id(id) .with_length(length) - .with_logical_type(Some(LogicalType::Decimal { - scale: *scale as i32, - precision: *precision as i32, - })) + .with_logical_type(Some(LogicalType::decimal(*scale as i32, *precision as i32))) .with_precision(*precision as i32) .with_scale(*scale as i32) .build() diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs index 17e5b9c321bb..796779358cd2 100644 --- a/parquet/src/basic.rs +++ b/parquet/src/basic.rs @@ -314,6 +314,54 @@ pub enum LogicalType { }, } +impl LogicalType { + /// Create a [`LogicalType::Integer`] variant with the given `bit_width` and `is_signed` + pub fn integer(bit_width: i8, is_signed: bool) -> Self { + Self::Integer { + bit_width, + is_signed, + } + } + + /// Create a [`LogicalType::Decimal`] variant with the given `scale` and `precision` + pub fn decimal(scale: i32, precision: i32) -> Self { + Self::Decimal { scale, precision } + } + + /// Create a [`LogicalType::Time`] variant with the given `is_adjusted_to_u_t_c` and `unit` + pub fn time(is_adjusted_to_u_t_c: bool, unit: TimeUnit) -> Self { + Self::Time { + is_adjusted_to_u_t_c, + unit, + } + } + + /// Create a [`LogicalType::Timestamp`] variant with the given `is_adjusted_to_u_t_c` and `unit` + pub fn timestamp(is_adjusted_to_u_t_c: bool, unit: TimeUnit) -> Self { + Self::Timestamp { + is_adjusted_to_u_t_c, + unit, + } + } + + /// Create a [`LogicalType::Variant`] variant with the given `specification_version` + pub fn variant(specification_version: Option) -> Self { + Self::Variant { + specification_version, + } + } + + /// Create a [`LogicalType::Geometry`] variant with the given `crs` + pub fn geometry(crs: Option) -> Self { + Self::Geometry { crs } + } + + /// Create a [`LogicalType::Geography`] variant with the given `crs` and `algorithm` + pub fn geography(crs: Option, algorithm: Option) -> Self { + Self::Geography { crs, algorithm } + } +} + impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for LogicalType { fn read_thrift(prot: &mut R) -> Result { let field_ident = prot.read_field_begin(0)?; @@ -339,10 +387,7 @@ impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for LogicalType { } 5 => { let val = DecimalType::read_thrift(&mut *prot)?; - Self::Decimal { - scale: val.scale, - precision: val.precision, - } + Self::decimal(val.scale, val.precision) } 6 => { prot.skip_empty_struct()?; @@ -350,24 +395,15 @@ impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for LogicalType { } 7 => { let val = TimeType::read_thrift(&mut *prot)?; - Self::Time { - is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c, - unit: val.unit, - } + Self::time(val.is_adjusted_to_u_t_c, val.unit) } 8 => { let val = TimestampType::read_thrift(&mut *prot)?; - Self::Timestamp { - is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c, - unit: val.unit, - } + Self::timestamp(val.is_adjusted_to_u_t_c, val.unit) } 10 => { let val = IntType::read_thrift(&mut *prot)?; - Self::Integer { - is_signed: val.is_signed, - bit_width: val.bit_width, - } + Self::integer(val.bit_width, val.is_signed) } 11 => { prot.skip_empty_struct()?; @@ -391,15 +427,11 @@ impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for LogicalType { } 16 => { let val = VariantType::read_thrift(&mut *prot)?; - Self::Variant { - specification_version: val.specification_version, - } + Self::variant(val.specification_version) } 17 => { let val = GeometryType::read_thrift(&mut *prot)?; - Self::Geometry { - crs: val.crs.map(|s| s.to_owned()), - } + Self::geometry(val.crs.map(|s| s.to_owned())) } 18 => { let val = GeographyType::read_thrift(&mut *prot)?; @@ -408,10 +440,7 @@ impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for LogicalType { let algorithm = val .algorithm .unwrap_or(EdgeInterpolationAlgorithm::SPHERICAL); - Self::Geography { - crs: val.crs.map(|s| s.to_owned()), - algorithm: Some(algorithm), - } + Self::geography(val.crs.map(|s| s.to_owned()), Some(algorithm)) } _ => { prot.skip(field_ident.field_type)?; @@ -1511,26 +1540,14 @@ impl str::FromStr for LogicalType { fn from_str(s: &str) -> Result { match s { // The type is a placeholder that gets updated elsewhere - "INTEGER" => Ok(LogicalType::Integer { - bit_width: 8, - is_signed: false, - }), + "INTEGER" => Ok(LogicalType::integer(8, false)), "MAP" => Ok(LogicalType::Map), "LIST" => Ok(LogicalType::List), "ENUM" => Ok(LogicalType::Enum), - "DECIMAL" => Ok(LogicalType::Decimal { - precision: -1, - scale: -1, - }), + "DECIMAL" => Ok(LogicalType::decimal(-1, -1)), "DATE" => Ok(LogicalType::Date), - "TIME" => Ok(LogicalType::Time { - is_adjusted_to_u_t_c: false, - unit: TimeUnit::MILLIS, - }), - "TIMESTAMP" => Ok(LogicalType::Timestamp { - is_adjusted_to_u_t_c: false, - unit: TimeUnit::MILLIS, - }), + "TIME" => Ok(LogicalType::time(false, TimeUnit::MILLIS)), + "TIMESTAMP" => Ok(LogicalType::timestamp(false, TimeUnit::MILLIS)), "STRING" => Ok(LogicalType::String), "JSON" => Ok(LogicalType::Json), "BSON" => Ok(LogicalType::Bson), @@ -1540,11 +1557,12 @@ impl str::FromStr for LogicalType { "Interval parquet logical type not yet supported" )), "FLOAT16" => Ok(LogicalType::Float16), - "GEOMETRY" => Ok(LogicalType::Geometry { crs: None }), - "GEOGRAPHY" => Ok(LogicalType::Geography { - crs: None, - algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL), - }), + "VARIANT" => Ok(LogicalType::variant(None)), + "GEOMETRY" => Ok(LogicalType::geometry(None)), + "GEOGRAPHY" => Ok(LogicalType::geography( + None, + Some(EdgeInterpolationAlgorithm::SPHERICAL), + )), other => Err(general_err!("Invalid parquet logical type {}", other)), } } @@ -1860,10 +1878,7 @@ mod tests { let logical_none: Option = None; assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE); assert_eq!( - ConvertedType::from(Some(LogicalType::Decimal { - precision: 20, - scale: 5 - })), + ConvertedType::from(Some(LogicalType::decimal(5, 20))), ConvertedType::DECIMAL ); assert_eq!( @@ -1883,101 +1898,59 @@ mod tests { ConvertedType::DATE ); assert_eq!( - ConvertedType::from(Some(LogicalType::Time { - unit: TimeUnit::MILLIS, - is_adjusted_to_u_t_c: true, - })), + ConvertedType::from(Some(LogicalType::time(true, TimeUnit::MILLIS))), ConvertedType::TIME_MILLIS ); assert_eq!( - ConvertedType::from(Some(LogicalType::Time { - unit: TimeUnit::MICROS, - is_adjusted_to_u_t_c: true, - })), + ConvertedType::from(Some(LogicalType::time(true, TimeUnit::MICROS))), ConvertedType::TIME_MICROS ); assert_eq!( - ConvertedType::from(Some(LogicalType::Time { - unit: TimeUnit::NANOS, - is_adjusted_to_u_t_c: false, - })), + ConvertedType::from(Some(LogicalType::time(false, TimeUnit::NANOS))), ConvertedType::NONE ); assert_eq!( - ConvertedType::from(Some(LogicalType::Timestamp { - unit: TimeUnit::MILLIS, - is_adjusted_to_u_t_c: true, - })), + ConvertedType::from(Some(LogicalType::timestamp(true, TimeUnit::MILLIS))), ConvertedType::TIMESTAMP_MILLIS ); assert_eq!( - ConvertedType::from(Some(LogicalType::Timestamp { - unit: TimeUnit::MICROS, - is_adjusted_to_u_t_c: false, - })), + ConvertedType::from(Some(LogicalType::timestamp(false, TimeUnit::MICROS))), ConvertedType::TIMESTAMP_MICROS ); assert_eq!( - ConvertedType::from(Some(LogicalType::Timestamp { - unit: TimeUnit::NANOS, - is_adjusted_to_u_t_c: false, - })), + ConvertedType::from(Some(LogicalType::timestamp(false, TimeUnit::NANOS))), ConvertedType::NONE ); assert_eq!( - ConvertedType::from(Some(LogicalType::Integer { - bit_width: 8, - is_signed: false - })), + ConvertedType::from(Some(LogicalType::integer(8, false))), ConvertedType::UINT_8 ); assert_eq!( - ConvertedType::from(Some(LogicalType::Integer { - bit_width: 8, - is_signed: true - })), + ConvertedType::from(Some(LogicalType::integer(8, true))), ConvertedType::INT_8 ); assert_eq!( - ConvertedType::from(Some(LogicalType::Integer { - bit_width: 16, - is_signed: false - })), + ConvertedType::from(Some(LogicalType::integer(16, false))), ConvertedType::UINT_16 ); assert_eq!( - ConvertedType::from(Some(LogicalType::Integer { - bit_width: 16, - is_signed: true - })), + ConvertedType::from(Some(LogicalType::integer(16, true))), ConvertedType::INT_16 ); assert_eq!( - ConvertedType::from(Some(LogicalType::Integer { - bit_width: 32, - is_signed: false - })), + ConvertedType::from(Some(LogicalType::integer(32, false))), ConvertedType::UINT_32 ); assert_eq!( - ConvertedType::from(Some(LogicalType::Integer { - bit_width: 32, - is_signed: true - })), + ConvertedType::from(Some(LogicalType::integer(32, true))), ConvertedType::INT_32 ); assert_eq!( - ConvertedType::from(Some(LogicalType::Integer { - bit_width: 64, - is_signed: false - })), + ConvertedType::from(Some(LogicalType::integer(64, false))), ConvertedType::UINT_64 ); assert_eq!( - ConvertedType::from(Some(LogicalType::Integer { - bit_width: 64, - is_signed: true - })), + ConvertedType::from(Some(LogicalType::integer(64, true))), ConvertedType::INT_64 ); assert_eq!( @@ -2001,14 +1974,15 @@ mod tests { ConvertedType::NONE ); assert_eq!( - ConvertedType::from(Some(LogicalType::Geometry { crs: None })), + ConvertedType::from(Some(LogicalType::variant(None))), ConvertedType::NONE ); assert_eq!( - ConvertedType::from(Some(LogicalType::Geography { - crs: None, - algorithm: Some(EdgeInterpolationAlgorithm::default()), - })), + ConvertedType::from(Some(LogicalType::geometry(None))), + ConvertedType::NONE + ); + assert_eq!( + ConvertedType::from(Some(LogicalType::geography(None, Some(Default::default())))), ConvertedType::NONE ); assert_eq!( @@ -2023,81 +1997,42 @@ mod tests { test_roundtrip(LogicalType::Map); test_roundtrip(LogicalType::List); test_roundtrip(LogicalType::Enum); - test_roundtrip(LogicalType::Decimal { - scale: 0, - precision: 20, - }); + test_roundtrip(LogicalType::decimal(0, 20)); test_roundtrip(LogicalType::Date); - test_roundtrip(LogicalType::Time { - is_adjusted_to_u_t_c: true, - unit: TimeUnit::MICROS, - }); - test_roundtrip(LogicalType::Time { - is_adjusted_to_u_t_c: false, - unit: TimeUnit::MILLIS, - }); - test_roundtrip(LogicalType::Time { - is_adjusted_to_u_t_c: false, - unit: TimeUnit::NANOS, - }); - test_roundtrip(LogicalType::Timestamp { - is_adjusted_to_u_t_c: false, - unit: TimeUnit::MICROS, - }); - test_roundtrip(LogicalType::Timestamp { - is_adjusted_to_u_t_c: true, - unit: TimeUnit::MILLIS, - }); - test_roundtrip(LogicalType::Timestamp { - is_adjusted_to_u_t_c: true, - unit: TimeUnit::NANOS, - }); - test_roundtrip(LogicalType::Integer { - bit_width: 8, - is_signed: true, - }); - test_roundtrip(LogicalType::Integer { - bit_width: 16, - is_signed: false, - }); - test_roundtrip(LogicalType::Integer { - bit_width: 32, - is_signed: true, - }); - test_roundtrip(LogicalType::Integer { - bit_width: 64, - is_signed: false, - }); + test_roundtrip(LogicalType::time(true, TimeUnit::MICROS)); + test_roundtrip(LogicalType::time(false, TimeUnit::MILLIS)); + test_roundtrip(LogicalType::time(false, TimeUnit::NANOS)); + test_roundtrip(LogicalType::timestamp(false, TimeUnit::MICROS)); + test_roundtrip(LogicalType::timestamp(true, TimeUnit::MILLIS)); + test_roundtrip(LogicalType::timestamp(true, TimeUnit::NANOS)); + test_roundtrip(LogicalType::integer(8, true)); + test_roundtrip(LogicalType::integer(16, false)); + test_roundtrip(LogicalType::integer(32, true)); + test_roundtrip(LogicalType::integer(64, false)); test_roundtrip(LogicalType::Json); test_roundtrip(LogicalType::Bson); test_roundtrip(LogicalType::Uuid); test_roundtrip(LogicalType::Float16); - test_roundtrip(LogicalType::Variant { - specification_version: Some(1), - }); - test_roundtrip(LogicalType::Variant { - specification_version: None, - }); - test_roundtrip(LogicalType::Geometry { - crs: Some("foo".to_owned()), - }); - test_roundtrip(LogicalType::Geometry { crs: None }); - test_roundtrip(LogicalType::Geography { - crs: Some("foo".to_owned()), - algorithm: Some(EdgeInterpolationAlgorithm::ANDOYER), - }); - test_roundtrip(LogicalType::Geography { - crs: None, - algorithm: Some(EdgeInterpolationAlgorithm::KARNEY), - }); - test_roundtrip(LogicalType::Geography { - crs: Some("foo".to_owned()), - algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL), - }); - test_roundtrip(LogicalType::Geography { - crs: None, - algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL), - }); + test_roundtrip(LogicalType::variant(Some(1))); + test_roundtrip(LogicalType::variant(None)); + test_roundtrip(LogicalType::geometry(Some("foo".to_owned()))); + test_roundtrip(LogicalType::geometry(None)); + test_roundtrip(LogicalType::geography( + Some("foo".to_owned()), + Some(EdgeInterpolationAlgorithm::ANDOYER), + )); + test_roundtrip(LogicalType::geography( + None, + Some(EdgeInterpolationAlgorithm::KARNEY), + )); + test_roundtrip(LogicalType::geography( + Some("foo".to_owned()), + Some(EdgeInterpolationAlgorithm::SPHERICAL), + )); + test_roundtrip(LogicalType::geography( + None, + Some(EdgeInterpolationAlgorithm::SPHERICAL), + )); } #[test] @@ -2291,72 +2226,27 @@ mod tests { LogicalType::Bson, LogicalType::Enum, LogicalType::Uuid, - LogicalType::Integer { - bit_width: 8, - is_signed: false, - }, - LogicalType::Integer { - bit_width: 16, - is_signed: false, - }, - LogicalType::Integer { - bit_width: 32, - is_signed: false, - }, - LogicalType::Integer { - bit_width: 64, - is_signed: false, - }, + LogicalType::integer(8, false), + LogicalType::integer(16, false), + LogicalType::integer(32, false), + LogicalType::integer(64, false), ]; check_sort_order(unsigned, SortOrder::UNSIGNED); // Signed comparison (physical type does not matter) let signed = vec![ - LogicalType::Integer { - bit_width: 8, - is_signed: true, - }, - LogicalType::Integer { - bit_width: 8, - is_signed: true, - }, - LogicalType::Integer { - bit_width: 8, - is_signed: true, - }, - LogicalType::Integer { - bit_width: 8, - is_signed: true, - }, - LogicalType::Decimal { - scale: 20, - precision: 4, - }, + LogicalType::integer(8, true), + LogicalType::integer(16, true), + LogicalType::integer(32, true), + LogicalType::integer(64, true), + LogicalType::decimal(20, 4), LogicalType::Date, - LogicalType::Time { - is_adjusted_to_u_t_c: false, - unit: TimeUnit::MILLIS, - }, - LogicalType::Time { - is_adjusted_to_u_t_c: false, - unit: TimeUnit::MICROS, - }, - LogicalType::Time { - is_adjusted_to_u_t_c: true, - unit: TimeUnit::NANOS, - }, - LogicalType::Timestamp { - is_adjusted_to_u_t_c: false, - unit: TimeUnit::MILLIS, - }, - LogicalType::Timestamp { - is_adjusted_to_u_t_c: false, - unit: TimeUnit::MICROS, - }, - LogicalType::Timestamp { - is_adjusted_to_u_t_c: true, - unit: TimeUnit::NANOS, - }, + LogicalType::time(false, TimeUnit::MILLIS), + LogicalType::time(false, TimeUnit::MICROS), + LogicalType::time(true, TimeUnit::NANOS), + LogicalType::timestamp(false, TimeUnit::MILLIS), + LogicalType::timestamp(false, TimeUnit::MICROS), + LogicalType::timestamp(true, TimeUnit::NANOS), LogicalType::Float16, ]; check_sort_order(signed, SortOrder::SIGNED); @@ -2365,11 +2255,9 @@ mod tests { let undefined = vec![ LogicalType::List, LogicalType::Map, - LogicalType::Geometry { crs: None }, - LogicalType::Geography { - crs: None, - algorithm: Some(EdgeInterpolationAlgorithm::default()), - }, + LogicalType::variant(None), + LogicalType::geometry(None), + LogicalType::geography(None, Some(Default::default())), ]; check_sort_order(undefined, SortOrder::UNDEFINED); } diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs index 5d14ac6856f9..595eadbc90f2 100644 --- a/parquet/src/column/writer/mod.rs +++ b/parquet/src/column/writer/mod.rs @@ -4449,10 +4449,7 @@ mod tests { let path = ColumnPath::from("col"); let tpe = SchemaType::primitive_type_builder("col", T::get_physical_type()) .with_length(16) - .with_logical_type(Some(LogicalType::Decimal { - scale: 2, - precision: 3, - })) + .with_logical_type(Some(LogicalType::decimal(2, 3))) .with_scale(2) .with_precision(3) .build() diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index ec4d02a47d79..942013ea6238 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -1280,10 +1280,7 @@ mod tests { #[test] fn test_file_writer_v2_with_metadata() { let file = tempfile::tempfile().unwrap(); - let field_logical_type = Some(LogicalType::Integer { - bit_width: 8, - is_signed: false, - }); + let field_logical_type = Some(LogicalType::integer(8, false)); let field = Arc::new( types::Type::primitive_type_builder("col1", Type::INT32) .with_logical_type(field_logical_type.clone()) diff --git a/parquet/src/geospatial/accumulator.rs b/parquet/src/geospatial/accumulator.rs index d25a47930f6e..3aad1060e05c 100644 --- a/parquet/src/geospatial/accumulator.rs +++ b/parquet/src/geospatial/accumulator.rs @@ -253,7 +253,7 @@ mod test { // Check that we have a working accumulator for Geometry let parquet_type = Type::primitive_type_builder("geom", crate::basic::Type::BYTE_ARRAY) - .with_logical_type(Some(LogicalType::Geometry { crs: None })) + .with_logical_type(Some(LogicalType::geometry(None))) .build() .unwrap(); let column_descr = @@ -271,10 +271,7 @@ mod test { // Check that we have a void accumulator for Geography let parquet_type = Type::primitive_type_builder("geom", crate::basic::Type::BYTE_ARRAY) - .with_logical_type(Some(LogicalType::Geography { - crs: None, - algorithm: None, - })) + .with_logical_type(Some(LogicalType::geography(None, None))) .build() .unwrap(); let column_descr = diff --git a/parquet/src/schema/parser.rs b/parquet/src/schema/parser.rs index 36cf2dc5175e..071962aa4ed8 100644 --- a/parquet/src/schema/parser.rs +++ b/parquet/src/schema/parser.rs @@ -353,7 +353,7 @@ impl Parser<'_> { } else { scale = 0 } - logical = Some(LogicalType::Decimal { scale, precision }); + logical = Some(LogicalType::decimal(scale, precision)); converted = ConvertedType::from(logical.clone()); } } @@ -371,10 +371,7 @@ impl Parser<'_> { "Failed to parse timezone info for TIME type", )?; assert_token(self.tokenizer.next(), ")")?; - logical = Some(LogicalType::Time { - is_adjusted_to_u_t_c, - unit, - }); + logical = Some(LogicalType::time(is_adjusted_to_u_t_c, unit)); converted = ConvertedType::from(logical.clone()); } else { // Invalid token for unit @@ -396,10 +393,7 @@ impl Parser<'_> { "Failed to parse timezone info for TIMESTAMP type", )?; assert_token(self.tokenizer.next(), ")")?; - logical = Some(LogicalType::Timestamp { - is_adjusted_to_u_t_c, - unit, - }); + logical = Some(LogicalType::timestamp(is_adjusted_to_u_t_c, unit)); converted = ConvertedType::from(logical.clone()); } else { // Invalid token for unit @@ -446,10 +440,7 @@ impl Parser<'_> { "Failed to parse is_signed for INTEGER type", )?; assert_token(self.tokenizer.next(), ")")?; - logical = Some(LogicalType::Integer { - bit_width, - is_signed, - }); + logical = Some(LogicalType::integer(bit_width, is_signed)); converted = ConvertedType::from(logical.clone()); } else { // Invalid token for unit @@ -833,10 +824,7 @@ mod tests { .with_fields(vec![ Arc::new( Type::primitive_type_builder("f1", PhysicalType::FIXED_LEN_BYTE_ARRAY) - .with_logical_type(Some(LogicalType::Decimal { - precision: 9, - scale: 3, - })) + .with_logical_type(Some(LogicalType::decimal(3, 9))) .with_converted_type(ConvertedType::DECIMAL) .with_length(5) .with_precision(9) @@ -846,10 +834,7 @@ mod tests { ), Arc::new( Type::primitive_type_builder("f2", PhysicalType::FIXED_LEN_BYTE_ARRAY) - .with_logical_type(Some(LogicalType::Decimal { - precision: 38, - scale: 18, - })) + .with_logical_type(Some(LogicalType::decimal(18, 38))) .with_converted_type(ConvertedType::DECIMAL) .with_length(16) .with_precision(38) @@ -1038,20 +1023,14 @@ mod tests { Arc::new( Type::primitive_type_builder("_1", PhysicalType::INT32) .with_repetition(Repetition::REQUIRED) - .with_logical_type(Some(LogicalType::Integer { - bit_width: 8, - is_signed: true, - })) + .with_logical_type(Some(LogicalType::integer(8, true))) .build() .unwrap(), ), Arc::new( Type::primitive_type_builder("_2", PhysicalType::INT32) .with_repetition(Repetition::REQUIRED) - .with_logical_type(Some(LogicalType::Integer { - bit_width: 16, - is_signed: false, - })) + .with_logical_type(Some(LogicalType::integer(16, false))) .build() .unwrap(), ), @@ -1075,37 +1054,25 @@ mod tests { ), Arc::new( Type::primitive_type_builder("_6", PhysicalType::INT32) - .with_logical_type(Some(LogicalType::Time { - unit: TimeUnit::MILLIS, - is_adjusted_to_u_t_c: false, - })) + .with_logical_type(Some(LogicalType::time(false, TimeUnit::MILLIS))) .build() .unwrap(), ), Arc::new( Type::primitive_type_builder("_7", PhysicalType::INT64) - .with_logical_type(Some(LogicalType::Time { - unit: TimeUnit::MICROS, - is_adjusted_to_u_t_c: true, - })) + .with_logical_type(Some(LogicalType::time(true, TimeUnit::MICROS))) .build() .unwrap(), ), Arc::new( Type::primitive_type_builder("_8", PhysicalType::INT64) - .with_logical_type(Some(LogicalType::Timestamp { - unit: TimeUnit::MILLIS, - is_adjusted_to_u_t_c: true, - })) + .with_logical_type(Some(LogicalType::timestamp(true, TimeUnit::MILLIS))) .build() .unwrap(), ), Arc::new( Type::primitive_type_builder("_9", PhysicalType::INT64) - .with_logical_type(Some(LogicalType::Timestamp { - unit: TimeUnit::NANOS, - is_adjusted_to_u_t_c: false, - })) + .with_logical_type(Some(LogicalType::timestamp(false, TimeUnit::NANOS))) .build() .unwrap(), ), diff --git a/parquet/src/schema/printer.rs b/parquet/src/schema/printer.rs index 68398005b6a5..dbeddcfc128c 100644 --- a/parquet/src/schema/printer.rs +++ b/parquet/src/schema/printer.rs @@ -457,7 +457,7 @@ mod tests { use std::sync::Arc; - use crate::basic::{EdgeInterpolationAlgorithm, Repetition, Type as PhysicalType}; + use crate::basic::{Repetition, Type as PhysicalType}; use crate::errors::Result; use crate::schema::parser::parse_message_type; @@ -543,10 +543,7 @@ mod tests { "field", None, PhysicalType::INT32, - Some(LogicalType::Integer { - bit_width: 32, - is_signed: true, - }), + Some(LogicalType::integer(32, true)), ConvertedType::NONE, Repetition::REQUIRED, ) @@ -558,10 +555,7 @@ mod tests { "field", None, PhysicalType::INT32, - Some(LogicalType::Integer { - bit_width: 8, - is_signed: false, - }), + Some(LogicalType::integer(8, false)), ConvertedType::NONE, Repetition::OPTIONAL, ) @@ -573,10 +567,7 @@ mod tests { "field", None, PhysicalType::INT32, - Some(LogicalType::Integer { - bit_width: 16, - is_signed: true, - }), + Some(LogicalType::integer(16, true)), ConvertedType::INT_16, Repetition::REPEATED, ) @@ -588,10 +579,7 @@ mod tests { "field", Some(42), PhysicalType::INT32, - Some(LogicalType::Integer { - bit_width: 16, - is_signed: true, - }), + Some(LogicalType::integer(16, true)), ConvertedType::INT_16, Repetition::REPEATED, ) @@ -651,10 +639,7 @@ mod tests { "field", None, PhysicalType::INT64, - Some(LogicalType::Timestamp { - is_adjusted_to_u_t_c: true, - unit: TimeUnit::MILLIS, - }), + Some(LogicalType::timestamp(true, TimeUnit::MILLIS)), ConvertedType::NONE, Repetition::REQUIRED, ) @@ -678,10 +663,7 @@ mod tests { "field", None, PhysicalType::INT32, - Some(LogicalType::Time { - unit: TimeUnit::MILLIS, - is_adjusted_to_u_t_c: false, - }), + Some(LogicalType::time(false, TimeUnit::MILLIS)), ConvertedType::TIME_MILLIS, Repetition::REQUIRED, ) @@ -693,10 +675,7 @@ mod tests { "field", Some(42), PhysicalType::INT32, - Some(LogicalType::Time { - unit: TimeUnit::MILLIS, - is_adjusted_to_u_t_c: false, - }), + Some(LogicalType::time(false, TimeUnit::MILLIS)), ConvertedType::TIME_MILLIS, Repetition::REQUIRED, ) @@ -792,7 +771,7 @@ mod tests { "field", None, PhysicalType::BYTE_ARRAY, - Some(LogicalType::Geometry { crs: None }), + Some(LogicalType::geometry(None)), ConvertedType::NONE, Repetition::REQUIRED, ) @@ -804,9 +783,7 @@ mod tests { "field", None, PhysicalType::BYTE_ARRAY, - Some(LogicalType::Geometry { - crs: Some("non-missing CRS".to_string()), - }), + Some(LogicalType::geometry(Some("non-missing CRS".to_string()))), ConvertedType::NONE, Repetition::REQUIRED, ) @@ -818,10 +795,7 @@ mod tests { "field", None, PhysicalType::BYTE_ARRAY, - Some(LogicalType::Geography { - crs: None, - algorithm: Some(EdgeInterpolationAlgorithm::default()), - }), + Some(LogicalType::geography(None, Some(Default::default()))), ConvertedType::NONE, Repetition::REQUIRED, ) @@ -833,10 +807,10 @@ mod tests { "field", None, PhysicalType::BYTE_ARRAY, - Some(LogicalType::Geography { - crs: Some("non-missing CRS".to_string()), - algorithm: Some(EdgeInterpolationAlgorithm::default()), - }), + Some(LogicalType::geography( + Some("non-missing CRS".to_string()), + Some(Default::default()), + )), ConvertedType::NONE, Repetition::REQUIRED, ) @@ -887,10 +861,7 @@ mod tests { ), ( Type::primitive_type_builder("decimal", PhysicalType::FIXED_LEN_BYTE_ARRAY) - .with_logical_type(Some(LogicalType::Decimal { - precision: 32, - scale: 20, - })) + .with_logical_type(Some(LogicalType::decimal(20, 32))) .with_precision(32) .with_scale(20) .with_length(decimal_length_from_precision(32)) @@ -1178,10 +1149,7 @@ mod tests { fn test_print_and_parse_decimal() { let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32) .with_repetition(Repetition::OPTIONAL) - .with_logical_type(Some(LogicalType::Decimal { - precision: 9, - scale: 2, - })) + .with_logical_type(Some(LogicalType::decimal(2, 9))) .with_converted_type(ConvertedType::DECIMAL) .with_precision(9) .with_scale(2) @@ -1190,10 +1158,7 @@ mod tests { let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32) .with_repetition(Repetition::OPTIONAL) - .with_logical_type(Some(LogicalType::Decimal { - precision: 9, - scale: 0, - })) + .with_logical_type(Some(LogicalType::decimal(0, 9))) .with_converted_type(ConvertedType::DECIMAL) .with_precision(9) .with_scale(0) diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs index 0d504e16fc28..d8b3456d4723 100644 --- a/parquet/src/schema/types.rs +++ b/parquet/src/schema/types.rs @@ -1449,10 +1449,7 @@ mod tests { #[test] fn test_primitive_type() { let mut result = Type::primitive_type_builder("foo", PhysicalType::INT32) - .with_logical_type(Some(LogicalType::Integer { - bit_width: 32, - is_signed: true, - })) + .with_logical_type(Some(LogicalType::integer(32, true))) .with_id(Some(0)) .build(); assert!(result.is_ok()); @@ -1464,10 +1461,7 @@ mod tests { assert_eq!(basic_info.repetition(), Repetition::OPTIONAL); assert_eq!( basic_info.logical_type_ref(), - Some(&LogicalType::Integer { - bit_width: 32, - is_signed: true - }) + Some(&LogicalType::integer(32, true)) ); assert_eq!(basic_info.converted_type(), ConvertedType::INT_32); assert_eq!(basic_info.id(), 0); @@ -1482,10 +1476,7 @@ mod tests { // Test illegal inputs with logical type result = Type::primitive_type_builder("foo", PhysicalType::INT64) .with_repetition(Repetition::REPEATED) - .with_logical_type(Some(LogicalType::Integer { - is_signed: true, - bit_width: 8, - })) + .with_logical_type(Some(LogicalType::integer(8, true))) .build(); assert!(result.is_err()); if let Err(e) = result { @@ -1524,10 +1515,7 @@ mod tests { result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY) .with_repetition(Repetition::REQUIRED) - .with_logical_type(Some(LogicalType::Decimal { - scale: 32, - precision: 12, - })) + .with_logical_type(Some(LogicalType::decimal(32, 12))) .with_precision(-1) .with_scale(-1) .build(); diff --git a/parquet/src/variant.rs b/parquet/src/variant.rs index cdbdd849683d..55df08673611 100644 --- a/parquet/src/variant.rs +++ b/parquet/src/variant.rs @@ -199,9 +199,7 @@ mod tests { // data should have been written with the Variant logical type assert_eq!( field.get_basic_info().logical_type_ref(), - Some(&crate::basic::LogicalType::Variant { - specification_version: None - }) + Some(&crate::basic::LogicalType::variant(None)) ); } diff --git a/parquet_derive/src/parquet_field.rs b/parquet_derive/src/parquet_field.rs index 7473f2305517..17b8d8543725 100644 --- a/parquet_derive/src/parquet_field.rs +++ b/parquet_derive/src/parquet_field.rs @@ -693,42 +693,18 @@ impl Type { match last_part.trim() { "bool" => quote! { None }, - "u8" => quote! { Some(LogicalType::Integer { - bit_width: 8, - is_signed: false, - }) }, - "u16" => quote! { Some(LogicalType::Integer { - bit_width: 16, - is_signed: false, - }) }, - "u32" => quote! { Some(LogicalType::Integer { - bit_width: 32, - is_signed: false, - }) }, - "u64" => quote! { Some(LogicalType::Integer { - bit_width: 64, - is_signed: false, - }) }, - "i8" => quote! { Some(LogicalType::Integer { - bit_width: 8, - is_signed: true, - }) }, - "i16" => quote! { Some(LogicalType::Integer { - bit_width: 16, - is_signed: true, - }) }, + "u8" => quote! { Some(LogicalType::integer(8, false)) }, + "u16" => quote! { Some(LogicalType::integer(16, false)) }, + "u32" => quote! { Some(LogicalType::integer(32, false)) }, + "u64" => quote! { Some(LogicalType::integer(64, false)) }, + "i8" => quote! { Some(LogicalType::integer(8, true)) }, + "i16" => quote! { Some(LogicalType::integer(16, true)) }, "i32" | "i64" => quote! { None }, "usize" => { - quote! { Some(LogicalType::Integer { - bit_width: usize::BITS as i8, - is_signed: false - }) } + quote! { Some(LogicalType::integer(usize::BITS as i8, false)) } } "isize" => { - quote! { Some(LogicalType::Integer { - bit_width: usize::BITS as i8, - is_signed: true - }) } + quote! { Some(LogicalType::integer(usize::BITS as i8, true)) } } "NaiveDate" => quote! { Some(LogicalType::Date) }, "NaiveDateTime" => quote! { None },