Skip to content

Commit

Permalink
feat: add map feature with logical array support for HashMap (#223)
Browse files Browse the repository at this point in the history
  • Loading branch information
mbrobbel authored Aug 16, 2024
1 parent e86d298 commit 2f937bb
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 12 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ arrow-rs = [
]
chrono = ["dep:chrono"]
derive = ["dep:narrow-derive"]
map = ["derive"]
uuid = ["dep:uuid"]

[dependencies]
Expand Down Expand Up @@ -72,7 +73,7 @@ harness = false

[[example]]
name = "parquet"
required-features = ["arrow-rs", "chrono", "derive", "uuid"]
required-features = ["arrow-rs", "chrono", "derive", "map", "uuid"]

[[example]]
name = "basic"
Expand Down
8 changes: 8 additions & 0 deletions examples/parquet.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::collections::HashMap;

use chrono::{DateTime, NaiveTime, Utc};

#[rustversion::attr(nightly, allow(non_local_definitions))]
Expand Down Expand Up @@ -29,6 +31,7 @@ fn main() {
i: VariableSizeBinary,
j: DateTime<Utc>,
k: NaiveTime,
l: Option<HashMap<String, Vec<u8>>>,
}
let input = [
Foo {
Expand All @@ -43,6 +46,10 @@ fn main() {
i: vec![1, 3, 3, 7].into(),
j: DateTime::UNIX_EPOCH,
k: NaiveTime::MIN,
l: Some(HashMap::from_iter([(
"a".to_string(),
vec![1, 2, 3, 4, 42],
)])),
},
Foo {
a: 42,
Expand All @@ -56,6 +63,7 @@ fn main() {
i: vec![4, 2].into(),
j: Utc::now(),
k: Utc::now().time(),
l: None,
},
];

Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,5 +86,5 @@ pub mod arrow;
pub use narrow_derive::ArrayType;

// This allows using the `ArrayType` derive macro in tests.
#[cfg(all(test, feature = "derive"))]
#[cfg(any(all(test, feature = "derive"), feature = "map"))]
extern crate self as narrow;
6 changes: 3 additions & 3 deletions src/logical/chrono.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use super::{LogicalArray, LogicalArrayType};

impl ArrayType<DateTime<Utc>> for DateTime<Utc> {
type Array<Buffer: BufferType, OffsetItem: OffsetElement, UnionLayout: UnionType> =
LogicalArray<DateTime<Utc>, false, Buffer, OffsetItem, UnionLayout>;
LogicalArray<Self, false, Buffer, OffsetItem, UnionLayout>;
}

impl ArrayType<DateTime<Utc>> for Option<DateTime<Utc>> {
Expand All @@ -36,7 +36,7 @@ pub type DateTimeArray<const NULLABLE: bool = false, Buffer = crate::buffer::Vec

impl ArrayType<NaiveDateTime> for NaiveDateTime {
type Array<Buffer: BufferType, OffsetItem: OffsetElement, UnionLayout: UnionType> =
LogicalArray<NaiveDateTime, false, Buffer, OffsetItem, UnionLayout>;
LogicalArray<Self, false, Buffer, OffsetItem, UnionLayout>;
}

impl ArrayType<NaiveDateTime> for Option<NaiveDateTime> {
Expand All @@ -62,7 +62,7 @@ pub type NaiveDateTimeArray<const NULLABLE: bool = false, Buffer = crate::buffer

impl ArrayType<NaiveTime> for NaiveTime {
type Array<Buffer: BufferType, OffsetItem: OffsetElement, UnionLayout: UnionType> =
LogicalArray<NaiveTime, false, Buffer, OffsetItem, UnionLayout>;
LogicalArray<Self, false, Buffer, OffsetItem, UnionLayout>;
}

impl ArrayType<NaiveTime> for Option<NaiveTime> {
Expand Down
126 changes: 126 additions & 0 deletions src/logical/map.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#![allow(missing_docs)]

use std::{
collections::HashMap,
hash::{BuildHasher, Hash},
};

use crate::{
array::{self, UnionType},
buffer::BufferType,
offset::OffsetElement,
ArrayType,
};

use super::{LogicalArray, LogicalArrayType};

impl<K: array::ArrayType<K> + Eq + Hash, V: array::ArrayType<V>, S: BuildHasher + Default>
array::ArrayType<HashMap<K, V, S>> for HashMap<K, V, S>
{
type Array<Buffer: BufferType, OffsetItem: OffsetElement, UnionLayout: UnionType> =
LogicalArray<Self, false, Buffer, OffsetItem, UnionLayout>;
}

impl<K: array::ArrayType<K> + Eq + Hash, V: array::ArrayType<V>, S: BuildHasher + Default>
array::ArrayType<HashMap<K, V, S>> for Option<HashMap<K, V, S>>
{
type Array<Buffer: BufferType, OffsetItem: OffsetElement, UnionLayout: UnionType> =
LogicalArray<HashMap<K, V, S>, true, Buffer, OffsetItem, UnionLayout>;
}

// TODO(mbrobbel): support HashMap<K, Option<V>>

/// An item in a map.
#[derive(ArrayType)]
pub struct KeyValue<K, V> {
/// The key.
key: K,
/// The value.
value: V,
}

impl<K: array::ArrayType<K> + Hash + Eq, V: array::ArrayType<V>, S: BuildHasher + Default>
LogicalArrayType<HashMap<K, V, S>> for HashMap<K, V, S>
{
type ArrayType = Vec<KeyValue<K, V>>;

fn from_array_type(item: Self::ArrayType) -> Self {
item.into_iter()
.map(|KeyValue { key, value }| (key, value))
.collect()
}

fn into_array_type(self) -> Self::ArrayType {
self.into_iter()
.map(|(key, value)| KeyValue { key, value })
.collect()
}
}

/// An array for [`HashMap`] items.
#[allow(unused)]
pub type HashMapArray<
K,
V,
const NULLABLE: bool = false,
Buffer = crate::buffer::VecBuffer,
OffsetItem = i32,
> = LogicalArray<HashMap<K, V>, NULLABLE, Buffer, OffsetItem, crate::array::union::NA>;

#[cfg(test)]
mod tests {
use super::*;
use crate::Length;

#[test]
fn from_iter() {
let array = [
HashMap::default(),
HashMap::from_iter([("a".to_owned(), 1), ("b".to_owned(), 2)]),
]
.into_iter()
.collect::<HashMapArray<String, u8>>();
assert_eq!(array.len(), 2);
assert_eq!(array.0.len(), 2);

let array_nullable = [
Some(HashMap::from_iter([
("a".to_owned(), 1),
("b".to_owned(), 2),
])),
None,
]
.into_iter()
.collect::<HashMapArray<String, i8, true>>();
assert_eq!(array_nullable.len(), 2);
assert_eq!(array_nullable.0.len(), 2);
}

#[test]
fn into_iter() {
let input = [
HashMap::default(),
HashMap::from_iter([("a".to_owned(), 1), ("b".to_owned(), 2)]),
];
let array = input
.clone()
.into_iter()
.collect::<HashMapArray<String, i32>>();
let output = array.into_iter().collect::<Vec<_>>();
assert_eq!(input, output.as_slice());

let input_nullable = [
Some(HashMap::from_iter([
("a".to_owned(), 1),
("b".to_owned(), 2),
])),
None,
];
let array_nullable = input_nullable
.clone()
.into_iter()
.collect::<HashMapArray<String, u64, true>>();
let output_nullable = array_nullable.into_iter().collect::<Vec<_>>();
assert_eq!(input_nullable, output_nullable.as_slice());
}
}
12 changes: 8 additions & 4 deletions src/logical/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,18 @@ use crate::{
Length,
};

#[cfg(feature = "uuid")]
/// Uuid support via logical arrays.
pub mod uuid;

#[cfg(feature = "chrono")]
/// Chrono support via logical arrays.
pub mod chrono;

#[cfg(feature = "map")]
/// Map arrays via logical arrays.
pub mod map;

#[cfg(feature = "uuid")]
/// Uuid support via logical arrays.
pub mod uuid;

/// Types that can be stored in Arrow arrays, but require mapping via
/// [`LogicalArray`].
///
Expand Down
6 changes: 3 additions & 3 deletions src/logical/uuid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use super::{LogicalArray, LogicalArrayType};

impl ArrayType<uuid::Uuid> for uuid::Uuid {
type Array<Buffer: BufferType, OffsetItem: OffsetElement, UnionLayout: UnionType> =
LogicalArray<uuid::Uuid, false, Buffer, OffsetItem, UnionLayout>;
LogicalArray<Self, false, Buffer, OffsetItem, UnionLayout>;
}

impl ArrayType<uuid::Uuid> for Option<uuid::Uuid> {
Expand All @@ -22,11 +22,11 @@ impl LogicalArrayType<uuid::Uuid> for uuid::Uuid {
type ArrayType = FixedSizeBinary<16>;

fn from_array_type(item: Self::ArrayType) -> Self {
uuid::Uuid::from_bytes(item.into())
Self::from_bytes(item.into())
}

fn into_array_type(self) -> Self::ArrayType {
uuid::Uuid::into_bytes(self).into()
Self::into_bytes(self).into()
}
}

Expand Down

0 comments on commit 2f937bb

Please sign in to comment.