diff --git a/.github/workflows/dev_pr/labeler.yml b/.github/workflows/dev_pr/labeler.yml index 751bbd7a6f87..64299bd507d3 100644 --- a/.github/workflows/dev_pr/labeler.yml +++ b/.github/workflows/dev_pr/labeler.yml @@ -44,7 +44,9 @@ arrow-flight: parquet: - changed-files: - - any-glob-to-any-file: [ 'parquet/**/*' ] + - any-glob-to-any-file: + - 'parquet/**/*' + - 'parquet-variant/**/*' parquet-derive: - changed-files: diff --git a/.github/workflows/parquet-variant.yml b/.github/workflows/parquet-variant.yml new file mode 100644 index 000000000000..6fc5c3a8cd00 --- /dev/null +++ b/.github/workflows/parquet-variant.yml @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +--- +# tests for parquet-variant crate +name: "parquet-variant" + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +# trigger for all PRs that touch certain files and changes to main +on: + push: + branches: + - main + pull_request: + paths: + - parquet-variant/** + - .github/** + +jobs: + # test the crate + linux-test: + name: Test + runs-on: ubuntu-latest + container: + image: amd64/rust + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Setup Rust toolchain + uses: ./.github/actions/setup-builder + - name: Test + run: cargo test -p parquet-variant + + # test compilation + linux-features: + name: Check Compilation + runs-on: ubuntu-latest + container: + image: amd64/rust + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Setup Rust toolchain + uses: ./.github/actions/setup-builder + - name: Check compilation + run: cargo check -p parquet-variant + + clippy: + name: Clippy + runs-on: ubuntu-latest + container: + image: amd64/rust + steps: + - uses: actions/checkout@v4 + - name: Setup Rust toolchain + uses: ./.github/actions/setup-builder + - name: Setup Clippy + run: rustup component add clippy + - name: Run clippy + run: cargo clippy -p parquet-variant --all-targets --all-features -- -D warnings diff --git a/parquet-variant/src/decoder.rs b/parquet-variant/src/decoder.rs index 80d6947c3da6..a3d2f87062ea 100644 --- a/parquet-variant/src/decoder.rs +++ b/parquet-variant/src/decoder.rs @@ -112,7 +112,7 @@ mod tests { #[test] fn test_i8() -> Result<(), ArrowError> { let value = [ - 0 | 3 << 2, // Primitive type for i8 + 3 << 2, // Primitive type for i8 42, ]; let result = decode_int8(&value)?; @@ -124,12 +124,12 @@ mod tests { fn test_short_string() -> Result<(), ArrowError> { let value = [ 1 | 5 << 2, // Basic type for short string | length of short string - 'H' as u8, - 'e' as u8, - 'l' as u8, - 'l' as u8, - 'o' as u8, - 'o' as u8, + b'H', + b'e', + b'l', + b'l', + b'o', + b'o', ]; let result = decode_short_string(&value)?; assert_eq!(result, "Hello"); @@ -139,17 +139,17 @@ mod tests { #[test] fn test_string() -> Result<(), ArrowError> { let value = [ - 0 | 16 << 2, // Basic type for short string | length of short string + 16 << 2, // Basic type for short string | length of short string 5, 0, 0, 0, // Length of string - 'H' as u8, - 'e' as u8, - 'l' as u8, - 'l' as u8, - 'o' as u8, - 'o' as u8, + b'H', + b'e', + b'l', + b'l', + b'o', + b'o', ]; let result = decode_long_string(&value)?; assert_eq!(result, "Hello"); diff --git a/parquet-variant/src/utils.rs b/parquet-variant/src/utils.rs index 85feb0bcb1c9..128221b4dd24 100644 --- a/parquet-variant/src/utils.rs +++ b/parquet-variant/src/utils.rs @@ -22,7 +22,6 @@ use std::fmt::Debug; use std::slice::SliceIndex; #[inline] - pub(crate) fn slice_from_slice + Clone + Debug>( bytes: &[u8], index: I, @@ -49,7 +48,7 @@ pub(crate) fn map_try_from_slice_error(e: TryFromSliceError) -> ArrowError { pub(crate) fn first_byte_from_slice(slice: &[u8]) -> Result<&u8, ArrowError> { slice - .get(0) + .first() .ok_or_else(|| ArrowError::InvalidArgumentError("Received empty bytes".to_string())) } diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index cf9f51acc72d..999826f5c274 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -19,10 +19,7 @@ use crate::decoder::{ }; use crate::utils::{array_from_slice, first_byte_from_slice, slice_from_slice, string_from_slice}; use arrow_schema::ArrowError; -use std::{ - num::TryFromIntError, - ops::{Index, Range}, -}; +use std::{num::TryFromIntError, ops::Range}; #[derive(Clone, Debug, Copy, PartialEq)] enum OffsetSizeBytes { @@ -251,7 +248,7 @@ impl<'m> VariantMetadata<'m> { // Skipping the header byte (setting byte_offset = 1) and the dictionary_size (setting offset_index +1) let unpack = |i| self.header.offset_size.unpack_usize(self.bytes, 1, i + 1); - Ok(unpack(index)?) + unpack(index) } /// Get the key-name by index @@ -278,7 +275,7 @@ impl<'m> VariantMetadata<'m> { let offset_size = self.header.offset_size; // `Copy` let bytes = self.bytes; - let iterator = (0..self.dict_size).map(move |i| { + (0..self.dict_size).map(move |i| { // This wont be out of bounds as long as dict_size and offsets have been validated // during construction via `try_new`, as it calls unpack_usize for the // indices `1..dict_size+1` already. @@ -289,9 +286,7 @@ impl<'m> VariantMetadata<'m> { (Ok(s), Ok(e)) => Ok(s..e), (Err(e), _) | (_, Err(e)) => Err(e), } - }); - - iterator + }) } /// Get all key-names as an Iterator of strings @@ -547,13 +542,13 @@ mod tests { OffsetSizeBytes::Three .unpack_usize(&buf_three, 0, 0) .unwrap(), - 0x0302_01 + 0x030201 ); assert_eq!( OffsetSizeBytes::Three .unpack_usize(&buf_three, 0, 1) .unwrap(), - 0x0000_FF + 0x0000FF ); // Four-byte offsets (0x12345678, 0x90ABCDEF)