diff --git a/parquet-cli/pom.xml b/parquet-cli/pom.xml index 2a345eda8a..0e06c9ceee 100644 --- a/parquet-cli/pom.xml +++ b/parquet-cli/pom.xml @@ -110,6 +110,12 @@ avro ${avro.version} + + org.tukaani + xz + ${tukaani.version} + runtime + com.github.luben zstd-jni diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/util/Codecs.java b/parquet-cli/src/main/java/org/apache/parquet/cli/util/Codecs.java index ee79ee6063..c549f996cb 100644 --- a/parquet-cli/src/main/java/org/apache/parquet/cli/util/Codecs.java +++ b/parquet-cli/src/main/java/org/apache/parquet/cli/util/Codecs.java @@ -19,6 +19,7 @@ package org.apache.parquet.cli.util; +import org.apache.avro.AvroRuntimeException; import org.apache.avro.file.CodecFactory; import org.apache.parquet.hadoop.metadata.CompressionCodecName; @@ -34,19 +35,24 @@ public static CompressionCodecName parquetCodec(String codec) { } public static CodecFactory avroCodec(String codec) { - CompressionCodecName parquetCodec = parquetCodec(codec); - switch (parquetCodec) { - case UNCOMPRESSED: - return CodecFactory.nullCodec(); - case SNAPPY: - return CodecFactory.snappyCodec(); - case GZIP: - return CodecFactory.deflateCodec(9); - case ZSTD: - return CodecFactory.zstandardCodec(CodecFactory.DEFAULT_ZSTANDARD_LEVEL); - default: - throw new IllegalArgumentException( - "Codec incompatible with Avro: " + codec); + String avroCodec; + if (codec.equalsIgnoreCase(CompressionCodecName.GZIP.name())) { + avroCodec = "deflate"; + } else if (codec.equalsIgnoreCase(CompressionCodecName.SNAPPY.name())) { + avroCodec = "snappy"; + } else if (codec.equalsIgnoreCase(CompressionCodecName.UNCOMPRESSED.name())) { + avroCodec = "null"; + } else if (codec.equalsIgnoreCase(CompressionCodecName.ZSTD.name())) { + avroCodec = "zstandard"; + } else { + avroCodec = codec; } + CodecFactory factory; + try { + factory = CodecFactory.fromString(avroCodec); + } catch (AvroRuntimeException e) { + throw new IllegalArgumentException("Codec incompatible with Avro: " + codec, e); + } + return factory; } } diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ToAvroCommandTest.java b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ToAvroCommandTest.java index d3b596a727..32000eb738 100644 --- a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ToAvroCommandTest.java +++ b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ToAvroCommandTest.java @@ -91,6 +91,18 @@ public void testToAvroCommandWithZstdCompression() throws IOException { Assert.assertTrue(avroFile.exists()); } + @Test + public void testToAvroCommandWithBzip2Compression() throws IOException { + File avroFile = toAvro(parquetFile(), "bzip2"); + Assert.assertTrue(avroFile.exists()); + } + + @Test + public void testToAvroCommandWithXzCompression() throws IOException { + File avroFile = toAvro(parquetFile(), "xz"); + Assert.assertTrue(avroFile.exists()); + } + @Test(expected = IllegalArgumentException.class) public void testToAvroCommandWithInvalidCompression() throws IOException { toAvro(parquetFile(), "FOO"); diff --git a/pom.xml b/pom.xml index ea8cd4c873..1fd4b06462 100644 --- a/pom.xml +++ b/pom.xml @@ -99,6 +99,7 @@ 2.3 1.82 + 1.9 1.5.0-1 1.8 3.0.2