Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into apacheGH-3035-parqu…
Browse files Browse the repository at this point in the history
…et-rewriter-add-column-renaming-feature
  • Loading branch information
maxim_konstantinov committed Nov 6, 2024
2 parents 169af05 + bdc9346 commit 4f1f81e
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 13 deletions.
2 changes: 1 addition & 1 deletion parquet-arrow/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
<url>https://parquet.apache.org</url>

<properties>
<arrow.version>16.1.0</arrow.version>
<arrow.version>17.0.0</arrow.version>
</properties>

<dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,11 @@ public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeL
return createListTypeMapping();
}

@Override
public TypeMapping visit(ArrowType.ListView type) {
return createListTypeMapping();
}

private ListTypeMapping createListTypeMapping() {
if (children.size() != 1) {
throw new IllegalArgumentException("list fields must have exactly one child: " + field);
Expand Down Expand Up @@ -249,11 +254,21 @@ public TypeMapping visit(ArrowType.LargeUtf8 largeUtf8) {
return primitive(BINARY, stringType());
}

@Override
public TypeMapping visit(ArrowType.Utf8View type) {
return primitive(BINARY, stringType());
}

@Override
public TypeMapping visit(Binary type) {
return primitive(BINARY);
}

@Override
public TypeMapping visit(ArrowType.BinaryView type) {
return primitive(BINARY);
}

@Override
public TypeMapping visit(ArrowType.LargeBinary largeBinary) {
return primitive(BINARY);
Expand Down Expand Up @@ -749,6 +764,11 @@ public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeL
return createListTypeMapping(type);
}

@Override
public TypeMapping visit(ArrowType.ListView type) {
return createListTypeMapping(type);
}

private TypeMapping createListTypeMapping(ArrowType.ComplexType type) {
if (arrowField.getChildren().size() != 1) {
throw new IllegalArgumentException("Invalid list type: " + type);
Expand Down Expand Up @@ -818,6 +838,11 @@ public TypeMapping visit(ArrowType.LargeUtf8 largeUtf8) {
return primitive();
}

@Override
public TypeMapping visit(ArrowType.Utf8View type) {
return primitive();
}

@Override
public TypeMapping visit(Binary type) {
return primitive();
Expand All @@ -828,6 +853,11 @@ public TypeMapping visit(ArrowType.LargeBinary largeBinary) {
return primitive();
}

@Override
public TypeMapping visit(ArrowType.BinaryView type) {
return primitive();
}

@Override
public TypeMapping visit(Bool type) {
return primitive();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,25 @@
*/
package org.apache.parquet.hadoop;

import java.net.URI;
import java.nio.file.Path;
import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.conf.ParquetConfiguration;
import org.apache.parquet.crypto.EncryptionPropertiesFactory;
import org.apache.parquet.crypto.FileEncryptionProperties;
import org.apache.parquet.hadoop.api.WriteSupport;
import org.apache.parquet.hadoop.util.ConfigurationUtil;
import org.apache.parquet.io.OutputFile;

final class EncryptionPropertiesHelper {
static FileEncryptionProperties createEncryptionProperties(
ParquetConfiguration fileParquetConfig, Path tempFilePath, WriteSupport.WriteContext fileWriteContext) {
ParquetConfiguration fileParquetConfig, OutputFile file, WriteSupport.WriteContext fileWriteContext) {
EncryptionPropertiesFactory cryptoFactory = EncryptionPropertiesFactory.loadFactory(fileParquetConfig);
if (null == cryptoFactory) {
return null;
}

Configuration hadoopConf = ConfigurationUtil.createHadoopConfiguration(fileParquetConfig);
URI path = tempFilePath == null ? null : tempFilePath.toUri();
return cryptoFactory.getFileEncryptionProperties(
hadoopConf, path == null ? null : new org.apache.hadoop.fs.Path(path), fileWriteContext);
hadoopConf, file == null ? null : new org.apache.hadoop.fs.Path(file.getPath()), fileWriteContext);
}

static FileEncryptionProperties createEncryptionProperties(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,20 @@ public static ParquetFileReader open(InputFile file, ParquetReadOptions options)
return new ParquetFileReader(file, options);
}

/**
* Open a {@link InputFile file} with {@link ParquetReadOptions options}.
*
* @param file an input file
* @param options parquet read options
* @param f the input stream for the file
* @return an open ParquetFileReader
* @throws IOException if there is an error while opening the file
*/
public static ParquetFileReader open(InputFile file, ParquetReadOptions options, SeekableInputStream f)
throws IOException {
return new ParquetFileReader(file, options, f);
}

protected final SeekableInputStream f;
private final InputFile file;
private final ParquetReadOptions options;
Expand Down Expand Up @@ -863,9 +877,23 @@ public ParquetFileReader(Configuration conf, Path file, ParquetMetadata footer)
*/
public ParquetFileReader(Configuration conf, Path file, ParquetMetadata footer, ParquetReadOptions options)
throws IOException {
this(conf, file, footer, options, HadoopInputFile.fromPath(file, conf).newStream());
}

/**
* @param conf the Hadoop Configuration
* @param file Path to a parquet file
* @param footer a {@link ParquetMetadata} footer already read from the file
* @param options {@link ParquetReadOptions}
* @param f a {@link SeekableInputStream} for the parquet file
* @throws IOException if the file can not be opened
*/
public ParquetFileReader(
Configuration conf, Path file, ParquetMetadata footer, ParquetReadOptions options, SeekableInputStream f)
throws IOException {
this.converter = new ParquetMetadataConverter(conf);
this.file = HadoopInputFile.fromPath(file, conf);
this.f = this.file.newStream();
this.f = f;
this.fileMetaData = footer.getFileMetaData();
this.fileDecryptor = fileMetaData.getFileDecryptor();
this.options = options;
Expand Down Expand Up @@ -894,9 +922,13 @@ public ParquetFileReader(Configuration conf, Path file, ParquetMetadata footer,
}

public ParquetFileReader(InputFile file, ParquetReadOptions options) throws IOException {
this(file, options, file.newStream());
}

public ParquetFileReader(InputFile file, ParquetReadOptions options, SeekableInputStream f) throws IOException {
this.converter = new ParquetMetadataConverter(options);
this.file = file;
this.f = file.newStream();
this.f = f;
this.options = options;
try {
this.footer = readFooter(file, options, f, converter);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

import java.io.Closeable;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
Expand Down Expand Up @@ -387,9 +386,7 @@ public ParquetWriter(Path file, Configuration conf, WriteSupport<T> writeSupport
// encryptionProperties could be built from the implementation of EncryptionPropertiesFactory when it is
// attached.
if (encryptionProperties == null) {
String path = file == null ? null : file.getPath();
encryptionProperties = EncryptionPropertiesHelper.createEncryptionProperties(
conf, path == null ? null : Paths.get(path), writeContext);
encryptionProperties = EncryptionPropertiesHelper.createEncryptionProperties(conf, file, writeContext);
}

ParquetFileWriter fileWriter = new ParquetFileWriter(
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
<brotli-codec.version>0.1.1</brotli-codec.version>
<mockito.version>1.10.19</mockito.version>
<powermock.version>2.0.9</powermock.version>
<net.openhft.version>0.16</net.openhft.version>
<net.openhft.version>0.26ea0</net.openhft.version>
<exec-maven-plugin.version>1.6.0</exec-maven-plugin.version>

<!-- parquet-cli dependencies -->
Expand Down

0 comments on commit 4f1f81e

Please sign in to comment.