Skip to content

Commit

Permalink
HIVE-28306: Iceberg: Return new scan after applying column project pa…
Browse files Browse the repository at this point in the history
…rameter (Butao Zhang, reviewed by Denys Kuzmenko)

Closes #5282
  • Loading branch information
zhangbutao authored Jun 11, 2024
1 parent 1c9f52e commit 33cadc5
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@
import org.apache.iceberg.Partitioning;
import org.apache.iceberg.Scan;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SchemaParser;
import org.apache.iceberg.SnapshotRef;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.SystemConfigs;
Expand Down Expand Up @@ -178,14 +177,19 @@ CombinedScanTask> applyConfig(
Long openFileCost = splitSize > 0 ? splitSize : TableProperties.SPLIT_SIZE_DEFAULT;
scan = scan.option(TableProperties.SPLIT_OPEN_FILE_COST, String.valueOf(openFileCost));
}
String schemaStr = conf.get(InputFormatConfig.READ_SCHEMA);
if (schemaStr != null) {
scan.project(SchemaParser.fromJson(schemaStr));
}

String[] selectedColumns = conf.getStrings(InputFormatConfig.SELECTED_COLUMNS);
if (selectedColumns != null) {
scan.select(selectedColumns);
// TODO: Currently, this projection optimization stored on scan is not being used effectively on Hive side, as
// Hive actually uses conf to propagate the projected columns to let the final reader to read the only
// projected columns data. See IcebergInputFormat::readSchema(Configuration conf, Table table, boolean
// caseSensitive). But we can consider using this projection optimization stored on scan in the future when
// needed.
Schema readSchema = InputFormatConfig.readSchema(conf);
if (readSchema != null) {
scan = scan.project(readSchema);
} else {
String[] selectedColumns = InputFormatConfig.selectedColumns(conf);
if (selectedColumns != null) {
scan = scan.select(selectedColumns);
}
}

// TODO add a filter parser to get rid of Serialization
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public IcebergInternalRecordWrapper(StructType tableSchema, StructType readSchem
public IcebergInternalRecordWrapper wrap(StructLike record) {
int idx = 0;
for (Types.NestedField field : readSchema.fields()) {
int position = fieldToPositionInTableSchema.get(field.name());
int position = fieldToPositionInReadSchema.get(field.name());
values[idx] = record.get(position, Object.class);
idx++;
}
Expand Down

0 comments on commit 33cadc5

Please sign in to comment.