diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java index 95e1e5b36623..d0398d0acc01 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java @@ -22,7 +22,6 @@ import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.StatsSetupConst; @@ -46,6 +45,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.types.Types; import org.apache.thrift.TException; public class MetastoreUtil { @@ -112,17 +112,18 @@ public static void alterTable( } } - public static List getPartitionKeys(org.apache.iceberg.Table table, int specId) { - Schema schema = table.specs().get(specId).schema(); - List hiveSchema = HiveSchemaUtil.convert(schema); - Map colNameToColType = hiveSchema.stream() - .collect(Collectors.toMap(FieldSchema::getName, FieldSchema::getType)); - return table.specs().get(specId).fields().stream() - .map(partField -> new FieldSchema( - schema.findColumnName(partField.sourceId()), - colNameToColType.get(schema.findColumnName(partField.sourceId())), - String.format("Transform: %s", partField.transform().toString())) - ) + public static List getPartitionKeys(org.apache.iceberg.Table table) { + Schema schema = table.spec().schema(); + + return table.spec().fields().stream() + .map(partField -> { + Types.NestedField col = schema.findField(partField.sourceId()); + return new FieldSchema( + col.name(), + HiveSchemaUtil.convertToTypeString(col.type()), + "Transform: %s".formatted(partField.transform()) + ); + }) .toList(); } @@ -134,7 +135,7 @@ public static Table toHiveTable(org.apache.iceberg.Table table, Configuration co result.setDbName(tableName.getDb()); result.setTableName(tableName.getTable()); result.setTableType(TableType.EXTERNAL_TABLE.toString()); - result.setPartitionKeys(getPartitionKeys(table, table.spec().specId())); + result.setPartitionKeys(getPartitionKeys(table)); TableMetadata metadata = ((BaseTable) table).operations().current(); long maxHiveTablePropertySize = conf.getLong(HiveOperationsBase.HIVE_TABLE_PROPERTY_MAX_SIZE, HiveOperationsBase.HIVE_TABLE_PROPERTY_MAX_SIZE_DEFAULT); diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index 1c48407bf682..c14a1749f848 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -2127,7 +2127,7 @@ public boolean canUseTruncate(org.apache.hadoop.hive.ql.metadata.Table hmsTable, public List getPartitions(org.apache.hadoop.hive.ql.metadata.Table hmsTable, Map partitionSpec, boolean latestSpecOnly) throws SemanticException { List partNames = IcebergTableUtil.getPartitionNames(conf, hmsTable, partitionSpec, latestSpecOnly); - return IcebergTableUtil.convertNameToMetastorePartition(hmsTable, partNames); + return IcebergTableUtil.convertNameToHivePartition(hmsTable, partNames); } public boolean isPartitioned(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { @@ -2280,7 +2280,7 @@ public List getPartitionKeys(org.apache.hadoop.hive.ql.metadata.Tab return Collections.emptyList(); } Table icebergTable = IcebergTableUtil.getTable(conf, hmsTable.getTTable()); - return MetastoreUtil.getPartitionKeys(icebergTable, icebergTable.spec().specId()); + return MetastoreUtil.getPartitionKeys(icebergTable); } @Override diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java index 386473f46fee..3fc200446241 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java @@ -30,7 +30,6 @@ import java.util.Objects; import java.util.Optional; import java.util.Properties; -import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicInteger; @@ -41,7 +40,6 @@ import org.apache.commons.lang3.SerializationUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.TimestampTZ; import org.apache.hadoop.hive.common.type.TimestampTZUtil; import org.apache.hadoop.hive.conf.HiveConf; @@ -55,7 +53,6 @@ import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.metadata.DummyPartition; -import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.parse.AlterTableExecuteSpec; @@ -65,8 +62,6 @@ import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionStateUtil; -import org.apache.hadoop.util.Sets; -import org.apache.iceberg.ContentFile; import org.apache.iceberg.DeleteFiles; import org.apache.iceberg.FileFormat; import org.apache.iceberg.FileScanTask; @@ -774,26 +769,6 @@ private static PartitionStats recordToPartitionStats(StructLike record) { return stats; } - public static PartitionSpec getPartitionSpec(Table icebergTable, String partitionPath) - throws MetaException, HiveException { - if (icebergTable == null || partitionPath == null || partitionPath.isEmpty()) { - throw new HiveException("Table and partitionPath must not be null or empty."); - } - - // Extract field names from the path: "field1=val1/field2=val2" → [field1, field2] - List fieldNames = Lists.newArrayList(Warehouse.makeSpecFromName(partitionPath).keySet()); - - return icebergTable.specs().values().stream() - .filter(spec -> { - List specFieldNames = spec.fields().stream() - .map(PartitionField::name) - .toList(); - return specFieldNames.equals(fieldNames); - }) - .findFirst() // Supposed to be only one matching spec - .orElseThrow(() -> new HiveException("No matching partition spec found for partition path: " + partitionPath)); - } - public static TransformSpec getTransformSpec(Table table, String transformName, int sourceId) { TransformSpec spec = TransformSpec.fromString(transformName.toUpperCase(), table.schema().findColumnName(sourceId)); @@ -849,26 +824,15 @@ public static boolean hasUndergonePartitionEvolution(Snapshot snapshot, FileIO i .anyMatch(m -> m.partitionSpecId() != 0); } - public static > Set getPartitionNames(Table icebergTable, Iterable files, - Boolean latestSpecOnly) { - Set partitions = Sets.newHashSet(); - int tableSpecId = icebergTable.spec().specId(); - for (T file : files) { - if (latestSpecOnly == null || latestSpecOnly.equals(file.specId() == tableSpecId)) { - String partName = icebergTable.specs().get(file.specId()).partitionToPath(file.partition()); - partitions.add(partName); - } - } - return partitions; - } - - public static List convertNameToMetastorePartition(org.apache.hadoop.hive.ql.metadata.Table hmsTable, + public static List convertNameToHivePartition(org.apache.hadoop.hive.ql.metadata.Table hmsTable, Collection partNames) { List partitions = Lists.newArrayList(); for (String partName : partNames) { - Map partSpecMap = Maps.newLinkedHashMap(); - Warehouse.makeSpecFromName(partSpecMap, new Path(partName), null); - partitions.add(new DummyPartition(hmsTable, partName, partSpecMap)); + try { + partitions.add(new DummyPartition(hmsTable, partName, Warehouse.makeSpecFromName(partName))); + } catch (MetaException e) { + LOG.error(e.getMessage(), e); + } } return partitions; } diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergCompactionUtil.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergCompactionUtil.java index fca301f870d1..23eac4c5e08a 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergCompactionUtil.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergCompactionUtil.java @@ -19,11 +19,18 @@ package org.apache.iceberg.mr.hive.compaction; import java.util.List; +import java.util.Set; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.util.Sets; import org.apache.iceberg.ContentFile; import org.apache.iceberg.DataFile; import org.apache.iceberg.DeleteFile; import org.apache.iceberg.MetadataTableType; import org.apache.iceberg.MetadataTableUtils; +import org.apache.iceberg.PartitionField; +import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.PositionDeletesScanTask; import org.apache.iceberg.ScanTask; import org.apache.iceberg.Table; @@ -100,4 +107,37 @@ public static List getDeleteFiles(Table table, Long snapshotId, Stri return Lists.newArrayList(CloseableIterable.transform(filteredDeletesScanTasks, t -> ((PositionDeletesScanTask) t).file())); } + + static PartitionSpec getPartitionSpec(Table icebergTable, String partitionPath) + throws MetaException, HiveException { + if (icebergTable == null || partitionPath == null || partitionPath.isEmpty()) { + throw new HiveException("Table and partitionPath must not be null or empty."); + } + + // Extract field names from the path: "field1=val1/field2=val2" → [field1, field2] + List fieldNames = Lists.newArrayList(Warehouse.makeSpecFromName(partitionPath).keySet()); + + return icebergTable.specs().values().stream() + .filter(spec -> { + List specFieldNames = spec.fields().stream() + .map(PartitionField::name) + .toList(); + return specFieldNames.equals(fieldNames); + }) + .findFirst() // Supposed to be only one matching spec + .orElseThrow(() -> new HiveException("No matching partition spec found for partition path: " + partitionPath)); + } + + static > Set getPartitionNames(Table icebergTable, Iterable files, + boolean latestSpecOnly) { + Set partitions = Sets.newHashSet(); + int tableSpecId = icebergTable.spec().specId(); + for (T file : files) { + if (latestSpecOnly == (file.specId() == tableSpecId)) { + String partName = icebergTable.specs().get(file.specId()).partitionToPath(file.partition()); + partitions.add(partName); + } + } + return partitions; + } } diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergQueryCompactor.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergQueryCompactor.java index 9c4a73cc7d0b..9ca788b89e40 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergQueryCompactor.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergQueryCompactor.java @@ -135,7 +135,7 @@ private String buildCompactionQuery(CompactorContext context, String compactTabl PartitionSpec spec; String partitionPredicate; try { - spec = IcebergTableUtil.getPartitionSpec(icebergTable, ci.partName); + spec = IcebergCompactionUtil.getPartitionSpec(icebergTable, ci.partName); partitionPredicate = buildPartitionPredicate(ci, spec); } catch (MetaException e) { throw new HiveException(e); diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergTableOptimizer.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergTableOptimizer.java index 5c612bd0479a..b34b157287db 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergTableOptimizer.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergTableOptimizer.java @@ -238,7 +238,7 @@ private List findModifiedPartitions( } ); - return IcebergTableUtil.convertNameToMetastorePartition( + return IcebergTableUtil.convertNameToHivePartition( hiveTable, modifiedPartitions); } @@ -249,7 +249,7 @@ private List>> createPartitionNameTasks( return relevantSnapshots.stream() .map(snapshot -> (Callable>) () -> - IcebergTableUtil.getPartitionNames( + IcebergCompactionUtil.getPartitionNames( icebergTable, getAffectedFiles(snapshot, icebergTable.io()), latestSpecOnly)) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index e74885e57a3d..3779143a71d0 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -31,7 +31,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.concurrent.ExecutorService; @@ -76,10 +75,10 @@ public class LlapInputFormat implements InputFormat ALLOWED_VIRTUAL_COLUMNS = Collections.unmodifiableMap( - new HashMap() {{ - put(VirtualColumn.ROWID.getName(), VirtualColumn.ROWID); - put(VirtualColumn.ROWISDELETED.getName(), VirtualColumn.ROWISDELETED); - }}); + new HashMap<>() {{ + put(VirtualColumn.ROWID.getName(), VirtualColumn.ROWID); + put(VirtualColumn.ROWISDELETED.getName(), VirtualColumn.ROWISDELETED); + }}); private final InputFormat sourceInputFormat; private final AvoidSplitCombination sourceASC; @@ -203,7 +202,7 @@ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { @Override public boolean shouldSkipCombine(Path path, Configuration conf) throws IOException { - return sourceASC == null ? false : sourceASC.shouldSkipCombine(path, conf); + return sourceASC != null && sourceASC.shouldSkipCombine(path, conf); } static VectorizedRowBatchCtx createFakeVrbCtx(MapWork mapWork) throws HiveException { @@ -211,8 +210,8 @@ static VectorizedRowBatchCtx createFakeVrbCtx(MapWork mapWork) throws HiveExcept // Add all non-virtual columns from the TableScan operator. RowSchema rowSchema = findTsOp(mapWork).getSchema(); - final List colNames = new ArrayList(rowSchema.getSignature().size()); - final List colTypes = new ArrayList(rowSchema.getSignature().size()); + final List colNames = new ArrayList<>(rowSchema.getSignature().size()); + final List colTypes = new ArrayList<>(rowSchema.getSignature().size()); ArrayList virtualColumnList = new ArrayList<>(2); for (ColumnInfo c : rowSchema.getSignature()) { String columnName = c.getInternalName(); @@ -232,15 +231,15 @@ static VectorizedRowBatchCtx createFakeVrbCtx(MapWork mapWork) throws HiveExcept if (paths.hasNext()) { PartitionDesc partDesc = mapWork.getPathToPartitionInfo().get(paths.next()); if (partDesc != null) { - LinkedHashMap partSpec = partDesc.getPartSpec(); + Map partSpec = partDesc.getPartSpec(); if (partSpec != null && !partSpec.isEmpty()) { partitionColumnCount = partSpec.size(); } } } final VirtualColumn[] virtualColumns = virtualColumnList.toArray(new VirtualColumn[0]); - return new VectorizedRowBatchCtx(colNames.toArray(new String[colNames.size()]), - colTypes.toArray(new TypeInfo[colTypes.size()]), null, null, partitionColumnCount, + return new VectorizedRowBatchCtx(colNames.toArray(new String[0]), + colTypes.toArray(new TypeInfo[0]), null, null, partitionColumnCount, virtualColumns.length, virtualColumns, new String[0], null); } diff --git a/ql/src/java/org/apache/hadoop/hive/llap/ProactiveEviction.java b/ql/src/java/org/apache/hadoop/hive/llap/ProactiveEviction.java index 120949fc9949..2d4558e7dee4 100644 --- a/ql/src/java/org/apache/hadoop/hive/llap/ProactiveEviction.java +++ b/ql/src/java/org/apache/hadoop/hive/llap/ProactiveEviction.java @@ -89,7 +89,7 @@ public static void evict(Configuration conf, Request request) { try { LlapRegistryService llapRegistryService = LlapRegistryService.getClient(conf); Collection instances = llapRegistryService.getInstances().getAll(); - if (instances.size() == 0) { + if (instances.isEmpty()) { // Not in LLAP mode. return; } @@ -158,13 +158,13 @@ public static final class Request { // Holds a hierarchical structure of DBs, tables and partitions such as: // { testdb : { testtab0 : [], testtab1 : [ {pk0 : p0v0, pk1 : p0v1}, {pk0 : p1v0, pk1 : p1v1} ] }, testdb2 : {} } - private final Map>>> entities; + private final Map>>> entities; - private Request(Map>>> entities) { + private Request(Map>>> entities) { this.entities = entities; } - public Map>>> getEntities() { + public Map>>> getEntities() { return entities; } @@ -191,21 +191,21 @@ public List toProtoRequests() List protoRequests = new LinkedList<>(); - for (Map.Entry>>> dbEntry : entities.entrySet()) { + for (Map.Entry>>> dbEntry : entities.entrySet()) { String dbName = dbEntry.getKey(); - Map>> tables = dbEntry.getValue(); + Map>> tables = dbEntry.getValue(); LlapDaemonProtocolProtos.EvictEntityRequestProto.Builder requestBuilder = LlapDaemonProtocolProtos.EvictEntityRequestProto.newBuilder(); LlapDaemonProtocolProtos.TableProto.Builder tableBuilder = null; requestBuilder.setDbName(dbName.toLowerCase()); - for (Map.Entry>> tableEntry : tables.entrySet()) { + for (Map.Entry>> tableEntry : tables.entrySet()) { String tableName = tableEntry.getKey(); tableBuilder = LlapDaemonProtocolProtos.TableProto.newBuilder(); tableBuilder.setTableName(tableName.toLowerCase()); - Set> partitions = tableEntry.getValue(); + Set> partitions = tableEntry.getValue(); Set partitionKeys = null; for (Map partitionSpec : partitions) { @@ -245,7 +245,7 @@ public boolean isTagMatch(CacheTag cacheTag) { return false; } - Map>> tables = entities.get(db); + Map>> tables = entities.get(db); // If true, must be a drop DB event and this cacheTag matches. if (tables.isEmpty()) { @@ -261,7 +261,7 @@ public boolean isTagMatch(CacheTag cacheTag) { for (String tableAndDbName : tables.keySet()) { if (tableAndDbName.equals(tagTableName.getNotEmptyDbTable())) { - Set> partDescs = tables.get(tableAndDbName); + Set> partDescs = tables.get(tableAndDbName); // If true, must be a drop table event, and this cacheTag matches. if (partDescs == null) { @@ -292,7 +292,7 @@ public String toString() { */ public static final class Builder { - private final Map>>> entities; + private final Map>>> entities; private Builder() { this.entities = new HashMap<>(); @@ -302,7 +302,7 @@ public static Builder create() { return new Builder(); } - public Builder addPartitionOfATable(String db, String tableName, LinkedHashMap partSpec) { + public Builder addPartitionOfATable(String db, String tableName, Map partSpec) { ensureDb(db); ensureTable(db, tableName); entities.get(db).get(tableName).add(partSpec); @@ -325,22 +325,13 @@ public Request build() { } private void ensureDb(String dbName) { - Map>> tables = entities.get(dbName); - if (tables == null) { - tables = new HashMap<>(); - entities.put(dbName, tables); - } + entities.computeIfAbsent(dbName, k -> new HashMap<>()); } private void ensureTable(String dbName, String tableName) { ensureDb(dbName); - Map>> tables = entities.get(dbName); - - Set> partitions = tables.get(tableName); - if (partitions == null) { - partitions = new HashSet<>(); - tables.put(tableName, partitions); - } + Map>> tables = entities.get(dbName); + tables.computeIfAbsent(tableName, k -> new HashSet<>()); } /** @@ -352,20 +343,19 @@ public Builder fromProtoRequest(LlapDaemonProtocolProtos.EvictEntityRequestProto entities.clear(); String dbName = protoRequest.getDbName().toLowerCase(); - Map>> entitiesInDb = new HashMap<>(); + Map>> entitiesInDb = new HashMap<>(); List tables = protoRequest.getTableList(); - if (tables != null && !tables.isEmpty()) { + if (!tables.isEmpty()) { for (LlapDaemonProtocolProtos.TableProto table : tables) { - String dbAndTableName = - (new StringBuilder().append(dbName).append('.').append(table.getTableName())).toString().toLowerCase(); + String dbAndTableName = (dbName + '.' + table.getTableName()).toLowerCase(); if (table.getPartValCount() == 0) { entitiesInDb.put(dbAndTableName, null); continue; } - Set> partitions = new HashSet<>(); - LinkedHashMap partDesc = new LinkedHashMap<>(); + Set> partitions = new HashSet<>(); + Map partDesc = new LinkedHashMap<>(); for (int valIx = 0; valIx < table.getPartValCount(); ++valIx) { int keyIx = valIx % table.getPartKeyCount(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java index 289479b7ee79..c24e58b8fb4c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java @@ -66,10 +66,7 @@ private List getColumnsByPattern() throws HiveException { private List getCols() throws HiveException { Table table = context.getDb().getTable(desc.getTableName()); - List allColumns = new ArrayList<>(); - allColumns.addAll(table.getCols()); - allColumns.addAll(table.getPartCols()); - return allColumns; + return table.getAllCols(); } private Matcher getMatcher() { @@ -94,13 +91,7 @@ private List filterColumns(List columns, Matcher match } if (desc.isSorted()) { - result.sort( - new Comparator() { - @Override - public int compare(FieldSchema f1, FieldSchema f2) { - return f1.getName().compareTo(f2.getName()); - } - }); + result.sort(Comparator.comparing(FieldSchema::getName)); } return result; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java index b1dd9738572a..203525613389 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java @@ -59,7 +59,6 @@ import java.io.DataOutputStream; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; @@ -70,7 +69,6 @@ import java.util.Set; import java.util.TreeMap; import java.util.Map.Entry; -import java.util.stream.Collectors; import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS; import static org.apache.hadoop.hive.ql.ddl.ShowUtils.ALIGNMENT; @@ -171,13 +169,8 @@ private void addPartitionData(DataOutputStream out, HiveConf conf, String column boolean isFormatted, boolean isOutputPadded) throws IOException { String partitionData = ""; if (columnPath == null) { - List partitionColumns = null; - // TODO (HIVE-29413): Refactor to a generic getPartCols() implementation - if (table.isPartitioned()) { - partitionColumns = table.hasNonNativePartitionSupport() ? - table.getStorageHandler().getPartitionKeys(table) : - table.getPartCols(); - } + List partitionColumns = table.isPartitioned() ? table.getEffectivePartCols() : null; + if (CollectionUtils.isNotEmpty(partitionColumns) && conf.getBoolVar(ConfVars.HIVE_DISPLAY_PARTITION_COLUMNS_SEPARATELY)) { TextMetaDataTable metaDataTable = new TextMetaDataTable(); @@ -204,13 +197,9 @@ private void addPartitionData(DataOutputStream out, HiveConf conf, String column } private void addFormattedTableData(DataOutputStream out, Table table, Partition partition, boolean isOutputPadded) - throws IOException, UnsupportedEncodingException { - String formattedTableInfo = null; - if (partition != null) { - formattedTableInfo = getPartitionInformation(table, partition); - } else { - formattedTableInfo = getTableInformation(table, isOutputPadded); - } + throws IOException { + String formattedTableInfo = (partition != null) ? getPartitionInformation(table, partition) : + getTableInformation(table, isOutputPadded); if (table.getTableConstraintsInfo().isTableConstraintsInfoNotEmpty()) { formattedTableInfo += getConstraintsInformation(table); @@ -335,24 +324,24 @@ private void getStorageDescriptorInfo(StringBuilder tableInfo, Table table, Stor List skewedCoumnNames = storageDesc.getSkewedInfo().getSkewedColNames().stream() .sorted() - .collect(Collectors.toList()); + .toList(); formatOutput("Skewed Columns:", skewedCoumnNames.toString(), tableInfo); } if (CollectionUtils.isNotEmpty(storageDesc.getSkewedInfo().getSkewedColValues())) { List> skewedColumnValues = storageDesc.getSkewedInfo().getSkewedColValues().stream() - .sorted(new VectorComparator()) - .collect(Collectors.toList()); + .sorted(new VectorComparator<>()) + .toList(); formatOutput("Skewed Values:", skewedColumnValues.toString(), tableInfo); } - Map, String> skewedColMap = new TreeMap<>(new VectorComparator()); + Map, String> skewedColMap = new TreeMap<>(new VectorComparator<>()); skewedColMap.putAll(storageDesc.getSkewedInfo().getSkewedColValueLocationMaps()); if (MapUtils.isNotEmpty(skewedColMap)) { formatOutput("Skewed Value to Path:", skewedColMap.toString(), tableInfo); Map, String> truncatedSkewedColMap = - new TreeMap, String>(new VectorComparator()); + new TreeMap<>(new VectorComparator<>()); // walk through existing map to truncate path so that test won't mask it then we can verify location is right Set, String>> entries = skewedColMap.entrySet(); for (Entry, String> entry : entries) { @@ -401,7 +390,7 @@ private void getPartitionMetaDataInformation(StringBuilder tableInfo, Partition } } - private class VectorComparator> implements Comparator>{ + private static class VectorComparator> implements Comparator>{ @Override public int compare(List listA, List listB) { for (int i = 0; i < listA.size() && i < listB.size(); i++) { @@ -436,7 +425,7 @@ private void displayAllParameters(Map params, StringBuilder tabl private void displayAllParameters(Map params, StringBuilder tableInfo, boolean escapeUnicode, boolean isOutputPadded) { - List keys = new ArrayList(params.keySet()); + List keys = new ArrayList<>(params.keySet()); Collections.sort(keys); for (String key : keys) { String value = params.get(key); @@ -624,7 +613,7 @@ private void addExtendedTableData(DataOutputStream out, Table table, Partition p } private void addExtendedConstraintData(DataOutputStream out, Table table) - throws IOException, UnsupportedEncodingException { + throws IOException { if (table.getTableConstraintsInfo().isTableConstraintsInfoNotEmpty()) { out.write(("Constraints").getBytes(StandardCharsets.UTF_8)); out.write(Utilities.tabCode); @@ -656,7 +645,7 @@ private void addExtendedConstraintData(DataOutputStream out, Table table) } private void addExtendedStorageData(DataOutputStream out, Table table) - throws IOException, UnsupportedEncodingException { + throws IOException { if (table.getStorageHandlerInfo() != null) { out.write(("StorageHandlerInfo").getBytes(StandardCharsets.UTF_8)); out.write(Utilities.newLineCode); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java index db7a5dfcd3d0..b27a9d9df784 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java @@ -23,7 +23,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.Map.Entry; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -64,14 +63,14 @@ private PartitionUtils() { public static void validatePartitions(HiveConf conf, Map partitionSpec) { // Partition can't have this name Set reservedPartitionValues = - new HashSet() {{ + new HashSet<>() {{ add(HiveConf.getVar(conf, ConfVars.DEFAULT_PARTITION_NAME)); add(HiveConf.getVar(conf, ConfVars.DEFAULT_ZOOKEEPER_PARTITION_NAME)); }}; // Partition value can't end in this suffix Set reservedPartitionSuffixes = - new HashSet() {{ + new HashSet<>() {{ add(HiveConf.getVar(conf, ConfVars.METASTORE_INT_ORIGINAL)); add(HiveConf.getVar(conf, ConfVars.METASTORE_INT_ARCHIVED)); add(HiveConf.getVar(conf, ConfVars.METASTORE_INT_EXTRACTED)); @@ -174,7 +173,7 @@ public static void addTablePartsOutputs(Hive db, Set outputs, Table throw new SemanticException(e.getMessage(), e); } } else { - parts = new ArrayList(); + parts = new ArrayList<>(); try { Partition p = db.getPartition(table, partitionSpec, false); if (p != null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/archive/AlterTableArchiveOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/archive/AlterTableArchiveOperation.java index e218e590a24e..e11c637e7e67 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/archive/AlterTableArchiveOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/archive/AlterTableArchiveOperation.java @@ -72,8 +72,8 @@ public int execute() throws HiveException { Path intermediateOriginalDir = AlterTableArchiveUtils.getInterMediateDir(originalDir, context.getConf(), ConfVars.METASTORE_INT_ORIGINAL); - context.getConsole().printInfo("intermediate.archived is " + intermediateArchivedDir.toString()); - context.getConsole().printInfo("intermediate.original is " + intermediateOriginalDir.toString()); + context.getConsole().printInfo("intermediate.archived is " + intermediateArchivedDir); + context.getConsole().printInfo("intermediate.original is " + intermediateOriginalDir); checkIfAlreadyArchived(partitionSpecInfo, partitions); boolean recovery = isRecovery(intermediateArchivedDir, intermediateOriginalDir); @@ -139,7 +139,7 @@ private Path getOriginalDir(Table table, PartSpecInfo partitionSpecInfo, List partSpec) // scheme like table/ds=2011-01-02/hr=13/ // ARCHIVE PARTITION (ds='2011-01-02') will work and // ARCHIVE PARTITION(hr='13') won't - List prefixFields = new ArrayList(); - List prefixValues = new ArrayList(); + List prefixFields = new ArrayList<>(); + List prefixValues = new ArrayList<>(); List partCols = tbl.getPartCols(); Iterator itrPsKeys = partSpec.keySet().iterator(); for (FieldSchema fs : partCols) { if (!itrPsKeys.hasNext()) { break; } - if (!itrPsKeys.next().toLowerCase().equals( - fs.getName().toLowerCase())) { - throw new HiveException("Invalid partition specification: " - + partSpec); + if (!itrPsKeys.next().equalsIgnoreCase(fs.getName())) { + throw new HiveException("Invalid partition specification: " + partSpec); } prefixFields.add(fs); prefixValues.add(partSpec.get(fs.getName())); @@ -133,7 +130,7 @@ public static class HarPathHelper { * @param archive absolute location of archive in underlying filesystem * @param originalBase directory for which Hadoop archive was created */ - public HarPathHelper(HiveConf hconf, URI archive, URI originalBase) throws HiveException { + public HarPathHelper(URI archive, URI originalBase) throws HiveException { this.originalBase = addSlash(originalBase); String parentHost = archive.getHost(); String harHost = archive.getScheme(); @@ -253,8 +250,7 @@ public static String getPartialName(Partition p, int level) throws HiveException * name when it can't * @throws HiveException */ - public static String conflictingArchiveNameOrNull(Hive db, Table tbl, - LinkedHashMap partSpec) + public static String conflictingArchiveNameOrNull(Hive db, Table tbl, Map partSpec) throws HiveException { List partKeys = tbl.getPartitionKeys(); @@ -271,8 +267,8 @@ public static String conflictingArchiveNameOrNull(Hive db, Table tbl, "partspec " + partSpec + " is wrong for table " + tbl.getTableName()); } - Map spec = new HashMap(partSpec); - List reversedKeys = new ArrayList(); + Map spec = new HashMap<>(partSpec); + List reversedKeys = new ArrayList<>(); for (FieldSchema fs : tbl.getPartCols()) { if (spec.containsKey(fs.getName())) { reversedKeys.add(fs.getName()); @@ -283,8 +279,8 @@ public static String conflictingArchiveNameOrNull(Hive db, Table tbl, for (String rk : reversedKeys) { List parts = db.getPartitions(tbl, spec, (short) 1); - if (parts.size() != 0) { - Partition p = parts.get(0); + if (!parts.isEmpty()) { + Partition p = parts.getFirst(); if (!isArchived(p)) { // if archiving was done at this or at upper level, every matched // partition would be archived, so it not being archived means diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index a0906cfb0339..4d840e5e4e76 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.util.Arrays; -import java.util.LinkedHashMap; import java.util.Map; import java.util.stream.IntStream; @@ -50,8 +49,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.mapred.FileSplit; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.google.common.base.Preconditions; @@ -63,10 +60,6 @@ */ public class VectorizedRowBatchCtx { - private static final long serialVersionUID = 1L; - - private static final Logger LOG = LoggerFactory.getLogger(VectorizedRowBatchCtx.class.getName()); - // The following information is for creating VectorizedRowBatch and for helping with // knowing how the table is partitioned. // @@ -114,11 +107,7 @@ public VectorizedRowBatchCtx( /* * Needed virtual columns are those used in the query. */ - if (neededVirtualColumns == null) { - neededVirtualColumns = new VirtualColumn[0]; - } else { - this.neededVirtualColumns = neededVirtualColumns; - } + this.neededVirtualColumns = (neededVirtualColumns == null) ? new VirtualColumn[0] : neededVirtualColumns; /* * The virtual columns available under vectorization. They may not actually @@ -287,7 +276,7 @@ public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, PartitionDesc partDesc, Object[] partitionValues) { - LinkedHashMap partSpec = partDesc.getPartSpec(); + Map partSpec = partDesc.getPartSpec(); for (int i = 0; i < vrbCtx.partitionColumnCount; i++) { Object objectValue; @@ -347,8 +336,7 @@ public VectorizedRowBatch createVectorizedRowBatch() } } else { // Create only needed/included columns data columns. - for (int i = 0; i < dataColumnNums.length; i++) { - int columnNum = dataColumnNums[i]; + for (int columnNum : dataColumnNums) { Preconditions.checkState(columnNum < nonScratchColumnCount); result.cols[columnNum] = createColumnVectorFromRowColumnTypeInfos(columnNum); @@ -402,7 +390,6 @@ public void setBucketAndWriteIdOf(VectorizedRowBatch vectorizedRowBatch, BucketI LongColumnVector bucketIdColVector = (LongColumnVector) rowIdStructColVector.fields[1]; bucketIdColVector.isRepeating = true; bucketIdColVector.vector[0] = bucketIdentifier.getBucketProperty(); - LongColumnVector rowIdColVector = (LongColumnVector) rowIdStructColVector.fields[2]; } /** @@ -440,7 +427,7 @@ public void addPartitionColsToBatch(ColumnVector col, Object value, int colIndex lcv.isNull[0] = true; lcv.isRepeating = true; } else { - lcv.fill((Boolean) value == true ? 1 : 0); + lcv.fill((Boolean) value ? 1 : 0); } } break; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java index c188eb09fdcf..782b4f6e5258 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java @@ -90,10 +90,7 @@ public List getValues() { Table table = this.getTable(); values = new ArrayList<>(); - // TODO (HIVE-29413): Refactor to a generic getPartCols() implementation - for (FieldSchema fs : table.hasNonNativePartitionSupport() - ? table.getStorageHandler().getPartitionKeys(table) - : table.getPartCols()) { + for (FieldSchema fs : table.getEffectivePartCols()) { String val = partSpec.get(fs.getName()); values.add(val); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java index 736e6e8c9f1a..78a5581b9d78 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java @@ -23,7 +23,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -124,7 +123,7 @@ public Partition(Table tbl, Map partSpec, Path location) throws public static org.apache.hadoop.hive.metastore.api.Partition createMetaPartitionObject( Table tbl, Map partSpec, Path location) throws HiveException { - List pvals = new ArrayList(); + List pvals = new ArrayList<>(); for (FieldSchema field : tbl.getPartCols()) { String val = partSpec.get(field.getName()); if (val == null || val.isEmpty()) { @@ -416,7 +415,7 @@ public Path[] getPath(Sample s) throws HiveException { } int scount = s.getSampleFraction(); - ArrayList ret = new ArrayList(); + List ret = new ArrayList<>(); if (bcount == scount) { ret.add(getBucketPath(s.getSampleNum() - 1)); @@ -428,7 +427,7 @@ public Path[] getPath(Sample s) throws HiveException { } // undersampling a bucket ret.add(getBucketPath((s.getSampleNum() - 1) % bcount)); - } else if (bcount > scount) { + } else { if ((bcount / scount) * scount != bcount) { throw new HiveException("Sample Count" + scount + " is not a divisor of bucket count " + bcount + " for table " @@ -439,11 +438,11 @@ public Path[] getPath(Sample s) throws HiveException { ret.add(getBucketPath(i * scount + (s.getSampleNum() - 1))); } } - return (ret.toArray(new Path[ret.size()])); + return (ret.toArray(new Path[0])); } } - public LinkedHashMap getSpec() { + public Map getSpec() { return table.createSpec(tPartition); } @@ -542,7 +541,7 @@ public void setLocation(String location) { */ public void setValues(Map partSpec) throws HiveException { - List pvals = new ArrayList(); + List pvals = new ArrayList<>(); for (FieldSchema field : table.getPartCols()) { String val = partSpec.get(field.getName()); if (val == null) { @@ -582,12 +581,11 @@ public List getSkewedColNames() { return tPartition.getSd().getSkewedInfo().getSkewedColNames(); } - public void setSkewedValueLocationMap(List valList, String dirName) - throws HiveException { + public void setSkewedValueLocationMap(List valList, String dirName) { Map, String> mappings = tPartition.getSd().getSkewedInfo() .getSkewedColValueLocationMaps(); if (null == mappings) { - mappings = new HashMap, String>(); + mappings = new HashMap<>(); tPartition.getSd().getSkewedInfo().setSkewedColValueLocationMaps(mappings); } @@ -612,8 +610,7 @@ public int hashCode() { @Override public boolean equals(Object obj) { - if (obj instanceof Partition) { - Partition o = (Partition) obj; + if (obj instanceof Partition o) { return Objects.equals(tPartition, o.tPartition); } return false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index f857e7d505f1..0a01427e2f9b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.metadata; import java.io.IOException; +import java.io.Serial; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; @@ -26,12 +27,12 @@ import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Properties; import java.util.Set; -import java.util.stream.Collectors; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; @@ -100,6 +101,7 @@ */ public class Table implements Serializable { + @Serial private static final long serialVersionUID = 1L; static final private Logger LOG = LoggerFactory.getLogger("hive.ql.metadata.Table"); @@ -109,6 +111,7 @@ public class Table implements Serializable { /** * These fields are all cached fields. The information comes from tTable. */ + private transient List cachedPartCols; private transient Deserializer deserializer; private Class outputFormatClass; private Class inputFormatClass; @@ -225,11 +228,11 @@ public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) { { sd.setSerdeInfo(new SerDeInfo()); sd.setNumBuckets(-1); - sd.setBucketCols(new ArrayList()); - sd.setCols(new ArrayList()); - sd.setParameters(new HashMap()); - sd.setSortCols(new ArrayList()); - sd.getSerdeInfo().setParameters(new HashMap()); + sd.setBucketCols(new ArrayList<>()); + sd.setCols(new ArrayList<>()); + sd.setParameters(new HashMap<>()); + sd.setSortCols(new ArrayList<>()); + sd.getSerdeInfo().setParameters(new HashMap<>()); // We have to use MetadataTypedColumnsetSerDe because LazySimpleSerDe does // not support a table with no columns. sd.getSerdeInfo().setSerializationLib(MetadataTypedColumnsetSerDe.class.getName()); @@ -239,17 +242,17 @@ public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) { sd.setInputFormat(SequenceFileInputFormat.class.getName()); sd.setOutputFormat(HiveSequenceFileOutputFormat.class.getName()); SkewedInfo skewInfo = new SkewedInfo(); - skewInfo.setSkewedColNames(new ArrayList()); - skewInfo.setSkewedColValues(new ArrayList>()); - skewInfo.setSkewedColValueLocationMaps(new HashMap, String>()); + skewInfo.setSkewedColNames(new ArrayList<>()); + skewInfo.setSkewedColValues(new ArrayList<>()); + skewInfo.setSkewedColValueLocationMaps(new HashMap<>()); sd.setSkewedInfo(skewInfo); } org.apache.hadoop.hive.metastore.api.Table t = new org.apache.hadoop.hive.metastore.api.Table(); { t.setSd(sd); - t.setPartitionKeys(new ArrayList()); - t.setParameters(new HashMap()); + t.setPartitionKeys(new ArrayList<>()); + t.setParameters(new HashMap<>()); t.setTableType(TableType.MANAGED_TABLE.toString()); t.setDbName(databaseName); t.setTableName(tableName); @@ -402,7 +405,7 @@ public void setStorageHandlerInfo(StorageHandlerInfo storageHandlerInfo) { this.storageHandlerInfo = storageHandlerInfo; } - final public Class getInputFormatClass() { + public final Class getInputFormatClass() { if (inputFormatClass == null) { try { String className = tTable.getSd().getInputFormat(); @@ -422,7 +425,7 @@ final public Class getInputFormatClass() { return inputFormatClass; } - final public Class getOutputFormatClass() { + public final Class getOutputFormatClass() { if (outputFormatClass == null) { try { String className = tTable.getSd().getOutputFormat(); @@ -456,7 +459,7 @@ public void setMaterializedTable(boolean materializedTable) { * Marker SemanticException, so that processing that allows for table validation failures * and appropriately handles them can recover from these types of SemanticExceptions */ - public class ValidationFailureSemanticException extends SemanticException{ + public static class ValidationFailureSemanticException extends SemanticException{ public ValidationFailureSemanticException(String s) { super(s); } @@ -526,9 +529,9 @@ public TableType getTableType() { return Enum.valueOf(TableType.class, tTable.getTableType()); } - public ArrayList getFields() { + public List getFields() { - ArrayList fields = new ArrayList(); + List fields = new ArrayList<>(); try { Deserializer decoder = getDeserializer(); @@ -603,6 +606,20 @@ public List getPartCols() { return partKeys; } + /** + * Returns partition columns, consulting the storage handler for non-native tables (e.g. Iceberg) + * where partition columns are not stored in the metastore. + */ + public List getEffectivePartCols() { + if (!hasNonNativePartitionSupport()) { + return getPartCols(); + } + if (cachedPartCols == null) { + cachedPartCols = getStorageHandler().getPartitionKeys(this); + } + return cachedPartCols; + } + public FieldSchema getPartColByName(String colName) { return getPartCols().stream() .filter(key -> key.getName().toLowerCase().equals(colName)) @@ -610,10 +627,7 @@ public FieldSchema getPartColByName(String colName) { } public List getPartColNames() { - List partCols = hasNonNativePartitionSupport() ? - getStorageHandler().getPartitionKeys(this) : getPartCols(); - return partCols.stream().map(FieldSchema::getName) - .collect(Collectors.toList()); + return getEffectivePartCols().stream().map(FieldSchema::getName).toList(); } public boolean hasNonNativePartitionSupport() { @@ -636,7 +650,7 @@ public String getBucketingDimensionId() { + " table has more than one dimensions which aren't supported yet"); } - return bcols.get(0); + return bcols.getFirst(); } public void setDataLocation(Path path) { @@ -671,7 +685,7 @@ public void setSkewedValueLocationMap(List valList, String dirName) { Map, String> mappings = tTable.getSd().getSkewedInfo() .getSkewedColValueLocationMaps(); if (null == mappings) { - mappings = new HashMap, String>(); + mappings = new HashMap<>(); tTable.getSd().getSkewedInfo().setSkewedColValueLocationMaps(mappings); } @@ -681,7 +695,7 @@ public void setSkewedValueLocationMap(List valList, String dirName) { public Map, String> getSkewedColValueLocationMaps() { return (tTable.getSd().getSkewedInfo() != null) ? tTable.getSd().getSkewedInfo() - .getSkewedColValueLocationMaps() : new HashMap, String>(); + .getSkewedColValueLocationMaps() : new HashMap<>(); } public void setSkewedColValues(List> skewedValues) { @@ -690,7 +704,7 @@ public void setSkewedColValues(List> skewedValues) { public List> getSkewedColValues(){ return (tTable.getSd().getSkewedInfo() != null) ? tTable.getSd().getSkewedInfo() - .getSkewedColValues() : new ArrayList>(); + .getSkewedColValues() : new ArrayList<>(); } public void setSkewedColNames(List skewedColNames) { @@ -699,7 +713,7 @@ public void setSkewedColNames(List skewedColNames) { public List getSkewedColNames() { return (tTable.getSd().getSkewedInfo() != null) ? tTable.getSd().getSkewedInfo() - .getSkewedColNames() : new ArrayList(); + .getSkewedColNames() : new ArrayList<>(); } public SkewedInfo getSkewedInfo() { @@ -761,8 +775,7 @@ private List getColsInternal(boolean forMs) { * @return List<FieldSchema> */ public List getAllCols() { - ArrayList f_list = new ArrayList(); - f_list.addAll(getCols()); + List f_list = new ArrayList<>(getCols()); f_list.addAll(getPartCols()); return f_list; } @@ -812,7 +825,7 @@ public void setOutputFormatClass(String name) throws HiveException { } public boolean isPartitioned() { - return hasNonNativePartitionSupport() ? getStorageHandler().isPartitioned(this) : + return hasNonNativePartitionSupport() ? getStorageHandler().isPartitioned(this) : CollectionUtils.isNotEmpty(getPartCols()); } @@ -1008,12 +1021,12 @@ public boolean isMaterializedView() { * Use the information from this partition. * @return Partition name to value mapping. */ - public LinkedHashMap createSpec( + public Map createSpec( org.apache.hadoop.hive.metastore.api.Partition tp) { List fsl = getPartCols(); List tpl = tp.getValues(); - LinkedHashMap spec = new LinkedHashMap(fsl.size()); + Map spec = new LinkedHashMap<>(fsl.size()); for (int i = 0; i < fsl.size(); i++) { FieldSchema fs = fsl.get(i); String value = tpl.get(i); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index 3d3e4ce7663f..13672b153357 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -807,8 +807,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, for (FieldNode col : cols) { int index = originalOutputColumnNames.indexOf(col.getFieldName()); Table tab = cppCtx.getParseContext().getViewProjectToTableSchema().get(op); - List fullFieldList = new ArrayList(tab.getCols()); - fullFieldList.addAll(tab.getPartCols()); + List fullFieldList = tab.getAllCols(); cppCtx.getParseContext().getColumnAccessInfo() .add(tab.getCompleteName(), fullFieldList.get(index).getName()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index f4b4c2ff3bad..7b6b424f2fee 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -22,7 +22,6 @@ import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM; import java.io.IOException; -import java.lang.annotation.Annotation; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -259,25 +258,25 @@ public class Vectorizer implements PhysicalPlanResolver { private static final TypeInfo[] EMPTY_TYPEINFO_ARRAY = new TypeInfo[0]; static { - StringBuilder patternBuilder = new StringBuilder(); - patternBuilder.append(serdeConstants.INT_TYPE_NAME); - patternBuilder.append("|").append(serdeConstants.SMALLINT_TYPE_NAME); - patternBuilder.append("|").append(serdeConstants.TINYINT_TYPE_NAME); - patternBuilder.append("|").append(serdeConstants.BIGINT_TYPE_NAME); - patternBuilder.append("|").append("integer"); - patternBuilder.append("|").append("long"); - patternBuilder.append("|").append("short"); - patternBuilder.append("|").append(serdeConstants.TIMESTAMP_TYPE_NAME); - patternBuilder.append("|").append(serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME); - patternBuilder.append("|").append(serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME); - patternBuilder.append("|").append(serdeConstants.BOOLEAN_TYPE_NAME); - patternBuilder.append("|").append(serdeConstants.BINARY_TYPE_NAME); - patternBuilder.append("|").append(serdeConstants.STRING_TYPE_NAME); - patternBuilder.append("|").append("byte"); - patternBuilder.append("|").append(serdeConstants.FLOAT_TYPE_NAME); - patternBuilder.append("|").append(serdeConstants.DOUBLE_TYPE_NAME); - patternBuilder.append("|").append(serdeConstants.DATE_TYPE_NAME); - patternBuilder.append("|").append(serdeConstants.VOID_TYPE_NAME); + StringBuilder patternBuilder = new StringBuilder() + .append(serdeConstants.INT_TYPE_NAME) + .append("|").append(serdeConstants.SMALLINT_TYPE_NAME) + .append("|").append(serdeConstants.TINYINT_TYPE_NAME) + .append("|").append(serdeConstants.BIGINT_TYPE_NAME) + .append("|").append("integer") + .append("|").append("long") + .append("|").append("short") + .append("|").append(serdeConstants.TIMESTAMP_TYPE_NAME) + .append("|").append(serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME) + .append("|").append(serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME) + .append("|").append(serdeConstants.BOOLEAN_TYPE_NAME) + .append("|").append(serdeConstants.BINARY_TYPE_NAME) + .append("|").append(serdeConstants.STRING_TYPE_NAME) + .append("|").append("byte") + .append("|").append(serdeConstants.FLOAT_TYPE_NAME) + .append("|").append(serdeConstants.DOUBLE_TYPE_NAME) + .append("|").append(serdeConstants.DATE_TYPE_NAME) + .append("|").append(serdeConstants.VOID_TYPE_NAME); /** Decimal types can be specified with different precision and scales e.g. decimal(10,5), * as opposed to other data types which can be represented by constant strings. @@ -433,7 +432,7 @@ private void clearNotVectorizedReason() { private PlanMapper planMapper; - public class VectorizerCannotVectorizeException extends Exception { + public static class VectorizerCannotVectorizeException extends Exception { } public Vectorizer() { @@ -652,7 +651,7 @@ public void transferToBaseWork(BaseWork baseWork) { final int virtualColumnCount = (availableVirtualColumnList == null ? 0 : availableVirtualColumnList.size()); VirtualColumn[] neededVirtualColumns; - if (neededVirtualColumnList != null && neededVirtualColumnList.size() > 0) { + if (neededVirtualColumnList != null && !neededVirtualColumnList.isEmpty()) { neededVirtualColumns = neededVirtualColumnList.toArray(new VirtualColumn[0]); } else { neededVirtualColumns = new VirtualColumn[0]; @@ -689,16 +688,14 @@ public void transferToBaseWork(BaseWork baseWork) { scratchdataTypePhysicalVariations); baseWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx); - if (baseWork instanceof MapWork) { - MapWork mapWork = (MapWork) baseWork; + if (baseWork instanceof MapWork mapWork) { mapWork.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); mapWork.setInputFormatSupportSet(inputFormatSupportSet); mapWork.setSupportSetInUse(supportSetInUse); mapWork.setSupportRemovedReasons(supportRemovedReasons); } - if (baseWork instanceof ReduceWork) { - ReduceWork reduceWork = (ReduceWork) baseWork; + if (baseWork instanceof ReduceWork reduceWork) { reduceWork.setVectorReduceColumnSortOrder(reduceColumnSortOrder); reduceWork.setVectorReduceColumnNullOrder(reduceColumnNullOrder); } @@ -730,7 +727,7 @@ public DummyOperator() { } @Override - public void process(Object row, int tag) throws HiveException { + public void process(Object row, int tag) { throw new RuntimeException("Not used"); } @@ -768,67 +765,13 @@ public VectorDesc getVectorDesc() { } private static List> newOperatorList() { - return new ArrayList>(); - } - - public static void debugDisplayJoinOperatorTree(Operator joinOperator, - String prefix) { - List> currentParentList = newOperatorList(); - currentParentList.add(joinOperator); - - int depth = 0; - do { - List> nextParentList = newOperatorList(); - - final int count = currentParentList.size(); - for (int i = 0; i < count; i++) { - Operator parent = currentParentList.get(i); - System.out.println(prefix + " parent depth " + depth + " " + - parent.getClass().getSimpleName() + " " + parent.toString()); - - List> parentList = parent.getParentOperators(); - if (parentList == null || parentList.size() == 0) { - continue; - } - - nextParentList.addAll(parentList); - } - - currentParentList = nextParentList; - depth--; - } while (currentParentList.size() > 0); - - List> currentChildList = newOperatorList(); - currentChildList.addAll(joinOperator.getChildOperators()); - - depth = 1; - do { - List> nextChildList = newOperatorList(); - - final int count = currentChildList.size(); - for (int i = 0; i < count; i++) { - Operator child = currentChildList.get(i); - System.out.println(prefix + " child depth " + depth + " " + - child.getClass().getSimpleName() + " " + child.toString()); - - List> childList = child.getChildOperators(); - if (childList == null || childList.size() == 0) { - continue; - } - - nextChildList.addAll(childList); - } - - currentChildList = nextChildList; - depth--; - } while (currentChildList.size() > 0); + return new ArrayList<>(); } private Operator validateAndVectorizeOperatorTree( Operator nonVecRootOperator, - boolean isReduce, boolean isTez, - VectorTaskColumnInfo vectorTaskColumnInfo) - throws VectorizerCannotVectorizeException { + boolean isTez, VectorTaskColumnInfo vectorTaskColumnInfo) + throws VectorizerCannotVectorizeException { VectorizationContext taskVContext = new VectorizationContext( @@ -858,7 +801,7 @@ private Operator validateAndVectorizeOperatorTree( Operator parent = currentParentList.get(i); List> childrenList = parent.getChildOperators(); - if (childrenList == null || childrenList.size() == 0) { + if (childrenList == null || childrenList.isEmpty()) { continue; } @@ -871,12 +814,12 @@ private Operator validateAndVectorizeOperatorTree( */ doProcessChildren( parent, vectorParent, nextParentList, nextVectorParentList, - isReduce, isTez, vectorTaskColumnInfo); + isTez, vectorTaskColumnInfo); } currentParentList = nextParentList; currentVectorParentList = nextVectorParentList; - } while (currentParentList.size() > 0); + } while (!currentParentList.isEmpty()); runDelayedFixups(); @@ -888,19 +831,15 @@ private void doProcessChildren( Operator vectorParent, List> nextParentList, List> nextVectorParentList, - boolean isReduce, boolean isTez, - VectorTaskColumnInfo vectorTaskColumnInfo) - throws VectorizerCannotVectorizeException { + boolean isTez, VectorTaskColumnInfo vectorTaskColumnInfo) + throws VectorizerCannotVectorizeException { List> children = parent.getChildOperators(); - final int childrenCount = children.size(); - for (int i = 0; i < childrenCount; i++) { - - Operator child = children.get(i); + for (Operator child : children) { Operator vectorChild = doProcessChild( - child, vectorParent, isReduce, isTez, vectorTaskColumnInfo); + child, vectorParent, isTez, vectorTaskColumnInfo); fixupNewVectorChild( parent, @@ -957,12 +896,11 @@ private void queueDelayedFixup(Operator parent, Operator child, Operator vectorChild) { if (delayedFixups.get(parent) == null) { HashSet, Operator>> value = - new HashSet, Operator>>(1); + new HashSet<>(1); delayedFixups.put(parent, value); } delayedFixups.get(parent).add( - new ImmutablePair, Operator>( - child, vectorChild)); + new ImmutablePair<>(child, vectorChild)); } private void runDelayedFixups() { @@ -996,9 +934,8 @@ private void fixupOtherParent( private Operator doProcessChild( Operator child, Operator vectorParent, - boolean isReduce, boolean isTez, - VectorTaskColumnInfo vectorTaskColumnInfo) - throws VectorizerCannotVectorizeException { + boolean isTez, VectorTaskColumnInfo vectorTaskColumnInfo) + throws VectorizerCannotVectorizeException { // Use vector parent to get VectorizationContext. final VectorizationContext vContext; @@ -1012,7 +949,7 @@ private Operator doProcessChild( try { vectorChild = - validateAndVectorizeOperator(child, vContext, isReduce, isTez, vectorTaskColumnInfo); + validateAndVectorizeOperator(child, vContext, isTez, vectorTaskColumnInfo); } catch (HiveException e) { String issue = "exception: " + VectorizationContext.getStackTraceAsSingleLine(e); setNodeIssue(issue); @@ -1028,8 +965,9 @@ class VectorizationDispatcher implements SemanticDispatcher { public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) throws SemanticException { Task currTask = (Task) nd; - if (currTask instanceof MapRedTask) { - MapredWork mapredWork = ((MapRedTask) currTask).getWork(); + + if (currTask instanceof MapRedTask mapRedTask) { + MapredWork mapredWork = mapRedTask.getWork(); MapWork mapWork = mapredWork.getMapWork(); setMapWorkExplainConditions(mapWork); @@ -1046,16 +984,14 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) logReduceWorkExplainVectorization(reduceWork); } - } else if (currTask instanceof TezTask) { - TezWork work = ((TezTask) currTask).getWork(); - for (BaseWork baseWork: work.getAllWork()) { - if (baseWork instanceof MapWork) { - MapWork mapWork = (MapWork) baseWork; + } else if (currTask instanceof TezTask tezTask) { + TezWork work = tezTask.getWork(); + for (BaseWork baseWork : work.getAllWork()) { + if (baseWork instanceof MapWork mapWork) { setMapWorkExplainConditions(mapWork); convertMapWork(mapWork, /* isTez */ true); logMapWorkExplainVectorization(mapWork); - } else if (baseWork instanceof ReduceWork) { - ReduceWork reduceWork = (ReduceWork) baseWork; + } else if (baseWork instanceof ReduceWork reduceWork) { // Always set the EXPLAIN conditions. setReduceWorkExplainConditions(reduceWork); @@ -1066,8 +1002,7 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) } logReduceWorkExplainVectorization(reduceWork); - } else if (baseWork instanceof MergeJoinWork){ - MergeJoinWork mergeJoinWork = (MergeJoinWork) baseWork; + } else if (baseWork instanceof MergeJoinWork mergeJoinWork) { // Always set the EXPLAIN conditions. setMergeJoinWorkExplainConditions(mergeJoinWork); @@ -1125,7 +1060,7 @@ private boolean logExplainVectorization(BaseWork baseWork, String name) { if (!isVectorized) { VectorizerReason notVectorizedReason = baseWork.getNotVectorizedReason(); if (notVectorizedReason != null) { - LOG.info(name + " notVectorizedReason: " + notVectorizedReason.toString()); + LOG.info(name + " notVectorizedReason: " + notVectorizedReason); } } LOG.info(name + " vectorizedVertexNum: " + baseWork.getVectorizedVertexNum()); @@ -1160,16 +1095,16 @@ private void logMapWorkExplainVectorization(MapWork mapWork) { // Conditions. List enabledConditionsMet = mapWork.getVectorizationEnabledConditionsMet(); if (enabledConditionsMet != null && !enabledConditionsMet.isEmpty()) { - LOG.info("Map enabledConditionsMet: " + enabledConditionsMet.toString()); + LOG.info("Map enabledConditionsMet: " + enabledConditionsMet); } List enabledConditionsNotMet = mapWork.getVectorizationEnabledConditionsNotMet(); if (enabledConditionsNotMet != null && !enabledConditionsNotMet.isEmpty()) { - LOG.info("Map enabledConditionsNotMet: " + enabledConditionsNotMet.toString()); + LOG.info("Map enabledConditionsNotMet: " + enabledConditionsNotMet); } Set inputFileFormatClassNameSet = mapWork.getVectorizationInputFileFormatClassNameSet(); if (inputFileFormatClassNameSet != null && !inputFileFormatClassNameSet.isEmpty()) { - LOG.info("Map inputFileFormatClassNameSet: " + inputFileFormatClassNameSet.toString()); + LOG.info("Map inputFileFormatClassNameSet: " + inputFileFormatClassNameSet); } } @@ -1212,7 +1147,7 @@ private ImmutablePair verifyOnlyOneTableScanOperator( // Eliminate MR plans with more than one TableScanOperator. Map> aliasToWork = mapWork.getAliasToWork(); - if ((aliasToWork == null) || (aliasToWork.size() == 0)) { + if ((aliasToWork == null) || (aliasToWork.isEmpty())) { setNodeIssue("Vectorized map work requires work"); return null; } @@ -1285,7 +1220,7 @@ private void determineDataColumnNums(TableScanOperator tableScanOperator, /* * The TableScanOperator's needed columns are just the data columns. */ - Set neededColumns = new HashSet(tableScanOperator.getNeededColumns()); + Set neededColumns = new HashSet<>(tableScanOperator.getNeededColumns()); for (int dataColumnNum = 0; dataColumnNum < dataColumnCount; dataColumnNum++) { String columnName = allColumnNameList.get(dataColumnNum); @@ -1322,12 +1257,8 @@ private void addVectorizedInputFileFormatSupport(Set newSupportSet, boo } else { supports = null; } - if (supports == null) { - // No support. - } else { - for (Support support : supports) { - newSupportSet.add(support); - } + if (supports != null) { + Collections.addAll(newSupportSet, supports); } } @@ -1388,7 +1319,7 @@ private boolean verifyAndSetVectorPartDesc( if (dataTypeInfoList == null) { dataTypeInfos = EMPTY_TYPEINFO_ARRAY; } else { - dataTypeInfos = dataTypeInfoList.toArray(new TypeInfo[dataTypeInfoList.size()]); + dataTypeInfos = dataTypeInfoList.toArray(new TypeInfo[0]); } // Always collect input file formats. @@ -1679,8 +1610,7 @@ private void setValidateInputFormatAndSchemaEvolutionExplain(MapWork mapWork, Map vectorPartitionDescMap, Collection enabledConditionsMetSet, Collection enabledConditionsNotMetList) { mapWork.setVectorizationInputFileFormatClassNameSet(inputFileFormatClassNameSet); - ArrayList vectorPartitionDescList = new ArrayList(); - vectorPartitionDescList.addAll(vectorPartitionDescMap.keySet()); + List vectorPartitionDescList = new ArrayList<>(vectorPartitionDescMap.keySet()); mapWork.setVectorPartitionDescList(vectorPartitionDescList); mapWork.setVectorizationEnabledConditionsMet(enabledConditionsMetSet); mapWork.setVectorizationEnabledConditionsNotMet(enabledConditionsNotMetList); @@ -1693,10 +1623,10 @@ private ImmutablePair validateInputFormatAndSchemaEvolution(Ma boolean isFullAcidTable = tableScanOperator.getConf().isFullAcidTable(); // These names/types are the data columns plus partition columns. - final List allColumnNameList = new ArrayList(); - final List allTypeInfoList = new ArrayList(); + final List allColumnNameList = new ArrayList<>(); + final List allTypeInfoList = new ArrayList<>(); - final List availableVirtualColumnList = new ArrayList(); + final List availableVirtualColumnList = new ArrayList<>(); getTableScanOperatorSchemaInfo( tableScanOperator, @@ -1704,7 +1634,7 @@ private ImmutablePair validateInputFormatAndSchemaEvolution(Ma availableVirtualColumnList); final int virtualColumnCount = availableVirtualColumnList.size(); - final List dataColumnNums = new ArrayList(); + final List dataColumnNums = new ArrayList<>(); final int dataAndPartColumnCount = allColumnNameList.size() - virtualColumnCount; @@ -1722,12 +1652,11 @@ private ImmutablePair validateInputFormatAndSchemaEvolution(Ma Map pathToPartitionInfo = mapWork.getPathToPartitionInfo(); // Remember the input file formats we validated and why. - Set inputFileFormatClassNameSet = new HashSet(); - Map vectorPartitionDescMap = - new LinkedHashMap(); - Set enabledConditionsMetSet = new HashSet(); - List enabledConditionsNotMetList = new ArrayList(); - Set inputFormatSupportSet = new TreeSet(); + Set inputFileFormatClassNameSet = new HashSet<>(); + Map vectorPartitionDescMap = new LinkedHashMap<>(); + Set enabledConditionsMetSet = new HashSet<>(); + List enabledConditionsNotMetList = new ArrayList<>(); + Set inputFormatSupportSet = new TreeSet<>(); boolean outsideLoopIsFirstPartition = true; for (Entry> entry: pathToAliases.entrySet()) { @@ -1736,10 +1665,10 @@ private ImmutablePair validateInputFormatAndSchemaEvolution(Ma Path path = entry.getKey(); List aliases = entry.getValue(); - boolean isPresent = (aliases != null && aliases.indexOf(alias) != -1); + boolean isPresent = (aliases != null && aliases.contains(alias)); if (!isPresent) { setOperatorIssue("Alias " + alias + " not present in aliases " + aliases); - return new ImmutablePair(false, false); + return new ImmutablePair<>(false, false); } // TODO: should this use getPartitionDescFromPathRecursively? That's what other code uses. PartitionDesc partDesc = pathToPartitionInfo.get(path); @@ -1747,7 +1676,7 @@ private ImmutablePair validateInputFormatAndSchemaEvolution(Ma // We've seen this already. continue; } - Set newSupportSet = new TreeSet(); + Set newSupportSet = new TreeSet<>(); final List nextDataTypeInfoList; final Deserializer deserializer; @@ -1767,8 +1696,8 @@ private ImmutablePair validateInputFormatAndSchemaEvolution(Ma * allColumnNameList and allTypeInfoList variables -- into the data and partition columns. */ - LinkedHashMap partSpec = partDesc.getPartSpec(); - if (partSpec != null && partSpec.size() > 0) { + Map partSpec = partDesc.getPartSpec(); + if (partSpec != null && !partSpec.isEmpty()) { partitionColumnCount = partSpec.size(); dataColumnCount = dataAndPartColumnCount - partitionColumnCount; } else { @@ -1826,7 +1755,7 @@ private ImmutablePair validateInputFormatAndSchemaEvolution(Ma enabledConditionsMetSet, enabledConditionsNotMetList); // We consider this an enable issue, not a not vectorized issue. - return new ImmutablePair(false, true); + return new ImmutablePair<>(false, true); } handleSupport(isFirstPartition, inputFormatSupportSet, newSupportSet); @@ -1854,7 +1783,7 @@ private ImmutablePair validateInputFormatAndSchemaEvolution(Ma setValidateInputFormatAndSchemaEvolutionExplain( mapWork, inputFileFormatClassNameSet, vectorPartitionDescMap, enabledConditionsMetSet, enabledConditionsNotMetList); - return new ImmutablePair(false, true); + return new ImmutablePair<>(false, true); } if (!(deserializer instanceof NullStructSerDe)) { @@ -1873,7 +1802,7 @@ private ImmutablePair validateInputFormatAndSchemaEvolution(Ma setValidateInputFormatAndSchemaEvolutionExplain( mapWork, inputFileFormatClassNameSet, vectorPartitionDescMap, enabledConditionsMetSet, enabledConditionsNotMetList); - return new ImmutablePair(false, true); + return new ImmutablePair<>(false, true); } } } @@ -1893,7 +1822,7 @@ private ImmutablePair validateInputFormatAndSchemaEvolution(Ma setValidateInputFormatAndSchemaEvolutionExplain( mapWork, inputFileFormatClassNameSet, vectorPartitionDescMap, enabledConditionsMetSet, enabledConditionsNotMetList); - return new ImmutablePair(false, true); + return new ImmutablePair<>(false, true); } for (int i = 0; i < nextDataTypeInfoSize; i++) { TypeInfo tableDataTypeInfo = tableDataTypeInfoList.get(i); @@ -1917,7 +1846,7 @@ private ImmutablePair validateInputFormatAndSchemaEvolution(Ma setValidateInputFormatAndSchemaEvolutionExplain( mapWork, inputFileFormatClassNameSet, vectorPartitionDescMap, enabledConditionsMetSet, enabledConditionsNotMetList); - return new ImmutablePair(false, true); + return new ImmutablePair<>(false, true); } } @@ -1936,13 +1865,12 @@ private ImmutablePair validateInputFormatAndSchemaEvolution(Ma // Always set these so EXPLAIN can see. mapWork.setVectorizationInputFileFormatClassNameSet(inputFileFormatClassNameSet); - ArrayList vectorPartitionDescList = new ArrayList(); - vectorPartitionDescList.addAll(vectorPartitionDescMap.keySet()); + List vectorPartitionDescList = new ArrayList<>(vectorPartitionDescMap.keySet()); mapWork.setVectorPartitionDescList(vectorPartitionDescList); mapWork.setVectorizationEnabledConditionsMet(enabledConditionsMetSet); mapWork.setVectorizationEnabledConditionsNotMet(enabledConditionsNotMetList); - return new ImmutablePair(true, false); + return new ImmutablePair<>(true, false); } private void validateAndVectorizeMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTaskColumnInfo, @@ -1985,8 +1913,8 @@ private void validateAndVectorizeMapWork(MapWork mapWork, VectorTaskColumnInfo v * Take what all input formats support and eliminate any of them not enabled by * the Hive variable. */ - List supportRemovedReasons = new ArrayList(); - Set supportSet = new TreeSet(); + List supportRemovedReasons = new ArrayList<>(); + Set supportSet = new TreeSet<>(); if (vectorTaskColumnInfo.inputFormatSupportSet != null) { supportSet.addAll(vectorTaskColumnInfo.inputFormatSupportSet); } @@ -1994,13 +1922,12 @@ private void validateAndVectorizeMapWork(MapWork mapWork, VectorTaskColumnInfo v supportSet.retainAll(vectorizedInputFormatSupportEnabledSet); if (!supportSet.equals(vectorTaskColumnInfo.inputFormatSupportSet)) { - Set removedSet = new TreeSet(); - removedSet.addAll(vectorizedInputFormatSupportEnabledSet); + Set removedSet = new TreeSet<>(vectorizedInputFormatSupportEnabledSet); removedSet.removeAll(supportSet); String removeString = - removedSet.toString() + " is disabled because it is not in " + + removedSet + " is disabled because it is not in " + HiveConf.ConfVars.HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED.varname + - " " + vectorizedInputFormatSupportEnabledSet.toString(); + " " + vectorizedInputFormatSupportEnabledSet; supportRemovedReasons.add(removeString); } @@ -2010,13 +1937,12 @@ private void validateAndVectorizeMapWork(MapWork mapWork, VectorTaskColumnInfo v vectorTaskColumnInfo.setSupportRemovedReasons(supportRemovedReasons); final boolean isSupportDecimal64 = supportSet.contains(Support.DECIMAL_64); - List dataTypePhysicalVariations = new ArrayList(); + List dataTypePhysicalVariations = new ArrayList<>(); for (int i = 0; i < dataColumnCount; i++) { DataTypePhysicalVariation dataTypePhysicalVariation = DataTypePhysicalVariation.NONE; if (isSupportDecimal64) { TypeInfo typeInfo = vectorTaskColumnInfo.allTypeInfos.get(i); - if (typeInfo instanceof DecimalTypeInfo) { - DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo; + if (typeInfo instanceof DecimalTypeInfo decimalTypeInfo) { if (HiveDecimalWritable.isPrecisionDecimal64(decimalTypeInfo.precision())) { dataTypePhysicalVariation = DataTypePhysicalVariation.DECIMAL_64; } @@ -2032,11 +1958,11 @@ private void validateAndVectorizeMapWork(MapWork mapWork, VectorTaskColumnInfo v // Set global member indicating which virtual columns are possible to be used by // the Map vertex. - availableVectorizedVirtualColumnSet = new HashSet(); + availableVectorizedVirtualColumnSet = new HashSet<>(); availableVectorizedVirtualColumnSet.addAll(vectorTaskColumnInfo.availableVirtualColumnList); // And, use set to remember which virtual columns were actually referenced. - neededVirtualColumnSet = new HashSet(); + neededVirtualColumnSet = new HashSet<>(); mapWork.setVectorizationEnabled(true); LOG.info("Vectorization is enabled for input format(s) " + mapWork.getVectorizationInputFileFormatClassNameSet().toString()); @@ -2058,14 +1984,14 @@ private void validateAndVectorizeMapWork(MapWork mapWork, VectorTaskColumnInfo v } private boolean validateAndVectorizeMapOperators(MapWork mapWork, TableScanOperator tableScanOperator, - boolean isTez, VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { + boolean isTez, VectorTaskColumnInfo vectorTaskColumnInfo) { LOG.info("Validating and vectorizing MapWork... (vectorizedVertexNum " + vectorizedVertexNum + ")"); // Set "global" member indicating where to store "not vectorized" information if necessary. currentBaseWork = mapWork; - if (!validateTableScanOperator(tableScanOperator, mapWork)) { + if (!validateTableScanOperator(tableScanOperator)) { // The "not vectorized" information has been stored in the MapWork vertex. return false; @@ -2083,12 +2009,6 @@ private boolean validateAndVectorizeMapOperators(MapWork mapWork, TableScanOpera } setNodeIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e)); return false; - } catch (ClassCastException e) { - if (!isTestVectorizerSuppressFatalExceptions) { - throw e; - } - setNodeIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e)); - return false; } catch (RuntimeException e) { if (!isTestVectorizerSuppressFatalExceptions) { throw e; @@ -2098,13 +2018,13 @@ private boolean validateAndVectorizeMapOperators(MapWork mapWork, TableScanOpera } vectorTaskColumnInfo.setNeededVirtualColumnList( - new ArrayList(neededVirtualColumnSet)); + new ArrayList<>(neededVirtualColumnSet)); /* * The scratch column information was collected by the task VectorizationContext. Go get it. */ VectorizationContext vContext = - ((VectorizationContextRegion) tableScanOperator).getOutputVectorizationContext(); + tableScanOperator.getOutputVectorizationContext(); vectorTaskColumnInfo.setScratchTypeNameArray( vContext.getScratchColumnTypeNames()); @@ -2119,17 +2039,13 @@ private void validateAndVectorizeMapOperators(TableScanOperator tableScanOperato throws VectorizerCannotVectorizeException { Operator dummyVectorOperator = - validateAndVectorizeOperatorTree(tableScanOperator, false, isTez, vectorTaskColumnInfo); + validateAndVectorizeOperatorTree(tableScanOperator, isTez, vectorTaskColumnInfo); // Fixup parent and child relations. List> vectorChildren = dummyVectorOperator.getChildOperators(); tableScanOperator.setChildOperators(vectorChildren); - final int vectorChildCount = vectorChildren.size(); - for (int i = 0; i < vectorChildCount; i++) { - - Operator vectorChild = vectorChildren.get(i); - + for (Operator vectorChild : vectorChildren) { // Replace any occurrence of dummyVectorOperator with our TableScanOperator. List> vectorChildParents = vectorChild.getParentOperators(); final int vectorChildParentCount = vectorChildParents.size(); @@ -2160,7 +2076,7 @@ private void vectorizeTableScanOperatorInPlace(TableScanOperator tableScanOperat tableScanDesc.setVectorDesc(vectorTableScanDesc); VectorizationContext vContext = - ((VectorizationContextRegion) tableScanOperator).getOutputVectorizationContext(); + tableScanOperator.getOutputVectorizationContext(); List projectedColumns = vContext.getProjectedColumns(); vectorTableScanDesc.setProjectedColumns( ArrayUtils.toPrimitive(projectedColumns.toArray(new Integer[0]))); @@ -2185,7 +2101,7 @@ private void vectorizeTableScanOperatorInPlace(TableScanOperator tableScanOperat tableScanOperator.getConf().setVectorized(true); List> children = tableScanOperator.getChildOperators(); - while (children.size() > 0) { + while (!children.isEmpty()) { children = dosetVectorDesc(children); } } @@ -2193,8 +2109,7 @@ private void vectorizeTableScanOperatorInPlace(TableScanOperator tableScanOperat private List> dosetVectorDesc( List> children) { - List> newChildren = - new ArrayList>(); + List> newChildren = new ArrayList<>(); for (Operator child : children) { @@ -2248,13 +2163,10 @@ private void validateAndVectorizeReduceWork(ReduceWork reduceWork, vectorTaskColumnInfo.transferToBaseWork(reduceWork); reduceWork.setVectorMode(true); - - return; } private boolean validateAndVectorizeReduceOperators(ReduceWork reduceWork, - VectorTaskColumnInfo vectorTaskColumnInfo) - throws SemanticException { + VectorTaskColumnInfo vectorTaskColumnInfo) { LOG.info("Validating and vectorizing ReduceWork... (vectorizedVertexNum " + vectorizedVertexNum + ")"); @@ -2273,12 +2185,6 @@ private boolean validateAndVectorizeReduceOperators(ReduceWork reduceWork, } setNodeIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e)); return false; - } catch (ClassCastException e) { - if (!isTestVectorizerSuppressFatalExceptions) { - throw e; - } - setNodeIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e)); - return false; } catch (RuntimeException e) { if (!isTestVectorizerSuppressFatalExceptions) { throw e; @@ -2313,15 +2219,14 @@ private Operator validateAndVectorizeReduceOperators( dummyOperator.getChildOperators().add(reducerOperator); Operator dummyVectorOperator = - validateAndVectorizeOperatorTree(dummyOperator, true, true, vectorTaskColumnInfo); + validateAndVectorizeOperatorTree(dummyOperator, true, vectorTaskColumnInfo); Operator newVectorReducer = - dummyVectorOperator.getChildOperators().get(0); + dummyVectorOperator.getChildOperators().getFirst(); - List> children = - new ArrayList>(); + List> children = new ArrayList<>(); children.add(newVectorReducer); - while (children.size() > 0) { + while (!children.isEmpty()) { children = dosetVectorDesc(children); } @@ -2331,9 +2236,9 @@ private Operator validateAndVectorizeReduceOperators( private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { - ArrayList reduceColumnNames = new ArrayList(); - ArrayList reduceTypeInfos = new ArrayList(); - ArrayList reduceDataTypePhysicalVariations = new ArrayList(); + List reduceColumnNames = new ArrayList<>(); + List reduceTypeInfos = new ArrayList<>(); + List reduceDataTypePhysicalVariations = new ArrayList<>(); if (reduceWork.getNeedsTagging()) { setNodeIssue("Tagging not supported"); @@ -2359,15 +2264,14 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, setNodeIssue("Key object inspector null"); return false; } - if (!(keyObjectInspector instanceof StructObjectInspector)) { + if (!(keyObjectInspector instanceof StructObjectInspector keyStructObjectInspector)) { setNodeIssue("Key object inspector not StructObjectInspector"); return false; } - StructObjectInspector keyStructObjectInspector = (StructObjectInspector) keyObjectInspector; List keyFields = keyStructObjectInspector.getAllStructFieldRefs(); for (StructField field: keyFields) { - reduceColumnNames.add(Utilities.ReduceField.KEY.toString() + "." + field.getFieldName()); + reduceColumnNames.add(Utilities.ReduceField.KEY + "." + field.getFieldName()); reduceTypeInfos.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName())); reduceDataTypePhysicalVariations.add(DataTypePhysicalVariation.NONE); } @@ -2381,15 +2285,14 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, valueDeserializer.initialize(null, valueTableDesc.getProperties(), null); ObjectInspector valueObjectInspector = valueDeserializer.getObjectInspector(); if (valueObjectInspector != null) { - if (!(valueObjectInspector instanceof StructObjectInspector)) { + if (!(valueObjectInspector instanceof StructObjectInspector valueStructObjectInspector)) { setNodeIssue("Value object inspector not StructObjectInspector"); return false; } - StructObjectInspector valueStructObjectInspector = (StructObjectInspector) valueObjectInspector; List valueFields = valueStructObjectInspector.getAllStructFieldRefs(); for (StructField field: valueFields) { - reduceColumnNames.add(Utilities.ReduceField.VALUE.toString() + "." + field.getFieldName()); + reduceColumnNames.add(Utilities.ReduceField.VALUE + "." + field.getFieldName()); TypeInfo reduceTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString( field.getFieldObjectInspector().getTypeName()); reduceTypeInfos.add(reduceTypeInfo); @@ -2546,7 +2449,7 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED); String[] supportEnabledStrings = vectorizedInputFormatSupportEnabled.toLowerCase().split(","); - vectorizedInputFormatSupportEnabledSet = new TreeSet(); + vectorizedInputFormatSupportEnabledSet = new TreeSet<>(); for (String supportEnabledString : supportEnabledStrings) { Support support = Support.nameToSupportMap.get(supportEnabledString); @@ -2580,8 +2483,7 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE TaskGraphWalker ogw = new TaskGraphWalker(disp); // get all the tasks nodes from root task - ArrayList topNodes = new ArrayList(); - topNodes.addAll(physicalContext.getRootTasks()); + List topNodes = new ArrayList<>(physicalContext.getRootTasks()); // begin to walk through the task tree. ogw.startWalking(topNodes, null); @@ -2600,10 +2502,9 @@ private void initRowDeserializeExcludeClasses() { private void setOperatorNotSupported(Operator op) { OperatorDesc desc = op.getConf(); - Annotation note = AnnotationUtils.getAnnotation(desc.getClass(), Explain.class); + Explain note = AnnotationUtils.getAnnotation(desc.getClass(), Explain.class); if (note != null) { - Explain explainNote = (Explain) note; - setNodeIssue(explainNote.displayName() + " (" + op.getType() + ") not supported"); + setNodeIssue(note.displayName() + " (" + op.getType() + ") not supported"); } else { setNodeIssue("Operator " + op.getType() + " not supported"); } @@ -2616,7 +2517,7 @@ private boolean validateSMBMapJoinOperator(SMBMapJoinOperator op) { return validateMapJoinDesc(desc); } - private boolean validateTableScanOperator(TableScanOperator op, MapWork mWork) { + private boolean validateTableScanOperator(TableScanOperator op) { TableScanDesc desc = op.getConf(); if (desc.isGatherStats()) { setOperatorIssue("gather stats not supported"); @@ -2695,8 +2596,7 @@ private boolean validateTopNKeyOperator(TopNKeyOperator op) { return validateExprNodeDesc(keyColumns, "Key columns"); } - private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, - boolean isTez, VectorGroupByDesc vectorGroupByDesc) { + private boolean validateGroupByOperator(GroupByOperator op, VectorGroupByDesc vectorGroupByDesc) { GroupByDesc desc = op.getConf(); @@ -2805,7 +2705,7 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, * Mode.FINAL --> ProcessingMode.STREAMING * */ - boolean hasKeys = (desc.getKeys().size() > 0); + boolean hasKeys = !desc.getKeys().isEmpty(); ProcessingMode processingMode = VectorGroupByDesc.groupByDescModeToVectorProcessingMode(desc.getMode(), hasKeys); @@ -2818,8 +2718,7 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, //TODO: isGroupingSetsPresent() is returning false, even though // ListGroupingSets is present. Need to check if there is hidden bug. boolean isGroupingSetsPresent = (desc.getListGroupingSets() != null && !desc.getListGroupingSets().isEmpty()); - if (!validateAggregationDescs(desc.getAggregators(), desc.getMode(), - isGroupingSetsPresent, hasKeys)) { + if (!validateAggregationDescs(desc.getAggregators(), desc.getMode(), isGroupingSetsPresent)) { return false; } @@ -2833,16 +2732,11 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, return true; } - private boolean validateFileSinkOperator(FileSinkOperator op) { - return true; - } - /* * Determine recursively if the PTF LEAD or LAG function is being used in an expression. */ private boolean containsLeadLag(ExprNodeDesc exprNodeDesc) { - if (exprNodeDesc instanceof ExprNodeGenericFuncDesc) { - ExprNodeGenericFuncDesc genericFuncDesc = (ExprNodeGenericFuncDesc) exprNodeDesc; + if (exprNodeDesc instanceof ExprNodeGenericFuncDesc genericFuncDesc) { GenericUDF genFuncClass = genericFuncDesc.getGenericUDF(); if (genFuncClass instanceof GenericUDFLag || genFuncClass instanceof GenericUDFLead) { @@ -2881,16 +2775,16 @@ private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContex return false; } List> ptfParents = op.getParentOperators(); - if (ptfParents != null && ptfParents.size() > 0) { - Operator ptfParent = op.getParentOperators().get(0); + if (ptfParents != null && !ptfParents.isEmpty()) { + Operator ptfParent = op.getParentOperators().getFirst(); if (!(ptfParent instanceof ReduceSinkOperator)) { boolean isReduceShufflePtf = false; if (ptfParent instanceof SelectOperator) { ptfParents = ptfParent.getParentOperators(); - if (ptfParents == null || ptfParents.size() == 0) { + if (ptfParents == null || ptfParents.isEmpty()) { isReduceShufflePtf = true; } else { - ptfParent = ptfParent.getParentOperators().get(0); + ptfParent = ptfParent.getParentOperators().getFirst(); isReduceShufflePtf = (ptfParent instanceof ReduceSinkOperator); } } @@ -2968,7 +2862,7 @@ private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContex (exprNodeDescList != null && exprNodeDescList.size() == 1); final ExprNodeDesc singleExprNodeDesc = - (isSingleParameter ? exprNodeDescList.get(0) : null); + (isSingleParameter ? exprNodeDescList.getFirst() : null); final TypeInfo singleTypeInfo = (isSingleParameter ? singleExprNodeDesc.getTypeInfo() : null); final PrimitiveCategory singlePrimitiveCategory = @@ -3016,7 +2910,7 @@ private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContex return false; } - ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0); + ExprNodeDesc exprNodeDesc = exprNodeDescList.getFirst(); if (containsLeadLag(exprNodeDesc)) { setOperatorIssue("lead and lag function not supported in argument expression of aggregation function " + functionName); @@ -3034,16 +2928,10 @@ private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContex } else { ColumnVector.Type colVecType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); - switch (colVecType) { - case LONG: - case DOUBLE: - case DECIMAL: - isSupportedType = true; - break; - default: - isSupportedType = false; - break; - } + isSupportedType = switch (colVecType) { + case LONG, DOUBLE, DECIMAL -> true; + default -> false; + }; } if (!isSupportedType) { setOperatorIssue(typeInfo.getTypeName() + " data type not supported in argument expression of aggregation function " + functionName); @@ -3094,12 +2982,10 @@ private boolean validateExprNodeDesc(List descs, } private boolean validateAggregationDescs(List descs, - GroupByDesc.Mode groupByMode, boolean isGroupingSetsPresent, - boolean hasKeys) { + GroupByDesc.Mode groupByMode, boolean isGroupingSetsPresent) { for (AggregationDesc d : descs) { - if (!validateAggregationDesc(d, groupByMode, isGroupingSetsPresent, - hasKeys)) { + if (!validateAggregationDesc(d, groupByMode, isGroupingSetsPresent)) { return false; } } @@ -3114,8 +3000,7 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressi private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressionTitle, VectorExpressionDescriptor.Mode mode, boolean allowComplex, boolean allowVoidProjection) { - if (desc instanceof ExprNodeColumnDesc) { - ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc; + if (desc instanceof ExprNodeColumnDesc c) { String columnName = c.getColumn(); if (availableVectorizedVirtualColumnSet != null) { @@ -3146,8 +3031,7 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressi return false; } boolean isInExpression = false; - if (desc instanceof ExprNodeGenericFuncDesc) { - ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc; + if (desc instanceof ExprNodeGenericFuncDesc d) { boolean r = validateGenericUdf(d); if (!r) { setExpressionIssue(expressionTitle, "UDF " + d + " not supported"); @@ -3158,11 +3042,10 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressi } if (desc.getChildren() != null) { if (isInExpression - && desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) { + && desc.getChildren().getFirst().getTypeInfo().getCategory() == Category.STRUCT) { // Don't restrict child expressions for projection. // Always use loose FILTER mode. - if (!validateStructInExpression( - desc, expressionTitle, VectorExpressionDescriptor.Mode.FILTER)) { + if (!validateStructInExpression(desc, expressionTitle)) { return false; } } else { @@ -3180,8 +3063,7 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressi return true; } - private boolean validateStructInExpression(ExprNodeDesc desc, - String expressionTitle, VectorExpressionDescriptor.Mode mode) { + private boolean validateStructInExpression(ExprNodeDesc desc, String expressionTitle) { for (ExprNodeDesc d : desc.getChildren()) { TypeInfo typeInfo = d.getTypeInfo(); if (typeInfo.getCategory() != Category.STRUCT) { @@ -3255,7 +3137,7 @@ private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) { } private boolean validateAggregationDesc(AggregationDesc aggDesc, GroupByDesc.Mode groupByMode, - boolean isGroupingSetsPresent, boolean hasKeys) { + boolean isGroupingSetsPresent) { String udfName = aggDesc.getGenericUDAFName().toLowerCase(); if (!supportedAggregationUdfs.contains(udfName)) { @@ -3274,14 +3156,10 @@ private boolean validateAggregationDesc(AggregationDesc aggDesc, GroupByDesc.Mod return false; } - List parameters = aggDesc.getParameters(); - if (parameters != null && !validateExprNodeDesc(parameters, "Aggregation Function UDF " + udfName + " parameter")) { - return false; - } - - return true; + return parameters == null || + validateExprNodeDesc(parameters, "Aggregation Function UDF " + udfName + " parameter"); } public static boolean validateDataType(String type, VectorExpressionDescriptor.Mode mode, @@ -3345,22 +3223,6 @@ public static String getValidateDataTypeErrorMsg(String type, VectorExpressionDe return (result ? null : "Vectorizing data type " + type + " not supported"); } - private void fixupParentChildOperators(Operator op, - Operator vectorOp) { - if (op.getParentOperators() != null) { - vectorOp.setParentOperators(op.getParentOperators()); - for (Operator p : op.getParentOperators()) { - p.replaceChild(op, vectorOp); - } - } - if (op.getChildOperators() != null) { - vectorOp.setChildOperators(op.getChildOperators()); - for (Operator c : op.getChildOperators()) { - c.replaceParent(op, vectorOp); - } - } - } - private boolean isBigTableOnlyResults(MapJoinDesc desc) { Byte[] order = desc.getTagOrder(); byte posBigTable = (byte) desc.getPosBigTable(); @@ -3422,10 +3284,7 @@ Operator specializeMapJoinOperator(Operator specializeMapJoinOperator(Operator> keyExprs = desc.getKeys(); List bigTableKeyExprs = keyExprs.get(posBigTable); if (bigTableKeyExprs.size() == 1) { - TypeInfo typeInfo = bigTableKeyExprs.get(0).getTypeInfo(); + TypeInfo typeInfo = bigTableKeyExprs.getFirst().getTypeInfo(); LOG.info("Vectorizer vectorizeOperator map join typeName " + typeInfo.getTypeName()); switch (((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) { case BOOLEAN: @@ -3503,87 +3362,36 @@ Operator specializeMapJoinOperator(Operator switch (vectorMapJoinVariation) { + case INNER -> VectorMapJoinInnerLongOperator.class; + case INNER_BIG_ONLY -> VectorMapJoinInnerBigOnlyLongOperator.class; + case LEFT_SEMI -> VectorMapJoinLeftSemiLongOperator.class; + case LEFT_ANTI -> VectorMapJoinAntiJoinLongOperator.class; + case OUTER -> VectorMapJoinOuterLongOperator.class; + case FULL_OUTER -> VectorMapJoinFullOuterLongOperator.class; + default -> throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); + }; + case STRING -> switch (vectorMapJoinVariation) { + case INNER -> VectorMapJoinInnerStringOperator.class; + case INNER_BIG_ONLY -> VectorMapJoinInnerBigOnlyStringOperator.class; + case LEFT_SEMI -> VectorMapJoinLeftSemiStringOperator.class; + case LEFT_ANTI -> VectorMapJoinAntiJoinStringOperator.class; + case OUTER -> VectorMapJoinOuterStringOperator.class; + case FULL_OUTER -> VectorMapJoinFullOuterStringOperator.class; + default -> throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); + }; + case MULTI_KEY -> switch (vectorMapJoinVariation) { + case INNER -> VectorMapJoinInnerMultiKeyOperator.class; + case INNER_BIG_ONLY -> VectorMapJoinInnerBigOnlyMultiKeyOperator.class; + case LEFT_SEMI -> VectorMapJoinLeftSemiMultiKeyOperator.class; + case LEFT_ANTI -> VectorMapJoinAntiJoinMultiKeyOperator.class; + case OUTER -> VectorMapJoinOuterMultiKeyOperator.class; + case FULL_OUTER -> VectorMapJoinFullOuterMultiKeyOperator.class; + default -> throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); + }; + default -> throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name()); + }; boolean minMaxEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED); @@ -3643,21 +3451,21 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi List keyDesc = desc.getKeys().get(posBigTable); - boolean outerJoinHasNoKeys = (!desc.isNoOuterJoin() && keyDesc.size() == 0); + boolean outerJoinHasNoKeys = (!desc.isNoOuterJoin() && keyDesc.isEmpty()); // For now, we don't support joins on or using DECIMAL_64. VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressionsUpConvertDecimal64(keyDesc); final int allBigTableKeyExpressionsLength = allBigTableKeyExpressions.length; boolean supportsKeyTypes = true; // Assume. - HashSet notSupportedKeyTypes = new HashSet(); + Set notSupportedKeyTypes = new HashSet<>(); // Since a key expression can be a calculation and the key will go into a scratch column, // we need the mapping and type information. int[] bigTableKeyColumnMap = new int[allBigTableKeyExpressionsLength]; String[] bigTableKeyColumnNames = new String[allBigTableKeyExpressionsLength]; TypeInfo[] bigTableKeyTypeInfos = new TypeInfo[allBigTableKeyExpressionsLength]; - ArrayList bigTableKeyExpressionsList = new ArrayList(); + List bigTableKeyExpressionsList = new ArrayList<>(); VectorExpression[] slimmedBigTableKeyExpressions; for (int i = 0; i < allBigTableKeyExpressionsLength; i++) { VectorExpression ve = allBigTableKeyExpressions[i]; @@ -3681,7 +3489,7 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi } bigTableKeyTypeInfos[i] = typeInfo; } - if (bigTableKeyExpressionsList.size() == 0) { + if (bigTableKeyExpressionsList.isEmpty()) { slimmedBigTableKeyExpressions = null; } else { slimmedBigTableKeyExpressions = bigTableKeyExpressionsList.toArray(new VectorExpression[0]); @@ -3712,12 +3520,12 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi * Value expressions include keys? YES. */ boolean supportsValueTypes = true; // Assume. - HashSet notSupportedValueTypes = new HashSet(); + Set notSupportedValueTypes = new HashSet<>(); int[] bigTableValueColumnMap = new int[allBigTableValueExpressions.length]; String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length]; TypeInfo[] bigTableValueTypeInfos = new TypeInfo[allBigTableValueExpressions.length]; - ArrayList bigTableValueExpressionsList = new ArrayList(); + List bigTableValueExpressionsList = new ArrayList<>(); VectorExpression[] slimmedBigTableValueExpressions; for (int i = 0; i < bigTableValueColumnMap.length; i++) { VectorExpression ve = allBigTableValueExpressions[i]; @@ -3736,7 +3544,7 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi } bigTableValueTypeInfos[i] = typeInfo; } - if (bigTableValueExpressionsList.size() == 0) { + if (bigTableValueExpressionsList.isEmpty()) { slimmedBigTableValueExpressions = null; } else { slimmedBigTableValueExpressions = @@ -3978,17 +3786,14 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi } // Check common conditions for both Optimized and Fast Hash Tables. - boolean result = true; // Assume. - if (!useOptimizedTable || - !isVectorizationMapJoinNativeEnabled || - !isTez || - !oneMapJoinCondition || - hasNullSafes || - !smallTableExprVectorizes || - outerJoinHasNoKeys || - !supportsValueTypes) { - result = false; - } + boolean result = useOptimizedTable && + isVectorizationMapJoinNativeEnabled && + isTez && + oneMapJoinCondition && + !hasNullSafes && + smallTableExprVectorizes && + !outerJoinHasNoKeys && + supportsValueTypes; // supportsKeyTypes @@ -4040,8 +3845,8 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi } private Operator specializeReduceSinkOperator( - Operator op, VectorizationContext vContext, ReduceSinkDesc desc, - VectorReduceSinkDesc vectorDesc) throws HiveException { + Operator op, VectorizationContext vContext, VectorReduceSinkDesc vectorDesc) + throws HiveException { VectorReduceSinkInfo vectorReduceSinkInfo = vectorDesc.getVectorReduceSinkInfo(); @@ -4088,19 +3893,12 @@ private Operator specializeReduceSinkOperator( if (vectorDesc.getIsEmptyKey()) { opClass = VectorReduceSinkEmptyKeyOperator.class; } else { - switch (reduceSinkKeyType) { - case LONG: - opClass = VectorReduceSinkLongOperator.class; - break; - case STRING: - opClass = VectorReduceSinkStringOperator.class; - break; - case MULTI_KEY: - opClass = VectorReduceSinkMultiKeyOperator.class; - break; - default: - throw new HiveException("Unknown reduce sink key type " + reduceSinkKeyType); - } + opClass = switch (reduceSinkKeyType) { + case LONG -> VectorReduceSinkLongOperator.class; + case STRING -> VectorReduceSinkStringOperator.class; + case MULTI_KEY -> VectorReduceSinkMultiKeyOperator.class; + default -> throw new HiveException("Unknown reduce sink key type " + reduceSinkKeyType); + }; } } else { if (vectorDesc.getIsEmptyKey() && vectorDesc.getIsEmptyBuckets() && vectorDesc.getIsEmptyPartitions()) { @@ -4153,7 +3951,7 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, boolean hasPTFTopN = (limit >= 0 && memUsage > 0 && desc.isPTFReduceSink()); - boolean hasDistinctColumns = (desc.getDistinctColumnIndices().size() > 0); + boolean hasDistinctColumns = !desc.getDistinctColumnIndices().isEmpty(); TableDesc keyTableDesc = desc.getKeySerializeInfo(); Class keySerializerClass = keyTableDesc.getSerDeClass(); @@ -4169,7 +3967,7 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, // So if we later decide not to specialize, we'll just waste any scratch columns allocated... List keysDescs = desc.getKeyCols(); - final boolean isEmptyKey = (keysDescs.size() == 0); + final boolean isEmptyKey = keysDescs.isEmpty(); if (!isEmptyKey) { VectorExpression[] allKeyExpressions = vContext.getVectorExpressions(keysDescs); @@ -4181,7 +3979,7 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, // Since a key expression can be a calculation and the key will go into a scratch column, // we need the mapping and type information. - ArrayList groupByKeyExpressionsList = new ArrayList(); + List groupByKeyExpressionsList = new ArrayList<>(); for (int i = 0; i < reduceSinkKeyColumnMap.length; i++) { VectorExpression ve = allKeyExpressions[i]; reduceSinkKeyColumnMap[i] = ve.getOutputColumnNum(); @@ -4192,7 +3990,7 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, groupByKeyExpressionsList.add(ve); } } - if (groupByKeyExpressionsList.size() == 0) { + if (groupByKeyExpressionsList.isEmpty()) { reduceSinkKeyExpressions = null; } else { reduceSinkKeyExpressions = groupByKeyExpressionsList.toArray(new VectorExpression[0]); @@ -4205,7 +4003,7 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, } List valueDescs = desc.getValueCols(); - final boolean isEmptyValue = (valueDescs.size() == 0); + final boolean isEmptyValue = valueDescs.isEmpty(); if (!isEmptyValue) { VectorExpression[] allValueExpressions = vContext.getVectorExpressions(valueDescs); @@ -4214,7 +4012,7 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, final Type[] reduceSinkValueColumnVectorTypes = new Type[allValueExpressions.length]; VectorExpression[] reduceSinkValueExpressions; - ArrayList reduceSinkValueExpressionsList = new ArrayList(); + List reduceSinkValueExpressionsList = new ArrayList<>(); for (int i = 0; i < valueDescs.size(); ++i) { VectorExpression ve = allValueExpressions[i]; reduceSinkValueColumnMap[i] = ve.getOutputColumnNum(); @@ -4225,7 +4023,7 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, reduceSinkValueExpressionsList.add(ve); } } - if (reduceSinkValueExpressionsList.size() == 0) { + if (reduceSinkValueExpressionsList.isEmpty()) { reduceSinkValueExpressions = null; } else { reduceSinkValueExpressions = reduceSinkValueExpressionsList.toArray(new VectorExpression[0]); @@ -4242,9 +4040,9 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, vectorReduceSinkInfo.setUseUniformHash(useUniformHash); List bucketDescs = desc.getBucketCols(); - final boolean isEmptyBuckets = (bucketDescs == null || bucketDescs.size() == 0); + final boolean isEmptyBuckets = (bucketDescs == null || bucketDescs.isEmpty()); List partitionDescs = desc.getPartitionCols(); - final boolean isEmptyPartitions = (partitionDescs == null || partitionDescs.size() == 0); + final boolean isEmptyPartitions = (partitionDescs == null || partitionDescs.isEmpty()); if (useUniformHash || (isEmptyKey && isEmptyBuckets && isEmptyPartitions)) { @@ -4265,7 +4063,7 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, reduceSinkBucketColumnMap = new int[bucketDescs.size()]; reduceSinkBucketTypeInfos = new TypeInfo[bucketDescs.size()]; reduceSinkBucketColumnVectorTypes = new Type[bucketDescs.size()]; - ArrayList reduceSinkBucketExpressionsList = new ArrayList(); + List reduceSinkBucketExpressionsList = new ArrayList<>(); for (int i = 0; i < bucketDescs.size(); ++i) { VectorExpression ve = allBucketExpressions[i]; reduceSinkBucketColumnMap[i] = ve.getOutputColumnNum(); @@ -4276,9 +4074,7 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, reduceSinkBucketExpressionsList.add(ve); } } - if (reduceSinkBucketExpressionsList.size() == 0) { - reduceSinkBucketExpressions = null; - } else { + if (!reduceSinkBucketExpressionsList.isEmpty()) { reduceSinkBucketExpressions = reduceSinkBucketExpressionsList.toArray(new VectorExpression[0]); } } @@ -4294,7 +4090,7 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, reduceSinkPartitionColumnMap = new int[partitionDescs.size()]; reduceSinkPartitionTypeInfos = new TypeInfo[partitionDescs.size()]; reduceSinkPartitionColumnVectorTypes = new Type[partitionDescs.size()]; - ArrayList reduceSinkPartitionExpressionsList = new ArrayList(); + List reduceSinkPartitionExpressionsList = new ArrayList<>(); for (int i = 0; i < partitionDescs.size(); ++i) { VectorExpression ve = allPartitionExpressions[i]; reduceSinkPartitionColumnMap[i] = ve.getOutputColumnNum(); @@ -4305,9 +4101,7 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, reduceSinkPartitionExpressionsList.add(ve); } } - if (reduceSinkPartitionExpressionsList.size() == 0) { - reduceSinkPartitionExpressions = null; - } else { + if (!reduceSinkPartitionExpressionsList.isEmpty()) { reduceSinkPartitionExpressions = reduceSinkPartitionExpressionsList.toArray(new VectorExpression[0]); } } @@ -4345,17 +4139,13 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, vectorDesc.setIsUnexpectedCondition(isUnexpectedCondition); // Many restrictions. - if (!isVectorizationReduceSinkNativeEnabled || - !isTez || - hasPTFTopN || - hasDistinctColumns || - !isKeyBinarySortable || - !isValueLazyBinary || - isUnexpectedCondition) { - return false; - } - - return true; + return isVectorizationReduceSinkNativeEnabled && + isTez && + !hasPTFTopN && + !hasDistinctColumns && + isKeyBinarySortable && + isValueLazyBinary && + !isUnexpectedCondition; } private boolean usesVectorUDFAdaptor(VectorExpression vecExpr) { @@ -4536,7 +4326,7 @@ public static ImmutablePair getVectorAggregationDe inputColVectorType = null; inputExpression = null; } else { - ExprNodeDesc exprNodeDesc = parameterList.get(0); + ExprNodeDesc exprNodeDesc = parameterList.getFirst(); inputTypeInfo = exprNodeDesc.getTypeInfo(); if (inputTypeInfo == null) { String issue ="Aggregations with null parameter type not supported " + @@ -4763,7 +4553,7 @@ private static ImmutablePair,String> doVectoriz ImmutablePair pair = getVectorAggregationDesc(aggDesc, vContext); if (pair.left == null) { - return new ImmutablePair, String>(null, pair.right); + return new ImmutablePair<>(null, pair.right); } vecAggrDescs[i] = pair.left; @@ -4778,7 +4568,7 @@ private static ImmutablePair,String> doVectoriz OperatorFactory.getVectorOperator( groupByOp.getCompilationOpContext(), groupByDesc, vContext, vectorGroupByDesc); - return new ImmutablePair, String>(vectorOp, null); + return new ImmutablePair<>(vectorOp, null); } public static Operator vectorizeSelectOperator( @@ -4868,8 +4658,7 @@ private static VectorExpression fixDecimalDataTypePhysicalVariations(final Vecto } // fix up the input column numbers and output column numbers if (inputArgsChanged) { - if (parent instanceof VectorUDFAdaptor) { - VectorUDFAdaptor parentAdaptor = (VectorUDFAdaptor) parent; + if (parent instanceof VectorUDFAdaptor parentAdaptor) { VectorUDFArgDesc[] argDescs = parentAdaptor.getArgDescs(); for (int i = 0; i < argDescs.length; ++i) { if (argDescs[i].getColumnNum() != children[i].getOutputColumnNum()) { @@ -4939,7 +4728,7 @@ private static void fillInPTFEvaluators( List args = winFunc.getArgs(); if (args != null) { - List exprNodeDescList = new ArrayList(); + List exprNodeDescList = new ArrayList<>(); for (PTFExpressionDef arg : args) { exprNodeDescList.add(arg.getExprNode()); } @@ -5078,8 +4867,8 @@ private static void createVectorPTFDesc(Operator ptfOp, private static void determineKeyAndNonKeyInputColumnMap(int[] outputColumnProjectionMap, boolean isPartitionOrderBy, int[] orderColumnMap, int[] partitionColumnMap, - int evaluatorCount, ArrayList keyInputColumns, - ArrayList nonKeyInputColumns) { + int evaluatorCount, List keyInputColumns, + List nonKeyInputColumns) { final int outputSize = outputColumnProjectionMap.length; final int orderKeyCount = orderColumnMap.length; @@ -5114,8 +4903,8 @@ private static void determineKeyAndNonKeyInputColumnMap(int[] outputColumnProjec * execution. */ private static VectorPTFInfo createVectorPTFInfo(Operator ptfOp, - PTFDesc ptfDesc, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) - throws HiveException { + VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) + throws HiveException { List outputSignature = ptfOp.getSchema().getSignature(); final int outputSize = outputSignature.size(); @@ -5183,8 +4972,8 @@ private static VectorPTFInfo createVectorPTFInfo(Operator keyInputColumns = new ArrayList(); - ArrayList nonKeyInputColumns = new ArrayList(); + List keyInputColumns = new ArrayList<>(); + List nonKeyInputColumns = new ArrayList<>(); determineKeyAndNonKeyInputColumnMap(outputColumnProjectionMap, isPartitionOrderBy, orderColumnMap, partitionColumnMap, evaluatorCount, keyInputColumns, nonKeyInputColumns); int[] keyInputColumnMap = ArrayUtils.toPrimitive(keyInputColumns.toArray(new Integer[0])); @@ -5249,9 +5038,7 @@ public static Operator vectorizePTFOperator( VectorPTFDesc vectorPTFDesc) throws HiveException { - PTFDesc ptfDesc = (PTFDesc) ptfOp.getConf(); - - VectorPTFInfo vectorPTFInfo = createVectorPTFInfo(ptfOp, ptfDesc, vContext, vectorPTFDesc); + VectorPTFInfo vectorPTFInfo = createVectorPTFInfo(ptfOp, vContext, vectorPTFDesc); vectorPTFDesc.setVectorPTFInfo(vectorPTFInfo); @@ -5261,22 +5048,9 @@ public static Operator vectorizePTFOperator( vContext, vectorPTFDesc); } - // UNDONE: Used by tests... - public Operator vectorizeOperator(Operator op, - VectorizationContext vContext, boolean isReduce, boolean isTez, VectorTaskColumnInfo vectorTaskColumnInfo) - throws HiveException, VectorizerCannotVectorizeException { - Operator vectorOp = - validateAndVectorizeOperator(op, vContext, isReduce, isTez, vectorTaskColumnInfo); - if (vectorOp != op) { - fixupParentChildOperators(op, vectorOp); - } - return vectorOp; - } - public Operator validateAndVectorizeOperator(Operator op, - VectorizationContext vContext, boolean isReduce, boolean isTez, - VectorTaskColumnInfo vectorTaskColumnInfo) - throws HiveException, VectorizerCannotVectorizeException { + VectorizationContext vContext, boolean isTez, VectorTaskColumnInfo vectorTaskColumnInfo) + throws HiveException, VectorizerCannotVectorizeException { Operator vectorOp = null; // This "global" allows various validation methods to set the "not vectorized" reason. @@ -5316,7 +5090,7 @@ public Operator validateAndVectorizeOperator(Operator bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable()); - boolean isOuterAndFiltered = (!desc.isNoOuterJoin() && bigTableFilters.size() > 0); + boolean isOuterAndFiltered = (!desc.isNoOuterJoin() && !bigTableFilters.isEmpty()); if (!isOuterAndFiltered) { opClass = VectorMapJoinOperator.class; } else { @@ -5386,7 +5160,7 @@ public Operator validateAndVectorizeOperator(Operator validateAndVectorizeOperator(Operator validateAndVectorizeOperator(Operator partSpec = p.getSpec(); + Map partSpec = p.getSpec(); Properties partProps = p.getSchema(); String[] partKeyTypes; @@ -59,8 +58,8 @@ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws Hiv if (!partSpec.keySet().containsAll(expr.getCols())) { return null; } - partKeyTypes = p.getTable().getStorageHandler().getPartitionKeys(p.getTable()).stream() - .map(FieldSchema::getType).toArray(String[]::new); + partKeyTypes = p.getTable().getEffectivePartCols().stream().map(FieldSchema::getType) + .toArray(String[]::new); } else { String pcolTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); partKeyTypes = pcolTypes.trim().split(":"); @@ -104,7 +103,7 @@ public static Pair prepareExpr( ExprNodeDesc expr, List partColumnNames, List partColumnTypeInfos) throws HiveException { // Create the row object - List partObjectInspectors = new ArrayList(); + List partObjectInspectors = new ArrayList<>(); for (int i = 0; i < partColumnNames.size(); i++) { partObjectInspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( partColumnTypeInfos.get(i))); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java index 1b6f73ea264f..47df10e07412 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java @@ -24,7 +24,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; @@ -211,10 +210,10 @@ private Operator genSelOp(String command, boolean rewritten, Context origCtx) loadFileWork.addAll(sem.getLoadFileWork()); // 4. because there is only one TS for analyze statement, we can get it. - if (sem.topOps.values().size() != 1) { + if (sem.topOps.size() != 1) { throw new SemanticException( "ColumnStatsAutoGatherContext is expecting exactly one TS, but finds " - + sem.topOps.values().size()); + + sem.topOps.size()); } Operator operator = sem.topOps.values().iterator().next(); @@ -245,10 +244,9 @@ private void replaceSelectOperatorProcess(SelectOperator operator, Operator columns = inputRR.getColumnInfos(); - List colList = new ArrayList(); - List columnNames = new ArrayList(); - Map columnExprMap = - new HashMap(); + List colList = new ArrayList<>(); + List columnNames = new ArrayList<>(); + Map columnExprMap = new HashMap<>(); // the column positions in the operator should be like this // <----non-partition columns---->|<--static partition columns-->|<--dynamic partition columns--> // ExprNodeColumnDesc | ExprNodeConstantDesc | ExprNodeColumnDesc diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 023934d9eb24..73eb33eeec34 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -193,8 +193,7 @@ private static CharSequence genPartitionClause(Table tbl, List pa private static String getColTypeOf(Table tbl, String partKey) { - for (FieldSchema fs : tbl.hasNonNativePartitionSupport() ? - tbl.getStorageHandler().getPartitionKeys(tbl) : tbl.getPartitionKeys()) { + for (FieldSchema fs : tbl.getEffectivePartCols()) { if (partKey.equalsIgnoreCase(fs.getName())) { return fs.getType().toLowerCase(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java index 882840ffef5a..b0412dd6a9e0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java @@ -29,7 +29,6 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.rewrite.MergeStatement; import org.apache.hadoop.hive.ql.parse.rewrite.RewriterFactory; -import org.apache.hadoop.hive.ql.plan.HiveOperation; import java.util.ArrayList; import java.util.HashMap; @@ -230,7 +229,7 @@ private MergeStatement.UpdateClause handleUpdate(ASTNode whenMatchedUpdateClause String deleteExtraPredicate) throws SemanticException { assert whenMatchedUpdateClause.getType() == HiveParser.TOK_MATCHED; assert getWhenClauseOperation(whenMatchedUpdateClause).getType() == HiveParser.TOK_UPDATE; - Map newValuesMap = new HashMap<>(targetTable.getCols().size() + targetTable.getPartCols().size()); + Map newValuesMap = new HashMap<>(targetTable.getAllCols().size()); ASTNode setClause = (ASTNode)getWhenClauseOperation(whenMatchedUpdateClause).getChild(0); //columns being updated -> update expressions; "setRCols" (last param) is null because we use actual expressions //before re-parsing, i.e. they are known to SemanticAnalyzer logic @@ -303,7 +302,7 @@ private List findWhenClauses(ASTNode tree, int start) throws SemanticEx "Unexpected node type found: " + whenClause.getType() + addParseInfo(whenClause); whenClauses.add(whenClause); } - if (whenClauses.size() <= 0) { + if (whenClauses.isEmpty()) { //Futureproofing: the parser will actually not allow this throw new SemanticException("Must have at least 1 WHEN clause in MERGE statement"); } @@ -499,11 +498,7 @@ private void handleUnresolvedColumns() { private void addColumn2Table(String tableName, String columnName) { tableName = tableName.toLowerCase(); //normalize name for mapping tableNamesFound.add(tableName); - List cols = table2column.get(tableName); - if (cols == null) { - cols = new ArrayList<>(); - table2column.put(tableName, cols); - } + List cols = table2column.computeIfAbsent(tableName, k -> new ArrayList<>()); //we want to preserve 'columnName' as it was in original input query so that rewrite //looks as much as possible like original query cols.add(columnName); @@ -526,7 +521,7 @@ private String getPredicate() { } StringBuilder sb = new StringBuilder(); for (String col : targetCols) { - if (sb.length() > 0) { + if (!sb.isEmpty()) { sb.append(" AND "); } //but preserve table name in SQL @@ -605,17 +600,15 @@ protected String getMatchedText(ASTNode n) { } protected boolean isAliased(ASTNode n) { - switch (n.getType()) { - case HiveParser.TOK_TABREF: - return findTabRefIdxs(n)[0] != 0; - case HiveParser.TOK_TABNAME: - return false; - case HiveParser.TOK_SUBQUERY: + return switch (n.getType()) { + case HiveParser.TOK_TABREF -> findTabRefIdxs(n)[0] != 0; + case HiveParser.TOK_TABNAME -> false; + case HiveParser.TOK_SUBQUERY -> { assert n.getChildCount() > 1 : "Expected Derived Table to be aliased"; - return true; - default: - throw raiseWrongType("TOK_TABREF|TOK_TABNAME", n); - } + yield true; + } + default -> throw raiseWrongType("TOK_TABREF|TOK_TABNAME", n); + }; } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index 9964b9369065..823dcbf76826 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -123,15 +123,11 @@ public static ASTNode parse( * @return boolean */ public static boolean isJoinToken(ASTNode node) { - switch (node.getToken().getType()) { - case HiveParser.TOK_JOIN: - case HiveParser.TOK_LEFTOUTERJOIN: - case HiveParser.TOK_RIGHTOUTERJOIN: - case HiveParser.TOK_FULLOUTERJOIN: - return true; - default: - return false; - } + return switch (node.getToken().getType()) { + case HiveParser.TOK_JOIN, HiveParser.TOK_LEFTOUTERJOIN, HiveParser.TOK_RIGHTOUTERJOIN, + HiveParser.TOK_FULLOUTERJOIN -> true; + default -> false; + }; } /** @@ -163,12 +159,10 @@ public static List validateColumnNameUniqueness( // but it should not be a major bottleneck as the number of columns are // anyway not so big Iterator iterCols = fieldSchemas.iterator(); - List colNames = new ArrayList(); + List colNames = new ArrayList<>(); while (iterCols.hasNext()) { String colName = iterCols.next().getName(); - Iterator iter = colNames.iterator(); - while (iter.hasNext()) { - String oldColName = iter.next(); + for (String oldColName : colNames) { if (colName.equalsIgnoreCase(oldColName)) { throw new SemanticException(ErrorMsg.DUPLICATE_COLUMN_NAMES .getMsg(oldColName)); @@ -286,7 +280,7 @@ public static Pair containsTokenOfType(ASTNode root, Integer .. final Set tokensToMatch = new HashSet<>(Arrays.asList(tokens)); final String[] matched = {null}; - boolean check = ParseUtils.containsTokenOfType(root, new PTFUtils.Predicate() { + boolean check = ParseUtils.containsTokenOfType(root, new PTFUtils.Predicate<>() { @Override public boolean apply(ASTNode node) { if (tokensToMatch.contains(node.getType())) { @@ -302,7 +296,7 @@ public boolean apply(ASTNode node) { } public static boolean containsTokenOfType(ASTNode root, PTFUtils.Predicate predicate) { - Queue queue = new ArrayDeque(); + Queue queue = new ArrayDeque<>(); // BFS queue.add(root); @@ -535,7 +529,7 @@ public static String getKeywords(Set excludes) { if (excludes != null && excludes.contains(name)) { continue; } - if (sb.length() > 0) { + if (!sb.isEmpty()) { sb.append(","); } sb.append(name); @@ -581,8 +575,7 @@ public static Map> getFullPartitionSpecs( CommonTree ast, Table table, Configuration conf, boolean canGroupExprs) throws SemanticException { String defaultPartitionName = HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULT_PARTITION_NAME); Map colTypes = new HashMap<>(); - List partitionKeys = table.hasNonNativePartitionSupport() ? - table.getStorageHandler().getPartitionKeys(table) : table.getPartitionKeys(); + List partitionKeys = table.getEffectivePartCols(); for (FieldSchema fs : partitionKeys) { colTypes.put(fs.getName().toLowerCase(), fs.getType()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java index b72f2496d938..b7335473da85 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java @@ -202,7 +202,7 @@ public void appendWhenMatchedUpdateClause(MergeStatement.UpdateClause updateClau sqlGenerator.append(hintStr); hintStr = null; } - List values = new ArrayList<>(targetTable.getCols().size() + targetTable.getPartCols().size()); + List values = new ArrayList<>(targetTable.getAllCols().size()); values.addAll(sqlGenerator.getDeleteValues(Context.Operation.MERGE)); addValues(targetTable, targetAlias, updateClause.getNewValuesMap(), values); addValuesForRowLineageForCopyOnMerge(isRowLineageSupported, values, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java index 3ec2e580f046..0773e4039809 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java @@ -223,8 +223,7 @@ public void appendWhenMatchedUpdateClause(MergeStatement.UpdateClause updateClau String onClauseAsString = mergeStatement.getOnClauseAsText(); sqlGenerator.append(" -- update clause").append("\n"); - List valuesAndAcidSortKeys = new ArrayList<>( - targetTable.getCols().size() + targetTable.getPartCols().size() + 1); + List valuesAndAcidSortKeys = new ArrayList<>(targetTable.getAllCols().size() + 1); valuesAndAcidSortKeys.addAll(sqlGenerator.getSortKeys(Operation.MERGE)); addValues(targetTable, targetAlias, updateClause.getNewValuesMap(), valuesAndAcidSortKeys); sqlGenerator.appendInsertBranch(hintStr, valuesAndAcidSortKeys); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitMergeRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitMergeRewriter.java index 84fcf186f6b7..06edaca90f0f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitMergeRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitMergeRewriter.java @@ -58,7 +58,7 @@ public void appendWhenMatchedUpdateClause(MergeStatement.UpdateClause updateClau String onClauseAsString = mergeStatement.getOnClauseAsText(); sqlGenerator.append(" -- update clause (insert part)\n"); - List values = new ArrayList<>(targetTable.getCols().size() + targetTable.getPartCols().size()); + List values = new ArrayList<>(targetTable.getAllCols().size()); addValues(targetTable, targetAlias, updateClause.getNewValuesMap(), values); addRowLineageColumnsForWhenMatchedUpdateClause(isRowLineageSupported, values, targetAlias, conf); sqlGenerator.appendInsertBranch(hintStr, values); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index 0dcfe72d7f5b..ca9fa6298446 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -57,7 +57,7 @@ public class PartitionDesc implements Serializable, Cloneable { private static final Interner> CLASS_INTERNER = Interners.newWeakInterner(); private TableDesc tableDesc; - private LinkedHashMap partSpec; + private Map partSpec; private Class inputFileFormatClass; private Class outputFileFormatClass; private Properties properties; @@ -138,11 +138,11 @@ public void setTableDesc(TableDesc tableDesc) { } @Explain(displayName = "partition values", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) - public LinkedHashMap getPartSpec() { + public Map getPartSpec() { return partSpec; } - public void setPartSpec(final LinkedHashMap partSpec) { + public void setPartSpec(final Map partSpec) { StringInternUtils.internValuesInMap(partSpec); this.partSpec = partSpec; } diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java b/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java index 0f5d7b915168..f81f8e9ec816 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java +++ b/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java @@ -82,7 +82,7 @@ public static final CacheTag build(String tableName) { return new TableCacheTag(tableName); } - public static final CacheTag build(String tableName, LinkedHashMap partDescMap) { + public static final CacheTag build(String tableName, Map partDescMap) { if (StringUtils.isEmpty(tableName) || partDescMap == null || partDescMap.isEmpty()) { throw new IllegalArgumentException(); }