apache · deniskuzZ · Mar 19, 2026 · zhangbutao · Apr 6, 2026 · deniskuzZ
diff --git a/...g/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/...g/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -707,6 +707,13 @@ private boolean writeColStats(List<ColumnStatistics> colStats, Table tbl) {
     return false;
   }
 
+  @Override
+  public long getSnapshotId(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
+    Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
+    Snapshot snapshot = IcebergTableUtil.getTableSnapshot(table, hmsTable);
+    return snapshot != null ? snapshot.snapshotId() : -1;
+  }
+
   @Override
   public boolean canProvideColStatistics(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
     Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -325,6 +325,16 @@ default boolean setColStatistics(org.apache.hadoop.hive.ql.metadata.Table table,
     return false;
   }
 
+  /**
+   * Resolves time-travel context (snapshot ref, version, timestamp) to a canonical snapshot ID.
+   * Used to build cache keys that are consistent regardless of how a particular version was referenced.
+   * @param table table object with time-travel attributes set
+   * @return a snapshot ID, or -1 if not applicable
+   */
+  default long getSnapshotId(org.apache.hadoop.hive.ql.metadata.Table table) {
+    return -1;
+  }
+
   /**
    * Check if the storage handler can provide column statistics.
    * @param table table object

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
@@ -24,7 +24,6 @@
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
-import java.util.stream.Collectors;
 
 import org.apache.calcite.linq4j.tree.Expression;
 import org.apache.calcite.plan.RelOptSchema;
@@ -82,7 +81,6 @@
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
 
 public class RelOptHiveTable implements RelOptTable {
 
@@ -125,7 +123,7 @@ public RelOptHiveTable(RelOptSchema calciteSchema, RelDataTypeFactory typeFactor
     this.schema = calciteSchema;
     this.typeFactory = typeFactory;
     this.qualifiedTblName = ImmutableList.copyOf(qualifiedTblName);
-    this.name = this.qualifiedTblName.stream().collect(Collectors.joining("."));
+    this.name = String.join(".", this.qualifiedTblName);
     this.rowType = rowType;
     this.hiveTblMetadata = hiveTblMetadata;
     this.hiveColStatsMap = new HashMap<>();
@@ -192,15 +190,15 @@ public List<ColumnStrategy> getColumnStrategies() {
   public RelOptHiveTable copy(RelDataType newRowType) {
     // 1. Build map of column name to col index of original schema
     // Assumption: Hive Table can not contain duplicate column names
-    Map<String, Integer> nameToColIndxMap = new HashMap<String, Integer>();
+    Map<String, Integer> nameToColIndxMap = new HashMap<>();
     for (RelDataTypeField f : this.rowType.getFieldList()) {
       nameToColIndxMap.put(f.getName(), f.getIndex());
     }
 
     // 2. Build nonPart/Part/Virtual column info for new RowSchema
-    List<ColumnInfo> newHiveNonPartitionCols = new ArrayList<ColumnInfo>();
-    List<ColumnInfo> newHivePartitionCols = new ArrayList<ColumnInfo>();
-    List<VirtualColumn> newHiveVirtualCols = new ArrayList<VirtualColumn>();
+    List<ColumnInfo> newHiveNonPartitionCols = new ArrayList<>();
+    List<ColumnInfo> newHivePartitionCols = new ArrayList<>();
+    List<VirtualColumn> newHiveVirtualCols = new ArrayList<>();
     Map<Integer, VirtualColumn> virtualColInfoMap = HiveCalciteUtil.getVColsMap(this.hiveVirtualCols,
         this.noOfNonVirtualCols);
     Integer originalColIndx;
@@ -329,8 +327,8 @@ private List<RelReferentialConstraint> generateReferentialConstraints() {
     ImmutableList.Builder<RelReferentialConstraint> builder = ImmutableList.builder();
     if (foreignKeyInfo != null && !foreignKeyInfo.getForeignKeys().isEmpty()) {
       for (List<ForeignKeyCol> fkCols : foreignKeyInfo.getForeignKeys().values()) {
-        String parentDatabaseName = fkCols.get(0).parentDatabaseName;
-        String parentTableName = fkCols.get(0).parentTableName;
+        String parentDatabaseName = fkCols.getFirst().parentDatabaseName;
+        String parentTableName = fkCols.getFirst().parentTableName;
         String qualifiedName;
         List<String> parentTableQualifiedName = new ArrayList<>();
         if (parentDatabaseName != null && !parentDatabaseName.isEmpty()) {
@@ -390,7 +388,7 @@ public <T> T unwrap(Class<T> arg0) {
 
   @Override
   public List<RelCollation> getCollationList() {
-    ImmutableList.Builder<RelFieldCollation> collationList = new ImmutableList.Builder<RelFieldCollation>();
+    ImmutableList.Builder<RelFieldCollation> collationList = new ImmutableList.Builder<>();
     for (Order sortColumn : this.hiveTblMetadata.getSortCols()) {
       for (int i=0; i<this.hiveTblMetadata.getSd().getCols().size(); i++) {
         FieldSchema field = this.hiveTblMetadata.getSd().getCols().get(i);
@@ -411,7 +409,7 @@ public List<RelCollation> getCollationList() {
 
   @Override
   public RelDistribution getDistribution() {
-    ImmutableList.Builder<Integer> columnPositions = new ImmutableList.Builder<Integer>();
+    ImmutableList.Builder<Integer> columnPositions = new ImmutableList.Builder<>();
     for (String bucketColumn : this.hiveTblMetadata.getBucketCols()) {
       for (int i=0; i<this.hiveTblMetadata.getSd().getCols().size(); i++) {
         FieldSchema field = this.hiveTblMetadata.getSd().getCols().get(i);
@@ -435,7 +433,7 @@ public double getRowCount() {
       if (null == partitionList) {
         // we are here either unpartitioned table or partitioned table with no
         // predicates
-        computePartitionList(hiveConf, null, new HashSet<Integer>());
+        computePartitionList(hiveConf, null, new HashSet<>());
       }
       rowCount = StatsUtils.getNumRows(hiveConf, getNonPartColumns(), hiveTblMetadata,
           partitionList, noColsMissingStats);
@@ -465,7 +463,7 @@ private String getColNamesForLogging(Set<String> colLst) {
   public void computePartitionList(HiveConf conf, RexNode pruneNode, Set<Integer> partOrVirtualCols) {
     try {
       if (!hiveTblMetadata.isPartitioned() || pruneNode == null
-          || InputFinder.bits(pruneNode).length() == 0) {
+          || InputFinder.bits(pruneNode).isEmpty()) {
         // there is no predicate on partitioning column, we need all partitions
         // in this case.
         partitionList = PartitionPruner.prune(hiveTblMetadata, null, conf, getName(),
@@ -485,11 +483,11 @@ public void computePartitionList(HiveConf conf, RexNode pruneNode, Set<Integer>
   }
 
   private void updateColStats(Set<Integer> projIndxLst, boolean allowMissingStats) {
-    List<String> nonPartColNamesThatRqrStats = new ArrayList<String>();
-    List<Integer> nonPartColIndxsThatRqrStats = new ArrayList<Integer>();
-    List<String> partColNamesThatRqrStats = new ArrayList<String>();
-    List<Integer> partColIndxsThatRqrStats = new ArrayList<Integer>();
-    Set<String> colNamesFailedStats = new HashSet<String>();
+    List<String> nonPartColNamesThatRqrStats = new ArrayList<>();
+    List<Integer> nonPartColIndxsThatRqrStats = new ArrayList<>();
+    List<String> partColNamesThatRqrStats = new ArrayList<>();
+    List<Integer> partColIndxsThatRqrStats = new ArrayList<>();
+    Set<String> colNamesFailedStats = new HashSet<>();
 
     // 1. Separate required columns to Non Partition and Partition Cols
     ColumnInfo tmp;
@@ -514,19 +512,19 @@ private void updateColStats(Set<Integer> projIndxLst, boolean allowMissingStats)
     if (null == partitionList) {
       // We could be here either because its an unpartitioned table or because
       // there are no pruning predicates on a partitioned table.
-      computePartitionList(hiveConf, null, new HashSet<Integer>());
+      computePartitionList(hiveConf, null, new HashSet<>());
     }
 
-    String partitionListKey = partitionList.getKey().orElse(null);
-    ColumnStatsList colStatsCached = colStatsCache.get(partitionListKey);
-    if (colStatsCached == null) {
-      colStatsCached = new ColumnStatsList();
-      colStatsCache.put(partitionListKey, colStatsCached);
-    }
+    String partitionListKey = partitionList.getKey();
+
+    ColumnStatsList colStatsCached = colStatsCache.computeIfAbsent(
+        partitionListKey,
+        k -> new ColumnStatsList()
+    );
 
     // 2. Obtain Col Stats for Non Partition Cols
-    if (nonPartColNamesThatRqrStats.size() > 0) {
-      List<ColStatistics> hiveColStats = new ArrayList<ColStatistics>();
+    if (!nonPartColNamesThatRqrStats.isEmpty()) {
+      List<ColStatistics> hiveColStats = new ArrayList<>();
 
       if (!hiveTblMetadata.isPartitioned()) {
         // 2.1 Handle the case for unpartitioned table.
@@ -547,9 +545,9 @@ private void updateColStats(Set<Integer> projIndxLst, boolean allowMissingStats)
           if (hiveColStats.isEmpty()) {
             colNamesFailedStats.addAll(nonPartColNamesThatRqrStats);
           } else if (hiveColStats.size() != nonPartColNamesThatRqrStats.size()) {
-            Set<String> setOfFiledCols = new HashSet<String>(nonPartColNamesThatRqrStats);
+            Set<String> setOfFiledCols = new HashSet<>(nonPartColNamesThatRqrStats);
 
-            Set<String> setOfObtainedColStats = new HashSet<String>();
+            Set<String> setOfObtainedColStats = new HashSet<>();
             for (ColStatistics cs : hiveColStats) {
               setOfObtainedColStats.add(cs.getColumnName());
             }
@@ -561,7 +559,7 @@ private void updateColStats(Set<Integer> projIndxLst, boolean allowMissingStats)
             // nonPartColNamesThatRqrStats. reorder hiveColStats so we can build hiveColStatsMap
             // using nonPartColIndxsThatRqrStats as below
             Map<String, ColStatistics> columnStatsMap =
-                new HashMap<String, ColStatistics>(hiveColStats.size());
+                new HashMap<>(hiveColStats.size());
             for (ColStatistics cs : hiveColStats) {
               columnStatsMap.put(cs.getColumnName(), cs);
               // even though the stats were estimated we need to warn user that
@@ -586,22 +584,21 @@ private void updateColStats(Set<Integer> projIndxLst, boolean allowMissingStats)
           if (partitionList.getNotDeniedPartns().isEmpty()) {
             // no need to make a metastore call
             rowCount = 0;
-            hiveColStats = new ArrayList<ColStatistics>();
+            hiveColStats = new ArrayList<>();
             for (int i = 0; i < nonPartColNamesThatRqrStats.size(); i++) {
               // add empty stats object for each column
               hiveColStats.add(
                   new ColStatistics(
                       nonPartColNamesThatRqrStats.get(i),
                       hiveNonPartitionColsMap.get(nonPartColIndxsThatRqrStats.get(i)).getTypeName()));
             }
-            colNamesFailedStats.clear();
             colStatsCached.updateState(State.COMPLETE);
           } else {
             Statistics stats = StatsUtils.collectStatistics(hiveConf, partitionList,
                 hiveTblMetadata, hiveNonPartitionCols, nonPartColNamesThatRqrStats, colStatsCached,
                 nonPartColNamesThatRqrStats, true);
             rowCount = stats.getNumRows();
-            hiveColStats = new ArrayList<ColStatistics>();
+            hiveColStats = new ArrayList<>();
             for (String c : nonPartColNamesThatRqrStats) {
               ColStatistics cs = stats.getColumnStatisticsFromColName(c);
               if (cs != null) {
@@ -622,7 +619,7 @@ private void updateColStats(Set<Integer> projIndxLst, boolean allowMissingStats)
         }
       }
 
-      if (hiveColStats != null && hiveColStats.size() == nonPartColNamesThatRqrStats.size()) {
+      if (hiveColStats.size() == nonPartColNamesThatRqrStats.size()) {
         for (int i = 0; i < hiveColStats.size(); i++) {
           // the columns in nonPartColIndxsThatRqrStats/nonPartColNamesThatRqrStats/hiveColStats
           // are in same order
@@ -754,7 +751,7 @@ public int hashCode() {
   }
 
   public String getPartitionListKey() {
-    return partitionList != null ? partitionList.getKey().orElse(null) : null;
+    return partitionList != null ? partitionList.getKey() : null;
   }
 
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
@@ -184,8 +184,11 @@ public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr,
     String key = tab.getFullyQualifiedName() + ";";
     if (tab.getMetaTable() != null) {
       key = tab.getFullyQualifiedName() + "." + tab.getMetaTable() + ";";
-    } else if (tab.getSnapshotRef() != null) {
-      key = tab.getFullyQualifiedName() + "." + tab.getSnapshotRef() + ";";
+    } else if (tab.isNonNative()) {
+      long snapshotId = tab.getStorageHandler().getSnapshotId(tab);
+      if (snapshotId > 0) {
+        key = tab.getFullyQualifiedName() + "." + snapshotId + ";";
+      }
     }
 
     if (!tab.isPartitioned()) {
@@ -441,7 +444,7 @@ static private boolean hasUserFunctions(ExprNodeDesc expr) {
     return false;
   }
 
-  private static PrunedPartitionList getPartitionsFromServer(Table tab, String key, ExprNodeDesc compactExpr, 
+  private static PrunedPartitionList getPartitionsFromServer(Table tab, String key, ExprNodeDesc compactExpr,
       HiveConf conf, Set<String> partColsUsedInFilter, boolean isPruningByExactFilter) 
       throws SemanticException {
     try {

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
@@ -128,7 +128,7 @@ public class ParseContext {
   private boolean needViewColumnAuthorization;
 
   private Map<ReduceSinkOperator, RuntimeValuesInfo> rsToRuntimeValuesInfo =
-          new LinkedHashMap<ReduceSinkOperator, RuntimeValuesInfo>();
+      new LinkedHashMap<>();
   /**
    * Mapping holding information about semijoins.
    *
@@ -451,7 +451,7 @@ public Map<String, ColumnStatsList> getColStatsCache() {
    * @return col stats
    */
   public ColumnStatsList getColStatsCached(PrunedPartitionList partList) {
-    return ctx.getOpContext().getColStatsCache().get(partList.getKey().orElse(null));
+    return ctx.getOpContext().getColStatsCache().get(partList.getKey());
   }
 
   /**
@@ -515,8 +515,7 @@ public Set<ReadEntity> getSemanticInputs() {
     return semanticInputs;
   }
 
-  public void replaceRootTask(Task<?> rootTask,
-                              List<? extends Task<?>> tasks) {
+  public void replaceRootTask(Task<?> rootTask, List<? extends Task<?>> tasks) {
     this.rootTasks.remove(rootTask);
     this.rootTasks.addAll(tasks);
   }
@@ -663,7 +662,7 @@ public void setColumnStatsAutoGatherContexts(
 
   public Collection<Operator> getAllOps() {
     List<Operator> ops = new ArrayList<>();
-    Set<Operator> visited = new HashSet<Operator>();
+    Set<Operator> visited = new HashSet<>();
     for (Operator<?> op : getTopOps().values()) {
       getAllOps(ops, visited, op);
     }

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java
@@ -18,11 +18,9 @@
 
 package org.apache.hadoop.hive.ql.parse;
 
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Objects;
-import java.util.Optional;
 import java.util.Set;
 
 import org.apache.hadoop.hive.ql.metadata.Partition;
@@ -37,7 +35,7 @@ public class PrunedPartitionList {
   private final Table source;
 
   /** Key to identify this partition list. */
-  private final Optional<String> ppListKey;
+  private final String ppListKey;
 
   /** Partitions that either satisfy the partition criteria, or may satisfy it. */
   private final Set<Partition> partitions;
@@ -56,7 +54,7 @@ public PrunedPartitionList(Table source, Set<Partition> partitions,
   public PrunedPartitionList(Table source, String key, Set<Partition> partitions,
       List<String> referred, boolean hasUnknowns) {
     this.source = Objects.requireNonNull(source);
-    this.ppListKey = Optional.ofNullable(key);
+    this.ppListKey = key;
     this.referred = Objects.requireNonNull(referred);
     this.partitions = Objects.requireNonNull(partitions);
     this.hasUnknowns = hasUnknowns;
@@ -66,7 +64,7 @@ public Table getSourceTable() {
     return source;
   }
 
-  public Optional<String> getKey() {
+  public String getKey() {
     return ppListKey;
   }
 
@@ -82,7 +80,7 @@ public Set<Partition> getPartitions() {
    * @return all partitions.
    */
   public List<Partition> getNotDeniedPartns() {
-    return Collections.unmodifiableList(new ArrayList<>(partitions));
+    return List.copyOf(partitions);
   }
 
   /**