diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index bf604e48cc96..1ef1f4c3ec5d 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2691,8 +2691,8 @@ public static enum ConfVars { "\n" + "If the skew information is correctly stored in the metadata, hive.optimize.skewjoin.compiletime\n" + "would change the query plan to take care of it, and hive.optimize.skewjoin will be a no-op."), - - HIVE_OPTIMIZE_REPLACE_DELETE_WITH_TRUNCATE("hive.optimize.delete.all", false, + @Deprecated + HIVE_OPTIMIZE_REPLACE_DELETE_WITH_TRUNCATE("hive.optimize.delete.all", true, "Optimize delete the entire data from table, use truncate instead"), HIVE_OPTIMIZE_METADATA_DELETE("hive.optimize.delete.metadata.only", true, "Optimize delete the entire data from table, use truncate instead"), diff --git a/iceberg/iceberg-handler/src/test/queries/positive/delete_all_iceberg.q b/iceberg/iceberg-handler/src/test/queries/positive/delete_all_iceberg.q index 05d5a95daa7c..79f5234754d4 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/delete_all_iceberg.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/delete_all_iceberg.q @@ -15,7 +15,6 @@ --! qt:replace:/(\S\"removed-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ set hive.vectorized.execution.enabled=true; -set hive.optimize.delete.all=true; create table ice01 (id int, key int) Stored by Iceberg stored as ORC TBLPROPERTIES('format-version'='2', 'iceberg.delete.skiprowdata'='false'); @@ -35,4 +34,41 @@ select count(*) from ice01; select * from ice01; describe formatted ice01; -drop table ice01; \ No newline at end of file +drop table ice01; + +-- Create a V2 table with Copy-on-write as the deletion mode. +create table ice01 (id int, key int) stored by iceberg stored as orc tblproperties ('format-version'='2', 'write.delete.mode'='copy-on-write'); + +insert into ice01 values (1,1),(2,1),(3,1),(4,1); +insert into ice01 values (1,2),(2,2),(3,2),(4,2); +insert into ice01 values (1,3),(2,3),(3,3),(4,3); +insert into ice01 values (1,4),(2,4),(3,4),(4,4); +insert into ice01 values (1,5),(2,5),(3,5),(4,5); + +explain analyze delete from ice01; + +delete from ice01; + +select count(*) from ice01; +select * from ice01; +describe formatted ice01; +drop table ice01; + +-- Create a V1 table. +create table ice01 (id int, key int) stored by iceberg stored as orc; + +insert into ice01 values (1,1),(2,1),(3,1),(4,1); +insert into ice01 values (1,2),(2,2),(3,2),(4,2); +insert into ice01 values (1,3),(2,3),(3,3),(4,3); +insert into ice01 values (1,4),(2,4),(3,4),(4,4); +insert into ice01 values (1,5),(2,5),(3,5),(4,5); + +explain analyze delete from ice01; + +-- Perform delete on the V1 table +delete from ice01; + +select count(*) from ice01; +select * from ice01; +describe formatted ice01; +drop table ice01; diff --git a/iceberg/iceberg-handler/src/test/results/positive/delete_all_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/delete_all_iceberg.q.out index c9ee05ffd2a8..1a04472118f3 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/delete_all_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/delete_all_iceberg.q.out @@ -152,3 +152,302 @@ POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@ice01 POSTHOOK: Output: database:default POSTHOOK: Output: default@ice01 +PREHOOK: query: create table ice01 (id int, key int) stored by iceberg stored as orc tblproperties ('format-version'='2', 'write.delete.mode'='copy-on-write') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ice01 +POSTHOOK: query: create table ice01 (id int, key int) stored by iceberg stored as orc tblproperties ('format-version'='2', 'write.delete.mode'='copy-on-write') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ice01 +PREHOOK: query: insert into ice01 values (1,1),(2,1),(3,1),(4,1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@ice01 +POSTHOOK: query: insert into ice01 values (1,1),(2,1),(3,1),(4,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@ice01 +PREHOOK: query: insert into ice01 values (1,2),(2,2),(3,2),(4,2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@ice01 +POSTHOOK: query: insert into ice01 values (1,2),(2,2),(3,2),(4,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@ice01 +PREHOOK: query: insert into ice01 values (1,3),(2,3),(3,3),(4,3) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@ice01 +POSTHOOK: query: insert into ice01 values (1,3),(2,3),(3,3),(4,3) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@ice01 +PREHOOK: query: insert into ice01 values (1,4),(2,4),(3,4),(4,4) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@ice01 +POSTHOOK: query: insert into ice01 values (1,4),(2,4),(3,4),(4,4) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@ice01 +PREHOOK: query: insert into ice01 values (1,5),(2,5),(3,5),(4,5) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@ice01 +POSTHOOK: query: insert into ice01 values (1,5),(2,5),(3,5),(4,5) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@ice01 +PREHOOK: query: delete from ice01 +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@ice01 +POSTHOOK: query: delete from ice01 +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@ice01 +PREHOOK: query: explain analyze delete from ice01 +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@ice01 +POSTHOOK: query: explain analyze delete from ice01 +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@ice01 +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Truncate Table or Partition + table name: default.ice01 + +PREHOOK: query: delete from ice01 +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@ice01 +POSTHOOK: query: delete from ice01 +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@ice01 +PREHOOK: query: select count(*) from ice01 +PREHOOK: type: QUERY +PREHOOK: Input: default@ice01 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from ice01 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice01 +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +PREHOOK: query: select * from ice01 +PREHOOK: type: QUERY +PREHOOK: Input: default@ice01 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from ice01 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice01 +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: describe formatted ice01 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice01 +POSTHOOK: query: describe formatted ice01 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice01 +# col_name data_type comment +id int +key int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: EXTERNAL_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + EXTERNAL TRUE + bucketing_version 2 + current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"key\",\"required\":false,\"type\":\"int\"}]} + current-snapshot-id #Masked# + current-snapshot-summary {\"deleted-data-files\":\"5\",\"deleted-records\":\"20\",\"removed-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"0\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"0\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\"} + current-snapshot-timestamp-ms #Masked# + format-version 2 + iceberg.orc.files.only true + metadata_location hdfs://### HDFS PATH ### + numFiles 0 + numRows 0 + parquet.compression zstd + previous_metadata_location hdfs://### HDFS PATH ### + rawDataSize 0 + serialization.format 1 + snapshot-count 6 + storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler + table_type ICEBERG + totalSize #Masked# +#### A masked pattern was here #### + uuid #Masked# + write.delete.mode copy-on-write + write.format.default orc + write.merge.mode merge-on-read + write.update.mode merge-on-read + +# Storage Information +SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe +InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat +OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat +Compressed: No +Sort Columns: [] +PREHOOK: query: drop table ice01 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@ice01 +PREHOOK: Output: database:default +PREHOOK: Output: default@ice01 +POSTHOOK: query: drop table ice01 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@ice01 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ice01 +PREHOOK: query: create table ice01 (id int, key int) stored by iceberg stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ice01 +POSTHOOK: query: create table ice01 (id int, key int) stored by iceberg stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ice01 +PREHOOK: query: insert into ice01 values (1,1),(2,1),(3,1),(4,1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@ice01 +POSTHOOK: query: insert into ice01 values (1,1),(2,1),(3,1),(4,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@ice01 +PREHOOK: query: insert into ice01 values (1,2),(2,2),(3,2),(4,2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@ice01 +POSTHOOK: query: insert into ice01 values (1,2),(2,2),(3,2),(4,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@ice01 +PREHOOK: query: insert into ice01 values (1,3),(2,3),(3,3),(4,3) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@ice01 +POSTHOOK: query: insert into ice01 values (1,3),(2,3),(3,3),(4,3) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@ice01 +PREHOOK: query: insert into ice01 values (1,4),(2,4),(3,4),(4,4) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@ice01 +POSTHOOK: query: insert into ice01 values (1,4),(2,4),(3,4),(4,4) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@ice01 +PREHOOK: query: insert into ice01 values (1,5),(2,5),(3,5),(4,5) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@ice01 +POSTHOOK: query: insert into ice01 values (1,5),(2,5),(3,5),(4,5) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@ice01 +PREHOOK: query: delete from ice01 +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@ice01 +POSTHOOK: query: delete from ice01 +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@ice01 +PREHOOK: query: explain analyze delete from ice01 +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@ice01 +POSTHOOK: query: explain analyze delete from ice01 +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@ice01 +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Truncate Table or Partition + table name: default.ice01 + +PREHOOK: query: delete from ice01 +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@ice01 +POSTHOOK: query: delete from ice01 +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@ice01 +PREHOOK: query: select count(*) from ice01 +PREHOOK: type: QUERY +PREHOOK: Input: default@ice01 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from ice01 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice01 +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +PREHOOK: query: select * from ice01 +PREHOOK: type: QUERY +PREHOOK: Input: default@ice01 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from ice01 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice01 +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: describe formatted ice01 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ice01 +POSTHOOK: query: describe formatted ice01 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ice01 +# col_name data_type comment +id int +key int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: EXTERNAL_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + EXTERNAL TRUE + bucketing_version 2 + current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"key\",\"required\":false,\"type\":\"int\"}]} + current-snapshot-id #Masked# + current-snapshot-summary {\"deleted-data-files\":\"5\",\"deleted-records\":\"20\",\"removed-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"0\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"0\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\"} + current-snapshot-timestamp-ms #Masked# + format-version 2 + iceberg.orc.files.only true + metadata_location hdfs://### HDFS PATH ### + numFiles 0 + numRows 0 + parquet.compression zstd + previous_metadata_location hdfs://### HDFS PATH ### + rawDataSize 0 + serialization.format 1 + snapshot-count 6 + storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler + table_type ICEBERG + totalSize #Masked# +#### A masked pattern was here #### + uuid #Masked# + write.format.default orc + +# Storage Information +SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe +InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat +OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat +Compressed: No +Sort Columns: [] +PREHOOK: query: drop table ice01 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@ice01 +PREHOOK: Output: database:default +PREHOOK: Output: default@ice01 +POSTHOOK: query: drop table ice01 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@ice01 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ice01 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DeleteSemanticAnalyzer.java index ec2568734ec7..980b0bfd94bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DeleteSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DeleteSemanticAnalyzer.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.QueryState; +import org.apache.hadoop.hive.ql.ddl.DDLDesc.DDLDescWithWriteId; import org.apache.hadoop.hive.ql.ddl.DDLWork; import org.apache.hadoop.hive.ql.ddl.table.execute.AlterTableExecuteDesc; import org.apache.hadoop.hive.ql.exec.TableScanOperator; @@ -38,6 +39,7 @@ import java.util.Map; public class DeleteSemanticAnalyzer extends RewriteSemanticAnalyzer { + private DDLDescWithWriteId acidDdlDesc; public DeleteSemanticAnalyzer(QueryState queryState, RewriterFactory rewriterFactory) throws SemanticException { @@ -88,7 +90,7 @@ private void genTruncatePlan(Table table, ASTNode tabNameNode) throws SemanticEx // Note: this will overwrite this.ctx with rewrittenCtx rewrittenCtx.setEnableUnparse(false); truncate.analyze(rewrittenTree, rewrittenCtx); - + acidDdlDesc = truncate.getAcidDdlDesc(); rootTasks = truncate.getRootTasks(); outputs = truncate.getOutputs(); updateOutputs(table); @@ -163,4 +165,9 @@ private DDLWork createDDLWorkOfMetadataUpdate(TableName tableName, SearchArgumen protected boolean enableColumnStatsCollecting() { return false; } + + @Override + public DDLDescWithWriteId getAcidDdlDesc() { + return acidDdlDesc; + } } diff --git a/ql/src/test/results/clientpositive/llap/delete_all_non_partitioned.q.out b/ql/src/test/results/clientpositive/llap/delete_all_non_partitioned.q.out index 38ce075ff84e..c64b3b38d00e 100644 --- a/ql/src/test/results/clientpositive/llap/delete_all_non_partitioned.q.out +++ b/ql/src/test/results/clientpositive/llap/delete_all_non_partitioned.q.out @@ -35,12 +35,10 @@ POSTHOOK: Input: default@acid_danp -1070551679 iUR3Q -1069736047 k17Am8uPHWk02cEf1jet PREHOOK: query: delete from acid_danp -PREHOOK: type: QUERY -PREHOOK: Input: default@acid_danp +PREHOOK: type: TRUNCATETABLE PREHOOK: Output: default@acid_danp POSTHOOK: query: delete from acid_danp -POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid_danp +POSTHOOK: type: TRUNCATETABLE POSTHOOK: Output: default@acid_danp PREHOOK: query: select a,b from acid_danp PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/delete_all_partitioned.q.out b/ql/src/test/results/clientpositive/llap/delete_all_partitioned.q.out index 4c1a02468390..a38770312520 100644 --- a/ql/src/test/results/clientpositive/llap/delete_all_partitioned.q.out +++ b/ql/src/test/results/clientpositive/llap/delete_all_partitioned.q.out @@ -59,17 +59,11 @@ POSTHOOK: Input: default@acid_dap@ds=tomorrow 6981 sF2CRfgt2K tomorrow 6981 NULL tomorrow PREHOOK: query: delete from acid_dap -PREHOOK: type: QUERY -PREHOOK: Input: default@acid_dap -PREHOOK: Input: default@acid_dap@ds=today -PREHOOK: Input: default@acid_dap@ds=tomorrow +PREHOOK: type: TRUNCATETABLE PREHOOK: Output: default@acid_dap@ds=today PREHOOK: Output: default@acid_dap@ds=tomorrow POSTHOOK: query: delete from acid_dap -POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid_dap -POSTHOOK: Input: default@acid_dap@ds=today -POSTHOOK: Input: default@acid_dap@ds=tomorrow +POSTHOOK: type: TRUNCATETABLE POSTHOOK: Output: default@acid_dap@ds=today POSTHOOK: Output: default@acid_dap@ds=tomorrow PREHOOK: query: select * from acid_dap diff --git a/ql/src/test/results/clientpositive/llap/insert_update_delete.q.out b/ql/src/test/results/clientpositive/llap/insert_update_delete.q.out index 54021a99c13b..364ea55baeee 100644 --- a/ql/src/test/results/clientpositive/llap/insert_update_delete.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_update_delete.q.out @@ -65,12 +65,10 @@ POSTHOOK: Input: default@acid_iud -1070551679 fred -1069736047 fred PREHOOK: query: delete from acid_iud -PREHOOK: type: QUERY -PREHOOK: Input: default@acid_iud +PREHOOK: type: TRUNCATETABLE PREHOOK: Output: default@acid_iud POSTHOOK: query: delete from acid_iud -POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid_iud +POSTHOOK: type: TRUNCATETABLE POSTHOOK: Output: default@acid_iud PREHOOK: query: select a,b from acid_iud order by a PREHOOK: type: QUERY