Skip to content

Commit

Permalink
HIVE-28166: Iceberg: Truncate on branch operates on the main table. (#…
Browse files Browse the repository at this point in the history
…5173). (Ayush Saxena, reviewed by Denys Kuzmenko)
  • Loading branch information
ayushtkn authored Apr 4, 2024
1 parent 77ca035 commit b0d3503
Show file tree
Hide file tree
Showing 11 changed files with 290 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.parse.PartitionTransform;
import org.apache.hadoop.hive.ql.parse.TransformSpec;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
Expand Down Expand Up @@ -679,6 +680,10 @@ public void preTruncateTable(org.apache.hadoop.hive.metastore.api.Table table, E
}

DeleteFiles delete = icebergTable.newDelete();
String branchName = context.getProperties().get(Catalogs.SNAPSHOT_REF);
if (branchName != null) {
delete.toBranch(HiveUtils.getTableSnapshotRef(branchName));
}
delete.deleteFromRowFilter(finalExp);
delete.commit();
context.putToProperties("truncateSkipDataDeletion", "true");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1772,6 +1772,10 @@ private List<String> getPartitions(DDLOperationContext context, Configuration jo
public void validatePartSpec(org.apache.hadoop.hive.ql.metadata.Table hmsTable, Map<String, String> partitionSpec)
throws SemanticException {
Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
if (hmsTable.getSnapshotRef() != null && hasUndergonePartitionEvolution(table)) {
// for this case we rewrite the query as delete query, so validations would be done as part of delete.
return;
}

if (table.spec().isUnpartitioned() && MapUtils.isNotEmpty(partitionSpec)) {
throw new SemanticException("Writing data into a partition fails when the Iceberg table is unpartitioned.");
Expand Down Expand Up @@ -1817,6 +1821,8 @@ public boolean canUseTruncate(org.apache.hadoop.hive.ql.metadata.Table hmsTable,
Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
if (MapUtils.isEmpty(partitionSpec) || !hasUndergonePartitionEvolution(table)) {
return true;
} else if (hmsTable.getSnapshotRef() != null) {
return false;
}

Expression finalExp = generateExpressionFromPartitionSpec(table, partitionSpec);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
-- SORT_QUERY_RESULTS
set hive.explain.user=false;
set hive.fetch.task.conversion=more;

create external table ice01(id int) stored by iceberg stored as orc tblproperties ('format-version'='2');

insert into ice01 values (1), (2), (3), (4);

select * from ice01;

-- create a branch named branch1
alter table ice01 create branch branch1;
select * from default.ice01.branch_branch1;

-- insert some data to branch
insert into ice01 values (5), (6);
select * from default.ice01.branch_branch1;

-- truncate the branch
truncate table default.ice01.branch_branch1;
select * from default.ice01.branch_branch1;

-- create a partioned iceberg table
create external table ice02(id int) partitioned by (name string) stored by iceberg stored as orc tblproperties ('format-version'='2');
insert into ice02 values (1, 'A'), (2, 'B'), (3, 'A'), (4, 'B');

select * from ice02;

-- create a branch named branch1
alter table ice02 create branch branch1;

-- insert some data to branch
insert into default.ice02.branch_branch1 values (5, 'A'), (6, 'C');
select * from default.ice02.branch_branch1;

-- truncate partition A
truncate table default.ice02.branch_branch1 partition (name='A');

select * from default.ice02.branch_branch1;

-- check original table is intact.
select * from ice02;

-- partition evolution
alter table ice02 set partition spec (id);

truncate table default.ice02.branch_branch1 partition (name='C');
select * from default.ice02.branch_branch1;
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
PREHOOK: query: create external table ice01(id int) stored by iceberg stored as orc tblproperties ('format-version'='2')
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@ice01
POSTHOOK: query: create external table ice01(id int) stored by iceberg stored as orc tblproperties ('format-version'='2')
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@ice01
PREHOOK: query: insert into ice01 values (1), (2), (3), (4)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@ice01
POSTHOOK: query: insert into ice01 values (1), (2), (3), (4)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@ice01
PREHOOK: query: select * from ice01
PREHOOK: type: QUERY
PREHOOK: Input: default@ice01
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select * from ice01
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice01
POSTHOOK: Output: hdfs://### HDFS PATH ###
1
2
3
4
PREHOOK: query: alter table ice01 create branch branch1
PREHOOK: type: ALTERTABLE_CREATEBRANCH
PREHOOK: Input: default@ice01
POSTHOOK: query: alter table ice01 create branch branch1
POSTHOOK: type: ALTERTABLE_CREATEBRANCH
POSTHOOK: Input: default@ice01
PREHOOK: query: select * from default.ice01.branch_branch1
PREHOOK: type: QUERY
PREHOOK: Input: default@ice01
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select * from default.ice01.branch_branch1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice01
POSTHOOK: Output: hdfs://### HDFS PATH ###
1
2
3
4
PREHOOK: query: insert into ice01 values (5), (6)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@ice01
POSTHOOK: query: insert into ice01 values (5), (6)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@ice01
PREHOOK: query: select * from default.ice01.branch_branch1
PREHOOK: type: QUERY
PREHOOK: Input: default@ice01
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select * from default.ice01.branch_branch1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice01
POSTHOOK: Output: hdfs://### HDFS PATH ###
1
2
3
4
PREHOOK: query: truncate table default.ice01.branch_branch1
PREHOOK: type: TRUNCATETABLE
PREHOOK: Output: default@ice01
POSTHOOK: query: truncate table default.ice01.branch_branch1
POSTHOOK: type: TRUNCATETABLE
POSTHOOK: Output: default@ice01
PREHOOK: query: select * from default.ice01.branch_branch1
PREHOOK: type: QUERY
PREHOOK: Input: default@ice01
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select * from default.ice01.branch_branch1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice01
POSTHOOK: Output: hdfs://### HDFS PATH ###
PREHOOK: query: create external table ice02(id int) partitioned by (name string) stored by iceberg stored as orc tblproperties ('format-version'='2')
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@ice02
POSTHOOK: query: create external table ice02(id int) partitioned by (name string) stored by iceberg stored as orc tblproperties ('format-version'='2')
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@ice02
PREHOOK: query: insert into ice02 values (1, 'A'), (2, 'B'), (3, 'A'), (4, 'B')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@ice02
POSTHOOK: query: insert into ice02 values (1, 'A'), (2, 'B'), (3, 'A'), (4, 'B')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@ice02
PREHOOK: query: select * from ice02
PREHOOK: type: QUERY
PREHOOK: Input: default@ice02
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select * from ice02
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice02
POSTHOOK: Output: hdfs://### HDFS PATH ###
1 A
2 B
3 A
4 B
PREHOOK: query: alter table ice02 create branch branch1
PREHOOK: type: ALTERTABLE_CREATEBRANCH
PREHOOK: Input: default@ice02
POSTHOOK: query: alter table ice02 create branch branch1
POSTHOOK: type: ALTERTABLE_CREATEBRANCH
POSTHOOK: Input: default@ice02
PREHOOK: query: insert into default.ice02.branch_branch1 values (5, 'A'), (6, 'C')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@ice02
POSTHOOK: query: insert into default.ice02.branch_branch1 values (5, 'A'), (6, 'C')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@ice02
PREHOOK: query: select * from default.ice02.branch_branch1
PREHOOK: type: QUERY
PREHOOK: Input: default@ice02
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select * from default.ice02.branch_branch1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice02
POSTHOOK: Output: hdfs://### HDFS PATH ###
1 A
2 B
3 A
4 B
5 A
6 C
PREHOOK: query: truncate table default.ice02.branch_branch1 partition (name='A')
PREHOOK: type: TRUNCATETABLE
PREHOOK: Output: default@ice02@name=A
POSTHOOK: query: truncate table default.ice02.branch_branch1 partition (name='A')
POSTHOOK: type: TRUNCATETABLE
POSTHOOK: Output: default@ice02@name=A
PREHOOK: query: select * from default.ice02.branch_branch1
PREHOOK: type: QUERY
PREHOOK: Input: default@ice02
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select * from default.ice02.branch_branch1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice02
POSTHOOK: Output: hdfs://### HDFS PATH ###
2 B
4 B
6 C
PREHOOK: query: select * from ice02
PREHOOK: type: QUERY
PREHOOK: Input: default@ice02
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select * from ice02
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice02
POSTHOOK: Output: hdfs://### HDFS PATH ###
1 A
2 B
3 A
4 B
PREHOOK: query: alter table ice02 set partition spec (id)
PREHOOK: type: ALTERTABLE_SETPARTSPEC
PREHOOK: Input: default@ice02
POSTHOOK: query: alter table ice02 set partition spec (id)
POSTHOOK: type: ALTERTABLE_SETPARTSPEC
POSTHOOK: Input: default@ice02
POSTHOOK: Output: default@ice02
PREHOOK: query: truncate table default.ice02.branch_branch1 partition (name='C')
PREHOOK: type: QUERY
PREHOOK: Output: default@ice02@name=C
POSTHOOK: query: truncate table default.ice02.branch_branch1 partition (name='C')
POSTHOOK: type: QUERY
POSTHOOK: Output: default@ice02@name=C
PREHOOK: query: select * from default.ice02.branch_branch1
PREHOOK: type: QUERY
PREHOOK: Input: default@ice02
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select * from default.ice02.branch_branch1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice02
POSTHOOK: Output: hdfs://### HDFS PATH ###
2 B
4 B
2 changes: 1 addition & 1 deletion ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
Original file line number Diff line number Diff line change
Expand Up @@ -1530,7 +1530,7 @@ public void truncateTable(String dbDotTableName, Map<String, String> partSpec, L
List<String> partNames = ((null == partSpec)
? null : getPartitionNames(table.getDbName(), table.getTableName(), partSpec, (short) -1));
if (snapshot == null) {
getMSC().truncateTable(table.getDbName(), table.getTableName(), partNames);
getMSC().truncateTable(table.getFullTableName(), partNames);
} else {
boolean truncateUseBase = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_ACID_TRUNCATE_USE_BASE)
|| HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_ACID_LOCKLESS_READS_ENABLED);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,18 @@ public void truncateTable(String dbName, String tableName, List<String> partName
super.truncateTable(dbName, tableName, partNames);
}

@Override
public void truncateTable(TableName tableName, List<String> partNames) throws TException {
// First try temp table
org.apache.hadoop.hive.metastore.api.Table table = getTempTable(tableName.getDb(), tableName.getTable());
if (table != null) {
truncateTempTable(table);
return;
}
// Try underlying client
super.truncateTable(tableName, partNames);
}

@Override
public void truncateTable(String dbName, String tableName,
List<String> partNames, String validWriteIds, long writeId)
Expand Down
2 changes: 1 addition & 1 deletion ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ final public String getTableName() {
}

public TableName getFullTableName() {
return new TableName(getCatName(), getDbName(), getTableName());
return new TableName(getCatName(), getDbName(), getTableName(), getSnapshotRef());
}

final public Path getDataLocation() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public HiveTableName(String catName, String dbName, String tableName) {
* @throws SemanticException
*/
public static TableName of(Table table) throws SemanticException {
return ofNullable(table.getTableName(), table.getDbName());
return ofNullable(table.getTableName(), table.getDbName(), table.getSnapshotRef());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ public class HiveMetaStoreClient implements IMetaStoreClient, AutoCloseable {
public static final String MANUALLY_INITIATED_COMPACTION = "manual";
public static final String TRUNCATE_SKIP_DATA_DELETION = "truncateSkipDataDeletion";
public static final String SKIP_DROP_PARTITION = "dropPartitionSkip";

public static final String SNAPSHOT_REF = "snapshot_ref";
public static final String RENAME_PARTITION_MAKE_COPY = "renamePartitionMakeCopy";

/**
Expand Down Expand Up @@ -2079,34 +2081,42 @@ public void dropTable(String catName, String dbname, String name, boolean delete
@Override
public void truncateTable(String dbName, String tableName, List<String> partNames,
String validWriteIds, long writeId, boolean deleteData) throws TException {
truncateTableInternal(getDefaultCatalog(conf),
dbName, tableName, partNames, validWriteIds, writeId, deleteData);
truncateTableInternal(getDefaultCatalog(conf), dbName, tableName, null, partNames, validWriteIds, writeId,
deleteData);
}

@Override
public void truncateTable(String dbName, String tableName, List<String> partNames,
String validWriteIds, long writeId) throws TException {
truncateTableInternal(getDefaultCatalog(conf),
dbName, tableName, partNames, validWriteIds, writeId, true);
truncateTable(dbName, tableName, partNames, validWriteIds, writeId, true);
}

@Override
public void truncateTable(String dbName, String tableName, List<String> partNames) throws TException {
truncateTableInternal(getDefaultCatalog(conf), dbName, tableName, partNames, null, -1, true);
truncateTable(getDefaultCatalog(conf), dbName, tableName, partNames);
}

@Override
public void truncateTable(TableName table, List<String> partNames) throws TException {
truncateTableInternal(table.getCat(), table.getDb(), table.getTable(), table.getTableMetaRef(), partNames,
null, -1, true);
}

@Override
public void truncateTable(String catName, String dbName, String tableName, List<String> partNames)
throws TException {
truncateTableInternal(catName, dbName, tableName, partNames, null, -1, true);
truncateTable(TableName.fromString(tableName, catName, dbName), partNames);
}

private void truncateTableInternal(String catName, String dbName, String tableName,
private void truncateTableInternal(String catName, String dbName, String tableName, String ref,
List<String> partNames, String validWriteIds, long writeId, boolean deleteData)
throws TException {
Table table = getTable(catName, dbName, tableName);
HiveMetaHook hook = getHook(table);
EnvironmentContext context = new EnvironmentContext();
if (ref != null) {
context.putToProperties(SNAPSHOT_REF, ref);
}
context.putToProperties(TRUNCATE_SKIP_DATA_DELETION, Boolean.toString(!deleteData));
if (hook != null) {
hook.preTruncateTable(table, context, partNames);
Expand Down
Loading

0 comments on commit b0d3503

Please sign in to comment.