Skip to content

HIVE-28903: Skip deleting archived path when drop partition/table #5769

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions ql/src/test/queries/clientpositive/archive_drop.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
set hive.mapred.mode=nonstrict;
set hive.archive.enabled = true;

create database test_db;

create table test_db.test_tbl (id int, name string) partitioned by (dt date, hr string);

insert overwrite table test_db.test_tbl partition (dt='2025-04-01', hr='11') select 1, 'tom';
insert overwrite table test_db.test_tbl partition (dt='2025-04-01', hr='12') select 2, 'jerry';
insert overwrite table test_db.test_tbl partition (dt='2025-04-01', hr='13') select 3, 'spike';

show partitions test_db.test_tbl;

alter table test_db.test_tbl archive partition (dt='2025-04-01');
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/test_db.db/test_tbl/dt=2025-04-01/;
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/test_db.db/test_tbl/dt=2025-04-01/data.har/;

show partitions test_db.test_tbl;

alter table test_db.test_tbl drop partition (dt='2025-04-01',hr='12');
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/test_db.db/test_tbl/dt=2025-04-01/;
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/test_db.db/test_tbl/dt=2025-04-01/data.har/;

show partitions test_db.test_tbl;

select * from test_db.test_tbl;

drop table test_db.test_tbl;
122 changes: 122 additions & 0 deletions ql/src/test/results/clientpositive/llap/archive_drop.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
PREHOOK: query: create database test_db
PREHOOK: type: CREATEDATABASE
PREHOOK: Output: database:test_db
POSTHOOK: query: create database test_db
POSTHOOK: type: CREATEDATABASE
POSTHOOK: Output: database:test_db
PREHOOK: query: create table test_db.test_tbl (id int, name string) partitioned by (dt date, hr string)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:test_db
PREHOOK: Output: test_db@test_tbl
POSTHOOK: query: create table test_db.test_tbl (id int, name string) partitioned by (dt date, hr string)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:test_db
POSTHOOK: Output: test_db@test_tbl
PREHOOK: query: insert overwrite table test_db.test_tbl partition (dt='2025-04-01', hr='11') select 1, 'tom'
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=11
POSTHOOK: query: insert overwrite table test_db.test_tbl partition (dt='2025-04-01', hr='11') select 1, 'tom'
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=11
POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=11).id SIMPLE []
POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=11).name SIMPLE []
PREHOOK: query: insert overwrite table test_db.test_tbl partition (dt='2025-04-01', hr='12') select 2, 'jerry'
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
POSTHOOK: query: insert overwrite table test_db.test_tbl partition (dt='2025-04-01', hr='12') select 2, 'jerry'
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=12).id SIMPLE []
POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=12).name SIMPLE []
PREHOOK: query: insert overwrite table test_db.test_tbl partition (dt='2025-04-01', hr='13') select 3, 'spike'
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=13
POSTHOOK: query: insert overwrite table test_db.test_tbl partition (dt='2025-04-01', hr='13') select 3, 'spike'
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=13
POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=13).id SIMPLE []
POSTHOOK: Lineage: test_tbl PARTITION(dt=2025-04-01,hr=13).name SIMPLE []
PREHOOK: query: show partitions test_db.test_tbl
PREHOOK: type: SHOWPARTITIONS
PREHOOK: Input: test_db@test_tbl
POSTHOOK: query: show partitions test_db.test_tbl
POSTHOOK: type: SHOWPARTITIONS
POSTHOOK: Input: test_db@test_tbl
dt=2025-04-01/hr=11
dt=2025-04-01/hr=12
dt=2025-04-01/hr=13
PREHOOK: query: alter table test_db.test_tbl archive partition (dt='2025-04-01')
PREHOOK: type: ALTERTABLE_ARCHIVE
PREHOOK: Input: test_db@test_tbl
PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=11
PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=13
POSTHOOK: query: alter table test_db.test_tbl archive partition (dt='2025-04-01')
POSTHOOK: type: ALTERTABLE_ARCHIVE
POSTHOOK: Input: test_db@test_tbl
POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=11
POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=13
Found 1 items
#### A masked pattern was here ####
Found 4 items
#### A masked pattern was here ####
PREHOOK: query: show partitions test_db.test_tbl
PREHOOK: type: SHOWPARTITIONS
PREHOOK: Input: test_db@test_tbl
POSTHOOK: query: show partitions test_db.test_tbl
POSTHOOK: type: SHOWPARTITIONS
POSTHOOK: Input: test_db@test_tbl
dt=2025-04-01/hr=11
dt=2025-04-01/hr=12
dt=2025-04-01/hr=13
PREHOOK: query: alter table test_db.test_tbl drop partition (dt='2025-04-01',hr='12')
PREHOOK: type: ALTERTABLE_DROPPARTS
PREHOOK: Input: test_db@test_tbl
PREHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
POSTHOOK: query: alter table test_db.test_tbl drop partition (dt='2025-04-01',hr='12')
POSTHOOK: type: ALTERTABLE_DROPPARTS
POSTHOOK: Input: test_db@test_tbl
POSTHOOK: Output: test_db@test_tbl@dt=2025-04-01/hr=12
Found 1 items
#### A masked pattern was here ####
Found 4 items
#### A masked pattern was here ####
PREHOOK: query: show partitions test_db.test_tbl
PREHOOK: type: SHOWPARTITIONS
PREHOOK: Input: test_db@test_tbl
POSTHOOK: query: show partitions test_db.test_tbl
POSTHOOK: type: SHOWPARTITIONS
POSTHOOK: Input: test_db@test_tbl
dt=2025-04-01/hr=11
dt=2025-04-01/hr=13
PREHOOK: query: select * from test_db.test_tbl
PREHOOK: type: QUERY
PREHOOK: Input: test_db@test_tbl
PREHOOK: Input: test_db@test_tbl@dt=2025-04-01/hr=11
PREHOOK: Input: test_db@test_tbl@dt=2025-04-01/hr=13
#### A masked pattern was here ####
POSTHOOK: query: select * from test_db.test_tbl
POSTHOOK: type: QUERY
POSTHOOK: Input: test_db@test_tbl
POSTHOOK: Input: test_db@test_tbl@dt=2025-04-01/hr=11
POSTHOOK: Input: test_db@test_tbl@dt=2025-04-01/hr=13
#### A masked pattern was here ####
1 tom 2025-04-01 11
3 spike 2025-04-01 13
PREHOOK: query: drop table test_db.test_tbl
PREHOOK: type: DROPTABLE
PREHOOK: Input: test_db@test_tbl
PREHOOK: Output: database:test_db
PREHOOK: Output: test_db@test_tbl
POSTHOOK: query: drop table test_db.test_tbl
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: test_db@test_tbl
POSTHOOK: Output: database:test_db
POSTHOOK: Output: test_db@test_tbl
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.HarFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hive.metastore.ReplChangeManager.RecycleType;
Expand Down Expand Up @@ -467,6 +468,10 @@ public boolean deleteDir(Path f, boolean recursive, boolean ifPurge, boolean nee
}
}
FileSystem fs = getFs(f);
if (fs instanceof HarFileSystem) {
LOG.warn("Har path {} is not supported to delete, skipping it.", f);
return true;
}
return fsHandler.deleteDir(fs, f, recursive, ifPurge, conf);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3179,8 +3179,7 @@ private void deleteDataExcludeCmroot(Path path, boolean ifPurge, boolean shouldE
wh.deleteDir(path, true, ifPurge, shouldEnableCm);
}
} catch (Exception e) {
LOG.error("Failed to delete directory: " + path +
" " + e.getMessage());
LOG.error("Failed to delete directory: {}", path, e);
}
}

Expand Down Expand Up @@ -5134,14 +5133,15 @@ private boolean drop_partition_common(RawStore ms, String catName, String db_nam
throw new NoSuchObjectException("Partition doesn't exist. " + part_vals);
}
isArchived = MetaStoreUtils.isArchived(part);
if (tableDataShouldBeDeleted && isArchived) {
archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
verifyIsWritablePath(archiveParentDir);
}

if (tableDataShouldBeDeleted && (part.getSd() != null) && (part.getSd().getLocation() != null)) {
partPath = new Path(part.getSd().getLocation());
verifyIsWritablePath(partPath);
if (tableDataShouldBeDeleted) {
if (isArchived) {
// Archived partition is only able to delete original location.
archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
verifyIsWritablePath(archiveParentDir);
} else if ((part.getSd() != null) && (part.getSd().getLocation() != null)) {
partPath = new Path(part.getSd().getLocation());
verifyIsWritablePath(partPath);
}
}

String partName = Warehouse.makePartName(tbl.getPartitionKeys(), part_vals);
Expand Down Expand Up @@ -5381,15 +5381,17 @@ public DropPartitionsResult drop_partitions_req(
if (colNames != null) {
partNames.add(FileUtils.makePartName(colNames, part.getValues()));
}
if (tableDataShouldBeDeleted && MetaStoreUtils.isArchived(part)) {
Path archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
verifyIsWritablePath(archiveParentDir);
archToDelete.add(archiveParentDir);
}
if (tableDataShouldBeDeleted && (part.getSd() != null) && (part.getSd().getLocation() != null)) {
Path partPath = new Path(part.getSd().getLocation());
verifyIsWritablePath(partPath);
dirsToDelete.add(new PathAndDepth(partPath, part.getValues().size()));
if (tableDataShouldBeDeleted) {
if (MetaStoreUtils.isArchived(part)) {
// Archived partition is only able to delete original location.
Path archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
verifyIsWritablePath(archiveParentDir);
archToDelete.add(archiveParentDir);
} else if ((part.getSd() != null) && (part.getSd().getLocation() != null)) {
Path partPath = new Path(part.getSd().getLocation());
verifyIsWritablePath(partPath);
dirsToDelete.add(new PathAndDepth(partPath, part.getValues().size()));
}
}
}

Expand Down
Loading