-
Notifications
You must be signed in to change notification settings - Fork 4.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
HIVE-28077: Iceberg: Major QB Compaction on partition level
- Loading branch information
Dmitriy Fingerman
committed
Apr 9, 2024
1 parent
5e78ce0
commit 15b94f2
Showing
15 changed files
with
1,177 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
67 changes: 67 additions & 0 deletions
67
...erg/iceberg-handler/src/test/queries/positive/iceberg_major_compaction_single_partition.q
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
-- SORT_QUERY_RESULTS | ||
-- Mask neededVirtualColumns due to non-strict order | ||
--! qt:replace:/(\s+neededVirtualColumns:\s)(.*)/$1#Masked#/ | ||
-- Mask the totalSize value as it can have slight variability, causing test flakiness | ||
--! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ | ||
-- Mask random uuid | ||
--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ | ||
-- Mask a random snapshot id | ||
--! qt:replace:/(\s+current-snapshot-id\s+)\S+(\s*)/$1#Masked#/ | ||
-- Mask added file size | ||
--! qt:replace:/(\S\"added-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ | ||
-- Mask total file size | ||
--! qt:replace:/(\S\"total-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ | ||
-- Mask current-snapshot-timestamp-ms | ||
--! qt:replace:/(\s+current-snapshot-timestamp-ms\s+)\S+(\s*)/$1#Masked#$2/ | ||
--! qt:replace:/(MAJOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/ | ||
-- Mask compaction id as they will be allocated in parallel threads | ||
--! qt:replace:/^[0-9]/#Masked#/ | ||
|
||
set hive.llap.io.enabled=true; | ||
set hive.vectorized.execution.enabled=true; | ||
set hive.optimize.shared.work.merge.ts.schema=true; | ||
|
||
create table ice_orc ( | ||
first_name string, | ||
last_name string | ||
) | ||
partitioned by (dept_id bigint, | ||
city string, | ||
registration_date date) | ||
stored by iceberg stored as orc | ||
tblproperties ('format-version'='2'); | ||
|
||
insert into ice_orc VALUES ('fn1','ln1',1,'London','2024-03-11'); | ||
insert into ice_orc VALUES ('fn2','ln2',1,'London','2024-03-11'); | ||
insert into ice_orc VALUES ('fn3','ln3',1,'London','2024-03-11'); | ||
insert into ice_orc VALUES ('fn4','ln4',1,'London','2024-03-11'); | ||
insert into ice_orc VALUES ('fn5','ln5',2,'Paris','2024-02-16'); | ||
insert into ice_orc VALUES ('fn6','ln6',2,'Paris','2024-02-16'); | ||
insert into ice_orc VALUES ('fn7','ln7',2,'Paris','2024-02-16'); | ||
|
||
update ice_orc set last_name = 'ln1a' where first_name='fn1'; | ||
update ice_orc set last_name = 'ln2a' where first_name='fn2'; | ||
update ice_orc set last_name = 'ln3a' where first_name='fn3'; | ||
update ice_orc set last_name = 'ln4a' where first_name='fn4'; | ||
update ice_orc set last_name = 'ln5a' where first_name='fn5'; | ||
update ice_orc set last_name = 'ln6a' where first_name='fn6'; | ||
update ice_orc set last_name = 'ln7a' where first_name='fn7'; | ||
|
||
delete from ice_orc where last_name in ('ln1a', 'ln2a', 'ln7a'); | ||
|
||
select * from ice_orc; | ||
describe formatted ice_orc; | ||
|
||
explain alter table ice_orc PARTITION (dept_id=1, city='London', registration_date='2024-03-11') COMPACT 'major' and wait; | ||
alter table ice_orc PARTITION (dept_id=1, city='London', registration_date='2024-03-11') COMPACT 'major' and wait; | ||
|
||
select * from ice_orc; | ||
describe formatted ice_orc; | ||
show compactions; | ||
|
||
explain alter table ice_orc PARTITION (dept_id=2, city='Paris', registration_date='2024-02-16') COMPACT 'major' and wait; | ||
alter table ice_orc PARTITION (dept_id=2, city='Paris', registration_date='2024-02-16') COMPACT 'major' and wait; | ||
|
||
select * from ice_orc; | ||
describe formatted ice_orc; | ||
show compactions; |
70 changes: 70 additions & 0 deletions
70
...dler/src/test/queries/positive/iceberg_major_compaction_single_partition_with_evolution.q
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
-- SORT_QUERY_RESULTS | ||
-- Mask neededVirtualColumns due to non-strict order | ||
--! qt:replace:/(\s+neededVirtualColumns:\s)(.*)/$1#Masked#/ | ||
-- Mask the totalSize value as it can have slight variability, causing test flakiness | ||
--! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ | ||
-- Mask random uuid | ||
--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ | ||
-- Mask a random snapshot id | ||
--! qt:replace:/(\s+current-snapshot-id\s+)\S+(\s*)/$1#Masked#/ | ||
-- Mask added file size | ||
--! qt:replace:/(\S\"added-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ | ||
-- Mask total file size | ||
--! qt:replace:/(\S\"total-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ | ||
-- Mask current-snapshot-timestamp-ms | ||
--! qt:replace:/(\s+current-snapshot-timestamp-ms\s+)\S+(\s*)/$1#Masked#$2/ | ||
--! qt:replace:/(MAJOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/ | ||
-- Mask compaction id as they will be allocated in parallel threads | ||
--! qt:replace:/^[0-9]/#Masked#/ | ||
|
||
set hive.llap.io.enabled=true; | ||
set hive.vectorized.execution.enabled=true; | ||
set hive.optimize.shared.work.merge.ts.schema=true; | ||
|
||
create table ice_orc ( | ||
first_name string, | ||
last_name string, | ||
dept_id bigint, | ||
city string | ||
) | ||
partitioned by (registration_date date) | ||
stored by iceberg stored as orc | ||
tblproperties ('format-version'='2'); | ||
|
||
insert into ice_orc values | ||
('fn1','ln1',1,'London','2024-03-11'), | ||
('fn2','ln2',1,'London','2024-03-11'), | ||
('fn3','ln3',1,'London','2024-03-11'), | ||
('fn4','ln4',1,'London','2024-03-11'), | ||
('fn5','ln5',2,'Paris','2024-03-11'), | ||
('fn6','ln6',2,'Paris','2024-03-11'), | ||
('fn7','ln7',2,'Paris','2024-03-11'); | ||
|
||
update ice_orc set last_name = 'ln1a' where first_name='fn1'; | ||
update ice_orc set last_name = 'ln2a' where first_name='fn2'; | ||
update ice_orc set last_name = 'ln3a' where first_name='fn3'; | ||
update ice_orc set last_name = 'ln4a' where first_name='fn4'; | ||
update ice_orc set last_name = 'ln5a' where first_name='fn5'; | ||
update ice_orc set last_name = 'ln6a' where first_name='fn6'; | ||
update ice_orc set last_name = 'ln7a' where first_name='fn7'; | ||
|
||
delete from ice_orc where last_name in ('ln1a', 'ln2a', 'ln7a'); | ||
|
||
alter table ice_orc set partition spec (dept_id, city, registration_date); | ||
|
||
select * from ice_orc; | ||
describe formatted ice_orc; | ||
|
||
explain alter table ice_orc partition (dept_id=1, city='London', registration_date='2024-03-11') compact 'major' and wait; | ||
alter table ice_orc partition (registration_date='2024-03-11', dept_id=1, city='London' ) compact 'major' and wait; | ||
|
||
select * from ice_orc; | ||
describe formatted ice_orc; | ||
show compactions; | ||
|
||
explain alter table ice_orc partition (dept_id=2, city='Paris', registration_date='2024-03-11') compact 'major' and wait; | ||
alter table ice_orc partition (registration_date='2024-03-11', dept_id=2, city='Paris') compact 'major' and wait; | ||
|
||
select * from ice_orc; | ||
describe formatted ice_orc; | ||
show compactions; |
Oops, something went wrong.