Skip to content

Commit

Permalink
HIVE-28654: MSCK repair fails for default partition when partition co…
Browse files Browse the repository at this point in the history
…lumn is of numeric type (apache#5568). (Raghav Aggarwal, reviewed by Ayush Saxena, Shohei Okumiya, Indhumathi Muthumurugesh)
  • Loading branch information
Aggarwal-Raghav authored Dec 10, 2024
1 parent fd4b157 commit 3483bc3
Show file tree
Hide file tree
Showing 6 changed files with 205 additions and 0 deletions.
11 changes: 11 additions & 0 deletions ql/src/test/queries/clientpositive/msck_repair_8.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
DROP TABLE IF EXISTS tbl1;

CREATE EXTERNAL TABLE tbl1 (id INT, name STRING) PARTITIONED BY (my_date BIGINT) stored as ORC location '${system:test.tmp.dir}/apps/hive/warehouse/test.db/tbl1/';

dfs ${system:test.dfs.mkdir} -p ${system:test.tmp.dir}/apps/hive/warehouse/test.db/tbl1/my_date=__HIVE_DEFAULT_PARTITION__;

MSCK REPAIR TABLE tbl1;

show partitions tbl1;

DROP TABLE tbl1;
22 changes: 22 additions & 0 deletions ql/src/test/queries/clientpositive/msck_repair_9.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
DROP TABLE IF EXISTS tbl_x;
DROP TABLE IF EXISTS tbl_y;

CREATE EXTERNAL TABLE tbl_x (id INT, name STRING) PARTITIONED BY (month INT, day INT) stored as ORC location '${system:test.tmp.dir}/apps/hive/warehouse/test.db/tbl_x/';

INSERT INTO tbl_x values(1, 'aaa', 12, 2);
INSERT INTO tbl_x values(2, 'bbb', 12, 3);
INSERT INTO tbl_x (id, name, month) values(3, 'ccc', 12);

SET hive.exec.default.partition.name=ANOTHER_PARTITION;
INSERT INTO tbl_x (id, name, day) values(4, 'ddd', 3);

SHOW PARTITIONS tbl_x;

CREATE EXTERNAL TABLE tbl_y (id INT, name STRING) PARTITIONED BY (month INT, day INT) stored as ORC location '${system:test.tmp.dir}/apps/hive/warehouse/test.db/tbl_x/';

MSCK REPAIR TABLE tbl_y;

SHOW PARTITIONS tbl_y;

DROP TABLE tbl_x;
DROP TABLE tbl_y;
41 changes: 41 additions & 0 deletions ql/src/test/results/clientpositive/llap/msck_repair_8.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
PREHOOK: query: DROP TABLE IF EXISTS tbl1
PREHOOK: type: DROPTABLE
PREHOOK: Output: database:default
POSTHOOK: query: DROP TABLE IF EXISTS tbl1
POSTHOOK: type: DROPTABLE
POSTHOOK: Output: database:default
#### A masked pattern was here ####
PREHOOK: type: CREATETABLE
#### A masked pattern was here ####
PREHOOK: Output: database:default
PREHOOK: Output: default@tbl1
#### A masked pattern was here ####
POSTHOOK: type: CREATETABLE
#### A masked pattern was here ####
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tbl1
PREHOOK: query: MSCK REPAIR TABLE tbl1
PREHOOK: type: MSCK
PREHOOK: Output: default@tbl1
POSTHOOK: query: MSCK REPAIR TABLE tbl1
POSTHOOK: type: MSCK
POSTHOOK: Output: default@tbl1
Partitions not in metastore: tbl1:my_date=__HIVE_DEFAULT_PARTITION__
#### A masked pattern was here ####
PREHOOK: query: show partitions tbl1
PREHOOK: type: SHOWPARTITIONS
PREHOOK: Input: default@tbl1
POSTHOOK: query: show partitions tbl1
POSTHOOK: type: SHOWPARTITIONS
POSTHOOK: Input: default@tbl1
my_date=__HIVE_DEFAULT_PARTITION__
PREHOOK: query: DROP TABLE tbl1
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@tbl1
PREHOOK: Output: database:default
PREHOOK: Output: default@tbl1
POSTHOOK: query: DROP TABLE tbl1
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@tbl1
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tbl1
124 changes: 124 additions & 0 deletions ql/src/test/results/clientpositive/llap/msck_repair_9.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
PREHOOK: query: DROP TABLE IF EXISTS tbl_x
PREHOOK: type: DROPTABLE
PREHOOK: Output: database:default
POSTHOOK: query: DROP TABLE IF EXISTS tbl_x
POSTHOOK: type: DROPTABLE
POSTHOOK: Output: database:default
PREHOOK: query: DROP TABLE IF EXISTS tbl_y
PREHOOK: type: DROPTABLE
PREHOOK: Output: database:default
POSTHOOK: query: DROP TABLE IF EXISTS tbl_y
POSTHOOK: type: DROPTABLE
POSTHOOK: Output: database:default
#### A masked pattern was here ####
PREHOOK: type: CREATETABLE
#### A masked pattern was here ####
PREHOOK: Output: database:default
PREHOOK: Output: default@tbl_x
#### A masked pattern was here ####
POSTHOOK: type: CREATETABLE
#### A masked pattern was here ####
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tbl_x
PREHOOK: query: INSERT INTO tbl_x values(1, 'aaa', 12, 2)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@tbl_x
POSTHOOK: query: INSERT INTO tbl_x values(1, 'aaa', 12, 2)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@tbl_x
POSTHOOK: Output: default@tbl_x@month=12/day=2
POSTHOOK: Lineage: tbl_x PARTITION(month=12,day=2).id SCRIPT []
POSTHOOK: Lineage: tbl_x PARTITION(month=12,day=2).name SCRIPT []
PREHOOK: query: INSERT INTO tbl_x values(2, 'bbb', 12, 3)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@tbl_x
POSTHOOK: query: INSERT INTO tbl_x values(2, 'bbb', 12, 3)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@tbl_x
POSTHOOK: Output: default@tbl_x@month=12/day=3
POSTHOOK: Lineage: tbl_x PARTITION(month=12,day=3).id SCRIPT []
POSTHOOK: Lineage: tbl_x PARTITION(month=12,day=3).name SCRIPT []
PREHOOK: query: INSERT INTO tbl_x (id, name, month) values(3, 'ccc', 12)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@tbl_x
POSTHOOK: query: INSERT INTO tbl_x (id, name, month) values(3, 'ccc', 12)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@tbl_x
POSTHOOK: Output: default@tbl_x@month=12/day=__HIVE_DEFAULT_PARTITION__
POSTHOOK: Lineage: tbl_x PARTITION(month=12,day=__HIVE_DEFAULT_PARTITION__).id SCRIPT []
POSTHOOK: Lineage: tbl_x PARTITION(month=12,day=__HIVE_DEFAULT_PARTITION__).name SCRIPT []
PREHOOK: query: INSERT INTO tbl_x (id, name, day) values(4, 'ddd', 3)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@tbl_x
POSTHOOK: query: INSERT INTO tbl_x (id, name, day) values(4, 'ddd', 3)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@tbl_x
POSTHOOK: Output: default@tbl_x@month=ANOTHER_PARTITION/day=3
POSTHOOK: Lineage: tbl_x PARTITION(month=ANOTHER_PARTITION,day=3).id SCRIPT []
POSTHOOK: Lineage: tbl_x PARTITION(month=ANOTHER_PARTITION,day=3).name SCRIPT []
PREHOOK: query: SHOW PARTITIONS tbl_x
PREHOOK: type: SHOWPARTITIONS
PREHOOK: Input: default@tbl_x
POSTHOOK: query: SHOW PARTITIONS tbl_x
POSTHOOK: type: SHOWPARTITIONS
POSTHOOK: Input: default@tbl_x
month=12/day=2
month=12/day=3
month=12/day=__HIVE_DEFAULT_PARTITION__
month=ANOTHER_PARTITION/day=3
#### A masked pattern was here ####
PREHOOK: type: CREATETABLE
#### A masked pattern was here ####
PREHOOK: Output: database:default
PREHOOK: Output: default@tbl_y
#### A masked pattern was here ####
POSTHOOK: type: CREATETABLE
#### A masked pattern was here ####
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tbl_y
PREHOOK: query: MSCK REPAIR TABLE tbl_y
PREHOOK: type: MSCK
PREHOOK: Output: default@tbl_y
POSTHOOK: query: MSCK REPAIR TABLE tbl_y
POSTHOOK: type: MSCK
POSTHOOK: Output: default@tbl_y
Partitions not in metastore: tbl_y:month=12/day=2 tbl_y:month=12/day=3 tbl_y:month=12/day=__HIVE_DEFAULT_PARTITION__ tbl_y:month=ANOTHER_PARTITION/day=3
#### A masked pattern was here ####
PREHOOK: query: SHOW PARTITIONS tbl_y
PREHOOK: type: SHOWPARTITIONS
PREHOOK: Input: default@tbl_y
POSTHOOK: query: SHOW PARTITIONS tbl_y
POSTHOOK: type: SHOWPARTITIONS
POSTHOOK: Input: default@tbl_y
month=12/day=2
month=12/day=3
month=12/day=__HIVE_DEFAULT_PARTITION__
month=ANOTHER_PARTITION/day=3
PREHOOK: query: DROP TABLE tbl_x
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@tbl_x
PREHOOK: Output: database:default
PREHOOK: Output: default@tbl_x
POSTHOOK: query: DROP TABLE tbl_x
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@tbl_x
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tbl_x
PREHOOK: query: DROP TABLE tbl_y
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@tbl_y
PREHOOK: Output: database:default
PREHOOK: Output: default@tbl_y
POSTHOOK: query: DROP TABLE tbl_y
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@tbl_y
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tbl_y
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections.ListUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
Expand Down Expand Up @@ -1636,6 +1637,10 @@ public static String getPartitionName(Path tablePath, Path partitionPath, Set<St

public static String getNormalisedPartitionValue(String partitionValue, String type) {

if (!NumberUtils.isParsable(partitionValue)) {
return partitionValue;
}

LOG.debug("Converting '" + partitionValue + "' to type: '" + type + "'.");

if (type.equalsIgnoreCase("tinyint")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,8 @@ public void testConversionToSignificantNumericTypes() {
assertEquals("-1.01", MetaStoreServerUtils.getNormalisedPartitionValue("-0001.010000", "double"));
assertEquals("1.01", MetaStoreServerUtils.getNormalisedPartitionValue("0001.0100", "decimal"));
assertEquals("-1.01", MetaStoreServerUtils.getNormalisedPartitionValue("-0001.0100", "decimal"));
assertEquals("__HIVE_DEFAULT_PARTITION__", MetaStoreServerUtils.getNormalisedPartitionValue(
"__HIVE_DEFAULT_PARTITION__", "decimal"));
}

@Test
Expand Down

0 comments on commit 3483bc3

Please sign in to comment.