Skip to content

Commit

Permalink
[MINOR] Fix NPE while parsing partition values in file index (#12177)
Browse files Browse the repository at this point in the history
  • Loading branch information
codope authored Oct 30, 2024
1 parent 899b1f6 commit 8f9bde7
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -373,10 +373,14 @@ class SparkHoodieTableFileIndex(spark: SparkSession,
staticPartitionColumnValues.map(_._1): _*)
}

protected def doParsePartitionColumnValues(partitionColumns: Array[String], partitionPath: String): Array[Object] = {
/**
* @VisibleForTesting
*/
def doParsePartitionColumnValues(partitionColumns: Array[String], partitionPath: String): Array[Object] = {
val tableConfig = metaClient.getTableConfig
if (null != tableConfig.getKeyGeneratorClassName
&& tableConfig.getKeyGeneratorClassName.equals(KeyGeneratorType.TIMESTAMP.getClassName)
&& null != tableConfig.propsMap.get(TimestampKeyGeneratorConfig.TIMESTAMP_TYPE_FIELD.key())
&& tableConfig.propsMap.get(TimestampKeyGeneratorConfig.TIMESTAMP_TYPE_FIELD.key())
.matches("SCALAR|UNIX_TIMESTAMP|EPOCHMILLISECONDS|EPOCHMICROSECONDS")) {
// For TIMESTAMP key generator when TYPE is SCALAR, UNIX_TIMESTAMP,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import org.apache.spark.sql.execution.datasources.{NoopCache, PartitionDirectory
import org.apache.spark.sql.functions.{lit, struct}
import org.apache.spark.sql.hudi.HoodieSparkSessionExtension
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
import org.junit.jupiter.api.{BeforeEach, Test}
import org.junit.jupiter.params.ParameterizedTest
Expand Down Expand Up @@ -110,6 +111,36 @@ class TestHoodieFileIndex extends HoodieSparkClientTestBase with ScalaAssertionS
assertEquals("partition", fileIndex.partitionSchema.fields.map(_.name).mkString(","))
}

/**
* Unit test for `doParsePartitionColumnValues` method in `HoodieFileIndex`.
*
* This test verifies that the `doParsePartitionColumnValues` method correctly returns
* partition values when the `propsMap` in the table configuration does not contain the
* expected timestamp configuration key, simulating a `null` scenario. Specifically,
* this test validates the behavior for the `TIMESTAMP` key generator type, ensuring
* that the partition path string is passed as `UTF8String` in the result array.
*/
@Test
def testParsePartitionValues(): Unit = {
// Set up table configuration and schema to use TIMESTAMP key generator
val tableConfig = metaClient.getTableConfig
tableConfig.setValue(HoodieTableConfig.KEY_GENERATOR_TYPE, KeyGeneratorType.TIMESTAMP.name())
tableConfig.setValue(HoodieTableConfig.PARTITION_FIELDS, "col1")
// Define schema with one partition column (col1)
val fields = List(
StructField.apply("f1", DataTypes.DoubleType, nullable = true),
StructField.apply("col1", DataTypes.LongType, nullable = true))
val schema = StructType.apply(fields)
// Set partition column and partition path for testing
val partitionColumns = Array("col1")
val partitionPath = "2023/10/28"
val fileIndex = HoodieFileIndex(spark, metaClient, Some(schema), queryOpts)
// Create file index and validate the result
val result = fileIndex.doParsePartitionColumnValues(partitionColumns, partitionPath)
assertEquals(1, result.length)
assertEquals(UTF8String.fromString(partitionPath), result(0))
}

@ParameterizedTest
@MethodSource(Array("keyGeneratorParameters"))
def testPartitionSchemaForBuiltInKeyGenerator(keyGenerator: String): Unit = {
Expand Down

0 comments on commit 8f9bde7

Please sign in to comment.