From fb1da64fb629876d1693c4ef479a54800c4f789a Mon Sep 17 00:00:00 2001 From: Amanda Liu Date: Tue, 30 Sep 2025 06:58:08 -0700 Subject: [PATCH 01/13] validate --- .../kernel/spark/catalog/SparkTable.java | 78 ++++++++++++++++++- 1 file changed, 76 insertions(+), 2 deletions(-) diff --git a/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java b/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java index 0dc13b484f5..34195a1f98f 100644 --- a/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java +++ b/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java @@ -28,8 +28,7 @@ import org.apache.spark.sql.connector.expressions.Expressions; import org.apache.spark.sql.connector.expressions.Transform; import org.apache.spark.sql.connector.read.ScanBuilder; -import org.apache.spark.sql.types.StructField; -import org.apache.spark.sql.types.StructType; +import org.apache.spark.sql.types.*; import org.apache.spark.sql.util.CaseInsensitiveStringMap; /** DataSource V2 Table implementation for Delta Lake using the Delta Kernel API. */ @@ -53,6 +52,80 @@ public class SparkTable implements Table, SupportsRead { private final Column[] columns; private final Transform[] partitionTransforms; + /** + * Validates that all fields in the schema use allowed data types. + * + * @param schema the schema to validate + * @throws IllegalArgumentException if any field uses a disallowed data type + */ + private static void validateSchemaTypes(StructType schema) { + for (StructField field : schema.fields()) { + validateDataType(field.dataType(), field.name()); + } + } + + /** + * Recursively validates a data type and its nested types. + * + * @param dataType the data type to validate + * @param fieldPath the path to the field (for error messages) + * @throws IllegalArgumentException if the data type is not allowed + */ + private static void validateDataType(DataType dataType, String fieldPath) { + if (isAllowedType(dataType)) { + // For struct types, validate nested fields + if (dataType instanceof StructType) { + StructType structType = (StructType) dataType; + for (StructField field : structType.fields()) { + validateDataType(field.dataType(), fieldPath + "." + field.name()); + } + } + // For array types, validate element type + else if (dataType instanceof ArrayType) { + ArrayType arrayType = (ArrayType) dataType; + validateDataType(arrayType.elementType(), fieldPath + ".element"); + } + // For map types, validate key and value types + else if (dataType instanceof MapType) { + MapType mapType = (MapType) dataType; + validateDataType(mapType.keyType(), fieldPath + ".key"); + validateDataType(mapType.valueType(), fieldPath + ".value"); + } + } else { + throw new IllegalArgumentException( + String.format("Unsupported data type '%s' for field '%s'. " + + "Only numeric types (ByteType, ShortType, IntegerType, LongType, FloatType, " + + "DoubleType, DecimalType), string types (StringType, VarcharType, CharType), " + + "BinaryType, and BooleanType are supported.", + dataType.typeName(), fieldPath)); + } + } + + /** + * Checks if a data type is allowed. + * + * @param dataType the data type to check + * @return true if the data type is allowed, false otherwise + */ + private static boolean isAllowedType(DataType dataType) { + return dataType instanceof ByteType || + dataType instanceof ShortType || + dataType instanceof IntegerType || + dataType instanceof LongType || + dataType instanceof FloatType || + dataType instanceof DoubleType || + dataType instanceof DecimalType || + dataType instanceof StringType || + dataType instanceof VarcharType || + dataType instanceof CharType || + dataType instanceof BinaryType || + dataType instanceof BooleanType || + // Allow complex types but validate their nested types + dataType instanceof StructType || + dataType instanceof ArrayType || + dataType instanceof MapType; + } + /** * Creates a SparkTable backed by a Delta Kernel snapshot and initializes Spark-facing metadata * (schemas, partitioning, capabilities). @@ -82,6 +155,7 @@ public SparkTable(Identifier identifier, String tablePath, Map o .build(io.delta.kernel.defaults.engine.DefaultEngine.create(hadoopConf)); this.schema = SchemaUtils.convertKernelSchemaToSparkSchema(snapshot.getSchema()); + validateSchemaTypes(this.schema); this.partColNames = Collections.unmodifiableList(new ArrayList<>(snapshot.getPartitionColumnNames())); From b26fef6ad13804bab5f5286e5e0083d411eedaa4 Mon Sep 17 00:00:00 2001 From: Amanda Liu Date: Tue, 30 Sep 2025 07:24:49 -0700 Subject: [PATCH 02/13] fmt --- .../kernel/spark/catalog/SparkTable.java | 48 ++++++++++--------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java b/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java index 34195a1f98f..910ba36a90b 100644 --- a/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java +++ b/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java @@ -54,7 +54,7 @@ public class SparkTable implements Table, SupportsRead { /** * Validates that all fields in the schema use allowed data types. - * + * * @param schema the schema to validate * @throws IllegalArgumentException if any field uses a disallowed data type */ @@ -66,7 +66,7 @@ private static void validateSchemaTypes(StructType schema) { /** * Recursively validates a data type and its nested types. - * + * * @param dataType the data type to validate * @param fieldPath the path to the field (for error messages) * @throws IllegalArgumentException if the data type is not allowed @@ -93,37 +93,39 @@ else if (dataType instanceof MapType) { } } else { throw new IllegalArgumentException( - String.format("Unsupported data type '%s' for field '%s'. " + - "Only numeric types (ByteType, ShortType, IntegerType, LongType, FloatType, " + - "DoubleType, DecimalType), string types (StringType, VarcharType, CharType), " + - "BinaryType, and BooleanType are supported.", + String.format( + "Unsupported data type '%s' for field '%s'. " + + "Only numeric types (ByteType, ShortType, IntegerType, LongType, FloatType, " + + "DoubleType, DecimalType), string types (StringType, VarcharType, CharType), " + + "BinaryType, and BooleanType are supported.", dataType.typeName(), fieldPath)); } } /** * Checks if a data type is allowed. - * + * * @param dataType the data type to check * @return true if the data type is allowed, false otherwise */ private static boolean isAllowedType(DataType dataType) { - return dataType instanceof ByteType || - dataType instanceof ShortType || - dataType instanceof IntegerType || - dataType instanceof LongType || - dataType instanceof FloatType || - dataType instanceof DoubleType || - dataType instanceof DecimalType || - dataType instanceof StringType || - dataType instanceof VarcharType || - dataType instanceof CharType || - dataType instanceof BinaryType || - dataType instanceof BooleanType || - // Allow complex types but validate their nested types - dataType instanceof StructType || - dataType instanceof ArrayType || - dataType instanceof MapType; + return dataType instanceof ByteType + || dataType instanceof ShortType + || dataType instanceof IntegerType + || dataType instanceof LongType + || dataType instanceof FloatType + || dataType instanceof DoubleType + || dataType instanceof DecimalType + || dataType instanceof StringType + || dataType instanceof VarcharType + || dataType instanceof CharType + || dataType instanceof BinaryType + || dataType instanceof BooleanType + || + // Allow complex types but validate their nested types + dataType instanceof StructType + || dataType instanceof ArrayType + || dataType instanceof MapType; } /** From 6f2a286126eded9b92f46e5ac9f46a58da0c8bbd Mon Sep 17 00:00:00 2001 From: Amanda Liu Date: Sun, 5 Oct 2025 11:56:35 -0700 Subject: [PATCH 03/13] test --- ...-be25-530a05922422-c000.snappy.parquet.crc | Bin 0 -> 68 bytes ...-96fb-48b4912507ce-c000.snappy.parquet.crc | Bin 0 -> 48 bytes ...-ad2f-8c837a77d398-c000.snappy.parquet.crc | Bin 0 -> 48 bytes ...-8110-77001b877182-c000.snappy.parquet.crc | Bin 0 -> 68 bytes .../_delta_log/.00000000000000000000.json.crc | Bin 0 -> 32 bytes .../_delta_log/.00000000000000000001.json.crc | Bin 0 -> 16 bytes ...0000000000000000002.checkpoint.parquet.crc | Bin 0 -> 180 bytes .../_delta_log/.00000000000000000002.json.crc | Bin 0 -> 16 bytes .../_delta_log/00000000000000000000.json | 5 + .../_delta_log/00000000000000000001.json | 2 + .../00000000000000000002.checkpoint.parquet | Bin 0 -> 21929 bytes .../_delta_log/00000000000000000002.json | 2 + .../_delta_log/_last_checkpoint | 1 + .../golden/spark-variant-checkpoint/info.txt | 71 ++++++ ...4080-be25-530a05922422-c000.snappy.parquet | Bin 0 -> 7443 bytes ...46d1-96fb-48b4912507ce-c000.snappy.parquet | Bin 0 -> 5072 bytes ...4322-ad2f-8c837a77d398-c000.snappy.parquet | Bin 0 -> 5072 bytes ...4dbf-8110-77001b877182-c000.snappy.parquet | Bin 0 -> 7324 bytes .../kernel/spark/read/DataTypeReaderTest.java | 214 ++++++++++++++++++ 19 files changed, 295 insertions(+) create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00000-16c852df-ba66-4080-be25-530a05922422-c000.snappy.parquet.crc create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00000-1e14ba22-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet.crc create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00000-9a9c570c-ee32-4322-ad2f-8c837a77d398-c000.snappy.parquet.crc create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00001-664313d3-14b4-4dbf-8110-77001b877182-c000.snappy.parquet.crc create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/.00000000000000000000.json.crc create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/.00000000000000000001.json.crc create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/.00000000000000000002.checkpoint.parquet.crc create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/.00000000000000000002.json.crc create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000000.json create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000001.json create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000002.checkpoint.parquet create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000002.json create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/_last_checkpoint create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/info.txt create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00000-16c852df-ba66-4080-be25-530a05922422-c000.snappy.parquet create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00000-1e14ba22-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00000-9a9c570c-ee32-4322-ad2f-8c837a77d398-c000.snappy.parquet create mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00001-664313d3-14b4-4dbf-8110-77001b877182-c000.snappy.parquet create mode 100644 kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00000-16c852df-ba66-4080-be25-530a05922422-c000.snappy.parquet.crc b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00000-16c852df-ba66-4080-be25-530a05922422-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..260edea352351fa4b617485ca60b4d0757e82a8f GIT binary patch literal 68 zcmV-K0K5NVa$^7h00IEx%hYd}eh%hjdo>*6e|mUms}<)8&?6$rmf4vxsr*t+>jQxp a3&|dKkn{Q#reWBADB^>jc*3Zyv?~ literal 0 HcmV?d00001 diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00000-1e14ba22-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet.crc b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00000-1e14ba22-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..a2a2910169e4a52bd85c5cafb34b349ecfb32de1 GIT binary patch literal 48 zcmV-00MGwpa$^7h00ICJ$)IeOU%}{VjtDmDxTS$DcM-h4o9&_`B+*jAL#d|cLl%1! GR{$~FR}?D% literal 0 HcmV?d00001 diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00000-9a9c570c-ee32-4322-ad2f-8c837a77d398-c000.snappy.parquet.crc b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00000-9a9c570c-ee32-4322-ad2f-8c837a77d398-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..a2879e5caf82a838667decc088cde68123d92d5d GIT binary patch literal 48 zcmV-00MGwpa$^7h00ID7C7!j5l(S<)m${N$`H}UM^n;AYF;yn(faC_^%;@9iLl%1! GR{${rdKPg2 literal 0 HcmV?d00001 diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00001-664313d3-14b4-4dbf-8110-77001b877182-c000.snappy.parquet.crc b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00001-664313d3-14b4-4dbf-8110-77001b877182-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..ec5683e60f079d416f83f21ce60cfe20a1925158 GIT binary patch literal 68 zcmV-K0K5NVa$^7h00IDW(46q9xwve^MW;|Hr$?$efzUZY{iG)dmQ01xB!h5$Wnf72 aNwX6*SlpQ{+SC!UN^67ayhq!e+bpI4LLck^ literal 0 HcmV?d00001 diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/.00000000000000000000.json.crc b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 0000000000000000000000000000000000000000..604cea8de4c9194e0fb9fbc988b355e85fc4648a GIT binary patch literal 32 ocmYc;N@ieSU}D&Brs4Vfbjybp4f9W*I<joV?JvY-PfBp%1*VF@fkV#hY zY&nQ8>#{~jydTUBW(U`B1{EE literal 0 HcmV?d00001 diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..76e56fc213e --- /dev/null +++ b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1723768497710,"operation":"CREATE OR REPLACE TABLE AS SELECT","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.checkpointInterval\":\"2\"}"},"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numFiles":"2","numOutputRows":"100","numOutputBytes":"14767"},"engineInfo":"Apache-Spark/4.0.0-SNAPSHOT Delta-Lake/3.3.0-SNAPSHOT","txnId":"2cc10429-f586-4c74-805c-8d19fd180c87"}} +{"metaData":{"id":"d7eb0848-b002-4e0b-9d8d-dd335c90946f","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"v\",\"type\":\"variant\",\"nullable\":true,\"metadata\":{}},{\"name\":\"array_of_variants\",\"type\":{\"type\":\"array\",\"elementType\":\"variant\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"struct_of_variants\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"v\",\"type\":\"variant\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"map_of_variants\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"variant\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"array_of_struct_of_variants\",\"type\":{\"type\":\"array\",\"elementType\":{\"type\":\"struct\",\"fields\":[{\"name\":\"v\",\"type\":\"variant\",\"nullable\":true,\"metadata\":{}}]},\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"struct_of_array_of_variants\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"v\",\"type\":{\"type\":\"array\",\"elementType\":\"variant\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointInterval":"2"},"createdTime":1723768495302}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["variantType-preview"],"writerFeatures":["variantType-preview","appendOnly","invariants"]}} +{"add":{"path":"part-00000-16c852df-ba66-4080-be25-530a05922422-c000.snappy.parquet","partitionValues":{},"size":7443,"modificationTime":1723768496908,"dataChange":true,"stats":"{\"numRecords\":50,\"minValues\":{\"id\":0},\"maxValues\":{\"id\":49},\"nullCount\":{\"id\":0,\"v\":0,\"array_of_variants\":0,\"struct_of_variants\":{\"v\":0},\"map_of_variants\":0,\"array_of_struct_of_variants\":0,\"struct_of_array_of_variants\":{\"v\":0}}}"}} +{"add":{"path":"part-00001-664313d3-14b4-4dbf-8110-77001b877182-c000.snappy.parquet","partitionValues":{},"size":7324,"modificationTime":1723768496908,"dataChange":true,"stats":"{\"numRecords\":50,\"minValues\":{\"id\":50},\"maxValues\":{\"id\":99},\"nullCount\":{\"id\":0,\"v\":0,\"array_of_variants\":0,\"struct_of_variants\":{\"v\":0},\"map_of_variants\":0,\"array_of_struct_of_variants\":0,\"struct_of_array_of_variants\":{\"v\":0}}}"}} diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..2bcd50d730c --- /dev/null +++ b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1723768498557,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"5072"},"engineInfo":"Apache-Spark/4.0.0-SNAPSHOT Delta-Lake/3.3.0-SNAPSHOT","txnId":"78417efa-a13f-45df-add0-f96aa113fd68"}} +{"add":{"path":"part-00000-9a9c570c-ee32-4322-ad2f-8c837a77d398-c000.snappy.parquet","partitionValues":{},"size":5072,"modificationTime":1723768498551,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":0},\"maxValues\":{\"id\":0},\"nullCount\":{\"id\":0,\"v\":0,\"array_of_variants\":0,\"struct_of_variants\":{\"v\":0},\"map_of_variants\":0,\"array_of_struct_of_variants\":0,\"struct_of_array_of_variants\":{\"v\":0}}}"}} diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000002.checkpoint.parquet b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000002.checkpoint.parquet new file mode 100644 index 0000000000000000000000000000000000000000..8e4a50ff0b21f5688671924830a33670bb70a13d GIT binary patch literal 21929 zcmeHP4Qy0bcD`>s{{O)N?qf_0YZ;gr@HjK$@z_J1E`Mu`*CE7l-KZ@aJ$s(XDB~Gt zW^5plD~eDyp;6?eE+Q1MUY1P}+PGCq&@8a1ilPKXY(!HxiXxXHcNYQ6x+slWZ$z!0 zbKiUSy_tD4o&l2G?m98}z5DZ>bI(2Zp7YKwK-U+CHh^AXXi%xY9bSd3GdTOUO3ecpYYL%uY zjN>1?eaNacNEXJlJY&_I{m7moO&wHsXZwF7}rj}#n|;qB$xfFH_6pitV| zvzd~b&`>bAB|H?0J+iLSSzkL`D->*$A`xldbK(ByhNVbA3dN%N;?ig=GSnCIE(830 zHyIIn$)I;Ru{!Ikw(Zz}zgIln@ggbz6YI#xksGA=;D#GSpmR3}{2aSM$S*cu5v)zl zj-LsnN%)t|9|>hX-$?!3EdoC$ZV~eHBezJ`O+q|#HwpZlxv6QxSGkMLw@czW|JU#T z`==I7*2${P_EKEucfS9lpBq~xs5Z=a+x*cZ&=-s_C<8^N^{2 zHLauncJ)7x6&m_4-pcqWBZa_+YMj(f?cU^}ZCXoEkA#W0as8OT)#r=o6)U}U>Zh-j zXmB8aUl*QMB`^cOU#^Yq8BOBag*{;m~fV z#>Fl52B8bAUKtj>9FzP~OoEMIWTc+;LAc>6-%@3%3(~PxQp4AO`34fDo%C$}syaZE zgYtkJigg!%Snh0r_r7o_CIv##9YCe{3>i6jfvl|@-cO2GcX1M!qCu*IUO) z(SS5K{3fY^U(oRdd7ooR84Mz@CSEy=TT?mp)#Zz1%Wa3S<~8B3)-D!G%D;2soX|8` zym_u$c%pkQ-?BZQPWOLG4xcCVX8zN27cK6KmIIqC)?Fj&RJ?w8^ew(ECeKABn~N^m zO~3m;Q}6M~g=TAQs^u06Vr2y8jQHhXOls(RUhaEgFdPWQwufSJ1o#9xcDjz9f8*5? zTuT99NxQ+Q*AN1>mbjtqN?J?*`nCA!BEy6(YEkF&{iy^ieDTene8bR8wdxjh*HCHE z^8f0>$49s?0lLB4pi)Ac4w$LGIQpyOTz`PIHR{-`LV+KAizU!f-P5~4lN^H)c{m{N zfzAg6dQhO#`{Og<3sgcu0fp|Hl) z)gwZcjld7JREcHy<1GUQaxJ!0*)}TvDtMt;d4lc~r_%_LrQez4^G0wGRyGJLDM}uM zy^K_~VYyjF537J07zsL%>_2$q-eVFnHq#tIc+`BvWCoy_0MZqhc<12^7&GS#E0Swq zqS~w`J${4bUgXC_aWWR8;>_=GIaP-%!3)De=6$6=G=U?Q;K8PJ;SGMCWIiZ)#RWmb*m z8>*QVPtpPCy~M;`o0pn1S)yC!%-yV2Y-|O|%8-7!W?UZCViDaa!pQJXwb;&H@aa{7 zKVBB-y*$?Ds1>W;%cq@sm(c>cLw`G4NZa8Jn|)HmA5Ro>uxrJ}&Sy%Doo7poyu?e5 zZ?no90>{fZ+N}NYpq&5Y^OWn z4IiiNv`u|H{TSDSyN}VASaUl=uA}D^tR46|zRt+-^@O+a^~#`O@!Z?lUKWKt^o8f5 z;Q9|pw2U-aYBP&J;j6Cjp6wyOyjLkH?vow0mbHJRI=L5se=v-)KyVMb#1*X+cCD2oKA2aq3($ zHF!jm1d1E_4h*KsoK3<~d2bB9lVpEeASgenevRXFd^P&;|8|mq@gfXXYz3XAm5S6Y z0lxHbj;s*yNYJlRzZ~t01O`zf4O7=2jtodKWhDXXVB&NGp~5ih@*s3x~9Z9D^E zjnHnLfl9v8w}gX310keS(Alaib!#vXjZup-z(O^^qW3L_5KDhx_fUkbmy{HwuoT_RFV}OWAs|fy#8e$;gXa`*`USCfeh`qgQ13 zi7;z*kTV^0srW`cg1idBD6hLRD)>K~;Mm z3@LfX)U763OE%Vau@ibF$_r+Yx%f)FMEs(IR*Od;r*2{&Eyxl-U$M=oA|2um=x0hn zjmUtSd>cr7_&D_t`&o!P!VDK#QQ2@>(9|(jj|t)l0qX773aa&}jULJot#v$>qnDDZ zdN}g4;syd%4;7SY78{%R<8OmH%sozb5c?;f@3?N(?G#@JASRTx}54u^|elzzr&4;>#~ffMqiLa$IB8~HbQ@a(C;$nS%6lzQ!{~-8k|ZFJ(~gg z(31fDxxzIB$umJ4yi*PxFHg_)I6|NFpg?AIXtqV@el16uH9=y*OCp{_ppQLhP2zA? zpz_Fo(mBK&HeQj+pbC2hVfQy7>?|HZ7?!b#C(RLOlMq$w$ySCq{UoT?1jCHuS)Bnh zpSj-*=F19kD$gEYgy!lOLSm#f=P;>u)UBO^v-9o^@Hap3(vB@^u~a!_GoFKE$6=^r z;oQN*8Y7HZ&jdgdf-3UJh;2G)oMuP(romtyQO5K9-|j&gHg@q?1wZm2;@p#9@u!-Y z#UERu>xKFWraLldFUFQ6s%(OLc47x8*r66w+DQP?)`AL@`pL%Cb}|W-SsoUG7SHd1 zg%D?$V@q{=red0zw6Ud$EE>SBBG?Uu!BjBQPeq-_^sqM&cFc$49)lz#)sU$yX6ZGJ z(L}B_uZc;4i1$#m@A}XHU(bO=lI6T^PDDL5AmY)dfXWv>w44)S$^`1Z6KcYQn8>g} z%x9khd7R#W@|Xb_%|@yM$;JlHB!Mex)}{*MLuB)l4Y+wvESnEMof)23mdJl1i5p1b zl|MidGeAN!!{$Rsw!||@(9B$_ydT&JvN^|qCv5Wpn;PQ@TO#8|^rMJ=Ltz`xHH&G^ zwy8ay$$&nA=to+a+~Est9&|I4JW-XN_xBL}eTF`xqt~gtn^@%;6EGGbC*o&__1~?i zffLI)ryAD0hIwLnDw`_uT|_?h1k-Im){H2Roj}ZlZ5gi%;;3MsS>Br6kyUoB#FDzK z4#2WQGg^dpehx;&SDvOT#gCt$mjq&;d(>o%ED*#u#<@omg{7JVJS8LIsi$EbjeZlw zb*b6}${0f8h?lA}ApR2~USf!II${f-fC44_#KTSuwM*==&sDeb6FdF@64g%ec$rZy z%up()Q$(CX(s#Z|JHfuKz`@|B@w3U4wdhuwkD|#HDT7nb(%_T}BsPmg;u~=f9$%RQ z5yMuGU*Y2`P0477j6@vk!oh5$UShw)=qM2xKAME!8WZBFN^#B#jazLqh4Upw&38OG zPkNMouBrNmP>^M6RjQ+>osn{$Srb6i(yNpdFP)Mx%}Pn+QWFQV$m}w4L{sQ1ZGeHq zWHOc*mh;GWWzQSrf+cv+DU9)CF+537vfd!F3M{esi(075iIr|K)EIwD+NX>NC z_?$j;nLZpP;3MSUIVnQVigP|rQz^jwu(V;AsodF~SAlkoEG+o+shruUZdOxln8b`* z-MsUqr0kwm#brxT`NLc_l+FvvuRO4}BY5 zSn}G{gasR)>AJ?kf|s-Z%HS3-8>yMPvkMEJeSPV^OYZyByiVe-5o4HRYLOxS^Y>kF zAaHUMI}o_NN((`>^T$dqElwu9DW5n(PG4&jW z4=gDGD^=_FiZ9vk0$H;Ra??U!D(kiemPLD_Cfr0#IJ%AQ2j#n6XTo@5{#P>NmEu`J zJSRZcR&N@H3_0yaK6U^fJd2%(2fJYr9Bro|2wkp2Hk6QJMPd?}nmooP3J;RT*QM+e zda~BZZW!7t+cAE8Jw0UNUcr&CXF|S&$Y&Yy_l4v zB8nev63gsG;-~AELJD=g%@&UAZjc5g$Z~FA3C|5`7PBqi)8K4y65HC87SR`j+P})s z5DD*T&~B32F09VRK0;z04<%;Po+VoI9(WS3ME){Uea3&#aj6^JaDKL^WOB)FxK6qvylx z2)*HOP?k_vv(z-qqUYWDJbum6)UwZeSRQ^VJBsTI<*5;VL#{sWcOTkiOzgjZ9OV{( z-XE@B%e&?c#QPVycF#d}N)k9Hz!z?YmGWR4u(m|`1rbiWHaE?BD7%)~Efk%hoHxPWCF`P1 z)WTk>-@?*WBj+7?VJ}r;%2}=Q95yiRaulDMY_>4t4eeZaWV~&~iRJ7e7_MgBKZue_ z!~175V(qpU@02s=>g6yFz^Q&MK3)zQVZ43Dxy^aFd@ENkt8pYgZqpd4PAB)^pze@B zU;VaQD&?<5;s$Q?+j5+jhrNuo8+e=m7jp(rB>9#e?_8Vn_MCjP?*X6^?*nomKA^8q zZxRDaUjykJmStkJDV4Vj^Q?5`UGz1Ur4wzwoN|UyFHGuuWa%HyD~YnJ`8&kO(BXS2uY^t=4cKDp85^LiV-y;7rJZuGg~ ZtyfwLmpS2H{%`pY32Y=}C;TI={|7VxT9^O; literal 0 HcmV?d00001 diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000002.json b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..c9084727700 --- /dev/null +++ b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1723768498990,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"5072"},"engineInfo":"Apache-Spark/4.0.0-SNAPSHOT Delta-Lake/3.3.0-SNAPSHOT","txnId":"d90393d5-9cdd-40f1-8861-121f2169808b"}} +{"add":{"path":"part-00000-1e14ba22-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet","partitionValues":{},"size":5072,"modificationTime":1723768498986,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":1},\"nullCount\":{\"id\":0,\"v\":0,\"array_of_variants\":0,\"struct_of_variants\":{\"v\":0},\"map_of_variants\":0,\"array_of_struct_of_variants\":0,\"struct_of_array_of_variants\":{\"v\":0}}}"}} diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/_last_checkpoint b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/_last_checkpoint new file mode 100644 index 00000000000..5902bb8eea8 --- /dev/null +++ b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":2,"size":6,"sizeInBytes":21929,"numOfAddFiles":4,"checkpointSchema":{"type":"struct","fields":[{"name":"txn","type":{"type":"struct","fields":[{"name":"appId","type":"string","nullable":true,"metadata":{}},{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"lastUpdated","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"add","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}},{"name":"clusteringProvider","type":"string","nullable":true,"metadata":{}},{"name":"stats","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"remove","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"deletionTimestamp","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"extendedFileMetadata","type":"boolean","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"metaData","type":{"type":"struct","fields":[{"name":"id","type":"string","nullable":true,"metadata":{}},{"name":"name","type":"string","nullable":true,"metadata":{}},{"name":"description","type":"string","nullable":true,"metadata":{}},{"name":"format","type":{"type":"struct","fields":[{"name":"provider","type":"string","nullable":true,"metadata":{}},{"name":"options","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"schemaString","type":"string","nullable":true,"metadata":{}},{"name":"partitionColumns","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"configuration","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"createdTime","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"protocol","type":{"type":"struct","fields":[{"name":"minReaderVersion","type":"integer","nullable":true,"metadata":{}},{"name":"minWriterVersion","type":"integer","nullable":true,"metadata":{}},{"name":"readerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"writerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"domainMetadata","type":{"type":"struct","fields":[{"name":"domain","type":"string","nullable":true,"metadata":{}},{"name":"configuration","type":"string","nullable":true,"metadata":{}},{"name":"removed","type":"boolean","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"checksum":"a8d400a03ead8a86dbb412f2a693e26e"} diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/info.txt b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/info.txt new file mode 100644 index 00000000000..e4c1508e318 --- /dev/null +++ b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/info.txt @@ -0,0 +1,71 @@ +This file contains the code used to generate this golden table "spark-variant-checkpoint" + +Using delta-spark 4.0, run the following scala script: + +val tableName = "./connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint" +val query = """ + with jsonStrings as ( + select + id, + format_string('{"key": %s}', id) as jsonString + from + range(0, 100) + ) + select + id, + parse_json(jsonString) as v, + array( + parse_json(jsonString), + null, + parse_json(jsonString), + null, + parse_json(jsonString) + ) as array_of_variants, + named_struct('v', parse_json(jsonString)) as struct_of_variants, + map( + cast(id as string), + parse_json(jsonString), + 'nullKey', + null + ) as map_of_variants, + array( + named_struct('v', parse_json(jsonString)), + named_struct('v', null), + null, + named_struct( + 'v', + parse_json(jsonString) + ), + null, + named_struct( + 'v', + parse_json(jsonString) + ) + ) as array_of_struct_of_variants, + named_struct( + 'v', + array( + null, + parse_json(jsonString) + ) + ) as struct_of_array_of_variants + from + jsonStrings +""" + +val writeToTableSql = s""" + create or replace table $tableName + USING DELTA TBLPROPERTIES (delta.checkpointInterval = 2) +""" + +spark.sql(s"${writeToTableSql}\n${query}") +// Write two additional rows to create a checkpoint. +(0 until 2).foreach { v => + spark + .sql(query) + .where(s"id = $v") + .write + .format("delta") + .mode("append") + .insertInto(tableName) +} diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00000-16c852df-ba66-4080-be25-530a05922422-c000.snappy.parquet b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00000-16c852df-ba66-4080-be25-530a05922422-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b92350a7869b1a45d038ed09ed8e0c5b830d066b GIT binary patch literal 7443 zcmd^EeQZJUQ}Lamfls6r^xGM1*SVils4rV${BSUXh+F^0A*qZ30}6sU&EAE0gLzU%k= zf=w`^e{3VlFXw&Sd(Sz)d(OGX&g!MF1_&a;);i8a>A})%#DB+@%i>J6KpWG2FM2#0PKJ{fI>hKpcqgBCiYtH)cM=esuit!MmBugkh8=Gk{GvcGyoegnT*u`TOfL^F=@`U#^PegoPZscw>8%Y-Q>4p%8_tg0!@3?P_j* zDba(AQ29sy-{bA|#QE9`3v=PZMm%OD(PHd95^~uS@^7c`=WkPO4c{^P!`at_p z8@Xa3@4owoAxYED&Qj3+d(luY=2zfS^y!Z<@}P9cRBz4O&FR%-O0P6cc9vmEiJ&;(g8#s!(WJ7TQ=xH1 zMRBB!%G0|nN9rm-GRLHvImMBuw{@9gvaU0R6*YdqqP)P$^RSRonRU7BL$j``c@PFa zXIt;SLD&!0r%p%ip&E}Xu*evmjR;^0hJpF2{=$ObFWj$HnaRcnNvabbGmU z=Iq_&GiDD|kb77geoJPv4^@)4Mbqk&Rb-Rtc2FggW@TU1lDGJDuKdyh@{W1>;9~NI zJ{|VbebibdhMC!M2WZAhE5ANo?YO40x=#&LsN5jaYS5&>U zvP9EYg*4OaFrw2D;-XjmWI0j?^0kGnqO8EeMpSBRze)Qr3{bDxoJJ)!F2E1HIvdZB z;)P~I1TR1>6VVxJ_cxMxbRfXgj=<4kS871(^334Z0$vYw!`IsudfiS>$uL*FmhTLY z;cO0P81QlN%p(EO;YRR&GCxvK&aAxen~l5{Wlr|FvIIK0maGhGm7)0iH?u#|e;myNDR3NypV>(4@*?Tvtu^z3>X zSFLo9zYGp6ufx&SzRefg5rbaJr?pkS?nHY>vaQu2g?xUWA316>Ms)0K_CW`>vn$c* zYj69pPj9Xq(|Nnxojp4e?j>&SzuW8HnoKmuJKRfN-{o$NwXj#oI6O`bYi?`Z26wEC z#5Om>TWF*bUbnuYd&#ceUav8;J3TSgTjV}Bqbo7iGHlj{z~fZg_e1VUTGN)c*3MY6 zwc};5mRs7Jv+3!0(7U2c{j+fP=$>!L-M0HW&`iT%06c`&Gerp)n$-49>rFFw(w;v5 z7-SiHl?!WGcg@(-@0`=`CI}1VTmfGo z3&s<{#yYs$9II8P;8RVG OzsLq(53h!QhyEKJIJ-Ll literal 0 HcmV?d00001 diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00000-1e14ba22-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00000-1e14ba22-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ad1c30c5de0ac3d00658bbb4e6f0d0a2b8f53e58 GIT binary patch literal 5072 zcmb_gUuYCZ7@wWn>|Ju#OJXy_u3qK1HcOQB$eDQg!%-}i5^M+|$RQM~8*if!x5FD)f>Yla{TvYqB|FL2AOHPDFa^jWLcy+TPrCv1 z`a8egYl)B+FqepgQ0st!x!VVrzUw~ZGyUAS|I6j3DnAwuBHsKU@&f51pMP@wC(*%t z$3X)LqiSl*^x9t^eGk;QXjP^*`VdKxE00&7gd#wd=?hUjCmT^n8y+*!_ z@N6R?0g+j$X0TQ|Fg-V8Nd!dX`;; z2FNIdO-h8O$vBJ@8fKHL0y%8YmM|GJW_#JURiAUVAd<9gxmGn>X(FeMZ*6SfqC}3{ zmUDjRY%yS>8wktI&e?nmnXvb*+cU_@N73O>aat>!IaWMzN}tq77!k1(%Q%}s`>`yP zp{$ZnCz6}WT5d+Kj02N;Ny}&@jhVD-g@ShO*wo2mWv!6a@}(jaG-)ISqU;xCF_$ft z2xf+UJ*Q9V`O>BycN9y7nG>aImmb-)PHNND8wnoiu5f9FM*~Pg7#0QgjC7)r2>XuB z5Up<;2E2<0?8dWk=!Os;MBQb(=4iIOrWV_g8Ol{exv#QXsq9Aq3F=-pLomhk5FJ$m zDZA3yBZ|6s{j*mgGvT>GaK@=UW)Vd6?GDmGe-4rnq^98ks6ccH<%*~V&L|yM(#9h7X@_*tB-^T4~q||=oWJ~yqhBE3gWUKDZ1BD z`4L@Zl&(EPMv=Pe^GeC7jKylzr+(lAR<%+vR<&zL*@V@fjMarOW3>Qnka;EpE_b>r z8^&oG?6;Z@N%nprdVv7;oiI6s)V0m9okC#B;MW*@ZM>dR#Du@h;1^mM{7RE?AV^XM zYAcdmg^VT^Q<0vcr(s7b3F&CQszWeoi*q(iVA<^Y4p$13?FvJJ8LS$R>-_0;&8 zK6Oo>P7?44x0ww*PFEFT+sWNc6X@tHk`I}6;Kj*8)GQp)L_E|H**BsMeb>Q%zES)!!J#l*`8N3m2&uptDILnu}^-l~U}OT4>W z5eXD4wzNtBBTk-B_bizI%r^S3IJx{Y5)Z+KQ|u!biUk=jf03kKZra_y2*zRPJ9G-T#EHO z3?z)osWHz}gBaO(l7SvW|=) z^-jPqCATs*t2+Vp-2kwvl!CFUTtmtVtbSvxPKFt)b0R;JL619KmJPdT3hY;#4hi;t zA$pPk_T4bqiPWW)u-!sn%HWq6d}X}8QpAEk&*0CtGWgpK#(^M78LX^GP8BkGSWHEF zkRFB|$poaM^{UE3+?|MR#?uKf&^ggB8O(!LvK^@l(7He>0?bHl7&8lbE{gP3x*v9W zX^p+okY8`Odd>@u=sm{cLK{2jxu97`?$v9FM8&|{6t(r}7 zu~9wQ2ox)2ViYUaYFt|v#ZMT;x%G_VJp4*xX4Rg0?~qU`6Va7m`{@KYS#^S?;5mjK z5z$+W(|v=}@~SwwWfLi*wajQ$szZGx8>jUvqqXo9NJY;yQ>=#Q-R2JH{CZ@0Dt{=Z zO>59f#f+{hR?_o@%v4Tp=#Rx?aU^f9vM3zQ#-IT^I-?h3`Ki}pc5fv=3LcLZk4@{* z;i%aSM0=tKGkP{%hz`GUJet!c*{4hzmMydFRPGSWSPju8vhWr9D7`0YdyEbrpPlXD zJ4?<+tF`dIx2h@OzBp&Y1eUF?Z?_qVgLGgjSJX1O!WghMna=vA?sP|XQ&f##N3;JQ z`Ck9rDjLvgVqgG^(0N*t0MU$bZboZbrIWGx;u^%_XXQsV&r{=P`ouMTGD*NATw^xy zI6YN}t|fO@OrX89NZx7Hf#)XkomS!K?2YyI$9g-vCuTC)bm!23p4NvFgM9-7{qcCZ qH$5<<_w^1X5`BpYtuL+j4Q++lgtl3E2*1?${DVSagvR0j^8N!30RyD~ literal 0 HcmV?d00001 diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00001-664313d3-14b4-4dbf-8110-77001b877182-c000.snappy.parquet b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00001-664313d3-14b4-4dbf-8110-77001b877182-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2573a38fed73d3c5e883b906dee774bc6b69a841 GIT binary patch literal 7324 zcmd^^du&tJ9mmhTzP`Qz0|Dh+*^4*4DBKt=xX#ND>b6NCKp^4$N@+-LVnZ$RfV|cS zMam|I7^=|zKnx+oGQWK|N>y>NF|X2v68oQ|uRklfx3d zV4`1t@X;@)r+7Uel*hcbU?k50Yq5PQDFLOR40wSL_(1^72d{%5cmuo%-U8*I0=x|t zfQ6tEECP$c5>N$}f@Po@)PUt+1y~7Ifz@CQSPRyH^Q9y9>e zKDDqh=}Hq7s7${@KOOyRzd1;GDiAW&#oluqF>MR?ATHT7hmK$VbuSBw5K9Fqo(Y6K z`Ljd^O?33cqZ^hW8$!r&K@;(R7%f82o9F;f4;}o?pw&U|o3x0CgxuotzmKR@+}e5X z4-dDPiD0VW=45|z@51A;+h#J}HJykBga{Q*>7vIfROD2uw#z#l(vS12j=HW67FsWr z)_mdr?CW5*P`N6<>RM?{PUX6F;o9q6b++B%+T)QMXIdOry6XPcbL(NRQ`)V%_TKjI z%da{RjvV^3=ZI_X@yN-OU!6Nu_@^^1e?K?&_=3lJWrvkyu(~%mx`_T`@YEGC)t#2f zyvd$U2x5@fF*kp7alg@y;gf0h3htPTpB}DF-!Z(lW5xtlU;jC9L z$~-J)tzFH{%fdb8nWV@pl44K-N^FZ~#3fZ+FX-oy*RAK+$@07e2KP;8zRyG%SP4ft5i4EVL50c6TGjz&59mxm`m zHaXZ8t0UMoksPEF$vyd9ZhXTO6E341FXomzg$3n8WrMJ|PpIk_mJJCtBf<(|Ugb2e zDL1ccFmLEH*Y=w?4Vmjk%=JW6onlkD7;X@^_KA^xaodpCG9tDS%MK@tCbXZ*qBl%* z-;v+{Vv44n9ca-0xuB~TrsTq-Pd0O;ki4>KyM=n_y}q%bX{k$8Vh|%V1Fv4;9}wsn zo__f8&krI#!$Km35AcoxWLeFHdyjXUh%lpr`+@VYtNm#7r+Ec?zsVwpvQYh{|JCY$ zm8YxerGNkKE|==luej&>l#LwF&VXnZ9Wl{6N1xoDu($4{?kyKLOZvjU5ApFxukQBh zAX!19#+m5h^>Z87^y-fokMkzMQymwL0x+ht;xLw1FPh&IZS=(TL| zt0tO0Cf?H7ZI(oHmp;Z&PL#7Np`V%Ti&%jk;NvO{#TgGZl&X2)g`bnjdukA0=7IU~ z12X2K5(?QNCCW_}34{v6V1H;*ZRx`dEg#?^?!L*so~aE@9*}2@HIy4u52+cMnHcAg zB|2)7S&BGhbfn|#+Ag0rv-@KbEl;@}veFS=yFZ=HUXRS6@2A`jXHT5HhfbJ1V5d(w zZTKz=n|&ge?h{g^KbTD$Qf>$H=~t=BZWhp8vFUN;F1kN;`ssP}y~K2^?Gxt`YsYrA zeOrA;(|h3-l}<-LJBPRNkq{~4ZCp1RCu*HTZ02sYxhovImRiD{YDn!=*^rrPd%N0G z-?pW`TWyc1t(_hCgdYOMtLu+QV zsM{w_qb z&W+bgBH#5=Sw8Hh)kL|EcGMcv;^Mkvan)4q&63AWcpdZ-SE8gQYHM+dRBW?EAtv%S zUh0(Zy6F<49E14~ED53=g7vJ_WWO)>aTuW7u{iWntY5&7M0wVq5qlxp4FO)j74|DQ zLG2MA{hs{cYfSABK9pMRbD(uedh%-tpGbAx*ZU^OW|lLaT;wwo+OpLv*V!`b?h z`wh0UK^6>WXP5GKyAq`iwg=*8t_>%!K|1cTXa_8`;v6RO zSwA9hwwNv?%86V}EkQ;;TWu^E^^}DA+gb8H0h5E?kWo77yP9o;Fnp#h?t9&*5e39+eqZ=%!+6k zT}hPF^OE73WJFdjJN2d z!i9MWiFxyP_VyO)GrN-$G~&$*^7H#m$_|EwV>&SVE`VY_l$=GhDNk?)0)$?IB8E8yhO0{waO%F zhO4Hp>9-!!Zzlm|0O@j5WCrQI1X9L^$lQK`Gg>1ynX zG>4o4UpN#Fc>N_lU#Z6vDhl}m;gX_&*IVLkR7*nPl0Y%;8r8Y>QGBe)@S9A*pNLoD H|C0X!s|gKc literal 0 HcmV?d00001 diff --git a/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java b/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java new file mode 100644 index 00000000000..f3b88044278 --- /dev/null +++ b/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java @@ -0,0 +1,214 @@ +/* + * Copyright (2025) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.kernel.spark.read; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import io.delta.golden.GoldenTableUtils$; +import java.io.File; +import java.util.List; +import org.apache.spark.SparkConf; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.connector.read.streaming.Offset; +import org.apache.spark.sql.sources.*; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +public class DataTypeReaderTest { + + private SparkMicroBatchStream microBatchStream; + private SparkSession spark; + + @BeforeAll + public void setUpClass() { + // Create a temporary directory manually instead of using @TempDir + File tempDir = + new File(System.getProperty("java.io.tmpdir"), "datatype-reader-test-" + System.nanoTime()); + tempDir.mkdirs(); + tempDir.deleteOnExit(); + + SparkConf conf = + new SparkConf() + .set("spark.sql.catalog.dsv2", "io.delta.kernel.spark.catalog.TestCatalog") + .set("spark.sql.catalog.dsv2.base_path", tempDir.getAbsolutePath()) + .set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .set( + "spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog") + .setMaster("local[*]") + .setAppName("DataTypeReaderTest"); + spark = SparkSession.builder().config(conf).getOrCreate(); + } + + @AfterAll + public void tearDownClass() { + if (spark != null) { + spark.stop(); + } + } + + @BeforeEach + void setUp() { + microBatchStream = new SparkMicroBatchStream(); + } + + private String goldenTablePath(String name) { + return GoldenTableUtils$.MODULE$.goldenTablePath(name); + } + + @Test + public void testnew() { + UnsupportedOperationException exception = + assertThrows(UnsupportedOperationException.class, () -> microBatchStream.latestOffset()); + assertEquals("latestOffset is not supported", exception.getMessage()); + } + + @Test + public void testLatestOffset_throwsUnsupportedOperationException() { + UnsupportedOperationException exception = + assertThrows(UnsupportedOperationException.class, () -> microBatchStream.latestOffset()); + assertEquals("latestOffset is not supported", exception.getMessage()); + } + + @Test + public void testPlanInputPartitions_throwsUnsupportedOperationException() { + Offset start = null; + Offset end = null; + UnsupportedOperationException exception = + assertThrows( + UnsupportedOperationException.class, + () -> microBatchStream.planInputPartitions(start, end)); + assertEquals("planInputPartitions is not supported", exception.getMessage()); + } + + @Test + public void testCreateReaderFactory_throwsUnsupportedOperationException() { + UnsupportedOperationException exception = + assertThrows( + UnsupportedOperationException.class, () -> microBatchStream.createReaderFactory()); + assertEquals("createReaderFactory is not supported", exception.getMessage()); + } + + @Test + public void testInitialOffset_throwsUnsupportedOperationException() { + UnsupportedOperationException exception = + assertThrows(UnsupportedOperationException.class, () -> microBatchStream.initialOffset()); + assertEquals("initialOffset is not supported", exception.getMessage()); + } + + @Test + public void testDeserializeOffset_throwsUnsupportedOperationException() { + UnsupportedOperationException exception = + assertThrows( + UnsupportedOperationException.class, () -> microBatchStream.deserializeOffset("{}")); + assertEquals("deserializeOffset is not supported", exception.getMessage()); + } + + @Test + public void testCommit_throwsUnsupportedOperationException() { + Offset end = null; + UnsupportedOperationException exception = + assertThrows(UnsupportedOperationException.class, () -> microBatchStream.commit(end)); + assertEquals("commit is not supported", exception.getMessage()); + } + + @Test + public void testStop_throwsUnsupportedOperationException() { + UnsupportedOperationException exception = + assertThrows(UnsupportedOperationException.class, () -> microBatchStream.stop()); + assertEquals("stop is not supported", exception.getMessage()); + } + + @Test + public void testUnsupportedDataTypeValidation() { + // Test the real-world scenario: table created with unsupported mock data type + // then accessed via kernel-spark connector (should detect unsupported type) + + String tablePath = goldenTablePath("kernel-spark-unsupported-datatype-validation"); + + try { + // First, try to describe the table to see its schema + Dataset describeResult = spark.sql("DESCRIBE TABLE `dsv2`.`delta`.`" + tablePath + "`"); + List schemaRows = describeResult.collectAsList(); + + System.out.println("Golden table schema:"); + for (Row row : schemaRows) { + System.out.println(" " + row.getString(0) + " : " + row.getString(1)); + } + + // Check for unsupported mock columns in the described schema + boolean hasUnsupportedColumn = + schemaRows.stream() + .anyMatch( + row -> + "unsupported_column".equals(row.getString(0)) + && (row.getString(1).toLowerCase().contains("unsupported") + || row.getString(1).toLowerCase().contains("mock"))); + + if (hasUnsupportedColumn) { + System.out.println("✓ Golden table contains unsupported mock data type column"); + System.out.println( + " Testing file path access (should trigger UnsupportedDataTypeException)..."); + + try { + // This should go through our kernel-spark connector and trigger validation + Dataset result = spark.sql("SELECT * FROM `dsv2`.`delta`.`" + tablePath + "`"); + List data = result.collectAsList(); + + System.out.println(" WARNING: Query succeeded - validation may not have been triggered"); + System.out.println(" Returned " + data.size() + " rows"); + + } catch (Exception queryException) { + System.out.println(" ✓ Query failed as expected: " + queryException.getMessage()); + if (queryException.getMessage() != null + && (queryException.getMessage().toLowerCase().contains("unsupported") + || queryException.getMessage().toLowerCase().contains("mock"))) { + System.out.println( + " ✓ Failure message indicates unsupported data type validation working"); + } + } + + } else { + assert (false); + System.out.println(" Golden table created without unsupported column"); + System.out.println( + " This means the unsupported mock data type was rejected during golden table creation"); + + // Try to query the table anyway to make sure it works for supported types + Dataset result = spark.sql("SELECT * FROM `dsv2`.`delta`.`" + tablePath + "`"); + List data = result.collectAsList(); + System.out.println(" ✓ Successfully queried fallback table with " + data.size() + " rows"); + } + + } catch (Exception e) { + System.out.println( + "Golden table test failed: " + e.getClass().getSimpleName() + " - " + e.getMessage()); + + // This might happen if golden table doesn't exist + if (!new File(tablePath).exists()) { + System.out.println(" Golden table directory does not exist: " + tablePath); + System.out.println(" You may need to run: ./build/sbt \"goldenTables/test\""); + } + } + } +} From 8637281e6c8998176a66baeb43fd843d4b799f2e Mon Sep 17 00:00:00 2001 From: Amanda Liu Date: Sun, 5 Oct 2025 12:11:49 -0700 Subject: [PATCH 04/13] test --- .../kernel/spark/catalog/SparkTable.java | 101 ++++++++++-------- .../kernel/spark/read/DataTypeReaderTest.java | 93 ++++------------ 2 files changed, 76 insertions(+), 118 deletions(-) diff --git a/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java b/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java index 910ba36a90b..d16bbeb0642 100644 --- a/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java +++ b/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java @@ -21,6 +21,19 @@ import io.delta.kernel.internal.SnapshotImpl; import io.delta.kernel.spark.read.SparkScanBuilder; import io.delta.kernel.spark.utils.SchemaUtils; +import io.delta.kernel.types.ArrayType; +import io.delta.kernel.types.BinaryType; +import io.delta.kernel.types.BooleanType; +import io.delta.kernel.types.ByteType; +import io.delta.kernel.types.DataType; +import io.delta.kernel.types.DecimalType; +import io.delta.kernel.types.DoubleType; +import io.delta.kernel.types.FloatType; +import io.delta.kernel.types.IntegerType; +import io.delta.kernel.types.LongType; +import io.delta.kernel.types.MapType; +import io.delta.kernel.types.ShortType; +import io.delta.kernel.types.StringType; import java.util.*; import org.apache.hadoop.conf.Configuration; import org.apache.spark.sql.SparkSession; @@ -45,70 +58,62 @@ public class SparkTable implements Table, SupportsRead { private final SnapshotImpl snapshot; private final Configuration hadoopConf; - private final StructType schema; + private final org.apache.spark.sql.types.StructType schema; private final List partColNames; - private final StructType dataSchema; - private final StructType partitionSchema; + private final org.apache.spark.sql.types.StructType dataSchema; + private final org.apache.spark.sql.types.StructType partitionSchema; private final Column[] columns; private final Transform[] partitionTransforms; /** - * Validates that all fields in the schema use allowed data types. + * Validates all fields in kernel schema use allowed data types. * - * @param schema the schema to validate + * @param schema the kernel schema to validate * @throws IllegalArgumentException if any field uses a disallowed data type */ - private static void validateSchemaTypes(StructType schema) { - for (StructField field : schema.fields()) { - validateDataType(field.dataType(), field.name()); + private static void validateSchemaTypes(io.delta.kernel.types.StructType schema) { + for (io.delta.kernel.types.StructField field : schema.fields()) { + validateKernelDataType(field.getDataType(), field.getName()); } } /** - * Recursively validates a data type and its nested types. + * Recursively validate kernel data type and nested types. * - * @param dataType the data type to validate + * @param dataType the kernel data type to validate * @param fieldPath the path to the field (for error messages) * @throws IllegalArgumentException if the data type is not allowed */ - private static void validateDataType(DataType dataType, String fieldPath) { - if (isAllowedType(dataType)) { - // For struct types, validate nested fields - if (dataType instanceof StructType) { - StructType structType = (StructType) dataType; - for (StructField field : structType.fields()) { - validateDataType(field.dataType(), fieldPath + "." + field.name()); + private static void validateKernelDataType(DataType dataType, String fieldPath) { + if (isAllowedKernelType(dataType)) { + // Validate nested fields for structs, arrays, maps + if (dataType instanceof io.delta.kernel.types.StructType) { + io.delta.kernel.types.StructType structType = (io.delta.kernel.types.StructType) dataType; + for (io.delta.kernel.types.StructField field : structType.fields()) { + validateKernelDataType(field.getDataType(), fieldPath + "." + field.getName()); } - } - // For array types, validate element type - else if (dataType instanceof ArrayType) { + } else if (dataType instanceof ArrayType) { ArrayType arrayType = (ArrayType) dataType; - validateDataType(arrayType.elementType(), fieldPath + ".element"); - } - // For map types, validate key and value types - else if (dataType instanceof MapType) { + validateKernelDataType(arrayType.getElementType(), fieldPath + ".element"); + } else if (dataType instanceof MapType) { MapType mapType = (MapType) dataType; - validateDataType(mapType.keyType(), fieldPath + ".key"); - validateDataType(mapType.valueType(), fieldPath + ".value"); + validateKernelDataType(mapType.getKeyType(), fieldPath + ".key"); + validateKernelDataType(mapType.getValueType(), fieldPath + ".value"); } } else { throw new IllegalArgumentException( String.format( - "Unsupported data type '%s' for field '%s'. " - + "Only numeric types (ByteType, ShortType, IntegerType, LongType, FloatType, " - + "DoubleType, DecimalType), string types (StringType, VarcharType, CharType), " - + "BinaryType, and BooleanType are supported.", - dataType.typeName(), fieldPath)); + "Unsupported data type '%s' for field '%s'. ", dataType.toString(), fieldPath)); } } /** - * Checks if a data type is allowed. + * Checks if a kernel data type is allowed. * - * @param dataType the data type to check + * @param dataType the kernel data type to check * @return true if the data type is allowed, false otherwise */ - private static boolean isAllowedType(DataType dataType) { + private static boolean isAllowedKernelType(DataType dataType) { return dataType instanceof ByteType || dataType instanceof ShortType || dataType instanceof IntegerType @@ -117,13 +122,11 @@ private static boolean isAllowedType(DataType dataType) { || dataType instanceof DoubleType || dataType instanceof DecimalType || dataType instanceof StringType - || dataType instanceof VarcharType - || dataType instanceof CharType || dataType instanceof BinaryType || dataType instanceof BooleanType || // Allow complex types but validate their nested types - dataType instanceof StructType + dataType instanceof io.delta.kernel.types.StructType || dataType instanceof ArrayType || dataType instanceof MapType; } @@ -156,17 +159,17 @@ public SparkTable(Identifier identifier, String tablePath, Map o io.delta.kernel.TableManager.loadSnapshot(tablePath) .build(io.delta.kernel.defaults.engine.DefaultEngine.create(hadoopConf)); + validateSchemaTypes(snapshot.getSchema()); this.schema = SchemaUtils.convertKernelSchemaToSparkSchema(snapshot.getSchema()); - validateSchemaTypes(this.schema); this.partColNames = Collections.unmodifiableList(new ArrayList<>(snapshot.getPartitionColumnNames())); - final List dataFields = new ArrayList<>(); - final List partitionFields = new ArrayList<>(); + final List dataFields = new ArrayList<>(); + final List partitionFields = new ArrayList<>(); // Build a map for O(1) field lookups to improve performance - Map fieldMap = new HashMap<>(); - for (StructField field : schema.fields()) { + Map fieldMap = new HashMap<>(); + for (org.apache.spark.sql.types.StructField field : schema.fields()) { fieldMap.put(field.name(), field); } @@ -175,7 +178,7 @@ public SparkTable(Identifier identifier, String tablePath, Map o // in snapshotSchema, and we need to preserve the partColNames order for // proper partitioning behavior for (String partColName : partColNames) { - StructField field = fieldMap.get(partColName); + org.apache.spark.sql.types.StructField field = fieldMap.get(partColName); if (field != null) { partitionFields.add(field); } @@ -183,13 +186,17 @@ public SparkTable(Identifier identifier, String tablePath, Map o // Add remaining fields as data fields (non-partition columns) // These are fields that exist in the schema but are not partition columns - for (StructField field : schema.fields()) { + for (org.apache.spark.sql.types.StructField field : schema.fields()) { if (!partColNames.contains(field.name())) { dataFields.add(field); } } - this.dataSchema = new StructType(dataFields.toArray(new StructField[0])); - this.partitionSchema = new StructType(partitionFields.toArray(new StructField[0])); + this.dataSchema = + new org.apache.spark.sql.types.StructType( + dataFields.toArray(new org.apache.spark.sql.types.StructField[0])); + this.partitionSchema = + new org.apache.spark.sql.types.StructType( + partitionFields.toArray(new org.apache.spark.sql.types.StructField[0])); this.columns = CatalogV2Util.structTypeToV2Columns(schema); this.partitionTransforms = @@ -214,7 +221,7 @@ public String name() { } @Override - public StructType schema() { + public org.apache.spark.sql.types.StructType schema() { return schema; } diff --git a/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java b/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java index f3b88044278..6f2fcb769e1 100644 --- a/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java +++ b/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java @@ -17,13 +17,11 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import io.delta.golden.GoldenTableUtils$; import java.io.File; -import java.util.List; import org.apache.spark.SparkConf; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.connector.read.streaming.Offset; import org.apache.spark.sql.sources.*; @@ -141,74 +139,27 @@ public void testStop_throwsUnsupportedOperationException() { @Test public void testUnsupportedDataTypeValidation() { - // Test the real-world scenario: table created with unsupported mock data type - // then accessed via kernel-spark connector (should detect unsupported type) - - String tablePath = goldenTablePath("kernel-spark-unsupported-datatype-validation"); - - try { - // First, try to describe the table to see its schema - Dataset describeResult = spark.sql("DESCRIBE TABLE `dsv2`.`delta`.`" + tablePath + "`"); - List schemaRows = describeResult.collectAsList(); - - System.out.println("Golden table schema:"); - for (Row row : schemaRows) { - System.out.println(" " + row.getString(0) + " : " + row.getString(1)); - } - - // Check for unsupported mock columns in the described schema - boolean hasUnsupportedColumn = - schemaRows.stream() - .anyMatch( - row -> - "unsupported_column".equals(row.getString(0)) - && (row.getString(1).toLowerCase().contains("unsupported") - || row.getString(1).toLowerCase().contains("mock"))); - - if (hasUnsupportedColumn) { - System.out.println("✓ Golden table contains unsupported mock data type column"); - System.out.println( - " Testing file path access (should trigger UnsupportedDataTypeException)..."); - - try { - // This should go through our kernel-spark connector and trigger validation - Dataset result = spark.sql("SELECT * FROM `dsv2`.`delta`.`" + tablePath + "`"); - List data = result.collectAsList(); - - System.out.println(" WARNING: Query succeeded - validation may not have been triggered"); - System.out.println(" Returned " + data.size() + " rows"); - - } catch (Exception queryException) { - System.out.println(" ✓ Query failed as expected: " + queryException.getMessage()); - if (queryException.getMessage() != null - && (queryException.getMessage().toLowerCase().contains("unsupported") - || queryException.getMessage().toLowerCase().contains("mock"))) { - System.out.println( - " ✓ Failure message indicates unsupported data type validation working"); - } - } - - } else { - assert (false); - System.out.println(" Golden table created without unsupported column"); - System.out.println( - " This means the unsupported mock data type was rejected during golden table creation"); - - // Try to query the table anyway to make sure it works for supported types - Dataset result = spark.sql("SELECT * FROM `dsv2`.`delta`.`" + tablePath + "`"); - List data = result.collectAsList(); - System.out.println(" ✓ Successfully queried fallback table with " + data.size() + " rows"); - } - - } catch (Exception e) { - System.out.println( - "Golden table test failed: " + e.getClass().getSimpleName() + " - " + e.getMessage()); - - // This might happen if golden table doesn't exist - if (!new File(tablePath).exists()) { - System.out.println(" Golden table directory does not exist: " + tablePath); - System.out.println(" You may need to run: ./build/sbt \"goldenTables/test\""); - } + String tablePath = goldenTablePath("spark-variant-checkpoint"); + + RuntimeException exception = + assertThrows( + RuntimeException.class, + () -> { + spark.sql("SELECT * FROM `dsv2`.`delta`.`" + tablePath + "`").collect(); + }); + + Throwable rootCause = getRootCause(exception); + String errorMessage = rootCause.getMessage(); + + assertTrue(errorMessage.contains("Unsupported data type")); + assertTrue(errorMessage.contains("variant")); + } + + private Throwable getRootCause(Throwable throwable) { + Throwable cause = throwable; + while (cause.getCause() != null) { + cause = cause.getCause(); } + return cause; } } From 03a4ce659cd58d9c25da4d0058979b189e505ee1 Mon Sep 17 00:00:00 2001 From: Amanda Liu Date: Sun, 5 Oct 2025 12:53:46 -0700 Subject: [PATCH 05/13] test --- .../kernel/spark/read/DataTypeReaderTest.java | 63 ----------------- .../spark/read/SparkScanBuilderTest.java | 47 +++++++++++++ .../spark-variant-checkpoint/info.txt | 2 +- .../defaults/DeltaTableReadsSuite.scala | 67 +++++++++++++++++++ 4 files changed, 115 insertions(+), 64 deletions(-) diff --git a/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java b/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java index 6f2fcb769e1..dbba70660c7 100644 --- a/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java +++ b/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java @@ -74,69 +74,6 @@ private String goldenTablePath(String name) { return GoldenTableUtils$.MODULE$.goldenTablePath(name); } - @Test - public void testnew() { - UnsupportedOperationException exception = - assertThrows(UnsupportedOperationException.class, () -> microBatchStream.latestOffset()); - assertEquals("latestOffset is not supported", exception.getMessage()); - } - - @Test - public void testLatestOffset_throwsUnsupportedOperationException() { - UnsupportedOperationException exception = - assertThrows(UnsupportedOperationException.class, () -> microBatchStream.latestOffset()); - assertEquals("latestOffset is not supported", exception.getMessage()); - } - - @Test - public void testPlanInputPartitions_throwsUnsupportedOperationException() { - Offset start = null; - Offset end = null; - UnsupportedOperationException exception = - assertThrows( - UnsupportedOperationException.class, - () -> microBatchStream.planInputPartitions(start, end)); - assertEquals("planInputPartitions is not supported", exception.getMessage()); - } - - @Test - public void testCreateReaderFactory_throwsUnsupportedOperationException() { - UnsupportedOperationException exception = - assertThrows( - UnsupportedOperationException.class, () -> microBatchStream.createReaderFactory()); - assertEquals("createReaderFactory is not supported", exception.getMessage()); - } - - @Test - public void testInitialOffset_throwsUnsupportedOperationException() { - UnsupportedOperationException exception = - assertThrows(UnsupportedOperationException.class, () -> microBatchStream.initialOffset()); - assertEquals("initialOffset is not supported", exception.getMessage()); - } - - @Test - public void testDeserializeOffset_throwsUnsupportedOperationException() { - UnsupportedOperationException exception = - assertThrows( - UnsupportedOperationException.class, () -> microBatchStream.deserializeOffset("{}")); - assertEquals("deserializeOffset is not supported", exception.getMessage()); - } - - @Test - public void testCommit_throwsUnsupportedOperationException() { - Offset end = null; - UnsupportedOperationException exception = - assertThrows(UnsupportedOperationException.class, () -> microBatchStream.commit(end)); - assertEquals("commit is not supported", exception.getMessage()); - } - - @Test - public void testStop_throwsUnsupportedOperationException() { - UnsupportedOperationException exception = - assertThrows(UnsupportedOperationException.class, () -> microBatchStream.stop()); - assertEquals("stop is not supported", exception.getMessage()); - } - @Test public void testUnsupportedDataTypeValidation() { String tablePath = goldenTablePath("spark-variant-checkpoint"); diff --git a/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkScanBuilderTest.java b/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkScanBuilderTest.java index cd32ea01b87..402a8eed6c9 100644 --- a/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkScanBuilderTest.java +++ b/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkScanBuilderTest.java @@ -43,6 +43,53 @@ public class SparkScanBuilderTest extends SparkDsv2TestBase { + // @Test + // public void testUnsupportedType(@TempDir File tempDir) { + // String path = tempDir.getAbsolutePath(); + // String tableName = "scan_builder_test"; + // spark.sql( + // String.format( + // "CREATE TABLE %s (id INT, name STRING, dep_id INT) USING delta PARTITIONED + // BY (dep_id) LOCATION '%s'", + // tableName, path)); + // Snapshot snapshot = TableManager.loadSnapshot(path).build(defaultEngine); + // StructType dataSchema = + // DataTypes.createStructType( + // new StructField[] { + // DataTypes.createStructField("id", DataTypes.IntegerType, true), + // DataTypes.createStructField("name", DataTypes.StringType, true), + // DataTypes.createStructField("dep_id", DataTypes.IntegerType, + // true) + // }); + // StructType partitionSchema = + // DataTypes.createStructType( + // new StructField[] {DataTypes.createStructField("dep_id", + // DataTypes.IntegerType, true)}); + // SparkScanBuilder builder = + // new SparkScanBuilder( + // tableName, + // path, + // dataSchema, + // partitionSchema, + // (SnapshotImpl) snapshot, + // CaseInsensitiveStringMap.empty()); + // + // StructType expectedSparkSchema = + // DataTypes.createStructType( + // new StructField[] { + // DataTypes.createStructField("id", DataTypes.IntegerType, true + // /*nullable*/), + // DataTypes.createStructField("dep_id", DataTypes.IntegerType, + // true) + // }); + // + // builder.pruneColumns(expectedSparkSchema); + // Scan scan = builder.build(); + // + // assertTrue(scan instanceof SparkScan); + // assertEquals(expectedSparkSchema, scan.readSchema()); + // } + @Test public void testBuild_returnsScanWithExpectedSchema(@TempDir File tempDir) { String path = tempDir.getAbsolutePath(); diff --git a/kernel/kernel-defaults/src/test/resources/spark-variant-checkpoint/info.txt b/kernel/kernel-defaults/src/test/resources/spark-variant-checkpoint/info.txt index f3c0947bf5f..e4c1508e318 100644 --- a/kernel/kernel-defaults/src/test/resources/spark-variant-checkpoint/info.txt +++ b/kernel/kernel-defaults/src/test/resources/spark-variant-checkpoint/info.txt @@ -2,7 +2,7 @@ This file contains the code used to generate this golden table "spark-variant-ch Using delta-spark 4.0, run the following scala script: -val tableName = "" +val tableName = "./connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint" val query = """ with jsonStrings as ( select diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/DeltaTableReadsSuite.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/DeltaTableReadsSuite.scala index ee63ac72142..8bdd5746d28 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/DeltaTableReadsSuite.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/DeltaTableReadsSuite.scala @@ -1080,6 +1080,73 @@ trait AbstractDeltaTableReadsSuite extends AnyFunSuite { self: AbstractTestUtils } } +// test("create table") { +// val tableName = +// "`./connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint`" +// val query = +// """ +// with jsonStrings as ( +// select +// id, +// format_string('{"key": %s}', id) as jsonString +// from +// range(0, 100) +// ) +// select +// id, +// parse_json(jsonString) as v, +// array( +// parse_json(jsonString), +// null, +// parse_json(jsonString), +// null, +// parse_json(jsonString) +// ) as array_of_variants, +// named_struct('v', parse_json(jsonString)) as struct_of_variants, +// map( +// cast(id as string), +// parse_json(jsonString), +// 'nullKey', +// null +// ) as map_of_variants, +// array( +// named_struct('v', parse_json(jsonString)), +// named_struct('v', null), +// null, +// named_struct( +// 'v', +// parse_json(jsonString) +// ), +// null, +// named_struct( +// 'v', +// parse_json(jsonString) +// ) +// ) as array_of_struct_of_variants, +// named_struct( +// 'v', +// array( +// null, +// parse_json(jsonString) +// ) +// ) as struct_of_array_of_variants +// from +// jsonStrings +// """ +// +// val writeToTableSql = +// s""" +// create or replace table $tableName +// USING DELTA TBLPROPERTIES (delta.checkpointInterval = 2) +// """ +// +// spark.sql(s"${writeToTableSql}\n${query}") +// // Write two additional rows to create a checkpoint. +// (0 until 2).foreach { v => +// spark.sql(query).where(s"id = $v").write.format("delta").mode("append").insertInto(tableName) +// } +// } + /////////////////////////////////////////////////////////////////////////////////////////////// // getVersionBeforeOrAtTimestamp + getVersionAtOrAfterTimestamp tests // (more in TableImplSuite and DeltaHistoryManagerSuite) From b6296f9b5cea2c6771757f87509ff77194107747 Mon Sep 17 00:00:00 2001 From: Amanda Liu Date: Sun, 5 Oct 2025 12:56:38 -0700 Subject: [PATCH 06/13] test --- .../kernel/spark/read/DataTypeReaderTest.java | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java b/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java index dbba70660c7..1265d6a3ebf 100644 --- a/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java +++ b/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java @@ -15,7 +15,6 @@ */ package io.delta.kernel.spark.read; -import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -23,7 +22,6 @@ import java.io.File; import org.apache.spark.SparkConf; import org.apache.spark.sql.SparkSession; -import org.apache.spark.sql.connector.read.streaming.Offset; import org.apache.spark.sql.sources.*; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; @@ -75,7 +73,7 @@ private String goldenTablePath(String name) { } @Test - public void testUnsupportedDataTypeValidation() { + public void testVariantUnsupportedDataType() { String tablePath = goldenTablePath("spark-variant-checkpoint"); RuntimeException exception = @@ -92,6 +90,24 @@ public void testUnsupportedDataTypeValidation() { assertTrue(errorMessage.contains("variant")); } + @Test + public void testTimestampUnsupportedDataType() { + String tablePath = goldenTablePath("kernel-timestamp-PST"); + + RuntimeException exception = + assertThrows( + RuntimeException.class, + () -> { + spark.sql("SELECT * FROM `dsv2`.`delta`.`" + tablePath + "`").collect(); + }); + + Throwable rootCause = getRootCause(exception); + String errorMessage = rootCause.getMessage(); + + assertTrue(errorMessage.contains("Unsupported data type")); + assertTrue(errorMessage.contains("timestamp")); + } + private Throwable getRootCause(Throwable throwable) { Throwable cause = throwable; while (cause.getCause() != null) { From 0831eb29ab9b3952e7b217aeb461d1e7ae8e733e Mon Sep 17 00:00:00 2001 From: Amanda Liu Date: Sun, 5 Oct 2025 12:57:46 -0700 Subject: [PATCH 07/13] restore --- .../spark/read/SparkScanBuilderTest.java | 47 ------------- .../spark-variant-checkpoint/info.txt | 2 +- .../defaults/DeltaTableReadsSuite.scala | 67 ------------------- 3 files changed, 1 insertion(+), 115 deletions(-) diff --git a/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkScanBuilderTest.java b/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkScanBuilderTest.java index 402a8eed6c9..cd32ea01b87 100644 --- a/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkScanBuilderTest.java +++ b/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkScanBuilderTest.java @@ -43,53 +43,6 @@ public class SparkScanBuilderTest extends SparkDsv2TestBase { - // @Test - // public void testUnsupportedType(@TempDir File tempDir) { - // String path = tempDir.getAbsolutePath(); - // String tableName = "scan_builder_test"; - // spark.sql( - // String.format( - // "CREATE TABLE %s (id INT, name STRING, dep_id INT) USING delta PARTITIONED - // BY (dep_id) LOCATION '%s'", - // tableName, path)); - // Snapshot snapshot = TableManager.loadSnapshot(path).build(defaultEngine); - // StructType dataSchema = - // DataTypes.createStructType( - // new StructField[] { - // DataTypes.createStructField("id", DataTypes.IntegerType, true), - // DataTypes.createStructField("name", DataTypes.StringType, true), - // DataTypes.createStructField("dep_id", DataTypes.IntegerType, - // true) - // }); - // StructType partitionSchema = - // DataTypes.createStructType( - // new StructField[] {DataTypes.createStructField("dep_id", - // DataTypes.IntegerType, true)}); - // SparkScanBuilder builder = - // new SparkScanBuilder( - // tableName, - // path, - // dataSchema, - // partitionSchema, - // (SnapshotImpl) snapshot, - // CaseInsensitiveStringMap.empty()); - // - // StructType expectedSparkSchema = - // DataTypes.createStructType( - // new StructField[] { - // DataTypes.createStructField("id", DataTypes.IntegerType, true - // /*nullable*/), - // DataTypes.createStructField("dep_id", DataTypes.IntegerType, - // true) - // }); - // - // builder.pruneColumns(expectedSparkSchema); - // Scan scan = builder.build(); - // - // assertTrue(scan instanceof SparkScan); - // assertEquals(expectedSparkSchema, scan.readSchema()); - // } - @Test public void testBuild_returnsScanWithExpectedSchema(@TempDir File tempDir) { String path = tempDir.getAbsolutePath(); diff --git a/kernel/kernel-defaults/src/test/resources/spark-variant-checkpoint/info.txt b/kernel/kernel-defaults/src/test/resources/spark-variant-checkpoint/info.txt index e4c1508e318..f3c0947bf5f 100644 --- a/kernel/kernel-defaults/src/test/resources/spark-variant-checkpoint/info.txt +++ b/kernel/kernel-defaults/src/test/resources/spark-variant-checkpoint/info.txt @@ -2,7 +2,7 @@ This file contains the code used to generate this golden table "spark-variant-ch Using delta-spark 4.0, run the following scala script: -val tableName = "./connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint" +val tableName = "" val query = """ with jsonStrings as ( select diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/DeltaTableReadsSuite.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/DeltaTableReadsSuite.scala index 8bdd5746d28..ee63ac72142 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/DeltaTableReadsSuite.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/DeltaTableReadsSuite.scala @@ -1080,73 +1080,6 @@ trait AbstractDeltaTableReadsSuite extends AnyFunSuite { self: AbstractTestUtils } } -// test("create table") { -// val tableName = -// "`./connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint`" -// val query = -// """ -// with jsonStrings as ( -// select -// id, -// format_string('{"key": %s}', id) as jsonString -// from -// range(0, 100) -// ) -// select -// id, -// parse_json(jsonString) as v, -// array( -// parse_json(jsonString), -// null, -// parse_json(jsonString), -// null, -// parse_json(jsonString) -// ) as array_of_variants, -// named_struct('v', parse_json(jsonString)) as struct_of_variants, -// map( -// cast(id as string), -// parse_json(jsonString), -// 'nullKey', -// null -// ) as map_of_variants, -// array( -// named_struct('v', parse_json(jsonString)), -// named_struct('v', null), -// null, -// named_struct( -// 'v', -// parse_json(jsonString) -// ), -// null, -// named_struct( -// 'v', -// parse_json(jsonString) -// ) -// ) as array_of_struct_of_variants, -// named_struct( -// 'v', -// array( -// null, -// parse_json(jsonString) -// ) -// ) as struct_of_array_of_variants -// from -// jsonStrings -// """ -// -// val writeToTableSql = -// s""" -// create or replace table $tableName -// USING DELTA TBLPROPERTIES (delta.checkpointInterval = 2) -// """ -// -// spark.sql(s"${writeToTableSql}\n${query}") -// // Write two additional rows to create a checkpoint. -// (0 until 2).foreach { v => -// spark.sql(query).where(s"id = $v").write.format("delta").mode("append").insertInto(tableName) -// } -// } - /////////////////////////////////////////////////////////////////////////////////////////////// // getVersionBeforeOrAtTimestamp + getVersionAtOrAfterTimestamp tests // (more in TableImplSuite and DeltaHistoryManagerSuite) From 95e7b4386d8b8ffc6722cd048a5dddc560a614ef Mon Sep 17 00:00:00 2001 From: Amanda Liu Date: Sun, 5 Oct 2025 12:59:24 -0700 Subject: [PATCH 08/13] goldentabletest --- .../spark/read/SparkGoldenTableTest.java | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkGoldenTableTest.java b/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkGoldenTableTest.java index 88c62109109..21701edcb14 100644 --- a/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkGoldenTableTest.java +++ b/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkGoldenTableTest.java @@ -516,6 +516,50 @@ public Dataset apply() { checkAnswer(dfFunc, expectedSeq); } + @Test + public void testVariantUnsupportedDataType() { + String tablePath = goldenTablePath("spark-variant-checkpoint"); + + RuntimeException exception = + assertThrows( + RuntimeException.class, + () -> { + spark.sql("SELECT * FROM `dsv2`.`delta`.`" + tablePath + "`").collect(); + }); + + Throwable rootCause = getRootCause(exception); + String errorMessage = rootCause.getMessage(); + + assertTrue(errorMessage.contains("Unsupported data type")); + assertTrue(errorMessage.contains("variant")); + } + + @Test + public void testTimestampUnsupportedDataType() { + String tablePath = goldenTablePath("kernel-timestamp-PST"); + + RuntimeException exception = + assertThrows( + RuntimeException.class, + () -> { + spark.sql("SELECT * FROM `dsv2`.`delta`.`" + tablePath + "`").collect(); + }); + + Throwable rootCause = getRootCause(exception); + String errorMessage = rootCause.getMessage(); + + assertTrue(errorMessage.contains("Unsupported data type")); + assertTrue(errorMessage.contains("timestamp")); + } + + private Throwable getRootCause(Throwable throwable) { + Throwable cause = throwable; + while (cause.getCause() != null) { + cause = cause.getCause(); + } + return cause; + } + @Test public void testAllGoldenTables() { List tableNames = getAllGoldenTableNames(); From 981fd4211bf2920f66c87a50792bfa6ab017d713 Mon Sep 17 00:00:00 2001 From: Amanda Liu Date: Sun, 5 Oct 2025 22:47:27 -0700 Subject: [PATCH 09/13] types --- .../kernel/spark/catalog/SparkTable.java | 29 +++++++++++-------- .../spark/read/SparkGoldenTableTest.java | 18 ------------ 2 files changed, 17 insertions(+), 30 deletions(-) diff --git a/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java b/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java index d16bbeb0642..c5ec580900f 100644 --- a/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java +++ b/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java @@ -114,21 +114,26 @@ private static void validateKernelDataType(DataType dataType, String fieldPath) * @return true if the data type is allowed, false otherwise */ private static boolean isAllowedKernelType(DataType dataType) { - return dataType instanceof ByteType - || dataType instanceof ShortType + // allowed data types in scala-2.12 + return dataType instanceof ArrayType + || dataType instanceof BinaryType + || dataType instanceof BooleanType + || dataType instanceof ByteType + || dataType instanceof DataType + || dataType instanceof DateType + || dataType instanceof DecimalType + || dataType instanceof DoubleType + || dataType instanceof FieldMetadata + || dataType instanceof FloatType || dataType instanceof IntegerType || dataType instanceof LongType - || dataType instanceof FloatType - || dataType instanceof DoubleType - || dataType instanceof DecimalType + || dataType instanceof MapType + || dataType instanceof NullType + || dataType instanceof ShortType || dataType instanceof StringType - || dataType instanceof BinaryType - || dataType instanceof BooleanType - || - // Allow complex types but validate their nested types - dataType instanceof io.delta.kernel.types.StructType - || dataType instanceof ArrayType - || dataType instanceof MapType; + || dataType instanceof StructField + || dataType instanceof StructType + || dataType instanceof TimestampType; } /** diff --git a/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkGoldenTableTest.java b/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkGoldenTableTest.java index 21701edcb14..70045e12fcc 100644 --- a/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkGoldenTableTest.java +++ b/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkGoldenTableTest.java @@ -534,24 +534,6 @@ public void testVariantUnsupportedDataType() { assertTrue(errorMessage.contains("variant")); } - @Test - public void testTimestampUnsupportedDataType() { - String tablePath = goldenTablePath("kernel-timestamp-PST"); - - RuntimeException exception = - assertThrows( - RuntimeException.class, - () -> { - spark.sql("SELECT * FROM `dsv2`.`delta`.`" + tablePath + "`").collect(); - }); - - Throwable rootCause = getRootCause(exception); - String errorMessage = rootCause.getMessage(); - - assertTrue(errorMessage.contains("Unsupported data type")); - assertTrue(errorMessage.contains("timestamp")); - } - private Throwable getRootCause(Throwable throwable) { Throwable cause = throwable; while (cause.getCause() != null) { From 19945ca2a96ef241f05f977af378b1922116e550 Mon Sep 17 00:00:00 2001 From: Amanda Liu Date: Mon, 6 Oct 2025 07:09:44 -0700 Subject: [PATCH 10/13] types --- .../delta/kernel/spark/catalog/SparkTable.java | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java b/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java index c5ec580900f..552a3be9753 100644 --- a/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java +++ b/kernel-spark/src/main/java/io/delta/kernel/spark/catalog/SparkTable.java @@ -26,6 +26,7 @@ import io.delta.kernel.types.BooleanType; import io.delta.kernel.types.ByteType; import io.delta.kernel.types.DataType; +import io.delta.kernel.types.DateType; import io.delta.kernel.types.DecimalType; import io.delta.kernel.types.DoubleType; import io.delta.kernel.types.FloatType; @@ -34,6 +35,9 @@ import io.delta.kernel.types.MapType; import io.delta.kernel.types.ShortType; import io.delta.kernel.types.StringType; +import io.delta.kernel.types.TimestampNTZType; +import io.delta.kernel.types.TimestampType; +import io.delta.kernel.types.VariantType; import java.util.*; import org.apache.hadoop.conf.Configuration; import org.apache.spark.sql.SparkSession; @@ -114,26 +118,24 @@ private static void validateKernelDataType(DataType dataType, String fieldPath) * @return true if the data type is allowed, false otherwise */ private static boolean isAllowedKernelType(DataType dataType) { - // allowed data types in scala-2.12 + // allowed kernel data types return dataType instanceof ArrayType || dataType instanceof BinaryType || dataType instanceof BooleanType || dataType instanceof ByteType - || dataType instanceof DataType || dataType instanceof DateType || dataType instanceof DecimalType || dataType instanceof DoubleType - || dataType instanceof FieldMetadata || dataType instanceof FloatType || dataType instanceof IntegerType || dataType instanceof LongType || dataType instanceof MapType - || dataType instanceof NullType || dataType instanceof ShortType || dataType instanceof StringType - || dataType instanceof StructField - || dataType instanceof StructType - || dataType instanceof TimestampType; + || dataType instanceof io.delta.kernel.types.StructType + || dataType instanceof TimestampType + || dataType instanceof TimestampNTZType + || dataType instanceof VariantType; } /** From 1f40d478ba77569a9246fa4f53d0be864a807aee Mon Sep 17 00:00:00 2001 From: Amanda Liu Date: Mon, 6 Oct 2025 08:50:41 -0700 Subject: [PATCH 11/13] import --- .../golden/spark-variant-checkpoint/info.txt | 71 ------------------- .../spark/read/SparkGoldenTableTest.java | 1 + 2 files changed, 1 insertion(+), 71 deletions(-) delete mode 100644 connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/info.txt diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/info.txt b/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/info.txt deleted file mode 100644 index e4c1508e318..00000000000 --- a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/info.txt +++ /dev/null @@ -1,71 +0,0 @@ -This file contains the code used to generate this golden table "spark-variant-checkpoint" - -Using delta-spark 4.0, run the following scala script: - -val tableName = "./connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint" -val query = """ - with jsonStrings as ( - select - id, - format_string('{"key": %s}', id) as jsonString - from - range(0, 100) - ) - select - id, - parse_json(jsonString) as v, - array( - parse_json(jsonString), - null, - parse_json(jsonString), - null, - parse_json(jsonString) - ) as array_of_variants, - named_struct('v', parse_json(jsonString)) as struct_of_variants, - map( - cast(id as string), - parse_json(jsonString), - 'nullKey', - null - ) as map_of_variants, - array( - named_struct('v', parse_json(jsonString)), - named_struct('v', null), - null, - named_struct( - 'v', - parse_json(jsonString) - ), - null, - named_struct( - 'v', - parse_json(jsonString) - ) - ) as array_of_struct_of_variants, - named_struct( - 'v', - array( - null, - parse_json(jsonString) - ) - ) as struct_of_array_of_variants - from - jsonStrings -""" - -val writeToTableSql = s""" - create or replace table $tableName - USING DELTA TBLPROPERTIES (delta.checkpointInterval = 2) -""" - -spark.sql(s"${writeToTableSql}\n${query}") -// Write two additional rows to create a checkpoint. -(0 until 2).foreach { v => - spark - .sql(query) - .where(s"id = $v") - .write - .format("delta") - .mode("append") - .insertInto(tableName) -} diff --git a/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkGoldenTableTest.java b/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkGoldenTableTest.java index 70045e12fcc..3af1d4084d4 100644 --- a/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkGoldenTableTest.java +++ b/kernel-spark/src/test/java/io/delta/kernel/spark/read/SparkGoldenTableTest.java @@ -16,6 +16,7 @@ package io.delta.kernel.spark.read; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import io.delta.golden.GoldenTableUtils$; From 3bff5fc595ca20d83bcc0c31c5171b951f99a773 Mon Sep 17 00:00:00 2001 From: Amanda Liu Date: Tue, 7 Oct 2025 09:20:43 -0700 Subject: [PATCH 12/13] rm --- .../kernel/spark/read/DataTypeReaderTest.java | 118 ------------------ 1 file changed, 118 deletions(-) delete mode 100644 kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java diff --git a/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java b/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java deleted file mode 100644 index 1265d6a3ebf..00000000000 --- a/kernel-spark/src/test/java/io/delta/kernel/spark/read/DataTypeReaderTest.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (2025) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.delta.kernel.spark.read; - -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import io.delta.golden.GoldenTableUtils$; -import java.io.File; -import org.apache.spark.SparkConf; -import org.apache.spark.sql.SparkSession; -import org.apache.spark.sql.sources.*; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; - -@TestInstance(TestInstance.Lifecycle.PER_CLASS) -public class DataTypeReaderTest { - - private SparkMicroBatchStream microBatchStream; - private SparkSession spark; - - @BeforeAll - public void setUpClass() { - // Create a temporary directory manually instead of using @TempDir - File tempDir = - new File(System.getProperty("java.io.tmpdir"), "datatype-reader-test-" + System.nanoTime()); - tempDir.mkdirs(); - tempDir.deleteOnExit(); - - SparkConf conf = - new SparkConf() - .set("spark.sql.catalog.dsv2", "io.delta.kernel.spark.catalog.TestCatalog") - .set("spark.sql.catalog.dsv2.base_path", tempDir.getAbsolutePath()) - .set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") - .set( - "spark.sql.catalog.spark_catalog", - "org.apache.spark.sql.delta.catalog.DeltaCatalog") - .setMaster("local[*]") - .setAppName("DataTypeReaderTest"); - spark = SparkSession.builder().config(conf).getOrCreate(); - } - - @AfterAll - public void tearDownClass() { - if (spark != null) { - spark.stop(); - } - } - - @BeforeEach - void setUp() { - microBatchStream = new SparkMicroBatchStream(); - } - - private String goldenTablePath(String name) { - return GoldenTableUtils$.MODULE$.goldenTablePath(name); - } - - @Test - public void testVariantUnsupportedDataType() { - String tablePath = goldenTablePath("spark-variant-checkpoint"); - - RuntimeException exception = - assertThrows( - RuntimeException.class, - () -> { - spark.sql("SELECT * FROM `dsv2`.`delta`.`" + tablePath + "`").collect(); - }); - - Throwable rootCause = getRootCause(exception); - String errorMessage = rootCause.getMessage(); - - assertTrue(errorMessage.contains("Unsupported data type")); - assertTrue(errorMessage.contains("variant")); - } - - @Test - public void testTimestampUnsupportedDataType() { - String tablePath = goldenTablePath("kernel-timestamp-PST"); - - RuntimeException exception = - assertThrows( - RuntimeException.class, - () -> { - spark.sql("SELECT * FROM `dsv2`.`delta`.`" + tablePath + "`").collect(); - }); - - Throwable rootCause = getRootCause(exception); - String errorMessage = rootCause.getMessage(); - - assertTrue(errorMessage.contains("Unsupported data type")); - assertTrue(errorMessage.contains("timestamp")); - } - - private Throwable getRootCause(Throwable throwable) { - Throwable cause = throwable; - while (cause.getCause() != null) { - cause = cause.getCause(); - } - return cause; - } -} From 0b868133bd3ad839a60fdb39cc333978b18648bd Mon Sep 17 00:00:00 2001 From: Amanda Liu Date: Mon, 13 Oct 2025 07:36:13 -0700 Subject: [PATCH 13/13] rename variant --- ...6-4080-be25-530a05922422-c000.snappy.parquet.crc | Bin ...4-46d1-96fb-48b4912507ce-c000.snappy.parquet.crc | Bin ...2-4322-ad2f-8c837a77d398-c000.snappy.parquet.crc | Bin ...4-4dbf-8110-77001b877182-c000.snappy.parquet.crc | Bin .../_delta_log/.00000000000000000000.json.crc | Bin .../_delta_log/.00000000000000000001.json.crc | Bin .../.00000000000000000002.checkpoint.parquet.crc | Bin .../_delta_log/.00000000000000000002.json.crc | Bin .../_delta_log/00000000000000000000.json | 0 .../_delta_log/00000000000000000001.json | 0 .../00000000000000000002.checkpoint.parquet | Bin .../_delta_log/00000000000000000002.json | 0 .../_delta_log/_last_checkpoint | 0 ...-ba66-4080-be25-530a05922422-c000.snappy.parquet | Bin ...-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet | Bin ...-ee32-4322-ad2f-8c837a77d398-c000.snappy.parquet | Bin ...-14b4-4dbf-8110-77001b877182-c000.snappy.parquet | Bin 17 files changed, 0 insertions(+), 0 deletions(-) rename connectors/golden-tables/src/main/resources/golden/{spark-variant-checkpoint => variant}/.part-00000-16c852df-ba66-4080-be25-530a05922422-c000.snappy.parquet.crc (100%) rename connectors/golden-tables/src/main/resources/golden/{spark-variant-checkpoint => variant}/.part-00000-1e14ba22-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet.crc (100%) rename connectors/golden-tables/src/main/resources/golden/{spark-variant-checkpoint => variant}/.part-00000-9a9c570c-ee32-4322-ad2f-8c837a77d398-c000.snappy.parquet.crc (100%) rename connectors/golden-tables/src/main/resources/golden/{spark-variant-checkpoint => variant}/.part-00001-664313d3-14b4-4dbf-8110-77001b877182-c000.snappy.parquet.crc (100%) rename connectors/golden-tables/src/main/resources/golden/{spark-variant-checkpoint => variant}/_delta_log/.00000000000000000000.json.crc (100%) rename connectors/golden-tables/src/main/resources/golden/{spark-variant-checkpoint => variant}/_delta_log/.00000000000000000001.json.crc (100%) rename connectors/golden-tables/src/main/resources/golden/{spark-variant-checkpoint => variant}/_delta_log/.00000000000000000002.checkpoint.parquet.crc (100%) rename connectors/golden-tables/src/main/resources/golden/{spark-variant-checkpoint => variant}/_delta_log/.00000000000000000002.json.crc (100%) rename connectors/golden-tables/src/main/resources/golden/{spark-variant-checkpoint => variant}/_delta_log/00000000000000000000.json (100%) rename connectors/golden-tables/src/main/resources/golden/{spark-variant-checkpoint => variant}/_delta_log/00000000000000000001.json (100%) rename connectors/golden-tables/src/main/resources/golden/{spark-variant-checkpoint => variant}/_delta_log/00000000000000000002.checkpoint.parquet (100%) rename connectors/golden-tables/src/main/resources/golden/{spark-variant-checkpoint => variant}/_delta_log/00000000000000000002.json (100%) rename connectors/golden-tables/src/main/resources/golden/{spark-variant-checkpoint => variant}/_delta_log/_last_checkpoint (100%) rename connectors/golden-tables/src/main/resources/golden/{spark-variant-checkpoint => variant}/part-00000-16c852df-ba66-4080-be25-530a05922422-c000.snappy.parquet (100%) rename connectors/golden-tables/src/main/resources/golden/{spark-variant-checkpoint => variant}/part-00000-1e14ba22-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet (100%) rename connectors/golden-tables/src/main/resources/golden/{spark-variant-checkpoint => variant}/part-00000-9a9c570c-ee32-4322-ad2f-8c837a77d398-c000.snappy.parquet (100%) rename connectors/golden-tables/src/main/resources/golden/{spark-variant-checkpoint => variant}/part-00001-664313d3-14b4-4dbf-8110-77001b877182-c000.snappy.parquet (100%) diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00000-16c852df-ba66-4080-be25-530a05922422-c000.snappy.parquet.crc b/connectors/golden-tables/src/main/resources/golden/variant/.part-00000-16c852df-ba66-4080-be25-530a05922422-c000.snappy.parquet.crc similarity index 100% rename from connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00000-16c852df-ba66-4080-be25-530a05922422-c000.snappy.parquet.crc rename to connectors/golden-tables/src/main/resources/golden/variant/.part-00000-16c852df-ba66-4080-be25-530a05922422-c000.snappy.parquet.crc diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00000-1e14ba22-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet.crc b/connectors/golden-tables/src/main/resources/golden/variant/.part-00000-1e14ba22-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet.crc similarity index 100% rename from connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00000-1e14ba22-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet.crc rename to connectors/golden-tables/src/main/resources/golden/variant/.part-00000-1e14ba22-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet.crc diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00000-9a9c570c-ee32-4322-ad2f-8c837a77d398-c000.snappy.parquet.crc b/connectors/golden-tables/src/main/resources/golden/variant/.part-00000-9a9c570c-ee32-4322-ad2f-8c837a77d398-c000.snappy.parquet.crc similarity index 100% rename from connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00000-9a9c570c-ee32-4322-ad2f-8c837a77d398-c000.snappy.parquet.crc rename to connectors/golden-tables/src/main/resources/golden/variant/.part-00000-9a9c570c-ee32-4322-ad2f-8c837a77d398-c000.snappy.parquet.crc diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00001-664313d3-14b4-4dbf-8110-77001b877182-c000.snappy.parquet.crc b/connectors/golden-tables/src/main/resources/golden/variant/.part-00001-664313d3-14b4-4dbf-8110-77001b877182-c000.snappy.parquet.crc similarity index 100% rename from connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/.part-00001-664313d3-14b4-4dbf-8110-77001b877182-c000.snappy.parquet.crc rename to connectors/golden-tables/src/main/resources/golden/variant/.part-00001-664313d3-14b4-4dbf-8110-77001b877182-c000.snappy.parquet.crc diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/.00000000000000000000.json.crc b/connectors/golden-tables/src/main/resources/golden/variant/_delta_log/.00000000000000000000.json.crc similarity index 100% rename from connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/.00000000000000000000.json.crc rename to connectors/golden-tables/src/main/resources/golden/variant/_delta_log/.00000000000000000000.json.crc diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/.00000000000000000001.json.crc b/connectors/golden-tables/src/main/resources/golden/variant/_delta_log/.00000000000000000001.json.crc similarity index 100% rename from connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/.00000000000000000001.json.crc rename to connectors/golden-tables/src/main/resources/golden/variant/_delta_log/.00000000000000000001.json.crc diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/.00000000000000000002.checkpoint.parquet.crc b/connectors/golden-tables/src/main/resources/golden/variant/_delta_log/.00000000000000000002.checkpoint.parquet.crc similarity index 100% rename from connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/.00000000000000000002.checkpoint.parquet.crc rename to connectors/golden-tables/src/main/resources/golden/variant/_delta_log/.00000000000000000002.checkpoint.parquet.crc diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/.00000000000000000002.json.crc b/connectors/golden-tables/src/main/resources/golden/variant/_delta_log/.00000000000000000002.json.crc similarity index 100% rename from connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/.00000000000000000002.json.crc rename to connectors/golden-tables/src/main/resources/golden/variant/_delta_log/.00000000000000000002.json.crc diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/main/resources/golden/variant/_delta_log/00000000000000000000.json similarity index 100% rename from connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000000.json rename to connectors/golden-tables/src/main/resources/golden/variant/_delta_log/00000000000000000000.json diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/main/resources/golden/variant/_delta_log/00000000000000000001.json similarity index 100% rename from connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000001.json rename to connectors/golden-tables/src/main/resources/golden/variant/_delta_log/00000000000000000001.json diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000002.checkpoint.parquet b/connectors/golden-tables/src/main/resources/golden/variant/_delta_log/00000000000000000002.checkpoint.parquet similarity index 100% rename from connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000002.checkpoint.parquet rename to connectors/golden-tables/src/main/resources/golden/variant/_delta_log/00000000000000000002.checkpoint.parquet diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000002.json b/connectors/golden-tables/src/main/resources/golden/variant/_delta_log/00000000000000000002.json similarity index 100% rename from connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/00000000000000000002.json rename to connectors/golden-tables/src/main/resources/golden/variant/_delta_log/00000000000000000002.json diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/_last_checkpoint b/connectors/golden-tables/src/main/resources/golden/variant/_delta_log/_last_checkpoint similarity index 100% rename from connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/_delta_log/_last_checkpoint rename to connectors/golden-tables/src/main/resources/golden/variant/_delta_log/_last_checkpoint diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00000-16c852df-ba66-4080-be25-530a05922422-c000.snappy.parquet b/connectors/golden-tables/src/main/resources/golden/variant/part-00000-16c852df-ba66-4080-be25-530a05922422-c000.snappy.parquet similarity index 100% rename from connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00000-16c852df-ba66-4080-be25-530a05922422-c000.snappy.parquet rename to connectors/golden-tables/src/main/resources/golden/variant/part-00000-16c852df-ba66-4080-be25-530a05922422-c000.snappy.parquet diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00000-1e14ba22-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet b/connectors/golden-tables/src/main/resources/golden/variant/part-00000-1e14ba22-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet similarity index 100% rename from connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00000-1e14ba22-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet rename to connectors/golden-tables/src/main/resources/golden/variant/part-00000-1e14ba22-3114-46d1-96fb-48b4912507ce-c000.snappy.parquet diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00000-9a9c570c-ee32-4322-ad2f-8c837a77d398-c000.snappy.parquet b/connectors/golden-tables/src/main/resources/golden/variant/part-00000-9a9c570c-ee32-4322-ad2f-8c837a77d398-c000.snappy.parquet similarity index 100% rename from connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00000-9a9c570c-ee32-4322-ad2f-8c837a77d398-c000.snappy.parquet rename to connectors/golden-tables/src/main/resources/golden/variant/part-00000-9a9c570c-ee32-4322-ad2f-8c837a77d398-c000.snappy.parquet diff --git a/connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00001-664313d3-14b4-4dbf-8110-77001b877182-c000.snappy.parquet b/connectors/golden-tables/src/main/resources/golden/variant/part-00001-664313d3-14b4-4dbf-8110-77001b877182-c000.snappy.parquet similarity index 100% rename from connectors/golden-tables/src/main/resources/golden/spark-variant-checkpoint/part-00001-664313d3-14b4-4dbf-8110-77001b877182-c000.snappy.parquet rename to connectors/golden-tables/src/main/resources/golden/variant/part-00001-664313d3-14b4-4dbf-8110-77001b877182-c000.snappy.parquet