From f6ce2bef091827ee483a978fa079859b4436e36b Mon Sep 17 00:00:00 2001 From: Kevin Ge Date: Mon, 10 Feb 2025 13:19:20 -0500 Subject: [PATCH] [Coral-schema] Generalize operand schema inference on ordinal return type UDF calls (#548) * fix nested nullability inference for nested udf calls * spotless * generalize operand schema inference for ordinal return type UDF calls * add unit test + documentation --- .../schema/avro/RelToAvroSchemaConverter.java | 23 ++++++--------- .../linkedin/coral/schema/avro/TestUtils.java | 4 +-- .../avro/ViewToAvroSchemaConverterTests.java | 28 +++++++++++++++++-- 3 files changed, 36 insertions(+), 19 deletions(-) diff --git a/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelToAvroSchemaConverter.java b/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelToAvroSchemaConverter.java index a7165a779..1452d86f7 100644 --- a/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelToAvroSchemaConverter.java +++ b/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelToAvroSchemaConverter.java @@ -1,5 +1,5 @@ /** - * Copyright 2019-2024 LinkedIn Corporation. All rights reserved. + * Copyright 2019-2025 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -433,25 +433,20 @@ public RexNode visitLiteral(RexLiteral rexLiteral) { @Override public RexNode visitCall(RexCall rexCall) { /** - * For SqlUserDefinedFunction and SqlOperator RexCall, no need to handle it recursively - * and only return type of udf or sql operator is relevant - */ - - /** - * If the return type of RexCall is based on the ordinal of its input argument - * and the corresponding input argument refers to a field from the input schema, - * use the field's schema as is. + * If the return type of RexCall is based on an ordinal of its input arguments, then leverage SchemaRexShuttle + * to visit the input argument and use the argument's schema as is to infer the return type of the call */ if (rexCall.getOperator().getReturnTypeInference() instanceof OrdinalReturnTypeInferenceV2) { int index = ((OrdinalReturnTypeInferenceV2) rexCall.getOperator().getReturnTypeInference()).getOrdinal(); RexNode operand = rexCall.operands.get(index); - - if (operand instanceof RexInputRef) { - appendRexInputRefField((RexInputRef) operand); - return rexCall; - } + operand.accept(this); + return rexCall; } + /** + * For SqlUserDefinedFunction and SqlOperator RexCall, no need to handle it recursively + * and just directly use the return type of udf or sql operator as the field's schema + */ RelDataType fieldType = rexCall.getType(); boolean isNullable = SchemaUtilities.isFieldNullable(rexCall, inputSchema); diff --git a/coral-schema/src/test/java/com/linkedin/coral/schema/avro/TestUtils.java b/coral-schema/src/test/java/com/linkedin/coral/schema/avro/TestUtils.java index 2d503ddcd..88fcb0d34 100644 --- a/coral-schema/src/test/java/com/linkedin/coral/schema/avro/TestUtils.java +++ b/coral-schema/src/test/java/com/linkedin/coral/schema/avro/TestUtils.java @@ -1,5 +1,5 @@ /** - * Copyright 2019-2024 LinkedIn Corporation. All rights reserved. + * Copyright 2019-2025 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -177,7 +177,7 @@ private static void initializeUdfs() { executeCreateFunctionQuery("default", Collections.singletonList("foo_udf_return_struct"), "FuncIsEven", "com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnStruct"); - executeCreateFunctionQuery("default", Collections.singletonList("innerfield_with_udf"), "ReturnInnerStuct", + executeCreateFunctionQuery("default", Collections.singletonList("innerfield_with_udf"), "ReturnInnerStruct", "com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnSecondArg"); } diff --git a/coral-schema/src/test/java/com/linkedin/coral/schema/avro/ViewToAvroSchemaConverterTests.java b/coral-schema/src/test/java/com/linkedin/coral/schema/avro/ViewToAvroSchemaConverterTests.java index d014be0ee..9d44369f6 100644 --- a/coral-schema/src/test/java/com/linkedin/coral/schema/avro/ViewToAvroSchemaConverterTests.java +++ b/coral-schema/src/test/java/com/linkedin/coral/schema/avro/ViewToAvroSchemaConverterTests.java @@ -1,5 +1,5 @@ /** - * Copyright 2019-2024 LinkedIn Corporation. All rights reserved. + * Copyright 2019-2025 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -236,9 +236,9 @@ public void testUdfLessThanHundred() { @Test public void testPreserveNullabilitiesAfterApplyingOrdinalReturnTypeUDF() { String viewSql = "CREATE VIEW innerfield_with_udf " - + "tblproperties('functions' = 'ReturnInnerStuct:com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnSecondArg', " + + "tblproperties('functions' = 'ReturnInnerStruct:com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnSecondArg', " + " 'dependencies' = 'ivy://com.linkedin:udf:1.0') " + "AS " - + "SELECT default_innerfield_with_udf_ReturnInnerStuct('foo', innerRecord) AS innerRecord " + + "SELECT default_innerfield_with_udf_ReturnInnerStruct('foo', innerRecord) AS innerRecord " + "FROM basecomplexmixednullabilities"; TestUtils.executeCreateViewQuery("default", "innerfield_with_udf", viewSql); @@ -252,6 +252,28 @@ public void testPreserveNullabilitiesAfterApplyingOrdinalReturnTypeUDF() { TestUtils.loadSchema("testPreserveNullabilitiesAfterApplyingOrdinalReturnTypeUDF-expected.avsc")); } + @Test + public void testPreserveNullabilitiesAfterApplyingOrdinalReturnTypeUDFForNestedCalls() { + String viewSql = "CREATE VIEW innerfield_with_udf " + + "tblproperties('functions' = 'ReturnInnerStruct:com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnSecondArg', " + + " 'dependencies' = 'ivy://com.linkedin:udf:1.0') " + "AS " + + "SELECT default_innerfield_with_udf_ReturnInnerStruct('foo', default_innerfield_with_udf_ReturnInnerStruct('foo', innerRecord)) AS innerRecord " + + "FROM basecomplexmixednullabilities"; + + TestUtils.executeCreateViewQuery("default", "innerfield_with_udf", viewSql); + + ViewToAvroSchemaConverter viewToAvroSchemaConverter = ViewToAvroSchemaConverter.create(hiveMetastoreClient); + Schema actualSchema = viewToAvroSchemaConverter.toAvroSchema("default", "innerfield_with_udf"); + + // Inner ReturnInnerStruct call return type == Return type of it's second argument, innerRecord + // Outer ReturnInnerStruct call return type == Return type of it's second argument, Inner ReturnInnerStruct call return type + // Therefore, Outer ReturnInnerStruct call return type == Return type of innerRecord + // + // We also expect all fields to retain their nullability after applying the UDF calls + Assert.assertEquals(actualSchema.toString(true), + TestUtils.loadSchema("testPreserveNullabilitiesAfterApplyingOrdinalReturnTypeUDF-expected.avsc")); + } + @Test public void testUdfGreaterThanHundred() { String viewSql = "CREATE VIEW foo_dali_udf2 "