From 5b76949da6fe65364a4e3766680871167131157f Mon Sep 17 00:00:00 2001 From: okumin Date: Tue, 20 Feb 2024 14:17:43 +0900 Subject: [PATCH] HIVE-27950: STACK UDTF returns wrong results when number of arguments is not a multiple of N (#4938) (okumin reviewed by Attila Turoczy, Zsolt Miskolczi and Sourabh Badhya) --- .../hive/ql/udf/generic/GenericUDTFStack.java | 20 +++++++--- .../clientnegative/udtf_stack_not_constant.q | 2 + .../queries/clientnegative/udtf_stack_null.q | 1 + .../clientnegative/udtf_stack_wrong_type1.q | 1 + .../test/queries/clientpositive/udtf_stack.q | 3 ++ .../udtf_stack_not_constant.q.out | 1 + .../clientnegative/udtf_stack_null.q.out | 1 + .../udtf_stack_wrong_type1.q.out | 1 + .../llap/allcolref_in_udf.q.out | 10 ++--- .../clientpositive/llap/udtf_stack.q.out | 39 +++++++++++++++++++ 10 files changed, 68 insertions(+), 11 deletions(-) create mode 100644 ql/src/test/queries/clientnegative/udtf_stack_not_constant.q create mode 100644 ql/src/test/queries/clientnegative/udtf_stack_null.q create mode 100644 ql/src/test/queries/clientnegative/udtf_stack_wrong_type1.q create mode 100644 ql/src/test/results/clientnegative/udtf_stack_not_constant.q.out create mode 100644 ql/src/test/results/clientnegative/udtf_stack_null.q.out create mode 100644 ql/src/test/results/clientnegative/udtf_stack_wrong_type1.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFStack.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFStack.java index f3cc2b4f6389..d75d3b627144 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFStack.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFStack.java @@ -23,13 +23,13 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantIntObjectInspector; import org.apache.hadoop.io.IntWritable; /** @@ -63,13 +63,21 @@ public StructObjectInspector initialize(ObjectInspector[] args) } if (!(args[0] instanceof ConstantObjectInspector)) { throw new UDFArgumentException( + "The first argument to STACK() must be a constant."); + } + final Object value = ((ConstantObjectInspector) args[0]).getWritableConstantValue(); + if (value == null) { + throw new UDFArgumentException("The first argument of STACK() must not be null."); + } + if (!(value instanceof IntWritable)) { + throw new UDFArgumentTypeException( + 0, "The first argument to STACK() must be a constant integer (got " + args[0].getTypeName() + " instead)."); } - numRows = (IntWritable) - ((ConstantObjectInspector)args[0]).getWritableConstantValue(); + numRows = (IntWritable) value; - if (numRows == null || numRows.get() < 1) { + if (numRows.get() < 1) { throw new UDFArgumentException( "STACK() expects its first argument to be >= 1."); } @@ -109,7 +117,7 @@ public StructObjectInspector initialize(ObjectInspector[] args) @Override public void process(Object[] args) - throws HiveException, UDFArgumentException { + throws HiveException { for (int ii = 0; ii < numRows.get(); ++ii) { for (int jj = 0; jj < numCols; ++jj) { int index = ii * numCols + jj + 1; @@ -117,7 +125,7 @@ public void process(Object[] args) forwardObj[jj] = returnOIResolvers.get(jj).convertIfNecessary(args[index], argOIs.get(index)); } else { - forwardObj[ii] = null; + forwardObj[jj] = null; } } forward(forwardObj); diff --git a/ql/src/test/queries/clientnegative/udtf_stack_not_constant.q b/ql/src/test/queries/clientnegative/udtf_stack_not_constant.q new file mode 100644 index 000000000000..f559ce304621 --- /dev/null +++ b/ql/src/test/queries/clientnegative/udtf_stack_not_constant.q @@ -0,0 +1,2 @@ +--! qt:dataset:alltypesparquet +SELECT STACK(cint, 'a', 'b') FROM alltypesparquet; diff --git a/ql/src/test/queries/clientnegative/udtf_stack_null.q b/ql/src/test/queries/clientnegative/udtf_stack_null.q new file mode 100644 index 000000000000..f3832ffd4adb --- /dev/null +++ b/ql/src/test/queries/clientnegative/udtf_stack_null.q @@ -0,0 +1 @@ +SELECT stack(cast(null as int), 'a', 'b', 'c', 'd'); diff --git a/ql/src/test/queries/clientnegative/udtf_stack_wrong_type1.q b/ql/src/test/queries/clientnegative/udtf_stack_wrong_type1.q new file mode 100644 index 000000000000..90762becb234 --- /dev/null +++ b/ql/src/test/queries/clientnegative/udtf_stack_wrong_type1.q @@ -0,0 +1 @@ +SELECT stack('2', 'a', 'b', 'c', 'd'); diff --git a/ql/src/test/queries/clientpositive/udtf_stack.q b/ql/src/test/queries/clientpositive/udtf_stack.q index 8aa7a8cfe80e..def0c38f5d56 100644 --- a/ql/src/test/queries/clientpositive/udtf_stack.q +++ b/ql/src/test/queries/clientpositive/udtf_stack.q @@ -11,3 +11,6 @@ SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z', array(4)) a AS x, EXPLAIN SELECT stack(1, "en", "dbpedia", NULL ); SELECT stack(1, "en", "dbpedia", NULL ); + +EXPLAIN SELECT STACK(2, 'a', 'b', 'c', 'd', 'e'); +SELECT STACK(2, 'a', 'b', 'c', 'd', 'e'); diff --git a/ql/src/test/results/clientnegative/udtf_stack_not_constant.q.out b/ql/src/test/results/clientnegative/udtf_stack_not_constant.q.out new file mode 100644 index 000000000000..86b264a179de --- /dev/null +++ b/ql/src/test/results/clientnegative/udtf_stack_not_constant.q.out @@ -0,0 +1 @@ +FAILED: UDFArgumentException The first argument to STACK() must be a constant. diff --git a/ql/src/test/results/clientnegative/udtf_stack_null.q.out b/ql/src/test/results/clientnegative/udtf_stack_null.q.out new file mode 100644 index 000000000000..854a2361c3a9 --- /dev/null +++ b/ql/src/test/results/clientnegative/udtf_stack_null.q.out @@ -0,0 +1 @@ +FAILED: UDFArgumentException The first argument of STACK() must not be null. diff --git a/ql/src/test/results/clientnegative/udtf_stack_wrong_type1.q.out b/ql/src/test/results/clientnegative/udtf_stack_wrong_type1.q.out new file mode 100644 index 000000000000..d18a726b9c17 --- /dev/null +++ b/ql/src/test/results/clientnegative/udtf_stack_wrong_type1.q.out @@ -0,0 +1 @@ +FAILED: UDFArgumentTypeException The first argument to STACK() must be a constant integer (got string instead). diff --git a/ql/src/test/results/clientpositive/llap/allcolref_in_udf.q.out b/ql/src/test/results/clientpositive/llap/allcolref_in_udf.q.out index 8e20aaa35d86..236d52a2ded5 100644 --- a/ql/src/test/results/clientpositive/llap/allcolref_in_udf.q.out +++ b/ql/src/test/results/clientpositive/llap/allcolref_in_udf.q.out @@ -168,15 +168,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 4val_45val_5 4val_4 5val_5 -4val_45 NULL 5val_5 +4val_45 45val_5 NULL 4val_45val_5 4val_4 5val_5 -4val_45 NULL 5val_5 +4val_45 45val_5 NULL 4val_45val_5 4val_4 5val_5 -4val_45 NULL 5val_5 +4val_45 45val_5 NULL 8val_89val_9 8val_8 9val_9 -8val_89 NULL 9val_9 +8val_89 89val_9 NULL 9val_910val_10 9val_9 10val_10 -9val_910 NULL 10val_10 +9val_910 910val_10 NULL PREHOOK: query: create table allcolref as select array(key, value) from src PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/llap/udtf_stack.q.out b/ql/src/test/results/clientpositive/llap/udtf_stack.q.out index 2e5bba2e388c..ad176cea611c 100644 --- a/ql/src/test/results/clientpositive/llap/udtf_stack.q.out +++ b/ql/src/test/results/clientpositive/llap/udtf_stack.q.out @@ -147,3 +147,42 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### en dbpedia NULL +PREHOOK: query: EXPLAIN SELECT STACK(2, 'a', 'b', 'c', 'd', 'e') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT STACK(2, 'a', 'b', 'c', 'd', 'e') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Select Operator + expressions: 2 (type: int), 'a' (type: string), 'b' (type: string), 'c' (type: string), 'd' (type: string), 'e' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + UDTF Operator + function name: stack + Select Operator + expressions: col0 (type: string), col1 (type: string), col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: SELECT STACK(2, 'a', 'b', 'c', 'd', 'e') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT STACK(2, 'a', 'b', 'c', 'd', 'e') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +a b c +d e NULL