Skip to content

Commit

Permalink
HIVE-27950: STACK UDTF returns wrong results when number of arguments…
Browse files Browse the repository at this point in the history
… is not a multiple of N (apache#4938) (okumin reviewed by Attila Turoczy, Zsolt Miskolczi and Sourabh Badhya)
  • Loading branch information
okumin authored Feb 20, 2024
1 parent a3926cc commit 5b76949
Show file tree
Hide file tree
Showing 10 changed files with 68 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantIntObjectInspector;
import org.apache.hadoop.io.IntWritable;

/**
Expand Down Expand Up @@ -63,13 +63,21 @@ public StructObjectInspector initialize(ObjectInspector[] args)
}
if (!(args[0] instanceof ConstantObjectInspector)) {
throw new UDFArgumentException(
"The first argument to STACK() must be a constant.");
}
final Object value = ((ConstantObjectInspector) args[0]).getWritableConstantValue();
if (value == null) {
throw new UDFArgumentException("The first argument of STACK() must not be null.");
}
if (!(value instanceof IntWritable)) {
throw new UDFArgumentTypeException(
0,
"The first argument to STACK() must be a constant integer (got " +
args[0].getTypeName() + " instead).");
}
numRows = (IntWritable)
((ConstantObjectInspector)args[0]).getWritableConstantValue();
numRows = (IntWritable) value;

if (numRows == null || numRows.get() < 1) {
if (numRows.get() < 1) {
throw new UDFArgumentException(
"STACK() expects its first argument to be >= 1.");
}
Expand Down Expand Up @@ -109,15 +117,15 @@ public StructObjectInspector initialize(ObjectInspector[] args)

@Override
public void process(Object[] args)
throws HiveException, UDFArgumentException {
throws HiveException {
for (int ii = 0; ii < numRows.get(); ++ii) {
for (int jj = 0; jj < numCols; ++jj) {
int index = ii * numCols + jj + 1;
if (index < args.length) {
forwardObj[jj] =
returnOIResolvers.get(jj).convertIfNecessary(args[index], argOIs.get(index));
} else {
forwardObj[ii] = null;
forwardObj[jj] = null;
}
}
forward(forwardObj);
Expand Down
2 changes: 2 additions & 0 deletions ql/src/test/queries/clientnegative/udtf_stack_not_constant.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--! qt:dataset:alltypesparquet
SELECT STACK(cint, 'a', 'b') FROM alltypesparquet;
1 change: 1 addition & 0 deletions ql/src/test/queries/clientnegative/udtf_stack_null.q
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT stack(cast(null as int), 'a', 'b', 'c', 'd');
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT stack('2', 'a', 'b', 'c', 'd');
3 changes: 3 additions & 0 deletions ql/src/test/queries/clientpositive/udtf_stack.q
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@ SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z', array(4)) a AS x,
EXPLAIN
SELECT stack(1, "en", "dbpedia", NULL );
SELECT stack(1, "en", "dbpedia", NULL );

EXPLAIN SELECT STACK(2, 'a', 'b', 'c', 'd', 'e');
SELECT STACK(2, 'a', 'b', 'c', 'd', 'e');
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FAILED: UDFArgumentException The first argument to STACK() must be a constant.
1 change: 1 addition & 0 deletions ql/src/test/results/clientnegative/udtf_stack_null.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FAILED: UDFArgumentException The first argument of STACK() must not be null.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FAILED: UDFArgumentTypeException The first argument to STACK() must be a constant integer (got string instead).
10 changes: 5 additions & 5 deletions ql/src/test/results/clientpositive/llap/allcolref_in_udf.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -168,15 +168,15 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
4val_45val_5 4val_4 5val_5
4val_45 NULL 5val_5
4val_45 45val_5 NULL
4val_45val_5 4val_4 5val_5
4val_45 NULL 5val_5
4val_45 45val_5 NULL
4val_45val_5 4val_4 5val_5
4val_45 NULL 5val_5
4val_45 45val_5 NULL
8val_89val_9 8val_8 9val_9
8val_89 NULL 9val_9
8val_89 89val_9 NULL
9val_910val_10 9val_9 10val_10
9val_910 NULL 10val_10
9val_910 910val_10 NULL
PREHOOK: query: create table allcolref as select array(key, value) from src
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@src
Expand Down
39 changes: 39 additions & 0 deletions ql/src/test/results/clientpositive/llap/udtf_stack.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,42 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
en dbpedia NULL
PREHOOK: query: EXPLAIN SELECT STACK(2, 'a', 'b', 'c', 'd', 'e')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
POSTHOOK: query: EXPLAIN SELECT STACK(2, 'a', 'b', 'c', 'd', 'e')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
STAGE DEPENDENCIES:
Stage-0 is a root stage

STAGE PLANS:
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
TableScan
alias: _dummy_table
Row Limit Per Split: 1
Select Operator
expressions: 2 (type: int), 'a' (type: string), 'b' (type: string), 'c' (type: string), 'd' (type: string), 'e' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
UDTF Operator
function name: stack
Select Operator
expressions: col0 (type: string), col1 (type: string), col2 (type: string)
outputColumnNames: _col0, _col1, _col2
ListSink

PREHOOK: query: SELECT STACK(2, 'a', 'b', 'c', 'd', 'e')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
POSTHOOK: query: SELECT STACK(2, 'a', 'b', 'c', 'd', 'e')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
a b c
d e NULL

0 comments on commit 5b76949

Please sign in to comment.