Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HIVE-28362: Fail to materialize a CTE with VOID #5335

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7834,7 +7834,8 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input)
if (!CollectionUtils.isEmpty(partitionColumnNames)) {
ColsAndTypes ct = deriveFileSinkColTypes(
inputRR, partitionColumnNames, sortColumnNames, distributeColumnNames, fieldSchemas, partitionColumns,
sortColumns, distributeColumns, fileSinkColInfos, sortColInfos, distributeColInfos);
sortColumns, distributeColumns, fileSinkColInfos, sortColInfos, distributeColInfos,
destTableIsMaterialization);
cols = ct.cols;
colTypes = ct.colTypes;
dpCtx = new DynamicPartitionCtx(partitionColumnNames,
Expand All @@ -7845,7 +7846,7 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input)
} else {
ColsAndTypes ct = deriveFileSinkColTypes(
inputRR, sortColumnNames, distributeColumnNames, fieldSchemas, sortColumns, distributeColumns,
sortColInfos, distributeColInfos);
sortColInfos, distributeColInfos, destTableIsMaterialization);
cols = ct.cols;
colTypes = ct.colTypes;
isPartitioned = false;
Expand Down Expand Up @@ -8299,16 +8300,18 @@ private boolean hasSetBatchSerializer(String serdeClassName) {

private ColsAndTypes deriveFileSinkColTypes(RowResolver inputRR, List<String> sortColumnNames, List<String> distributeColumnNames,
List<FieldSchema> fieldSchemas, List<FieldSchema> sortColumns, List<FieldSchema> distributeColumns,
List<ColumnInfo> sortColInfos, List<ColumnInfo> distributeColInfos) throws SemanticException {
List<ColumnInfo> sortColInfos, List<ColumnInfo> distributeColInfos, boolean isMaterialized)
throws SemanticException {
return deriveFileSinkColTypes(inputRR, new ArrayList<>(), sortColumnNames, distributeColumnNames,
fieldSchemas, new ArrayList<>(), sortColumns, distributeColumns, new ArrayList<>(),
sortColInfos, distributeColInfos);
sortColInfos, distributeColInfos, isMaterialized);
}

private ColsAndTypes deriveFileSinkColTypes(
RowResolver inputRR, List<String> partitionColumnNames, List<String> sortColumnNames, List<String> distributeColumnNames,
List<FieldSchema> columns, List<FieldSchema> partitionColumns, List<FieldSchema> sortColumns, List<FieldSchema> distributeColumns,
List<ColumnInfo> fileSinkColInfos, List<ColumnInfo> sortColInfos, List<ColumnInfo> distributeColInfos) throws SemanticException {
List<ColumnInfo> fileSinkColInfos, List<ColumnInfo> sortColInfos, List<ColumnInfo> distributeColInfos,
boolean isMaterialized) throws SemanticException {
ColsAndTypes result = new ColsAndTypes("", "");
List<String> allColumns = new ArrayList<>();
List<ColumnInfo> colInfos = inputRR.getColumnInfos();
Expand Down Expand Up @@ -8340,7 +8343,7 @@ private ColsAndTypes deriveFileSinkColTypes(
allColumns.add(colName);
String typeName = colInfo.getType().getTypeName();
// CTAS should NOT create a VOID type
if (typeName.equals(serdeConstants.VOID_TYPE_NAME)) {
if (!isMaterialized && typeName.equals(serdeConstants.VOID_TYPE_NAME)) {
throw new SemanticException(ErrorMsg.CTAS_CREATES_VOID_TYPE.getMsg(colName));
}
col.setType(typeName);
Expand Down
9 changes: 9 additions & 0 deletions ql/src/test/queries/clientpositive/cte_mat_12.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
set hive.optimize.cte.materialize.full.aggregate.only=false;
set hive.optimize.cte.materialize.threshold=2;

EXPLAIN
WITH x AS (SELECT null AS null_value, 1 AS non_null_value)
SELECT *, TYPEOF(null_value) FROM x UNION ALL SELECT *, TYPEOF(null_value) FROM x;

WITH x AS (SELECT null AS null_value, 1 AS non_null_value)
SELECT *, TYPEOF(null_value) FROM x UNION ALL SELECT *, TYPEOF(null_value) FROM x;
133 changes: 133 additions & 0 deletions ql/src/test/results/clientpositive/llap/cte_mat_12.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
PREHOOK: query: EXPLAIN
WITH x AS (SELECT null AS null_value, 1 AS non_null_value)
SELECT *, TYPEOF(null_value) FROM x UNION ALL SELECT *, TYPEOF(null_value) FROM x
PREHOOK: type: QUERY
PREHOOK: Input: default@x
#### A masked pattern was here ####
POSTHOOK: query: EXPLAIN
WITH x AS (SELECT null AS null_value, 1 AS non_null_value)
SELECT *, TYPEOF(null_value) FROM x UNION ALL SELECT *, TYPEOF(null_value) FROM x
POSTHOOK: type: QUERY
POSTHOOK: Input: default@x
#### A masked pattern was here ####
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-4 depends on stages: Stage-2, Stage-0
Stage-0 depends on stages: Stage-1
Stage-3 depends on stages: Stage-4

STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: _dummy_table
Row Limit Per Split: 1
Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: null (type: void), 1 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.x
Execution mode: llap
LLAP IO: no inputs

Stage: Stage-2
Dependency Collection

Stage: Stage-4
Tez
#### A masked pattern was here ####
Edges:
Map 2 <- Union 3 (CONTAINS)
Map 4 <- Union 3 (CONTAINS)
#### A masked pattern was here ####
Vertices:
Map 2
Map Operator Tree:
TableScan
alias: x
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: non_null_value (type: int)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: null (type: void), _col0 (type: int), 'void' (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
Map 4
Map Operator Tree:
TableScan
alias: x
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: non_null_value (type: int)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: null (type: void), _col0 (type: int), 'void' (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
Union 3
Vertex: Union 3

Stage: Stage-0
Move Operator
files:
hdfs directory: true
#### A masked pattern was here ####

Stage: Stage-3
Fetch Operator
limit: -1
Processor Tree:
ListSink

PREHOOK: query: WITH x AS (SELECT null AS null_value, 1 AS non_null_value)
SELECT *, TYPEOF(null_value) FROM x UNION ALL SELECT *, TYPEOF(null_value) FROM x
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Input: default@x
PREHOOK: Output: database:default
PREHOOK: Output: default@x
#### A masked pattern was here ####
POSTHOOK: query: WITH x AS (SELECT null AS null_value, 1 AS non_null_value)
SELECT *, TYPEOF(null_value) FROM x UNION ALL SELECT *, TYPEOF(null_value) FROM x
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Input: default@x
POSTHOOK: Output: database:default
POSTHOOK: Output: default@x
#### A masked pattern was here ####
NULL 1 void
NULL 1 void
Loading