diff --git a/iceberg/iceberg-handler/src/test/queries/positive/merge_iceberg_copy_on_write_partitioned.q b/iceberg/iceberg-handler/src/test/queries/positive/merge_iceberg_copy_on_write_partitioned.q index 826b730a3b07..2b88325f55c1 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/merge_iceberg_copy_on_write_partitioned.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/merge_iceberg_copy_on_write_partitioned.q @@ -27,6 +27,15 @@ merge into target_ice as t using source src ON t.a = src.a when not matched then insert values (src.a, src.b, src.c); merge into target_ice as t using source src ON t.a = src.a -when not matched then insert values (src.a, src.b, src.c); +when not matched and src.a <= 5 then insert values (src.a, src.b, src.c); + +select * from target_ice; + +-- insert clause with a column list +merge into target_ice as t using source src ON t.a = src.a +when not matched then insert (a, c) values (src.a, src.c); select * from target_ice; + + + diff --git a/iceberg/iceberg-handler/src/test/queries/positive/merge_iceberg_copy_on_write_unpartitioned.q b/iceberg/iceberg-handler/src/test/queries/positive/merge_iceberg_copy_on_write_unpartitioned.q index 371e4b5e3129..3e77025ed691 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/merge_iceberg_copy_on_write_unpartitioned.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/merge_iceberg_copy_on_write_unpartitioned.q @@ -14,13 +14,14 @@ insert into source values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), (4, explain merge into target_ice as t using source src ON t.a = src.a when matched and t.a > 100 THEN DELETE +when matched then update set b = 'Merged', c = t.c + 10 when not matched then insert values (src.a, src.b, src.c); +-- insert clause with a column list explain merge into target_ice as t using source src ON t.a = src.a when matched and t.a > 100 THEN DELETE -when matched then update set b = 'Merged', c = t.c + 10 -when not matched then insert values (src.a, src.b, src.c); +when not matched then insert (a, b) values (src.a, src.b); merge into target_ice as t using source src ON t.a = src.a when matched and t.a > 100 THEN DELETE diff --git a/iceberg/iceberg-handler/src/test/results/positive/merge_iceberg_copy_on_write_partitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/merge_iceberg_copy_on_write_partitioned.q.out index f22d0bb16200..9e96bcd2f32e 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/merge_iceberg_copy_on_write_partitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/merge_iceberg_copy_on_write_partitioned.q.out @@ -551,13 +551,39 @@ STAGE PLANS: Basic Stats Work: PREHOOK: query: merge into target_ice as t using source src ON t.a = src.a -when not matched then insert values (src.a, src.b, src.c) +when not matched and src.a <= 5 then insert values (src.a, src.b, src.c) PREHOOK: type: QUERY PREHOOK: Input: default@source PREHOOK: Input: default@target_ice PREHOOK: Output: default@target_ice POSTHOOK: query: merge into target_ice as t using source src ON t.a = src.a -when not matched then insert values (src.a, src.b, src.c) +when not matched and src.a <= 5 then insert values (src.a, src.b, src.c) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@source +POSTHOOK: Input: default@target_ice +POSTHOOK: Output: default@target_ice +PREHOOK: query: select * from target_ice +PREHOOK: type: QUERY +PREHOOK: Input: default@target_ice +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from target_ice +POSTHOOK: type: QUERY +POSTHOOK: Input: default@target_ice +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 Merged 60 +2 Merged 61 +3 three 52 +333 two 56 +4 four 53 +5 five 54 +PREHOOK: query: merge into target_ice as t using source src ON t.a = src.a +when not matched then insert (a, c) values (src.a, src.c) +PREHOOK: type: QUERY +PREHOOK: Input: default@source +PREHOOK: Input: default@target_ice +PREHOOK: Output: default@target_ice +POSTHOOK: query: merge into target_ice as t using source src ON t.a = src.a +when not matched then insert (a, c) values (src.a, src.c) POSTHOOK: type: QUERY POSTHOOK: Input: default@source POSTHOOK: Input: default@target_ice @@ -571,7 +597,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@target_ice POSTHOOK: Output: hdfs://### HDFS PATH ### 1 Merged 60 -111 one 55 +111 NULL 55 2 Merged 61 3 three 52 333 two 56 diff --git a/iceberg/iceberg-handler/src/test/results/positive/merge_iceberg_copy_on_write_unpartitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/merge_iceberg_copy_on_write_unpartitioned.q.out index 14a9fd4c52b6..9fd120e1330f 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/merge_iceberg_copy_on_write_unpartitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/merge_iceberg_copy_on_write_unpartitioned.q.out @@ -48,6 +48,7 @@ POSTHOOK: Lineage: source.c SCRIPT [] PREHOOK: query: explain merge into target_ice as t using source src ON t.a = src.a when matched and t.a > 100 THEN DELETE +when matched then update set b = 'Merged', c = t.c + 10 when not matched then insert values (src.a, src.b, src.c) PREHOOK: type: QUERY PREHOOK: Input: default@source @@ -56,6 +57,7 @@ PREHOOK: Output: default@target_ice POSTHOOK: query: explain merge into target_ice as t using source src ON t.a = src.a when matched and t.a > 100 THEN DELETE +when matched then update set b = 'Merged', c = t.c + 10 when not matched then insert values (src.a, src.b, src.c) POSTHOOK: type: QUERY POSTHOOK: Input: default@source @@ -72,16 +74,62 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 6 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 6 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: src + filterExpr: ((a <= 100) or a is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (a <= 100) (type: boolean) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Map 10 Map Operator Tree: TableScan alias: target_ice @@ -97,6 +145,29 @@ STAGE PLANS: Map-reduce partition columns: _col4 (type: int) Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) + Execution mode: vectorized + Map 12 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 6 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int), b (type: string), c (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: int) + Execution mode: vectorized + Map 13 + Map Operator Tree: + TableScan + alias: target_ice + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: FILE__PATH is not null (type: boolean) Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE @@ -112,84 +183,49 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 1196 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col5 (type: string), _col6 (type: int) Filter Operator - predicate: (a > 100) (type: boolean) - Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (a <= 100) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), a (type: int), b (type: string), c (type: int) + expressions: PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint), a (type: int), c (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3 Data size: 873 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col3 (type: int) + key expressions: _col4 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 3 Data size: 873 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col4 (type: string), _col5 (type: int) + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col5 (type: int) Filter Operator - predicate: ((a > 100) and FILE__PATH is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (a is not null and FILE__PATH is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: FILE__PATH (type: string), a (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Execution mode: vectorized - Map 10 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 6 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: a (type: int), b (type: string), c (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: int) - Filter Operator - predicate: (a > 100) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: a (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: a (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + expressions: PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), a (type: int), b (type: string), c (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col3 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 4 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col4 (type: string), _col5 (type: int) Execution mode: vectorized - Reducer 2 + Reducer 11 Reduce Operator Tree: Merge Join Operator condition map: @@ -208,12 +244,34 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 13 Data size: 2097 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.target_ice + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: string), _col4 (type: bigint), _col5 (type: int), 'Merged' (type: string), (_col6 + 10) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 17 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat + output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat + serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe + name: default.target_ice Reducer 4 Reduce Operator Tree: Merge Join Operator @@ -225,7 +283,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 8 Data size: 2412 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((((_col4 <> _col7) or (_col4 <= 100)) and _col4 is not null) or (((_col4 = _col7) and (_col4 > 100)) or _col4 is null) is null) (type: boolean) + predicate: ((_col4 <> _col7) or ((_col4 = _col7) or _col4 is null) is null) (type: boolean) Statistics: Num rows: 8 Data size: 2412 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: int), _col5 (type: string), _col6 (type: int) @@ -247,10 +305,10 @@ STAGE PLANS: 0 _col2 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 2 Data size: 598 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 1196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 13 Data size: 2097 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat @@ -263,33 +321,33 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col3 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE + 1 _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col3 (type: string) + key expressions: _col1 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: bigint), _col4 (type: int), _col5 (type: string), _col6 (type: int) + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition input alias: ptf_0 + output shape: _col1: string type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 ASC NULLS FIRST - partition by: _col3 + order by: _col1 ASC NULLS FIRST + partition by: _col1 raw input shape: window functions: window function definition @@ -298,22 +356,26 @@ STAGE PLANS: window function: GenericUDAFRowNumberEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (row_number_window_0 = 1) (type: boolean) - Statistics: Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: string), -1L (type: bigint), _col4 (type: int), _col5 (type: string), _col6 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 299 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 2097 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat - output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat - serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe - name: default.target_ice + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Reduce Operator Tree: Merge Join Operator @@ -321,33 +383,33 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + 1 _col3 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 4 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col3 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 4 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: bigint), _col4 (type: int), _col5 (type: string), _col6 (type: int) Reducer 9 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + expressions: VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 4 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col1: string type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col1 + order by: _col3 ASC NULLS FIRST + partition by: _col3 raw input shape: window functions: window function definition @@ -356,26 +418,22 @@ STAGE PLANS: window function: GenericUDAFRowNumberEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (row_number_window_0 = 1) (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: string), -1L (type: bigint), _col4 (type: int), _col5 (type: string), _col6 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2 Data size: 598 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 17 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat + output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat + serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe + name: default.target_ice Union 3 Vertex: Union 3 @@ -399,8 +457,7 @@ STAGE PLANS: PREHOOK: query: explain merge into target_ice as t using source src ON t.a = src.a when matched and t.a > 100 THEN DELETE -when matched then update set b = 'Merged', c = t.c + 10 -when not matched then insert values (src.a, src.b, src.c) +when not matched then insert (a, b) values (src.a, src.b) PREHOOK: type: QUERY PREHOOK: Input: default@source PREHOOK: Input: default@target_ice @@ -408,8 +465,7 @@ PREHOOK: Output: default@target_ice POSTHOOK: query: explain merge into target_ice as t using source src ON t.a = src.a when matched and t.a > 100 THEN DELETE -when matched then update set b = 'Merged', c = t.c + 10 -when not matched then insert values (src.a, src.b, src.c) +when not matched then insert (a, b) values (src.a, src.b) POSTHOOK: type: QUERY POSTHOOK: Input: default@source POSTHOOK: Input: default@target_ice @@ -425,62 +481,16 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 6 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) - Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 6 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: src - filterExpr: ((a <= 100) or a is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (a <= 100) (type: boolean) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: a (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: a (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized - Map 10 Map Operator Tree: TableScan alias: target_ice @@ -496,29 +506,6 @@ STAGE PLANS: Map-reduce partition columns: _col4 (type: int) Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - Execution mode: vectorized - Map 12 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 6 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: a (type: int), b (type: string), c (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: int) - Execution mode: vectorized - Map 13 - Map Operator Tree: - TableScan - alias: target_ice - Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: FILE__PATH is not null (type: boolean) Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE @@ -534,49 +521,84 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 1196 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col5 (type: string), _col6 (type: int) Filter Operator - predicate: (a <= 100) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (a > 100) (type: boolean) + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint), a (type: int), c (type: int) + expressions: PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), a (type: int), b (type: string), c (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 873 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col4 (type: int) + key expressions: _col3 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col5 (type: int) + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 3 Data size: 873 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col4 (type: string), _col5 (type: int) Filter Operator - predicate: (a is not null and FILE__PATH is not null) (type: boolean) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((a > 100) and FILE__PATH is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: FILE__PATH (type: string), a (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) + Execution mode: vectorized + Map 10 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 6 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Filter Operator + predicate: (a > 100) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), a (type: int), b (type: string), c (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col3 (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 4 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col4 (type: string), _col5 (type: int) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized - Reducer 11 + Reducer 2 Reduce Operator Tree: Merge Join Operator condition map: @@ -584,45 +606,23 @@ STAGE PLANS: keys: 0 _col4 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 14 Data size: 1520 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 14 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col4 is null (type: boolean) - Statistics: Num rows: 10 Data size: 1216 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col5 (type: int), _col6 (type: string), _col7 (type: int) + expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col5 (type: int), _col6 (type: string), null (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2085 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.target_ice - Reducer 2 - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col4 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: string), _col4 (type: bigint), _col5 (type: int), 'Merged' (type: string), (_col6 + 10) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 17 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat - output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat - serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe - name: default.target_ice Reducer 4 Reduce Operator Tree: Merge Join Operator @@ -634,7 +634,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 8 Data size: 2412 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((_col4 <> _col7) or ((_col4 = _col7) or _col4 is null) is null) (type: boolean) + predicate: ((((_col4 <> _col7) or (_col4 <= 100)) and _col4 is not null) or (((_col4 = _col7) and (_col4 > 100)) or _col4 is null) is null) (type: boolean) Statistics: Num rows: 8 Data size: 2412 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: int), _col5 (type: string), _col6 (type: int) @@ -656,10 +656,10 @@ STAGE PLANS: 0 _col2 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 4 Data size: 1196 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 598 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2085 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat @@ -672,33 +672,33 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE + 1 _col3 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col3 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: bigint), _col4 (type: int), _col5 (type: string), _col6 (type: int) Reducer 7 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE + expressions: VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col1: string type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col1 + order by: _col3 ASC NULLS FIRST + partition by: _col3 raw input shape: window functions: window function definition @@ -707,26 +707,22 @@ STAGE PLANS: window function: GenericUDAFRowNumberEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (row_number_window_0 = 1) (type: boolean) - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE + expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: string), -1L (type: bigint), _col4 (type: int), _col5 (type: string), _col6 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 299 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 2085 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat + output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat + serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe + name: default.target_ice Reducer 8 Reduce Operator Tree: Merge Join Operator @@ -734,33 +730,33 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col3 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 4 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + 1 _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col3 (type: string) + key expressions: _col1 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 4 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: bigint), _col4 (type: int), _col5 (type: string), _col6 (type: int) + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reducer 9 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 4 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition input alias: ptf_0 + output shape: _col1: string type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 ASC NULLS FIRST - partition by: _col3 + order by: _col1 ASC NULLS FIRST + partition by: _col1 raw input shape: window functions: window function definition @@ -769,22 +765,26 @@ STAGE PLANS: window function: GenericUDAFRowNumberEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 4 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (row_number_window_0 = 1) (type: boolean) - Statistics: Num rows: 2 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: string), -1L (type: bigint), _col4 (type: int), _col5 (type: string), _col6 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 2 Data size: 598 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 17 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat - output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat - serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe - name: default.target_ice + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Union 3 Vertex: Union 3 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java index 968f34078af9..a8c47811c9f4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java @@ -22,6 +22,7 @@ import com.google.common.collect.Lists; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveUtils; @@ -35,8 +36,11 @@ import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.function.UnaryOperator; +import java.util.stream.Collectors; +import java.util.stream.IntStream; import static org.apache.commons.lang3.StringUtils.isNotBlank; import static org.apache.hadoop.hive.ql.parse.rewrite.sql.SqlGeneratorFactory.TARGET_PREFIX; @@ -149,8 +153,20 @@ public void appendWhenNotMatchedInsertClause(MergeStatement.InsertClause insertC hintStr = null; } List values = sqlGenerator.getDeleteValues(Context.Operation.MERGE); - values.add(insertClause.getValuesClause()); + if (insertClause.getColumnListText() != null) { + String[] columnNames = insertClause.getColumnListText() + .substring(1, insertClause.getColumnListText().length() - 1).split(","); + String[] columnValues = insertClause.getValuesClause().split(","); + + Map columnMap = IntStream.range(0, columnNames.length).boxed().collect( + Collectors.toMap(i -> ParseUtils.stripIdentifierQuotes(columnNames[i].trim()), i -> columnValues[i])); + for (FieldSchema col : mergeStatement.getTargetTable().getAllCols()) { + values.add(columnMap.getOrDefault(col.getName(), "null")); + } + } else { + values.add(insertClause.getValuesClause()); + } sqlGenerator.append(StringUtils.join(values, ",")); sqlGenerator.append("\nFROM " + mergeStatement.getSourceName()); sqlGenerator.append("\n WHERE ");