From 5b1435c52241e8e2a8f268afc692c88f7e3d8b1d Mon Sep 17 00:00:00 2001 From: Jeongdae Kim Date: Tue, 25 Jun 2024 15:11:22 +0900 Subject: [PATCH 1/6] HIVE-28347 --- .../GenericUDAFMkCollectionEvaluator.java | 21 +- .../clientpositive/udaf_collect_set_2.q | 191 ++++++++++++++++++ 2 files changed, 201 insertions(+), 11 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java index cffc7f765100..d5a732a48daa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java @@ -66,18 +66,17 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) inputOI = parameters[0]; return ObjectInspectorFactory.getStandardListObjectInspector( ObjectInspectorUtils.getStandardObjectInspector(inputOI)); + } else if (m == Mode.PARTIAL2 || m == Mode.FINAL) { + internalMergeOI = (ListObjectInspector) parameters[0]; + inputOI = internalMergeOI.getListElementObjectInspector(); + loi = (StandardListObjectInspector) + ObjectInspectorUtils.getStandardObjectInspector(internalMergeOI); + return loi; } else { - if (!(parameters[0] instanceof ListObjectInspector)) { - //no map aggregation. - inputOI = ObjectInspectorUtils.getStandardObjectInspector(parameters[0]); - return ObjectInspectorFactory.getStandardListObjectInspector(inputOI); - } else { - internalMergeOI = (ListObjectInspector) parameters[0]; - inputOI = internalMergeOI.getListElementObjectInspector(); - loi = (StandardListObjectInspector) - ObjectInspectorUtils.getStandardObjectInspector(internalMergeOI); - return loi; - } + //no map aggregation. + inputOI = parameters[0]; + return ObjectInspectorFactory.getStandardListObjectInspector( + ObjectInspectorUtils.getStandardObjectInspector(inputOI)); } } diff --git a/ql/src/test/queries/clientpositive/udaf_collect_set_2.q b/ql/src/test/queries/clientpositive/udaf_collect_set_2.q index 769655bae1f7..7b535ec538b0 100644 --- a/ql/src/test/queries/clientpositive/udaf_collect_set_2.q +++ b/ql/src/test/queries/clientpositive/udaf_collect_set_2.q @@ -31,6 +31,43 @@ LOAD DATA LOCAL INPATH "../../data/files/nested_orders.txt" INTO TABLE nested_or -- 1.1 when field is primitive +set hive.map.aggr = true; + +SELECT c.id, sort_array(collect_set(named_struct("name", c.name, "date", o.d, "amount", o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id; + +SELECT c.id, sort_array(collect_list(named_struct("name", c.name, "date", o.d, "amount", o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id; + +-- cast decimal + +SELECT c.id, sort_array(collect_set(named_struct("name", c.name, "date", o.d, "amount", cast(o.amount as decimal(10,1))))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id; + +SELECT c.id, sort_array(collect_list(named_struct("name", c.name, "date", o.d, "amount", cast(o.amount as decimal(10,1))))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id; + + +SELECT c.id, sort_array(collect_set(struct(c.name, o.d, o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id; + +SELECT c.id, sort_array(collect_list(struct(c.name, o.d, o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id; + +set hive.map.aggr = false; + SELECT c.id, sort_array(collect_set(named_struct("name", c.name, "date", o.d, "amount", o.amount))) FROM customers c INNER JOIN orders o @@ -67,6 +104,8 @@ ON (c.id = o.cid) GROUP BY c.id; -- 1.2 when field is map +set hive.map.aggr = true; + SELECT c.id, sort_array(collect_set(named_struct("name", c.name, "date", o.d, "sub", o.sub))) FROM customers c INNER JOIN nested_orders o @@ -87,9 +126,54 @@ FROM customers c INNER JOIN nested_orders o ON (c.id = o.cid) GROUP BY c.id; +set hive.map.aggr = false; + +SELECT c.id, sort_array(collect_set(named_struct("name", c.name, "date", o.d, "sub", o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id; + +SELECT c.id, sort_array(collect_list(named_struct("name", c.name, "date", o.d, "sub", o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id; + +SELECT c.id, sort_array(collect_set(struct(c.name, o.d, o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id; + +SELECT c.id, sort_array(collect_list(struct(c.name, o.d, o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id; -- 1.3 when field is list +set hive.map.aggr = true; + +SELECT c.id, sort_array(collect_set(named_struct("name", c.name, "date", o.d, "sub", map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id; + +SELECT c.id, sort_array(collect_list(named_struct("name", c.name, "date", o.d, "sub", map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id; + +SELECT c.id, sort_array(collect_set(struct(c.name, o.d, map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id; + +SELECT c.id, sort_array(collect_list(struct(c.name, o.d, map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id; + +set hive.map.aggr = false; + SELECT c.id, sort_array(collect_set(named_struct("name", c.name, "date", o.d, "sub", map_values(o.sub)))) FROM customers c INNER JOIN nested_orders o @@ -115,6 +199,32 @@ ON (c.id = o.cid) GROUP BY c.id; -- 2.1 when field is primitive +set hive.map.aggr = true; + +SELECT c.id, sort_array(collect_set(array(o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id; + +SELECT c.id, sort_array(collect_list(array(o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id; + +-- cast decimal + +SELECT c.id, sort_array(collect_set(array(cast(o.amount as decimal(10,1))))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id; + +SELECT c.id, sort_array(collect_list(array(cast(o.amount as decimal(10,1))))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id; + +set hive.map.aggr = false; + SELECT c.id, sort_array(collect_set(array(o.amount))) FROM customers c INNER JOIN orders o @@ -139,6 +249,20 @@ ON (c.id = o.cid) GROUP BY c.id; -- 2.2 when field is struct +set hive.map.aggr = true; + +SELECT c.id, sort_array(collect_set(array(o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id; + +SELECT c.id, sort_array(collect_list(array(o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id; + +set hive.map.aggr = false; + SELECT c.id, sort_array(collect_set(array(o.sub))) FROM customers c INNER JOIN nested_orders o @@ -151,6 +275,20 @@ ON (c.id = o.cid) GROUP BY c.id; -- 2.3 when field is list +set hive.map.aggr = true; + +SELECT c.id, sort_array(collect_set(array(map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id; + +SELECT c.id, sort_array(collect_list(array(map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id; + +set hive.map.aggr = false; + SELECT c.id, sort_array(collect_set(array(map_values(o.sub)))) FROM customers c INNER JOIN nested_orders o @@ -166,6 +304,32 @@ ON (c.id = o.cid) GROUP BY c.id; -- 3.1 when field is primitive +set hive.map.aggr = true; + +SELECT c.id, sort_array(collect_set(map("amount", o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id; + +SELECT c.id, sort_array(collect_list(map("amount", o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id; + +-- cast decimal + +SELECT c.id, sort_array(collect_set(map("amount", cast(o.amount as decimal(10,1))))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id; + +SELECT c.id, sort_array(collect_list(map("amount", cast(o.amount as decimal(10,1))))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id; + +set hive.map.aggr = false; + SELECT c.id, sort_array(collect_set(map("amount", o.amount))) FROM customers c INNER JOIN orders o @@ -190,6 +354,20 @@ ON (c.id = o.cid) GROUP BY c.id; -- 3.2 when field is struct +set hive.map.aggr = true; + +SELECT c.id, sort_array(collect_set(map("sub", o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id; + +SELECT c.id, sort_array(collect_list(map("sub", o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id; + +set hive.map.aggr = false; + SELECT c.id, sort_array(collect_set(map("sub", o.sub))) FROM customers c INNER JOIN nested_orders o @@ -202,6 +380,8 @@ ON (c.id = o.cid) GROUP BY c.id; -- 3.3 when field is list +set hive.map.aggr = true; + SELECT c.id, sort_array(collect_set(map("sub", map_values(o.sub)))) FROM customers c INNER JOIN nested_orders o @@ -212,6 +392,17 @@ FROM customers c INNER JOIN nested_orders o ON (c.id = o.cid) GROUP BY c.id; +set hive.map.aggr = false; + +SELECT c.id, sort_array(collect_set(map("sub", map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id; + +SELECT c.id, sort_array(collect_list(map("sub", map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id; -- clean up From ea79e1a6538feebe9f6439dab18b4883fcb03e8e Mon Sep 17 00:00:00 2001 From: Jeongdae Kim Date: Thu, 27 Jun 2024 07:53:13 +0900 Subject: [PATCH 2/6] Remove duplicates --- .../ql/udf/generic/GenericUDAFMkCollectionEvaluator.java | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java index d5a732a48daa..2daca6d68b62 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java @@ -62,21 +62,16 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) super.init(m, parameters); // init output object inspectors // The output of a partial aggregation is a list - if (m == Mode.PARTIAL1) { + if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { inputOI = parameters[0]; return ObjectInspectorFactory.getStandardListObjectInspector( ObjectInspectorUtils.getStandardObjectInspector(inputOI)); - } else if (m == Mode.PARTIAL2 || m == Mode.FINAL) { + } else { internalMergeOI = (ListObjectInspector) parameters[0]; inputOI = internalMergeOI.getListElementObjectInspector(); loi = (StandardListObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(internalMergeOI); return loi; - } else { - //no map aggregation. - inputOI = parameters[0]; - return ObjectInspectorFactory.getStandardListObjectInspector( - ObjectInspectorUtils.getStandardObjectInspector(inputOI)); } } From a145a2ff5c43175b418d64f9761c402a37cef483 Mon Sep 17 00:00:00 2001 From: Jeongdae Kim Date: Thu, 27 Jun 2024 09:55:30 +0900 Subject: [PATCH 3/6] Update q.out file --- .../llap/udaf_collect_set_2.q.out | 598 +++++++++++++++++- 1 file changed, 584 insertions(+), 14 deletions(-) diff --git a/ql/src/test/results/clientpositive/llap/udaf_collect_set_2.q.out b/ql/src/test/results/clientpositive/llap/udaf_collect_set_2.q.out index 0a0c7b8f5935..71e2524a29ac 100644 --- a/ql/src/test/results/clientpositive/llap/udaf_collect_set_2.q.out +++ b/ql/src/test/results/clientpositive/llap/udaf_collect_set_2.q.out @@ -200,6 +200,196 @@ POSTHOOK: Input: default@orders 3 [{"col1":"Martin","col2":"2014-05-11","col3":30.5},{"col1":"Martin","col2":"2014-12-12","col3":210.03}] 1 [{"col1":"Chris","col2":"2013-06-21","col3":21.45},{"col1":"Chris","col2":"2013-06-21","col3":21.45},{"col1":"Chris","col2":"2014-10-11","col3":29.36}] 2 [{"col1":"John","col2":"2013-08-10","col3":126.57},{"col1":"John","col2":"2014-06-25","col3":3.65},{"col1":"John","col2":"2015-01-15","col3":27.45}] +PREHOOK: query: SELECT c.id, sort_array(collect_set(named_struct("name", c.name, "date", o.d, "amount", o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_set(named_struct("name", c.name, "date", o.d, "amount", o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@orders +#### A masked pattern was here #### +3 [{"name":"Martin","date":"2014-05-11","amount":30.5},{"name":"Martin","date":"2014-12-12","amount":210.03}] +1 [{"name":"Chris","date":"2013-06-21","amount":21.45},{"name":"Chris","date":"2014-10-11","amount":29.36}] +2 [{"name":"John","date":"2013-08-10","amount":126.57},{"name":"John","date":"2014-06-25","amount":3.65},{"name":"John","date":"2015-01-15","amount":27.45}] +PREHOOK: query: SELECT c.id, sort_array(collect_list(named_struct("name", c.name, "date", o.d, "amount", o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_list(named_struct("name", c.name, "date", o.d, "amount", o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@orders +#### A masked pattern was here #### +3 [{"name":"Martin","date":"2014-05-11","amount":30.5},{"name":"Martin","date":"2014-12-12","amount":210.03}] +1 [{"name":"Chris","date":"2013-06-21","amount":21.45},{"name":"Chris","date":"2013-06-21","amount":21.45},{"name":"Chris","date":"2014-10-11","amount":29.36}] +2 [{"name":"John","date":"2013-08-10","amount":126.57},{"name":"John","date":"2014-06-25","amount":3.65},{"name":"John","date":"2015-01-15","amount":27.45}] +PREHOOK: query: SELECT c.id, sort_array(collect_set(named_struct("name", c.name, "date", o.d, "amount", cast(o.amount as decimal(10,1))))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_set(named_struct("name", c.name, "date", o.d, "amount", cast(o.amount as decimal(10,1))))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@orders +#### A masked pattern was here #### +3 [{"name":"Martin","date":"2014-05-11","amount":30.5},{"name":"Martin","date":"2014-12-12","amount":210}] +1 [{"name":"Chris","date":"2013-06-21","amount":21.5},{"name":"Chris","date":"2014-10-11","amount":29.4}] +2 [{"name":"John","date":"2013-08-10","amount":126.6},{"name":"John","date":"2014-06-25","amount":3.7},{"name":"John","date":"2015-01-15","amount":27.5}] +PREHOOK: query: SELECT c.id, sort_array(collect_list(named_struct("name", c.name, "date", o.d, "amount", cast(o.amount as decimal(10,1))))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_list(named_struct("name", c.name, "date", o.d, "amount", cast(o.amount as decimal(10,1))))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@orders +#### A masked pattern was here #### +3 [{"name":"Martin","date":"2014-05-11","amount":30.5},{"name":"Martin","date":"2014-12-12","amount":210}] +1 [{"name":"Chris","date":"2013-06-21","amount":21.5},{"name":"Chris","date":"2013-06-21","amount":21.5},{"name":"Chris","date":"2014-10-11","amount":29.4}] +2 [{"name":"John","date":"2013-08-10","amount":126.6},{"name":"John","date":"2014-06-25","amount":3.7},{"name":"John","date":"2015-01-15","amount":27.5}] +PREHOOK: query: SELECT c.id, sort_array(collect_set(struct(c.name, o.d, o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_set(struct(c.name, o.d, o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@orders +#### A masked pattern was here #### +3 [{"col1":"Martin","col2":"2014-05-11","col3":30.5},{"col1":"Martin","col2":"2014-12-12","col3":210.03}] +1 [{"col1":"Chris","col2":"2013-06-21","col3":21.45},{"col1":"Chris","col2":"2014-10-11","col3":29.36}] +2 [{"col1":"John","col2":"2013-08-10","col3":126.57},{"col1":"John","col2":"2014-06-25","col3":3.65},{"col1":"John","col2":"2015-01-15","col3":27.45}] +PREHOOK: query: SELECT c.id, sort_array(collect_list(struct(c.name, o.d, o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_list(struct(c.name, o.d, o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@orders +#### A masked pattern was here #### +3 [{"col1":"Martin","col2":"2014-05-11","col3":30.5},{"col1":"Martin","col2":"2014-12-12","col3":210.03}] +1 [{"col1":"Chris","col2":"2013-06-21","col3":21.45},{"col1":"Chris","col2":"2013-06-21","col3":21.45},{"col1":"Chris","col2":"2014-10-11","col3":29.36}] +2 [{"col1":"John","col2":"2013-08-10","col3":126.57},{"col1":"John","col2":"2014-06-25","col3":3.65},{"col1":"John","col2":"2015-01-15","col3":27.45}] +PREHOOK: query: SELECT c.id, sort_array(collect_set(named_struct("name", c.name, "date", o.d, "sub", o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@nested_orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_set(named_struct("name", c.name, "date", o.d, "sub", o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [{"name":"Martin","date":"2014-05-11","sub":{"\"apple\"":30.5,"\"orange\"":41.35}},{"name":"Martin","date":"2014-12-12","sub":{"\"icecream\"":210.03,"\"banana\"":100.56,"\"coffee":500.0}}] +1 [{"name":"Chris","date":"2013-06-21","sub":{"\"juice\"":21.45,"\"bread\"":15.2}},{"name":"Chris","date":"2014-10-11","sub":{"\"rice\"":29.36,"\"grape\"":1200.5}}] +2 [{"name":"John","date":"2013-08-10","sub":{"\"yogurt\"":126.57,"\"beef\"":210.57}},{"name":"John","date":"2014-06-25","sub":{"\"chocolate\"":3.65,"\"water\"":420.36}},{"name":"John","date":"2015-01-15","sub":{"\"milk\"":27.45}}] +PREHOOK: query: SELECT c.id, sort_array(collect_list(named_struct("name", c.name, "date", o.d, "sub", o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@nested_orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_list(named_struct("name", c.name, "date", o.d, "sub", o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [{"name":"Martin","date":"2014-05-11","sub":{"\"apple\"":30.5,"\"orange\"":41.35}},{"name":"Martin","date":"2014-12-12","sub":{"\"icecream\"":210.03,"\"banana\"":100.56,"\"coffee":500.0}}] +1 [{"name":"Chris","date":"2013-06-21","sub":{"\"juice\"":21.45,"\"bread\"":15.2}},{"name":"Chris","date":"2013-06-21","sub":{"\"juice\"":21.45,"\"bread\"":15.2}},{"name":"Chris","date":"2014-10-11","sub":{"\"rice\"":29.36,"\"grape\"":1200.5}}] +2 [{"name":"John","date":"2013-08-10","sub":{"\"yogurt\"":126.57,"\"beef\"":210.57}},{"name":"John","date":"2014-06-25","sub":{"\"chocolate\"":3.65,"\"water\"":420.36}},{"name":"John","date":"2015-01-15","sub":{"\"milk\"":27.45}}] +PREHOOK: query: SELECT c.id, sort_array(collect_set(struct(c.name, o.d, o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@nested_orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_set(struct(c.name, o.d, o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [{"col1":"Martin","col2":"2014-05-11","col3":{"\"apple\"":30.5,"\"orange\"":41.35}},{"col1":"Martin","col2":"2014-12-12","col3":{"\"icecream\"":210.03,"\"banana\"":100.56,"\"coffee":500.0}}] +1 [{"col1":"Chris","col2":"2013-06-21","col3":{"\"juice\"":21.45,"\"bread\"":15.2}},{"col1":"Chris","col2":"2014-10-11","col3":{"\"rice\"":29.36,"\"grape\"":1200.5}}] +2 [{"col1":"John","col2":"2013-08-10","col3":{"\"yogurt\"":126.57,"\"beef\"":210.57}},{"col1":"John","col2":"2014-06-25","col3":{"\"chocolate\"":3.65,"\"water\"":420.36}},{"col1":"John","col2":"2015-01-15","col3":{"\"milk\"":27.45}}] +PREHOOK: query: SELECT c.id, sort_array(collect_list(struct(c.name, o.d, o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@nested_orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_list(struct(c.name, o.d, o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [{"col1":"Martin","col2":"2014-05-11","col3":{"\"apple\"":30.5,"\"orange\"":41.35}},{"col1":"Martin","col2":"2014-12-12","col3":{"\"icecream\"":210.03,"\"banana\"":100.56,"\"coffee":500.0}}] +1 [{"col1":"Chris","col2":"2013-06-21","col3":{"\"juice\"":21.45,"\"bread\"":15.2}},{"col1":"Chris","col2":"2013-06-21","col3":{"\"juice\"":21.45,"\"bread\"":15.2}},{"col1":"Chris","col2":"2014-10-11","col3":{"\"rice\"":29.36,"\"grape\"":1200.5}}] +2 [{"col1":"John","col2":"2013-08-10","col3":{"\"yogurt\"":126.57,"\"beef\"":210.57}},{"col1":"John","col2":"2014-06-25","col3":{"\"chocolate\"":3.65,"\"water\"":420.36}},{"col1":"John","col2":"2015-01-15","col3":{"\"milk\"":27.45}}] PREHOOK: query: SELECT c.id, sort_array(collect_set(named_struct("name", c.name, "date", o.d, "sub", o.sub))) FROM customers c INNER JOIN nested_orders o @@ -324,34 +514,186 @@ PREHOOK: Input: default@nested_orders #### A masked pattern was here #### POSTHOOK: query: SELECT c.id, sort_array(collect_set(struct(c.name, o.d, map_values(o.sub)))) FROM customers c -INNER JOIN nested_orders o +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [{"col1":"Martin","col2":"2014-05-11","col3":[30.5,41.35]},{"col1":"Martin","col2":"2014-12-12","col3":[210.03,100.56,500.0]}] +1 [{"col1":"Chris","col2":"2013-06-21","col3":[21.45,15.2]},{"col1":"Chris","col2":"2014-10-11","col3":[29.36,1200.5]}] +2 [{"col1":"John","col2":"2013-08-10","col3":[126.57,210.57]},{"col1":"John","col2":"2014-06-25","col3":[3.65,420.36]},{"col1":"John","col2":"2015-01-15","col3":[27.45]}] +PREHOOK: query: SELECT c.id, sort_array(collect_list(struct(c.name, o.d, map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@nested_orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_list(struct(c.name, o.d, map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [{"col1":"Martin","col2":"2014-05-11","col3":[30.5,41.35]},{"col1":"Martin","col2":"2014-12-12","col3":[210.03,100.56,500.0]}] +1 [{"col1":"Chris","col2":"2013-06-21","col3":[21.45,15.2]},{"col1":"Chris","col2":"2013-06-21","col3":[21.45,15.2]},{"col1":"Chris","col2":"2014-10-11","col3":[29.36,1200.5]}] +2 [{"col1":"John","col2":"2013-08-10","col3":[126.57,210.57]},{"col1":"John","col2":"2014-06-25","col3":[3.65,420.36]},{"col1":"John","col2":"2015-01-15","col3":[27.45]}] +PREHOOK: query: SELECT c.id, sort_array(collect_set(named_struct("name", c.name, "date", o.d, "sub", map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@nested_orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_set(named_struct("name", c.name, "date", o.d, "sub", map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [{"name":"Martin","date":"2014-05-11","sub":[30.5,41.35]},{"name":"Martin","date":"2014-12-12","sub":[210.03,100.56,500.0]}] +1 [{"name":"Chris","date":"2013-06-21","sub":[21.45,15.2]},{"name":"Chris","date":"2014-10-11","sub":[29.36,1200.5]}] +2 [{"name":"John","date":"2013-08-10","sub":[126.57,210.57]},{"name":"John","date":"2014-06-25","sub":[3.65,420.36]},{"name":"John","date":"2015-01-15","sub":[27.45]}] +PREHOOK: query: SELECT c.id, sort_array(collect_list(named_struct("name", c.name, "date", o.d, "sub", map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@nested_orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_list(named_struct("name", c.name, "date", o.d, "sub", map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [{"name":"Martin","date":"2014-05-11","sub":[30.5,41.35]},{"name":"Martin","date":"2014-12-12","sub":[210.03,100.56,500.0]}] +1 [{"name":"Chris","date":"2013-06-21","sub":[21.45,15.2]},{"name":"Chris","date":"2013-06-21","sub":[21.45,15.2]},{"name":"Chris","date":"2014-10-11","sub":[29.36,1200.5]}] +2 [{"name":"John","date":"2013-08-10","sub":[126.57,210.57]},{"name":"John","date":"2014-06-25","sub":[3.65,420.36]},{"name":"John","date":"2015-01-15","sub":[27.45]}] +PREHOOK: query: SELECT c.id, sort_array(collect_set(struct(c.name, o.d, map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@nested_orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_set(struct(c.name, o.d, map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [{"col1":"Martin","col2":"2014-05-11","col3":[30.5,41.35]},{"col1":"Martin","col2":"2014-12-12","col3":[210.03,100.56,500.0]}] +1 [{"col1":"Chris","col2":"2013-06-21","col3":[21.45,15.2]},{"col1":"Chris","col2":"2014-10-11","col3":[29.36,1200.5]}] +2 [{"col1":"John","col2":"2013-08-10","col3":[126.57,210.57]},{"col1":"John","col2":"2014-06-25","col3":[3.65,420.36]},{"col1":"John","col2":"2015-01-15","col3":[27.45]}] +PREHOOK: query: SELECT c.id, sort_array(collect_list(struct(c.name, o.d, map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@nested_orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_list(struct(c.name, o.d, map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [{"col1":"Martin","col2":"2014-05-11","col3":[30.5,41.35]},{"col1":"Martin","col2":"2014-12-12","col3":[210.03,100.56,500.0]}] +1 [{"col1":"Chris","col2":"2013-06-21","col3":[21.45,15.2]},{"col1":"Chris","col2":"2013-06-21","col3":[21.45,15.2]},{"col1":"Chris","col2":"2014-10-11","col3":[29.36,1200.5]}] +2 [{"col1":"John","col2":"2013-08-10","col3":[126.57,210.57]},{"col1":"John","col2":"2014-06-25","col3":[3.65,420.36]},{"col1":"John","col2":"2015-01-15","col3":[27.45]}] +PREHOOK: query: SELECT c.id, sort_array(collect_set(array(o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_set(array(o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@orders +#### A masked pattern was here #### +3 [[30.5],[210.03]] +1 [[21.45],[29.36]] +2 [[3.65],[27.45],[126.57]] +PREHOOK: query: SELECT c.id, sort_array(collect_list(array(o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_list(array(o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@orders +#### A masked pattern was here #### +3 [[30.5],[210.03]] +1 [[21.45],[21.45],[29.36]] +2 [[3.65],[27.45],[126.57]] +PREHOOK: query: SELECT c.id, sort_array(collect_set(array(cast(o.amount as decimal(10,1))))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_set(array(cast(o.amount as decimal(10,1))))) +FROM customers c +INNER JOIN orders o ON (c.id = o.cid) GROUP BY c.id POSTHOOK: type: QUERY POSTHOOK: Input: default@customers -POSTHOOK: Input: default@nested_orders +POSTHOOK: Input: default@orders #### A masked pattern was here #### -3 [{"col1":"Martin","col2":"2014-05-11","col3":[30.5,41.35]},{"col1":"Martin","col2":"2014-12-12","col3":[210.03,100.56,500.0]}] -1 [{"col1":"Chris","col2":"2013-06-21","col3":[21.45,15.2]},{"col1":"Chris","col2":"2014-10-11","col3":[29.36,1200.5]}] -2 [{"col1":"John","col2":"2013-08-10","col3":[126.57,210.57]},{"col1":"John","col2":"2014-06-25","col3":[3.65,420.36]},{"col1":"John","col2":"2015-01-15","col3":[27.45]}] -PREHOOK: query: SELECT c.id, sort_array(collect_list(struct(c.name, o.d, map_values(o.sub)))) +3 [[30.5],[210]] +1 [[21.5],[29.4]] +2 [[3.7],[27.5],[126.6]] +PREHOOK: query: SELECT c.id, sort_array(collect_list(array(cast(o.amount as decimal(10,1))))) FROM customers c -INNER JOIN nested_orders o +INNER JOIN orders o ON (c.id = o.cid) GROUP BY c.id PREHOOK: type: QUERY PREHOOK: Input: default@customers -PREHOOK: Input: default@nested_orders +PREHOOK: Input: default@orders #### A masked pattern was here #### -POSTHOOK: query: SELECT c.id, sort_array(collect_list(struct(c.name, o.d, map_values(o.sub)))) +POSTHOOK: query: SELECT c.id, sort_array(collect_list(array(cast(o.amount as decimal(10,1))))) FROM customers c -INNER JOIN nested_orders o +INNER JOIN orders o ON (c.id = o.cid) GROUP BY c.id POSTHOOK: type: QUERY POSTHOOK: Input: default@customers -POSTHOOK: Input: default@nested_orders +POSTHOOK: Input: default@orders #### A masked pattern was here #### -3 [{"col1":"Martin","col2":"2014-05-11","col3":[30.5,41.35]},{"col1":"Martin","col2":"2014-12-12","col3":[210.03,100.56,500.0]}] -1 [{"col1":"Chris","col2":"2013-06-21","col3":[21.45,15.2]},{"col1":"Chris","col2":"2013-06-21","col3":[21.45,15.2]},{"col1":"Chris","col2":"2014-10-11","col3":[29.36,1200.5]}] -2 [{"col1":"John","col2":"2013-08-10","col3":[126.57,210.57]},{"col1":"John","col2":"2014-06-25","col3":[3.65,420.36]},{"col1":"John","col2":"2015-01-15","col3":[27.45]}] +3 [[30.5],[210]] +1 [[21.5],[21.5],[29.4]] +2 [[3.7],[27.5],[126.6]] PREHOOK: query: SELECT c.id, sort_array(collect_set(array(o.amount))) FROM customers c INNER JOIN orders o @@ -466,6 +808,82 @@ POSTHOOK: Input: default@nested_orders 3 [[{"\"apple\"":30.5,"\"orange\"":41.35}],[{"\"icecream\"":210.03,"\"banana\"":100.56,"\"coffee":500.0}]] 1 [[{"\"juice\"":21.45,"\"bread\"":15.2}],[{"\"juice\"":21.45,"\"bread\"":15.2}],[{"\"rice\"":29.36,"\"grape\"":1200.5}]] 2 [[{"\"milk\"":27.45}],[{"\"yogurt\"":126.57,"\"beef\"":210.57}],[{"\"chocolate\"":3.65,"\"water\"":420.36}]] +PREHOOK: query: SELECT c.id, sort_array(collect_set(array(o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@nested_orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_set(array(o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [[{"\"apple\"":30.5,"\"orange\"":41.35}],[{"\"icecream\"":210.03,"\"banana\"":100.56,"\"coffee":500.0}]] +1 [[{"\"juice\"":21.45,"\"bread\"":15.2}],[{"\"rice\"":29.36,"\"grape\"":1200.5}]] +2 [[{"\"milk\"":27.45}],[{"\"yogurt\"":126.57,"\"beef\"":210.57}],[{"\"chocolate\"":3.65,"\"water\"":420.36}]] +PREHOOK: query: SELECT c.id, sort_array(collect_list(array(o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@nested_orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_list(array(o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [[{"\"apple\"":30.5,"\"orange\"":41.35}],[{"\"icecream\"":210.03,"\"banana\"":100.56,"\"coffee":500.0}]] +1 [[{"\"juice\"":21.45,"\"bread\"":15.2}],[{"\"juice\"":21.45,"\"bread\"":15.2}],[{"\"rice\"":29.36,"\"grape\"":1200.5}]] +2 [[{"\"milk\"":27.45}],[{"\"yogurt\"":126.57,"\"beef\"":210.57}],[{"\"chocolate\"":3.65,"\"water\"":420.36}]] +PREHOOK: query: SELECT c.id, sort_array(collect_set(array(map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@nested_orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_set(array(map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [[[30.5,41.35]],[[210.03,100.56,500.0]]] +1 [[[21.45,15.2]],[[29.36,1200.5]]] +2 [[[3.65,420.36]],[[27.45]],[[126.57,210.57]]] +PREHOOK: query: SELECT c.id, sort_array(collect_list(array(map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@nested_orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_list(array(map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [[[30.5,41.35]],[[210.03,100.56,500.0]]] +1 [[[21.45,15.2]],[[21.45,15.2]],[[29.36,1200.5]]] +2 [[[3.65,420.36]],[[27.45]],[[126.57,210.57]]] PREHOOK: query: SELECT c.id, sort_array(collect_set(array(map_values(o.sub)))) FROM customers c INNER JOIN nested_orders o @@ -580,6 +998,120 @@ POSTHOOK: Input: default@orders 3 [{"amount":30.5},{"amount":210}] 1 [{"amount":21.5},{"amount":21.5},{"amount":29.4}] 2 [{"amount":3.7},{"amount":27.5},{"amount":126.6}] +PREHOOK: query: SELECT c.id, sort_array(collect_set(map("amount", o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_set(map("amount", o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@orders +#### A masked pattern was here #### +3 [{"amount":30.5},{"amount":210.03}] +1 [{"amount":21.45},{"amount":29.36}] +2 [{"amount":3.65},{"amount":27.45},{"amount":126.57}] +PREHOOK: query: SELECT c.id, sort_array(collect_list(map("amount", o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_list(map("amount", o.amount))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@orders +#### A masked pattern was here #### +3 [{"amount":30.5},{"amount":210.03}] +1 [{"amount":21.45},{"amount":21.45},{"amount":29.36}] +2 [{"amount":3.65},{"amount":27.45},{"amount":126.57}] +PREHOOK: query: SELECT c.id, sort_array(collect_set(map("amount", cast(o.amount as decimal(10,1))))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_set(map("amount", cast(o.amount as decimal(10,1))))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@orders +#### A masked pattern was here #### +3 [{"amount":30.5},{"amount":210}] +1 [{"amount":21.5},{"amount":29.4}] +2 [{"amount":3.7},{"amount":27.5},{"amount":126.6}] +PREHOOK: query: SELECT c.id, sort_array(collect_list(map("amount", cast(o.amount as decimal(10,1))))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_list(map("amount", cast(o.amount as decimal(10,1))))) +FROM customers c +INNER JOIN orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@orders +#### A masked pattern was here #### +3 [{"amount":30.5},{"amount":210}] +1 [{"amount":21.5},{"amount":21.5},{"amount":29.4}] +2 [{"amount":3.7},{"amount":27.5},{"amount":126.6}] +PREHOOK: query: SELECT c.id, sort_array(collect_set(map("sub", o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@nested_orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_set(map("sub", o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [{"sub":{"\"apple\"":30.5,"\"orange\"":41.35}},{"sub":{"\"icecream\"":210.03,"\"banana\"":100.56,"\"coffee":500.0}}] +1 [{"sub":{"\"juice\"":21.45,"\"bread\"":15.2}},{"sub":{"\"rice\"":29.36,"\"grape\"":1200.5}}] +2 [{"sub":{"\"milk\"":27.45}},{"sub":{"\"yogurt\"":126.57,"\"beef\"":210.57}},{"sub":{"\"chocolate\"":3.65,"\"water\"":420.36}}] +PREHOOK: query: SELECT c.id, sort_array(collect_list(map("sub", o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@nested_orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_list(map("sub", o.sub))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [{"sub":{"\"apple\"":30.5,"\"orange\"":41.35}},{"sub":{"\"icecream\"":210.03,"\"banana\"":100.56,"\"coffee":500.0}}] +1 [{"sub":{"\"juice\"":21.45,"\"bread\"":15.2}},{"sub":{"\"juice\"":21.45,"\"bread\"":15.2}},{"sub":{"\"rice\"":29.36,"\"grape\"":1200.5}}] +2 [{"sub":{"\"milk\"":27.45}},{"sub":{"\"yogurt\"":126.57,"\"beef\"":210.57}},{"sub":{"\"chocolate\"":3.65,"\"water\"":420.36}}] PREHOOK: query: SELECT c.id, sort_array(collect_set(map("sub", o.sub))) FROM customers c INNER JOIN nested_orders o @@ -656,6 +1188,44 @@ POSTHOOK: Input: default@nested_orders 3 [{"sub":[30.5,41.35]},{"sub":[210.03,100.56,500.0]}] 1 [{"sub":[21.45,15.2]},{"sub":[21.45,15.2]},{"sub":[29.36,1200.5]}] 2 [{"sub":[3.65,420.36]},{"sub":[27.45]},{"sub":[126.57,210.57]}] +PREHOOK: query: SELECT c.id, sort_array(collect_set(map("sub", map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@nested_orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_set(map("sub", map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [{"sub":[30.5,41.35]},{"sub":[210.03,100.56,500.0]}] +1 [{"sub":[21.45,15.2]},{"sub":[29.36,1200.5]}] +2 [{"sub":[3.65,420.36]},{"sub":[27.45]},{"sub":[126.57,210.57]}] +PREHOOK: query: SELECT c.id, sort_array(collect_list(map("sub", map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +PREHOOK: type: QUERY +PREHOOK: Input: default@customers +PREHOOK: Input: default@nested_orders +#### A masked pattern was here #### +POSTHOOK: query: SELECT c.id, sort_array(collect_list(map("sub", map_values(o.sub)))) +FROM customers c +INNER JOIN nested_orders o +ON (c.id = o.cid) GROUP BY c.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customers +POSTHOOK: Input: default@nested_orders +#### A masked pattern was here #### +3 [{"sub":[30.5,41.35]},{"sub":[210.03,100.56,500.0]}] +1 [{"sub":[21.45,15.2]},{"sub":[21.45,15.2]},{"sub":[29.36,1200.5]}] +2 [{"sub":[3.65,420.36]},{"sub":[27.45]},{"sub":[126.57,210.57]}] PREHOOK: query: DROP TABLE customer PREHOOK: type: DROPTABLE PREHOOK: Output: database:default From 74e7ce7087682c2a9c23a25b9c1423840baf3b0a Mon Sep 17 00:00:00 2001 From: JD Date: Mon, 22 Jul 2024 08:12:03 +0900 Subject: [PATCH 4/6] Update comments --- .../hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java index 2daca6d68b62..c95346024f60 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java @@ -61,7 +61,9 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { super.init(m, parameters); // init output object inspectors - // The output of a partial aggregation is a list + // Mode.PARTIAL1 or Mode.COMPLETE: T => List[T] + // Mode.PARTIAL2 or Mode.FINAL: List[T] => List[T] + // The output of a partial aggregation is a list, output of COMPLETE is List as well if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { inputOI = parameters[0]; return ObjectInspectorFactory.getStandardListObjectInspector( From a5b435cc258afc1ecec45e964ad5c38e8b4f414e Mon Sep 17 00:00:00 2001 From: JD Date: Mon, 22 Jul 2024 15:27:55 +0900 Subject: [PATCH 5/6] Remove a line --- .../hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java | 1 - 1 file changed, 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java index c95346024f60..692de31b2d46 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java @@ -63,7 +63,6 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) // init output object inspectors // Mode.PARTIAL1 or Mode.COMPLETE: T => List[T] // Mode.PARTIAL2 or Mode.FINAL: List[T] => List[T] - // The output of a partial aggregation is a list, output of COMPLETE is List as well if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { inputOI = parameters[0]; return ObjectInspectorFactory.getStandardListObjectInspector( From 78d0de498b529aa6a2bf6e05ae4d81cd801be9b0 Mon Sep 17 00:00:00 2001 From: JD Date: Mon, 22 Jul 2024 15:35:49 +0900 Subject: [PATCH 6/6] comments --- .../hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java index 692de31b2d46..c5abae60fb23 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMkCollectionEvaluator.java @@ -61,13 +61,13 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { super.init(m, parameters); // init output object inspectors - // Mode.PARTIAL1 or Mode.COMPLETE: T => List[T] - // Mode.PARTIAL2 or Mode.FINAL: List[T] => List[T] if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { + // T => List[T] inputOI = parameters[0]; return ObjectInspectorFactory.getStandardListObjectInspector( ObjectInspectorUtils.getStandardObjectInspector(inputOI)); } else { + // List[T] => List[T] internalMergeOI = (ListObjectInspector) parameters[0]; inputOI = internalMergeOI.getListElementObjectInspector(); loi = (StandardListObjectInspector)