diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt new file mode 100644 index 000000000..97885fe7f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt @@ -0,0 +1,43 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST], output=[c_customer_id#1]) ++- *(9) Project [c_customer_id#1] + +- *(9) BroadcastHashJoin [ctr_customer_sk#2], [cast(c_customer_sk#3 as bigint)], Inner, BuildRight + :- *(9) Project [ctr_customer_sk#2] + : +- *(9) BroadcastHashJoin [ctr_store_sk#4], [cast(s_store_sk#5 as bigint)], Inner, BuildRight + : :- *(9) Project [ctr_customer_sk#2, ctr_store_sk#4] + : : +- *(9) BroadcastHashJoin [ctr_store_sk#4], [ctr_store_sk#4#6], Inner, BuildRight, (cast(ctr_total_return#7 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#8) + : : :- *(9) Filter isnotnull(ctr_total_return#7) + : : : +- *(9) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[sum(UnscaledValue(sr_return_amt#11))]) + : : : +- Exchange hashpartitioning(sr_customer_sk#9, sr_store_sk#10, 200) + : : : +- *(2) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[partial_sum(UnscaledValue(sr_return_amt#11))]) + : : : +- *(2) Project [sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] + : : : +- *(2) BroadcastHashJoin [sr_returned_date_sk#12], [cast(d_date_sk#13 as bigint)], Inner, BuildRight + : : : :- *(2) Project [sr_returned_date_sk#12, sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] + : : : : +- *(2) Filter ((isnotnull(sr_returned_date_sk#12) && isnotnull(sr_store_sk#10)) && isnotnull(sr_customer_sk#9)) + : : : : +- *(2) FileScan parquet default.store_returns[sr_returned_date_sk#12,sr_customer_sk#9,sr_store_sk#10,sr_return_amt#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk), IsNotNull(sr_customer_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true])) + : : +- *(6) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#8) + : : +- *(6) HashAggregate(keys=[ctr_store_sk#4], functions=[avg(ctr_total_return#7)]) + : : +- Exchange hashpartitioning(ctr_store_sk#4, 200) + : : +- *(5) HashAggregate(keys=[ctr_store_sk#4], functions=[partial_avg(ctr_total_return#7)]) + : : +- *(5) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[sum(UnscaledValue(sr_return_amt#11))]) + : : +- Exchange hashpartitioning(sr_customer_sk#9, sr_store_sk#10, 200) + : : +- *(4) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[partial_sum(UnscaledValue(sr_return_amt#11))]) + : : +- *(4) Project [sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] + : : +- *(4) BroadcastHashJoin [sr_returned_date_sk#12], [cast(d_date_sk#13 as bigint)], Inner, BuildRight + : : :- *(4) Project [sr_returned_date_sk#12, sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] + : : : +- *(4) Filter (isnotnull(sr_returned_date_sk#12) && isnotnull(sr_store_sk#10)) + : : : +- *(4) FileScan parquet default.store_returns[sr_returned_date_sk#12,sr_customer_sk#9,sr_store_sk#10,sr_return_amt#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [c_customer_sk#3, c_customer_id#1] + +- *(8) Filter isnotnull(c_customer_sk#3) + +- *(8) FileScan parquet default.customer[c_customer_sk#3,c_customer_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt new file mode 100644 index 000000000..6104e2ac9 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt @@ -0,0 +1,58 @@ +TakeOrderedAndProject [c_customer_id] + WholeStageCodegen + Project [c_customer_id] + BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + Project [ctr_customer_sk] + BroadcastHashJoin [ctr_store_sk,s_store_sk] + Project [ctr_customer_sk,ctr_store_sk] + BroadcastHashJoin [ctr_store_sk,ctr_store_skL,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + Filter [ctr_total_return] + HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [ctr_customer_sk,ctr_total_return,ctr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] + InputAdapter + Exchange [sr_customer_sk,sr_store_sk] #1 + WholeStageCodegen + HashAggregate [sr_customer_sk,sum,sr_return_amt,sum,sr_store_sk] [sum,sum] + Project [sr_customer_sk,sr_store_sk,sr_return_amt] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] + Filter [sr_returned_date_sk,sr_store_sk,sr_customer_sk] + Scan parquet default.store_returns [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + HashAggregate [ctr_store_sk,sum,count,avg(ctr_total_return)] [avg(ctr_total_return),ctr_store_skL,sum,count,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + InputAdapter + Exchange [ctr_store_sk] #4 + WholeStageCodegen + HashAggregate [ctr_store_sk,ctr_total_return,sum,count,sum,count] [sum,count,sum,count] + HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [sum(UnscaledValue(sr_return_amt)),ctr_store_sk,ctr_total_return,sum] + InputAdapter + Exchange [sr_customer_sk,sr_store_sk] #5 + WholeStageCodegen + HashAggregate [sum,sr_customer_sk,sum,sr_return_amt,sr_store_sk] [sum,sum] + Project [sr_customer_sk,sr_store_sk,sr_return_amt] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] + Filter [sr_returned_date_sk,sr_store_sk] + Scan parquet default.store_returns [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [s_store_sk] + Filter [s_state,s_store_sk] + Scan parquet default.store [s_store_sk,s_state] [s_store_sk,s_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [c_customer_sk,c_customer_id] + Filter [c_customer_sk] + Scan parquet default.customer [c_customer_sk,c_customer_id] [c_customer_sk,c_customer_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt new file mode 100644 index 000000000..a108b1a61 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt @@ -0,0 +1,49 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[cd_gender#1 ASC NULLS FIRST,cd_marital_status#2 ASC NULLS FIRST,cd_education_status#3 ASC NULLS FIRST,cd_purchase_estimate#4 ASC NULLS FIRST,cd_credit_rating#5 ASC NULLS FIRST,cd_dep_count#6 ASC NULLS FIRST,cd_dep_employed_count#7 ASC NULLS FIRST,cd_dep_college_count#8 ASC NULLS FIRST], output=[cd_gender#1,cd_marital_status#2,cd_education_status#3,cnt1#9,cd_purchase_estimate#4,cnt2#10,cd_credit_rating#5,cnt3#11,cd_dep_count#6,cnt4#12,cd_dep_employed_count#7,cnt5#13,cd_dep_college_count#8,cnt6#14]) ++- *(10) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8], functions=[count(1)]) + +- Exchange hashpartitioning(cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8, 200) + +- *(9) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8], functions=[partial_count(1)]) + +- *(9) Project [cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8] + +- *(9) BroadcastHashJoin [c_current_cdemo_sk#15], [cd_demo_sk#16], Inner, BuildRight + :- *(9) Project [c_current_cdemo_sk#15] + : +- *(9) BroadcastHashJoin [c_current_addr_sk#17], [ca_address_sk#18], Inner, BuildRight + : :- *(9) Project [c_current_cdemo_sk#15, c_current_addr_sk#17] + : : +- *(9) Filter (exists#19 || exists#20) + : : +- *(9) BroadcastHashJoin [c_customer_sk#21], [cs_ship_customer_sk#22], ExistenceJoin(exists#20), BuildRight + : : :- *(9) BroadcastHashJoin [c_customer_sk#21], [ws_bill_customer_sk#23], ExistenceJoin(exists#19), BuildRight + : : : :- *(9) BroadcastHashJoin [c_customer_sk#21], [ss_customer_sk#24], LeftSemi, BuildRight + : : : : :- *(9) Project [c_customer_sk#21, c_current_cdemo_sk#15, c_current_addr_sk#17] + : : : : : +- *(9) Filter (isnotnull(c_current_addr_sk#17) && isnotnull(c_current_cdemo_sk#15)) + : : : : : +- *(9) FileScan parquet default.customer[c_customer_sk#21,c_current_cdemo_sk#15,c_current_addr_sk#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [ss_customer_sk#24] + : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#25], [d_date_sk#26], Inner, BuildRight + : : : : :- *(2) Project [ss_sold_date_sk#25, ss_customer_sk#24] + : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#25) + : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#25,ss_customer_sk#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [d_date_sk#26] + : : : : +- *(1) Filter (((((isnotnull(d_year#27) && isnotnull(d_moy#28)) && (d_year#27 = 2002)) && (d_moy#28 >= 1)) && (d_moy#28 <= 4)) && isnotnull(d_date_sk#26)) + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#26,d_year#27,d_moy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThan..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [ws_bill_customer_sk#23] + : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#26], Inner, BuildRight + : : : :- *(4) Project [ws_sold_date_sk#29, ws_bill_customer_sk#23] + : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#29) + : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#29,ws_bill_customer_sk#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [cs_ship_customer_sk#22] + : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#30], [d_date_sk#26], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#30, cs_ship_customer_sk#22] + : : : +- *(6) Filter isnotnull(cs_sold_date_sk#30) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#30,cs_ship_customer_sk#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [ca_address_sk#18] + : +- *(7) Filter (ca_county#31 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) && isnotnull(ca_address_sk#18)) + : +- *(7) FileScan parquet default.customer_address[ca_address_sk#18,ca_county#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [In(ca_county, [Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County]), IsNo..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [cd_demo_sk#16, cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8] + +- *(8) Filter isnotnull(cd_demo_sk#16) + +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#16,cd_gender#1,cd_marital_status#2,cd_education_status#3,cd_purchase_estimate#4,cd_credit_rating#5,cd_dep_count#6,cd_dep_employed_count#7,cd_dep_college_count#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct 0.00) THEN CheckOverflow((promote_precision(year_total#5) / promote_precision(year_total#4)), DecimalType(38,20)) ELSE null END > CASE WHEN (year_total#6 > 0.00) THEN CheckOverflow((promote_precision(year_total#7) / promote_precision(year_total#6)), DecimalType(38,20)) ELSE null END) + :- *(17) Project [customer_id#2, year_total#6, customer_preferred_cust_flag#1, year_total#7, year_total#4] + : +- *(17) BroadcastHashJoin [customer_id#2], [customer_id#8], Inner, BuildRight + : :- *(17) Project [customer_id#2, year_total#6, customer_preferred_cust_flag#1, year_total#7] + : : +- *(17) BroadcastHashJoin [customer_id#2], [customer_id#9], Inner, BuildRight + : : :- Union + : : : :- *(4) Filter (isnotnull(year_total#6) && (year_total#6 > 0.00)) + : : : : +- *(4) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) + : : : : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, 200) + : : : : +- *(3) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) + : : : : +- *(3) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_ext_discount_amt#19, ss_ext_list_price#18, d_year#13] + : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight + : : : : :- *(3) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_sold_date_sk#20, ss_ext_discount_amt#19, ss_ext_list_price#18] + : : : : : +- *(3) BroadcastHashJoin [c_customer_sk#22], [ss_customer_sk#23], Inner, BuildRight + : : : : : :- *(3) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] + : : : : : : +- *(3) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) + : : : : : : +- *(3) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : +- LocalTableScan , [customer_id#24, year_total#25] + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : +- Union + : : :- *(8) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) + : : : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, 200) + : : : +- *(7) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) + : : : +- *(7) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_ext_discount_amt#19, ss_ext_list_price#18, d_year#13] + : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight + : : : :- *(7) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_sold_date_sk#20, ss_ext_discount_amt#19, ss_ext_list_price#18] + : : : : +- *(7) BroadcastHashJoin [c_customer_sk#22], [ss_customer_sk#23], Inner, BuildRight + : : : : :- *(7) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] + : : : : : +- *(7) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) + : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : +- LocalTableScan , [customer_id#24, customer_preferred_cust_flag#26, year_total#25] + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- Union + : :- LocalTableScan , [customer_id#8, year_total#4] + : +- *(12) Filter (isnotnull(year_total#25) && (year_total#25 > 0.00)) + : +- *(12) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) + : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13, 200) + : +- *(11) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) + : +- *(11) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_ext_discount_amt#28, ws_ext_list_price#27, d_year#13] + : +- *(11) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#21], Inner, BuildRight + : :- *(11) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_sold_date_sk#29, ws_ext_discount_amt#28, ws_ext_list_price#27] + : : +- *(11) BroadcastHashJoin [c_customer_sk#22], [ws_bill_customer_sk#30], Inner, BuildRight + : : :- *(11) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] + : : : +- *(11) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) + : : : +- *(11) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#3, year_total#5] + +- *(16) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) + +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13, 200) + +- *(15) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) + +- *(15) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_ext_discount_amt#28, ws_ext_list_price#27, d_year#13] + +- *(15) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#21], Inner, BuildRight + :- *(15) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_sold_date_sk#29, ws_ext_discount_amt#28, ws_ext_list_price#27] + : +- *(15) BroadcastHashJoin [c_customer_sk#22], [ws_bill_customer_sk#30], Inner, BuildRight + : :- *(15) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] + : : +- *(15) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) + : : +- *(15) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [i_item_sk#15, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] + : +- *(1) Filter (i_category#1 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) + : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#3,i_item_desc#4,i_current_price#6,i_class#2,i_category#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) + +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt new file mode 100644 index 000000000..497db4cde --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,itemrevenue,i_category,revenueratio,i_current_price,i_class] + WholeStageCodegen + Project [i_item_id,i_item_desc,itemrevenue,_we0,i_category,i_current_price,i_class,_w0] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,i_category,sum,i_current_price,sum(UnscaledValue(ws_ext_sales_price)),i_class] [itemrevenue,_w1,sum,sum(UnscaledValue(ws_ext_sales_price)),_w0] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_current_price,i_class] #2 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,ws_ext_sales_price,i_category,sum,i_current_price,i_class,sum] [sum,sum] + Project [i_class,i_current_price,ws_ext_sales_price,i_category,i_item_desc,i_item_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [i_class,i_current_price,ws_ext_sales_price,ws_sold_date_sk,i_category,i_item_desc,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Filter [i_category,i_item_sk] + Scan parquet default.item [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt new file mode 100644 index 000000000..1e61c9ee5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt @@ -0,0 +1,37 @@ +== Physical Plan == +*(7) HashAggregate(keys=[], functions=[avg(cast(ss_quantity#1 as bigint)), avg(UnscaledValue(ss_ext_sales_price#2)), avg(UnscaledValue(ss_ext_wholesale_cost#3)), sum(UnscaledValue(ss_ext_wholesale_cost#3))]) ++- Exchange SinglePartition + +- *(6) HashAggregate(keys=[], functions=[partial_avg(cast(ss_quantity#1 as bigint)), partial_avg(UnscaledValue(ss_ext_sales_price#2)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#3)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#3))]) + +- *(6) Project [ss_quantity#1, ss_ext_sales_price#2, ss_ext_wholesale_cost#3] + +- *(6) BroadcastHashJoin [ss_hdemo_sk#4], [hd_demo_sk#5], Inner, BuildRight, (((((((cd_marital_status#6 = M) && (cd_education_status#7 = Advanced Degree)) && (ss_sales_price#8 >= 100.00)) && (ss_sales_price#8 <= 150.00)) && (hd_dep_count#9 = 3)) || (((((cd_marital_status#6 = S) && (cd_education_status#7 = College)) && (ss_sales_price#8 >= 50.00)) && (ss_sales_price#8 <= 100.00)) && (hd_dep_count#9 = 1))) || (((((cd_marital_status#6 = W) && (cd_education_status#7 = 2 yr Degree)) && (ss_sales_price#8 >= 150.00)) && (ss_sales_price#8 <= 200.00)) && (hd_dep_count#9 = 1))) + :- *(6) Project [ss_hdemo_sk#4, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3, cd_marital_status#6, cd_education_status#7] + : +- *(6) BroadcastHashJoin [ss_cdemo_sk#10], [cd_demo_sk#11], Inner, BuildRight + : :- *(6) Project [ss_cdemo_sk#10, ss_hdemo_sk#4, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3] + : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : : :- *(6) Project [ss_sold_date_sk#12, ss_cdemo_sk#10, ss_hdemo_sk#4, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3] + : : : +- *(6) BroadcastHashJoin [ss_addr_sk#14], [ca_address_sk#15], Inner, BuildRight, ((((ca_state#16 IN (TX,OH) && (ss_net_profit#17 >= 100.00)) && (ss_net_profit#17 <= 200.00)) || ((ca_state#16 IN (OR,NM,KY) && (ss_net_profit#17 >= 150.00)) && (ss_net_profit#17 <= 300.00))) || ((ca_state#16 IN (VA,TX,MS) && (ss_net_profit#17 >= 50.00)) && (ss_net_profit#17 <= 250.00))) + : : : :- *(6) Project [ss_sold_date_sk#12, ss_cdemo_sk#10, ss_hdemo_sk#4, ss_addr_sk#14, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3, ss_net_profit#17] + : : : : +- *(6) BroadcastHashJoin [ss_store_sk#18], [s_store_sk#19], Inner, BuildRight + : : : : :- *(6) Project [ss_sold_date_sk#12, ss_cdemo_sk#10, ss_hdemo_sk#4, ss_addr_sk#14, ss_store_sk#18, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3, ss_net_profit#17] + : : : : : +- *(6) Filter ((((isnotnull(ss_store_sk#18) && isnotnull(ss_addr_sk#14)) && isnotnull(ss_sold_date_sk#12)) && isnotnull(ss_cdemo_sk#10)) && isnotnull(ss_hdemo_sk#4)) + : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_cdemo_sk#10,ss_hdemo_sk#4,ss_addr_sk#14,ss_store_sk#18,ss_quantity#1,ss_sales_price#8,ss_ext_sales_price#2,ss_ext_wholesale_cost#3,ss_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_cdemo_sk..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [ca_address_sk#15, ca_state#16] + : : : +- *(2) Filter ((isnotnull(ca_country#20) && (ca_country#20 = United States)) && isnotnull(ca_address_sk#15)) + : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#16,ca_country#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#13] + : : +- *(3) Filter ((isnotnull(d_year#21) && (d_year#21 = 2001)) && isnotnull(d_date_sk#13)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#13,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [cd_demo_sk#11, cd_marital_status#6, cd_education_status#7] + : +- *(4) Filter isnotnull(cd_demo_sk#11) + : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#11,cd_marital_status#6,cd_education_status#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [hd_demo_sk#5, hd_dep_count#9] + +- *(5) Filter isnotnull(hd_demo_sk#5) + +- *(5) FileScan parquet default.household_demographics[hd_demo_sk#5,hd_dep_count#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt new file mode 100644 index 000000000..0b02236c0 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt @@ -0,0 +1,49 @@ +WholeStageCodegen + HashAggregate [sum,sum,sum(UnscaledValue(ss_ext_wholesale_cost)),sum,avg(cast(ss_quantity as bigint)),count,sum,avg(UnscaledValue(ss_ext_sales_price)),count,avg(UnscaledValue(ss_ext_wholesale_cost)),count] [sum,avg(ss_ext_sales_price),sum,sum(UnscaledValue(ss_ext_wholesale_cost)),sum,sum(ss_ext_wholesale_cost),avg(ss_ext_wholesale_cost),avg(cast(ss_quantity as bigint)),avg(ss_quantity),count,sum,avg(UnscaledValue(ss_ext_sales_price)),count,avg(UnscaledValue(ss_ext_wholesale_cost)),count] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [ss_ext_wholesale_cost,count,sum,sum,count,sum,count,sum,sum,count,sum,sum,sum,ss_ext_sales_price,count,ss_quantity,count] [count,sum,sum,count,sum,count,sum,sum,count,sum,sum,sum,count,count] + Project [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [hd_dep_count,ss_hdemo_sk,ss_sales_price,cd_education_status,hd_demo_sk,cd_marital_status] + Project [ss_quantity,cd_marital_status,ss_ext_sales_price,ss_sales_price,ss_hdemo_sk,cd_education_status,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_quantity,ss_ext_sales_price,ss_sales_price,ss_cdemo_sk,ss_hdemo_sk,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_ext_sales_price,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] + Project [ss_addr_sk,ss_quantity,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_addr_sk,ss_quantity,ss_store_sk,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] + Filter [ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_quantity,ss_store_sk,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] [ss_addr_sk,ss_quantity,ss_store_sk,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ca_address_sk,ca_state] + Filter [ca_country,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country] [ca_address_sk,ca_state,ca_country] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [cd_demo_sk,cd_marital_status,cd_education_status] + Filter [cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [hd_demo_sk,hd_dep_count] + Filter [hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] [hd_demo_sk,hd_dep_count] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt new file mode 100644 index 000000000..27a206912 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt @@ -0,0 +1,192 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 ASC NULLS FIRST,i_class_id#3 ASC NULLS FIRST,i_category_id#4 ASC NULLS FIRST], output=[channel#1,i_brand_id#2,i_class_id#3,i_category_id#4,sum(sales)#5,sum(number_sales)#6]) ++- *(80) HashAggregate(keys=[channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7], functions=[sum(sales#8), sum(number_sales#9)]) + +- Exchange hashpartitioning(channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7, 200) + +- *(79) HashAggregate(keys=[channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7], functions=[partial_sum(sales#8), partial_sum(number_sales#9)]) + +- *(79) Expand [List(sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, i_category_id#13, 0), List(sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, null, 1), List(sales#8, number_sales#9, channel#10, i_brand_id#11, null, null, 3), List(sales#8, number_sales#9, channel#10, null, null, null, 7), List(sales#8, number_sales#9, null, null, null, null, 15)], [sales#8, number_sales#9, channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7] + +- Union + :- *(26) Project [sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, i_category_id#13] + : +- *(26) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16 as decimal(32,6)) > cast(Subquery subquery1662 as decimal(32,6)))) + : : +- Subquery subquery1662 + : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Exchange SinglePartition + : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Union + : : :- *(2) Project [ss_quantity#14 AS quantity#17, ss_list_price#15 AS list_price#18] + : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] + : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) + : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#20] + : : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] + : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight + : : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] + : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) + : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] + : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] + : : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(26) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2))), count(1)]) + : +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 200) + : +- *(25) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) + : +- *(25) Project [ss_quantity#14, ss_list_price#15, i_brand_id#11, i_class_id#12, i_category_id#13] + : +- *(25) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : :- *(25) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15, i_brand_id#11, i_class_id#12, i_category_id#13] + : : +- *(25) BroadcastHashJoin [ss_item_sk#32], [i_item_sk#33], Inner, BuildRight + : : :- *(25) BroadcastHashJoin [ss_item_sk#32], [ss_item_sk#34], LeftSemi, BuildRight + : : : :- *(25) Project [ss_sold_date_sk#19, ss_item_sk#32, ss_quantity#14, ss_list_price#15] + : : : : +- *(25) Filter (isnotnull(ss_item_sk#32) && isnotnull(ss_sold_date_sk#19)) + : : : : +- *(25) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#32,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(11) Project [i_item_sk#33 AS ss_item_sk#34] + : : : +- *(11) BroadcastHashJoin [i_brand_id#11, i_class_id#12, i_category_id#13], [brand_id#35, class_id#36, category_id#37], Inner, BuildRight + : : : :- *(11) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : +- *(11) Filter ((isnotnull(i_class_id#12) && isnotnull(i_brand_id#11)) && isnotnull(i_category_id#13)) + : : : : +- *(11) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) + : : : +- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) + : : : +- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) + : : : +- *(10) BroadcastHashJoin [coalesce(brand_id#35, 0), coalesce(class_id#36, 0), coalesce(category_id#37, 0)], [coalesce(i_brand_id#11, 0), coalesce(i_class_id#12, 0), coalesce(i_category_id#13, 0)], LeftSemi, BuildRight, (((brand_id#35 <=> i_brand_id#11) && (class_id#36 <=> i_class_id#12)) && (category_id#37 <=> i_category_id#13)) + : : : :- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) + : : : : +- Exchange hashpartitioning(brand_id#35, class_id#36, category_id#37, 200) + : : : : +- *(6) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) + : : : : +- *(6) BroadcastHashJoin [coalesce(brand_id#35, 0), coalesce(class_id#36, 0), coalesce(category_id#37, 0)], [coalesce(i_brand_id#11, 0), coalesce(i_class_id#12, 0), coalesce(i_category_id#13, 0)], LeftSemi, BuildRight, (((brand_id#35 <=> i_brand_id#11) && (class_id#36 <=> i_class_id#12)) && (category_id#37 <=> i_category_id#13)) + : : : : :- *(6) Project [i_brand_id#11 AS brand_id#35, i_class_id#12 AS class_id#36, i_category_id#13 AS category_id#37] + : : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : : : : :- *(6) Project [ss_sold_date_sk#19, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : : : +- *(6) BroadcastHashJoin [ss_item_sk#32], [i_item_sk#33], Inner, BuildRight + : : : : : : :- *(6) Project [ss_sold_date_sk#19, ss_item_sk#32] + : : : : : : : +- *(6) Filter (isnotnull(ss_item_sk#32) && isnotnull(ss_sold_date_sk#19)) + : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- *(1) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#33) && isnotnull(i_class_id#12)) && isnotnull(i_brand_id#11)) && isnotnull(i_category_id#13)) + : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(2) Project [d_date_sk#20] + : : : : : +- *(2) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) + : : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) + : : : : +- *(5) Project [i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight + : : : : :- *(5) Project [cs_sold_date_sk#26, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : : +- *(5) BroadcastHashJoin [cs_item_sk#38], [i_item_sk#33], Inner, BuildRight + : : : : : :- *(5) Project [cs_sold_date_sk#26, cs_item_sk#38] + : : : : : : +- *(5) Filter (isnotnull(cs_item_sk#38) && isnotnull(cs_sold_date_sk#26)) + : : : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_item_sk#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(3) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : : +- *(3) Filter isnotnull(i_item_sk#33) + : : : : : +- *(3) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) + : : : +- *(9) Project [i_brand_id#11, i_class_id#12, i_category_id#13] + : : : +- *(9) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight + : : : :- *(9) Project [ws_sold_date_sk#31, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : +- *(9) BroadcastHashJoin [ws_item_sk#39], [i_item_sk#33], Inner, BuildRight + : : : : :- *(9) Project [ws_sold_date_sk#31, ws_item_sk#39] + : : : : : +- *(9) Filter (isnotnull(ws_item_sk#39) && isnotnull(ws_sold_date_sk#31)) + : : : : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_item_sk#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(23) BroadcastHashJoin [i_item_sk#33], [ss_item_sk#34], LeftSemi, BuildRight + : : :- *(23) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : +- *(23) Filter isnotnull(i_item_sk#33) + : : : +- *(23) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(24) Project [d_date_sk#20] + : +- *(24) Filter ((((isnotnull(d_year#21) && isnotnull(d_moy#40)) && (d_year#21 = 2001)) && (d_moy#40 = 11)) && isnotnull(d_date_sk#20)) + : +- *(24) FileScan parquet default.date_dim[d_date_sk#20,d_year#21,d_moy#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct + :- *(52) Project [sales#41, number_sales#42, channel#43, i_brand_id#11, i_class_id#12, i_category_id#13] + : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44) && (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44 as decimal(32,6)) > cast(Subquery subquery1667 as decimal(32,6)))) + : : +- Subquery subquery1667 + : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Exchange SinglePartition + : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Union + : : :- *(2) Project [ss_quantity#14 AS quantity#17, ss_list_price#15 AS list_price#18] + : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] + : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) + : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#20] + : : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] + : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight + : : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] + : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) + : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] + : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] + : : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(52) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2))), count(1)]) + : +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 200) + : +- *(51) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) + : +- *(51) Project [cs_quantity#22, cs_list_price#24, i_brand_id#11, i_class_id#12, i_category_id#13] + : +- *(51) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight + : :- *(51) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24, i_brand_id#11, i_class_id#12, i_category_id#13] + : : +- *(51) BroadcastHashJoin [cs_item_sk#38], [i_item_sk#33], Inner, BuildRight + : : :- *(51) BroadcastHashJoin [cs_item_sk#38], [ss_item_sk#34], LeftSemi, BuildRight + : : : :- *(51) Project [cs_sold_date_sk#26, cs_item_sk#38, cs_quantity#22, cs_list_price#24] + : : : : +- *(51) Filter (isnotnull(cs_item_sk#38) && isnotnull(cs_sold_date_sk#26)) + : : : : +- *(51) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_item_sk#38,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(78) Project [sales#45, number_sales#46, channel#47, i_brand_id#11, i_class_id#12, i_category_id#13] + +- *(78) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48 as decimal(32,6)) > cast(Subquery subquery1672 as decimal(32,6)))) + : +- Subquery subquery1672 + : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : +- Exchange SinglePartition + : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : +- Union + : :- *(2) Project [ss_quantity#14 AS quantity#17, ss_list_price#15 AS list_price#18] + : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] + : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) + : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [d_date_sk#20] + : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] + : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight + : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] + : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) + : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] + : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight + : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] + : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) + : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(78) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2))), count(1)]) + +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 200) + +- *(77) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) + +- *(77) Project [ws_quantity#27, ws_list_price#29, i_brand_id#11, i_class_id#12, i_category_id#13] + +- *(77) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight + :- *(77) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29, i_brand_id#11, i_class_id#12, i_category_id#13] + : +- *(77) BroadcastHashJoin [ws_item_sk#39], [i_item_sk#33], Inner, BuildRight + : :- *(77) BroadcastHashJoin [ws_item_sk#39], [ss_item_sk#34], LeftSemi, BuildRight + : : :- *(77) Project [ws_sold_date_sk#31, ws_item_sk#39, ws_quantity#27, ws_list_price#29] + : : : +- *(77) Filter (isnotnull(ws_item_sk#39) && isnotnull(ws_sold_date_sk#31)) + : : : +- *(77) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_item_sk#39,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt new file mode 100644 index 000000000..93bae0792 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt @@ -0,0 +1,267 @@ +TakeOrderedAndProject [sum(sales),i_brand_id,sum(number_sales),channel,i_category_id,i_class_id] + WholeStageCodegen + HashAggregate [sum(sales),spark_grouping_id,i_brand_id,sum,sum,channel,i_category_id,i_class_id,sum(number_salesL)] [sum(sales),sum(sales),sum(number_sales),sum,sum,sum(number_salesL)] + InputAdapter + Exchange [spark_grouping_id,i_brand_id,channel,i_category_id,i_class_id] #1 + WholeStageCodegen + HashAggregate [sales,spark_grouping_id,sum,i_brand_id,sum,number_sales,sum,sum,channel,i_category_id,i_class_id] [sum,sum,sum,sum] + Expand [channel,sales,i_category_id,number_sales,i_class_id,i_brand_id] + InputAdapter + Union + WholeStageCodegen + Project [channel,sales,i_category_id,number_sales,i_class_id,i_brand_id] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] + Subquery #1 + WholeStageCodegen + HashAggregate [sum,count,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2)))] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count] + InputAdapter + Exchange #13 + WholeStageCodegen + HashAggregate [count,list_price,sum,quantity,sum,count] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_list_price] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_quantity,ss_list_price] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_quantity,cs_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #14 + WholeStageCodegen + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] + Filter [ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #14 + HashAggregate [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,i_category_id,i_brand_id,sum,i_class_id] [sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,number_sales,channel,sum] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #2 + WholeStageCodegen + HashAggregate [count,count,sum,ss_list_price,i_category_id,i_brand_id,ss_quantity,sum,i_class_id] [sum,count,sum,count] + Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,i_class_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,ss_sold_date_sk,i_class_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk] + BroadcastHashJoin [category_id,class_id,i_category_id,i_brand_id,i_class_id,brand_id] + Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Filter [i_class_id,i_brand_id,i_category_id] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + HashAggregate [brand_id,class_id,category_id] + HashAggregate [brand_id,class_id,category_id] + BroadcastHashJoin [class_id,i_class_id,i_category_id,brand_id,i_brand_id,category_id] + HashAggregate [brand_id,class_id,category_id] + InputAdapter + Exchange [brand_id,class_id,category_id] #5 + WholeStageCodegen + HashAggregate [brand_id,class_id,category_id] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,class_id,brand_id,category_id] + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_sold_date_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] [ss_sold_date_sk,ss_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk,i_class_id,i_brand_id,i_category_id] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_sold_date_sk,cs_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk] [cs_sold_date_sk,cs_item_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #7 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_sold_date_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk] [ws_sold_date_sk,ws_item_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] #9 + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #7 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [ss_item_sk] [ss_item_sk] #3 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + WholeStageCodegen + Project [sales,channel,i_category_id,i_brand_id,number_sales,i_class_id] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] + Subquery #2 + WholeStageCodegen + HashAggregate [sum,count,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2)))] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count] + InputAdapter + Exchange #13 + WholeStageCodegen + HashAggregate [count,list_price,sum,quantity,sum,count] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_list_price] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_quantity,ss_list_price] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_quantity,cs_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #14 + WholeStageCodegen + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] + Filter [ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #14 + HashAggregate [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sum,i_category_id,i_brand_id,count,count(1),i_class_id] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sales,channel,sum,number_sales,count,count(1)] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #15 + WholeStageCodegen + HashAggregate [sum,cs_list_price,sum,i_category_id,count,i_brand_id,cs_quantity,count,i_class_id] [sum,count,sum,count] + Project [cs_quantity,i_category_id,i_brand_id,i_class_id,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_quantity,i_category_id,cs_sold_date_sk,i_brand_id,i_class_id,cs_list_price] + BroadcastHashJoin [cs_item_sk,i_item_sk] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price] + Filter [cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price] + InputAdapter + ReusedExchange [ss_item_sk] [ss_item_sk] #3 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #12 + WholeStageCodegen + Project [number_sales,i_category_id,i_brand_id,i_class_id,sales,channel] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] + Subquery #3 + WholeStageCodegen + HashAggregate [sum,count,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2)))] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count] + InputAdapter + Exchange #13 + WholeStageCodegen + HashAggregate [count,list_price,sum,quantity,sum,count] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_list_price] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_quantity,ss_list_price] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_quantity,cs_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #14 + WholeStageCodegen + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] + Filter [ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #14 + HashAggregate [count(1),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum,count,i_category_id,i_brand_id,i_class_id] [count(1),number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum,count,sales,channel] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #16 + WholeStageCodegen + HashAggregate [count,sum,sum,count,i_category_id,i_brand_id,i_class_id,ws_list_price,ws_quantity] [sum,count,sum,count] + Project [ws_quantity,i_category_id,ws_list_price,i_brand_id,i_class_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_quantity,i_category_id,ws_list_price,ws_sold_date_sk,i_brand_id,i_class_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + BroadcastHashJoin [ws_item_sk,ss_item_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [ss_item_sk] [ss_item_sk] #3 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #12 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt new file mode 100644 index 000000000..b3130331d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt @@ -0,0 +1,166 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 ASC NULLS FIRST,i_class_id#3 ASC NULLS FIRST,i_category_id#4 ASC NULLS FIRST], output=[channel#1,i_brand_id#2,i_class_id#3,i_category_id#4,sales#5,number_sales#6,channel#7,i_brand_id#8,i_class_id#9,i_category_id#10,sales#11,number_sales#12]) ++- *(52) BroadcastHashJoin [i_brand_id#2, i_class_id#3, i_category_id#4], [i_brand_id#8, i_class_id#9, i_category_id#10], Inner, BuildRight + :- *(52) Project [channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, sales#5, number_sales#6] + : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15 as decimal(32,6)) > cast(Subquery subquery1884 as decimal(32,6)))) + : : +- Subquery subquery1884 + : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Exchange SinglePartition + : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Union + : : :- *(2) Project [ss_quantity#13 AS quantity#16, ss_list_price#14 AS list_price#17] + : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + : : : :- *(2) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14] + : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#18) + : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#19] + : : : +- *(1) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : :- *(4) Project [cs_quantity#21 AS quantity#22, cs_list_price#23 AS list_price#24] + : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight + : : : :- *(4) Project [cs_sold_date_sk#25, cs_quantity#21, cs_list_price#23] + : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#25) + : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_quantity#21,cs_list_price#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [ws_quantity#26 AS quantity#27, ws_list_price#28 AS list_price#29] + : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#30], [d_date_sk#19], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#30, ws_quantity#26, ws_list_price#28] + : : : +- *(6) Filter isnotnull(ws_sold_date_sk#30) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_quantity#26,ws_list_price#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(52) HashAggregate(keys=[i_brand_id#2, i_class_id#3, i_category_id#4], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), count(1)]) + : +- Exchange hashpartitioning(i_brand_id#2, i_class_id#3, i_category_id#4, 200) + : +- *(25) HashAggregate(keys=[i_brand_id#2, i_class_id#3, i_category_id#4], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) + : +- *(25) Project [ss_quantity#13, ss_list_price#14, i_brand_id#2, i_class_id#3, i_category_id#4] + : +- *(25) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + : :- *(25) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14, i_brand_id#2, i_class_id#3, i_category_id#4] + : : +- *(25) BroadcastHashJoin [ss_item_sk#31], [i_item_sk#32], Inner, BuildRight + : : :- *(25) BroadcastHashJoin [ss_item_sk#31], [ss_item_sk#33], LeftSemi, BuildRight + : : : :- *(25) Project [ss_sold_date_sk#18, ss_item_sk#31, ss_quantity#13, ss_list_price#14] + : : : : +- *(25) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) + : : : : +- *(25) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(11) Project [i_item_sk#32 AS ss_item_sk#33] + : : : +- *(11) BroadcastHashJoin [i_brand_id#2, i_class_id#3, i_category_id#4], [brand_id#34, class_id#35, category_id#36], Inner, BuildRight + : : : :- *(11) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : +- *(11) Filter ((isnotnull(i_class_id#3) && isnotnull(i_brand_id#2)) && isnotnull(i_category_id#4)) + : : : : +- *(11) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) + : : : +- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) + : : : +- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) + : : : +- *(10) BroadcastHashJoin [coalesce(brand_id#34, 0), coalesce(class_id#35, 0), coalesce(category_id#36, 0)], [coalesce(i_brand_id#2, 0), coalesce(i_class_id#3, 0), coalesce(i_category_id#4, 0)], LeftSemi, BuildRight, (((brand_id#34 <=> i_brand_id#2) && (class_id#35 <=> i_class_id#3)) && (category_id#36 <=> i_category_id#4)) + : : : :- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) + : : : : +- Exchange hashpartitioning(brand_id#34, class_id#35, category_id#36, 200) + : : : : +- *(6) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) + : : : : +- *(6) BroadcastHashJoin [coalesce(brand_id#34, 0), coalesce(class_id#35, 0), coalesce(category_id#36, 0)], [coalesce(i_brand_id#2, 0), coalesce(i_class_id#3, 0), coalesce(i_category_id#4, 0)], LeftSemi, BuildRight, (((brand_id#34 <=> i_brand_id#2) && (class_id#35 <=> i_class_id#3)) && (category_id#36 <=> i_category_id#4)) + : : : : :- *(6) Project [i_brand_id#2 AS brand_id#34, i_class_id#3 AS class_id#35, i_category_id#4 AS category_id#36] + : : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + : : : : : :- *(6) Project [ss_sold_date_sk#18, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : : : +- *(6) BroadcastHashJoin [ss_item_sk#31], [i_item_sk#32], Inner, BuildRight + : : : : : : :- *(6) Project [ss_sold_date_sk#18, ss_item_sk#31] + : : : : : : : +- *(6) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) + : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- *(1) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_class_id#3)) && isnotnull(i_brand_id#2)) && isnotnull(i_category_id#4)) + : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(2) Project [d_date_sk#19] + : : : : : +- *(2) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) + : : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) + : : : : +- *(5) Project [i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight + : : : : :- *(5) Project [cs_sold_date_sk#25, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : : +- *(5) BroadcastHashJoin [cs_item_sk#37], [i_item_sk#32], Inner, BuildRight + : : : : : :- *(5) Project [cs_sold_date_sk#25, cs_item_sk#37] + : : : : : : +- *(5) Filter (isnotnull(cs_item_sk#37) && isnotnull(cs_sold_date_sk#25)) + : : : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_item_sk#37] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(3) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : : +- *(3) Filter isnotnull(i_item_sk#32) + : : : : : +- *(3) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) + : : : +- *(9) Project [i_brand_id#2, i_class_id#3, i_category_id#4] + : : : +- *(9) BroadcastHashJoin [ws_sold_date_sk#30], [d_date_sk#19], Inner, BuildRight + : : : :- *(9) Project [ws_sold_date_sk#30, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : +- *(9) BroadcastHashJoin [ws_item_sk#38], [i_item_sk#32], Inner, BuildRight + : : : : :- *(9) Project [ws_sold_date_sk#30, ws_item_sk#38] + : : : : : +- *(9) Filter (isnotnull(ws_item_sk#38) && isnotnull(ws_sold_date_sk#30)) + : : : : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_item_sk#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : : +- ReusedExchange [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(23) BroadcastHashJoin [i_item_sk#32], [ss_item_sk#33], LeftSemi, BuildRight + : : :- *(23) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : +- *(23) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_class_id#3)) && isnotnull(i_category_id#4)) && isnotnull(i_brand_id#2)) + : : : +- *(23) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_brand_id)], ReadSchema: struct + : : +- ReusedExchange [ss_item_sk#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(24) Project [d_date_sk#19] + : +- *(24) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery1883)) && isnotnull(d_date_sk#19)) + : : +- Subquery subquery1883 + : : +- *(1) Project [d_week_seq#39] + : : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + : : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct + : +- *(24) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct + : +- Subquery subquery1883 + : +- *(1) Project [d_week_seq#39] + : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true])) + +- *(51) Project [channel#7, i_brand_id#8, i_class_id#9, i_category_id#10, sales#11, number_sales#12] + +- *(51) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42 as decimal(32,6)) > cast(Subquery subquery1890 as decimal(32,6)))) + : +- Subquery subquery1890 + : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) + : +- Exchange SinglePartition + : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) + : +- Union + : :- *(2) Project [ss_quantity#13 AS quantity#16, ss_list_price#14 AS list_price#17] + : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + : : :- *(2) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14] + : : : +- *(2) Filter isnotnull(ss_sold_date_sk#18) + : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [d_date_sk#19] + : : +- *(1) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : :- *(4) Project [cs_quantity#21 AS quantity#22, cs_list_price#23 AS list_price#24] + : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight + : : :- *(4) Project [cs_sold_date_sk#25, cs_quantity#21, cs_list_price#23] + : : : +- *(4) Filter isnotnull(cs_sold_date_sk#25) + : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_quantity#21,cs_list_price#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [ws_quantity#26 AS quantity#27, ws_list_price#28 AS list_price#29] + : +- *(6) BroadcastHashJoin [ws_sold_date_sk#30], [d_date_sk#19], Inner, BuildRight + : :- *(6) Project [ws_sold_date_sk#30, ws_quantity#26, ws_list_price#28] + : : +- *(6) Filter isnotnull(ws_sold_date_sk#30) + : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_quantity#26,ws_list_price#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(51) HashAggregate(keys=[i_brand_id#8, i_class_id#9, i_category_id#10], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), count(1)]) + +- Exchange hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, 200) + +- *(50) HashAggregate(keys=[i_brand_id#8, i_class_id#9, i_category_id#10], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) + +- *(50) Project [ss_quantity#13, ss_list_price#14, i_brand_id#8, i_class_id#9, i_category_id#10] + +- *(50) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + :- *(50) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14, i_brand_id#8, i_class_id#9, i_category_id#10] + : +- *(50) BroadcastHashJoin [ss_item_sk#31], [i_item_sk#43], Inner, BuildRight + : :- *(50) BroadcastHashJoin [ss_item_sk#31], [ss_item_sk#33], LeftSemi, BuildRight + : : :- *(50) Project [ss_sold_date_sk#18, ss_item_sk#31, ss_quantity#13, ss_list_price#14] + : : : +- *(50) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) + : : : +- *(50) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [ss_item_sk#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [i_item_sk#43, i_brand_id#8, i_class_id#9, i_category_id#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(49) Project [d_date_sk#19] + +- *(49) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery1889)) && isnotnull(d_date_sk#19)) + : +- Subquery subquery1889 + : +- *(1) Project [d_week_seq#39] + : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct + +- *(49) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct + +- Subquery subquery1889 + +- *(1) Project [d_week_seq#39] + +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt new file mode 100644 index 000000000..d0ac00bfe --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt @@ -0,0 +1,226 @@ +TakeOrderedAndProject [i_brand_id,i_category_id,number_sales,i_class_id,i_brand_id,sales,sales,number_sales,i_category_id,channel,channel,i_class_id] + WholeStageCodegen + BroadcastHashJoin [i_category_id,i_category_id,i_brand_id,i_class_id,i_brand_id,i_class_id] + Project [number_sales,i_category_id,channel,sales,i_brand_id,i_class_id] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] + Subquery #2 + WholeStageCodegen + HashAggregate [sum,count,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2)))] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count] + InputAdapter + Exchange #12 + WholeStageCodegen + HashAggregate [quantity,sum,count,list_price,count,sum] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_list_price] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_quantity,ss_list_price] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_quantity,cs_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #13 + WholeStageCodegen + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] + Filter [ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #13 + HashAggregate [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sum,i_category_id,i_brand_id,count,i_class_id] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),number_sales,count(1),sum,channel,sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #1 + WholeStageCodegen + HashAggregate [sum,sum,ss_list_price,i_category_id,i_brand_id,ss_quantity,count,i_class_id,count] [sum,count,sum,count] + Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,i_class_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,ss_sold_date_sk,i_class_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [i_item_sk] + BroadcastHashJoin [brand_id,category_id,i_category_id,i_brand_id,class_id,i_class_id] + Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Filter [i_class_id,i_brand_id,i_category_id] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + HashAggregate [brand_id,class_id,category_id] + HashAggregate [brand_id,class_id,category_id] + BroadcastHashJoin [i_class_id,i_category_id,class_id,category_id,i_brand_id,brand_id] + HashAggregate [brand_id,class_id,category_id] + InputAdapter + Exchange [brand_id,class_id,category_id] #4 + WholeStageCodegen + HashAggregate [brand_id,class_id,category_id] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,class_id,category_id,brand_id] + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_sold_date_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] [ss_sold_date_sk,ss_item_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk,i_class_id,i_brand_id,i_category_id] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_sold_date_sk,cs_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk] [cs_sold_date_sk,cs_item_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #6 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_sold_date_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk] [ws_sold_date_sk,ws_item_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] #8 + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #6 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk,i_class_id,i_category_id,i_brand_id] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [ss_item_sk] [ss_item_sk] #2 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + Project [d_date_sk] + Filter [d_week_seq,d_date_sk] + Subquery #1 + WholeStageCodegen + Project [d_week_seq] + Filter [d_year,d_dom,d_moy] + Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] + Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] + Subquery #1 + WholeStageCodegen + Project [d_week_seq] + Filter [d_year,d_dom,d_moy] + Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen + Project [i_brand_id,i_class_id,sales,number_sales,i_category_id,channel] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] + Subquery #4 + WholeStageCodegen + HashAggregate [sum,count,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2)))] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count] + InputAdapter + Exchange #12 + WholeStageCodegen + HashAggregate [quantity,sum,count,list_price,count,sum] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_list_price] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_quantity,ss_list_price] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_quantity,cs_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #13 + WholeStageCodegen + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] + Filter [ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #13 + HashAggregate [i_brand_id,sum,i_class_id,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,i_category_id] [sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),channel] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #15 + WholeStageCodegen + HashAggregate [i_brand_id,sum,sum,i_class_id,count,count,ss_list_price,i_category_id,ss_quantity] [sum,count,sum,count] + Project [ss_quantity,i_category_id,ss_list_price,i_brand_id,i_class_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,i_category_id,ss_list_price,i_brand_id,ss_sold_date_sk,i_class_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + InputAdapter + ReusedExchange [ss_item_sk] [ss_item_sk] #2 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] #10 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen + Project [d_date_sk] + Filter [d_week_seq,d_date_sk] + Subquery #3 + WholeStageCodegen + Project [d_week_seq] + Filter [d_year,d_dom,d_moy] + Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] + Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] + Subquery #3 + WholeStageCodegen + Project [d_week_seq] + Filter [d_year,d_dom,d_moy] + Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt new file mode 100644 index 000000000..e74a4a6e2 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt @@ -0,0 +1,26 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[ca_zip#1 ASC NULLS FIRST], output=[ca_zip#1,sum(cs_sales_price)#2]) ++- *(5) HashAggregate(keys=[ca_zip#1], functions=[sum(UnscaledValue(cs_sales_price#3))]) + +- Exchange hashpartitioning(ca_zip#1, 200) + +- *(4) HashAggregate(keys=[ca_zip#1], functions=[partial_sum(UnscaledValue(cs_sales_price#3))]) + +- *(4) Project [cs_sales_price#3, ca_zip#1] + +- *(4) BroadcastHashJoin [cs_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + :- *(4) Project [cs_sold_date_sk#4, cs_sales_price#3, ca_zip#1] + : +- *(4) BroadcastHashJoin [c_current_addr_sk#6], [ca_address_sk#7], Inner, BuildRight, ((substring(ca_zip#1, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) || ca_state#8 IN (CA,WA,GA)) || (cs_sales_price#3 > 500.00)) + : :- *(4) Project [cs_sold_date_sk#4, cs_sales_price#3, c_current_addr_sk#6] + : : +- *(4) BroadcastHashJoin [cs_bill_customer_sk#9], [c_customer_sk#10], Inner, BuildRight + : : :- *(4) Project [cs_sold_date_sk#4, cs_bill_customer_sk#9, cs_sales_price#3] + : : : +- *(4) Filter (isnotnull(cs_bill_customer_sk#9) && isnotnull(cs_sold_date_sk#4)) + : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#4,cs_bill_customer_sk#9,cs_sales_price#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [c_customer_sk#10, c_current_addr_sk#6] + : : +- *(1) Filter (isnotnull(c_customer_sk#10) && isnotnull(c_current_addr_sk#6)) + : : +- *(1) FileScan parquet default.customer[c_customer_sk#10,c_current_addr_sk#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [ca_address_sk#7, ca_state#8, ca_zip#1] + : +- *(2) Filter isnotnull(ca_address_sk#7) + : +- *(2) FileScan parquet default.customer_address[ca_address_sk#7,ca_state#8,ca_zip#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [d_date_sk#5] + +- *(3) Filter ((((isnotnull(d_qoy#11) && isnotnull(d_year#12)) && (d_qoy#11 = 2)) && (d_year#12 = 2001)) && isnotnull(d_date_sk#5)) + +- *(3) FileScan parquet default.date_dim[d_date_sk#5,d_year#12,d_qoy#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt new file mode 100644 index 000000000..5d72165cb --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [ca_zip,sum(cs_sales_price)] + WholeStageCodegen + HashAggregate [ca_zip,sum,sum(UnscaledValue(cs_sales_price))] [sum(UnscaledValue(cs_sales_price)),sum(cs_sales_price),sum] + InputAdapter + Exchange [ca_zip] #1 + WholeStageCodegen + HashAggregate [ca_zip,cs_sales_price,sum,sum] [sum,sum] + Project [cs_sales_price,ca_zip] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_sales_price,ca_zip] + BroadcastHashJoin [ca_zip,cs_sales_price,ca_address_sk,c_current_addr_sk,ca_state] + Project [cs_sold_date_sk,cs_sales_price,c_current_addr_sk] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_sales_price] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_sales_price] [cs_sold_date_sk,cs_bill_customer_sk,cs_sales_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [c_customer_sk,c_current_addr_sk] + Filter [c_customer_sk,c_current_addr_sk] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ca_address_sk,ca_state,ca_zip] + Filter [ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state,ca_zip] [ca_address_sk,ca_state,ca_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_qoy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt new file mode 100644 index 000000000..f1ebfcb6e --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt @@ -0,0 +1,37 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], output=[order count #1,total shipping cost #2,total net profit #3]) ++- *(8) HashAggregate(keys=[], functions=[sum(UnscaledValue(cs_ext_ship_cost#4)), sum(UnscaledValue(cs_net_profit#5)), count(distinct cs_order_number#6)]) + +- Exchange SinglePartition + +- *(7) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(cs_ext_ship_cost#4)), merge_sum(UnscaledValue(cs_net_profit#5)), partial_count(distinct cs_order_number#6)]) + +- *(7) HashAggregate(keys=[cs_order_number#6], functions=[merge_sum(UnscaledValue(cs_ext_ship_cost#4)), merge_sum(UnscaledValue(cs_net_profit#5))]) + +- Exchange hashpartitioning(cs_order_number#6, 200) + +- *(6) HashAggregate(keys=[cs_order_number#6], functions=[partial_sum(UnscaledValue(cs_ext_ship_cost#4)), partial_sum(UnscaledValue(cs_net_profit#5))]) + +- *(6) Project [cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] + +- *(6) BroadcastHashJoin [cs_call_center_sk#7], [cc_call_center_sk#8], Inner, BuildRight + :- *(6) Project [cs_call_center_sk#7, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] + : +- *(6) BroadcastHashJoin [cs_ship_addr_sk#9], [ca_address_sk#10], Inner, BuildRight + : :- *(6) Project [cs_ship_addr_sk#9, cs_call_center_sk#7, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] + : : +- *(6) BroadcastHashJoin [cs_ship_date_sk#11], [d_date_sk#12], Inner, BuildRight + : : :- *(6) BroadcastHashJoin [cs_order_number#6], [cr_order_number#13], LeftAnti, BuildRight + : : : :- *(6) Project [cs_ship_date_sk#11, cs_ship_addr_sk#9, cs_call_center_sk#7, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] + : : : : +- *(6) BroadcastHashJoin [cs_order_number#6], [cs_order_number#6#14], LeftSemi, BuildRight, NOT (cs_warehouse_sk#15 = cs_warehouse_sk#15#16) + : : : : :- *(6) Project [cs_ship_date_sk#11, cs_ship_addr_sk#9, cs_call_center_sk#7, cs_warehouse_sk#15, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] + : : : : : +- *(6) Filter ((isnotnull(cs_ship_date_sk#11) && isnotnull(cs_ship_addr_sk#9)) && isnotnull(cs_call_center_sk#7)) + : : : : : +- *(6) FileScan parquet default.catalog_sales[cs_ship_date_sk#11,cs_ship_addr_sk#9,cs_call_center_sk#7,cs_warehouse_sk#15,cs_order_number#6,cs_ext_ship_cost#4,cs_net_profit#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_ship_date_sk), IsNotNull(cs_ship_addr_sk), IsNotNull(cs_call_center_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) FileScan parquet default.catalog_returns[cr_order_number#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#12] + : : +- *(3) Filter (((isnotnull(d_date#17) && (cast(d_date#17 as string) >= 2002-02-01)) && (d_date#17 <= 11779)) && isnotnull(d_date_sk#12)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [ca_address_sk#10] + : +- *(4) Filter ((isnotnull(ca_state#18) && (ca_state#18 = GA)) && isnotnull(ca_address_sk#10)) + : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [cc_call_center_sk#8] + +- *(5) Filter ((isnotnull(cc_county#19) && (cc_county#19 = Williamson County)) && isnotnull(cc_call_center_sk#8)) + +- *(5) FileScan parquet default.call_center[cc_call_center_sk#8,cc_county#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt new file mode 100644 index 000000000..57b72893e --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt @@ -0,0 +1,51 @@ +TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] + WholeStageCodegen + HashAggregate [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),total net profit ,order count ,count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),total shipping cost ] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),cs_order_number] [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] + HashAggregate [sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),cs_order_number] [sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] + InputAdapter + Exchange [cs_order_number] #2 + WholeStageCodegen + HashAggregate [cs_ext_ship_cost,sum(UnscaledValue(cs_net_profit)),sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),cs_order_number,cs_net_profit] [sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] + Project [cs_order_number,cs_ext_ship_cost,cs_net_profit] + BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] + Project [cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + BroadcastHashJoin [cs_ship_addr_sk,ca_address_sk] + Project [cs_ship_addr_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk] + BroadcastHashJoin [cs_order_number,cr_order_number] + Project [cs_ship_addr_sk,cs_ship_date_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] + BroadcastHashJoin [cs_order_number,cs_order_number,cs_warehouse_sk,cs_warehouse_sk] + Project [cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] + Filter [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk] + Scan parquet default.catalog_sales [cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] [cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [cs_warehouse_sk,cs_order_number] + Scan parquet default.catalog_sales [cs_warehouse_sk,cs_order_number] [cs_warehouse_sk,cs_order_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Scan parquet default.catalog_returns [cr_order_number] [cr_order_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [cc_call_center_sk] + Filter [cc_county,cc_call_center_sk] + Scan parquet default.call_center [cc_call_center_sk,cc_county] [cc_call_center_sk,cc_county] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt new file mode 100644 index 000000000..62f104f27 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt @@ -0,0 +1,50 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST,s_state#3 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,s_state#3,store_sales_quantitycount#4,store_sales_quantityave#5,store_sales_quantitystdev#6,store_sales_quantitycov#7,as_store_returns_quantitycount#8,as_store_returns_quantityave#9,as_store_returns_quantitystdev#10,store_returns_quantitycov#11,catalog_sales_quantitycount#12,catalog_sales_quantityave#13,catalog_sales_quantitystdev#14,catalog_sales_quantitycov#15]) ++- *(9) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_state#3], functions=[count(ss_quantity#16), avg(cast(ss_quantity#16 as bigint)), stddev_samp(cast(ss_quantity#16 as double)), count(sr_return_quantity#17), avg(cast(sr_return_quantity#17 as bigint)), stddev_samp(cast(sr_return_quantity#17 as double)), count(cs_quantity#18), avg(cast(cs_quantity#18 as bigint)), stddev_samp(cast(cs_quantity#18 as double))]) + +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_state#3, 200) + +- *(8) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_state#3], functions=[partial_count(ss_quantity#16), partial_avg(cast(ss_quantity#16 as bigint)), partial_stddev_samp(cast(ss_quantity#16 as double)), partial_count(sr_return_quantity#17), partial_avg(cast(sr_return_quantity#17 as bigint)), partial_stddev_samp(cast(sr_return_quantity#17 as double)), partial_count(cs_quantity#18), partial_avg(cast(cs_quantity#18 as bigint)), partial_stddev_samp(cast(cs_quantity#18 as double))]) + +- *(8) Project [ss_quantity#16, sr_return_quantity#17, cs_quantity#18, s_state#3, i_item_id#1, i_item_desc#2] + +- *(8) BroadcastHashJoin [ss_item_sk#19], [i_item_sk#20], Inner, BuildRight + :- *(8) Project [ss_item_sk#19, ss_quantity#16, sr_return_quantity#17, cs_quantity#18, s_state#3] + : +- *(8) BroadcastHashJoin [ss_store_sk#21], [s_store_sk#22], Inner, BuildRight + : :- *(8) Project [ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_return_quantity#17, cs_quantity#18] + : : +- *(8) BroadcastHashJoin [cs_sold_date_sk#23], [d_date_sk#24], Inner, BuildRight + : : :- *(8) Project [ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_return_quantity#17, cs_sold_date_sk#23, cs_quantity#18] + : : : +- *(8) BroadcastHashJoin [sr_returned_date_sk#25], [cast(d_date_sk#26 as bigint)], Inner, BuildRight + : : : :- *(8) Project [ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_returned_date_sk#25, sr_return_quantity#17, cs_sold_date_sk#23, cs_quantity#18] + : : : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#27], [d_date_sk#28], Inner, BuildRight + : : : : :- *(8) Project [ss_sold_date_sk#27, ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_returned_date_sk#25, sr_return_quantity#17, cs_sold_date_sk#23, cs_quantity#18] + : : : : : +- *(8) BroadcastHashJoin [sr_customer_sk#29, sr_item_sk#30], [cast(cs_bill_customer_sk#31 as bigint), cast(cs_item_sk#32 as bigint)], Inner, BuildRight + : : : : : :- *(8) Project [ss_sold_date_sk#27, ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_returned_date_sk#25, sr_item_sk#30, sr_customer_sk#29, sr_return_quantity#17] + : : : : : : +- *(8) BroadcastHashJoin [cast(ss_customer_sk#33 as bigint), cast(ss_item_sk#19 as bigint), cast(ss_ticket_number#34 as bigint)], [sr_customer_sk#29, sr_item_sk#30, sr_ticket_number#35], Inner, BuildRight + : : : : : : :- *(8) Project [ss_sold_date_sk#27, ss_item_sk#19, ss_customer_sk#33, ss_store_sk#21, ss_ticket_number#34, ss_quantity#16] + : : : : : : : +- *(8) Filter ((((isnotnull(ss_ticket_number#34) && isnotnull(ss_item_sk#19)) && isnotnull(ss_customer_sk#33)) && isnotnull(ss_sold_date_sk#27)) && isnotnull(ss_store_sk#21)) + : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#27,ss_item_sk#19,ss_customer_sk#33,ss_store_sk#21,ss_ticket_number#34,ss_quantity#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(3) Project [d_date_sk#28] + : : : : +- *(3) Filter ((isnotnull(d_quarter_name#36) && (d_quarter_name#36 = 2001Q1)) && isnotnull(d_date_sk#28)) + : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#28,d_quarter_name#36] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_quarter_name), EqualTo(d_quarter_name,2001Q1), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [d_date_sk#26] + : : : +- *(4) Filter (d_quarter_name#37 IN (2001Q1,2001Q2,2001Q3) && isnotnull(d_date_sk#26)) + : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#26,d_quarter_name#37] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [d_date_sk#24] + : : +- *(5) Filter (d_quarter_name#38 IN (2001Q1,2001Q2,2001Q3) && isnotnull(d_date_sk#24)) + : : +- *(5) FileScan parquet default.date_dim[d_date_sk#24,d_quarter_name#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [s_store_sk#22, s_state#3] + : +- *(6) Filter isnotnull(s_store_sk#22) + : +- *(6) FileScan parquet default.store[s_store_sk#22,s_state#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [i_item_sk#20, i_item_id#1, i_item_desc#2] + +- *(7) Filter isnotnull(i_item_sk#20) + +- *(7) FileScan parquet default.item[i_item_sk#20,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt new file mode 100644 index 000000000..454790ddb --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt @@ -0,0 +1,66 @@ +TakeOrderedAndProject [as_store_returns_quantityave,catalog_sales_quantitycount,i_item_id,i_item_desc,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitystdev,catalog_sales_quantitycov,s_state,store_returns_quantitycov,catalog_sales_quantityave,store_sales_quantitycov,as_store_returns_quantitystdev,as_store_returns_quantitycount,catalog_sales_quantitystdev] + WholeStageCodegen + HashAggregate [count,avg(cast(ss_quantity as bigint)),i_item_id,stddev_samp(cast(ss_quantity as double)),i_item_desc,stddev_samp(cast(sr_return_quantity as double)),m2,avg(cast(sr_return_quantity as bigint)),n,count,m2,avg,avg,s_state,count(sr_return_quantity),sum,avg,n,count,m2,count(cs_quantity),sum,stddev_samp(cast(cs_quantity as double)),avg(cast(cs_quantity as bigint)),count,n,sum,count,count(ss_quantity),count] [count,avg(cast(ss_quantity as bigint)),as_store_returns_quantityave,catalog_sales_quantitycount,stddev_samp(cast(ss_quantity as double)),stddev_samp(cast(sr_return_quantity as double)),m2,avg(cast(sr_return_quantity as bigint)),n,count,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitystdev,m2,avg,avg,catalog_sales_quantitycov,count(sr_return_quantity),store_returns_quantitycov,sum,catalog_sales_quantityave,store_sales_quantitycov,avg,n,count,as_store_returns_quantitystdev,m2,count(cs_quantity),sum,stddev_samp(cast(cs_quantity as double)),avg(cast(cs_quantity as bigint)),count,as_store_returns_quantitycount,n,sum,count,count(ss_quantity),count,catalog_sales_quantitystdev] + InputAdapter + Exchange [i_item_id,i_item_desc,s_state] #1 + WholeStageCodegen + HashAggregate [count,m2,i_item_id,m2,i_item_desc,m2,count,avg,avg,m2,count,n,count,sum,n,sum,n,count,m2,avg,avg,s_state,avg,sum,sr_return_quantity,avg,count,count,count,n,count,m2,sum,count,ss_quantity,cs_quantity,n,sum,count,n,sum,count] [count,m2,m2,m2,count,avg,avg,m2,count,n,count,sum,n,sum,n,count,m2,avg,avg,avg,sum,avg,count,count,count,n,count,m2,sum,count,n,sum,count,n,sum,count] + Project [ss_quantity,cs_quantity,sr_return_quantity,s_state,i_item_desc,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,sr_return_quantity,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,sr_returned_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,ss_sold_date_sk,sr_returned_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] + Project [ss_quantity,ss_item_sk,ss_store_sk,sr_return_quantity,sr_customer_sk,sr_item_sk,ss_sold_date_sk,sr_returned_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] + Project [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] + Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] + Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] + Scan parquet default.store_returns [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_quarter_name,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_quarter_name,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk] + Filter [d_quarter_name,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [s_store_sk,s_state] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_state] [s_store_sk,s_state] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [i_item_sk,i_item_id,i_item_desc] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] [i_item_sk,i_item_id,i_item_desc] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt new file mode 100644 index 000000000..9c1bcddcf --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt @@ -0,0 +1,45 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[ca_country#1 ASC NULLS FIRST,ca_state#2 ASC NULLS FIRST,ca_county#3 ASC NULLS FIRST,i_item_id#4 ASC NULLS FIRST], output=[i_item_id#4,ca_country#1,ca_state#2,ca_county#3,agg1#5,agg2#6,agg3#7,agg4#8,agg5#9,agg6#10,agg7#11]) ++- *(8) HashAggregate(keys=[i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12], functions=[avg(cast(cs_quantity#13 as decimal(12,2))), avg(cast(cs_list_price#14 as decimal(12,2))), avg(cast(cs_coupon_amt#15 as decimal(12,2))), avg(cast(cs_sales_price#16 as decimal(12,2))), avg(cast(cs_net_profit#17 as decimal(12,2))), avg(cast(c_birth_year#18 as decimal(12,2))), avg(cast(cd_dep_count#19 as decimal(12,2)))]) + +- Exchange hashpartitioning(i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12, 200) + +- *(7) HashAggregate(keys=[i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12], functions=[partial_avg(cast(cs_quantity#13 as decimal(12,2))), partial_avg(cast(cs_list_price#14 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#15 as decimal(12,2))), partial_avg(cast(cs_sales_price#16 as decimal(12,2))), partial_avg(cast(cs_net_profit#17 as decimal(12,2))), partial_avg(cast(c_birth_year#18 as decimal(12,2))), partial_avg(cast(cd_dep_count#19 as decimal(12,2)))]) + +- *(7) Expand [List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, ca_state#22, ca_county#23, 0), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, ca_state#22, null, 1), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, null, null, 3), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, null, null, null, 7), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, null, null, null, null, 15)], [cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12] + +- *(7) Project [cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#24 AS i_item_id#20, ca_country#25 AS ca_country#21, ca_state#26 AS ca_state#22, ca_county#27 AS ca_county#23] + +- *(7) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#29], Inner, BuildRight + :- *(7) Project [cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, ca_county#27, ca_state#26, ca_country#25] + : +- *(7) BroadcastHashJoin [cs_sold_date_sk#30], [d_date_sk#31], Inner, BuildRight + : :- *(7) Project [cs_sold_date_sk#30, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, ca_county#27, ca_state#26, ca_country#25] + : : +- *(7) BroadcastHashJoin [c_current_addr_sk#32], [ca_address_sk#33], Inner, BuildRight + : : :- *(7) Project [cs_sold_date_sk#30, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_current_addr_sk#32, c_birth_year#18] + : : : +- *(7) BroadcastHashJoin [c_current_cdemo_sk#34], [cd_demo_sk#35], Inner, BuildRight + : : : :- *(7) Project [cs_sold_date_sk#30, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_current_cdemo_sk#34, c_current_addr_sk#32, c_birth_year#18] + : : : : +- *(7) BroadcastHashJoin [cs_bill_customer_sk#36], [c_customer_sk#37], Inner, BuildRight + : : : : :- *(7) Project [cs_sold_date_sk#30, cs_bill_customer_sk#36, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19] + : : : : : +- *(7) BroadcastHashJoin [cs_bill_cdemo_sk#38], [cd_demo_sk#39], Inner, BuildRight + : : : : : :- *(7) Project [cs_sold_date_sk#30, cs_bill_customer_sk#36, cs_bill_cdemo_sk#38, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17] + : : : : : : +- *(7) Filter (((isnotnull(cs_bill_cdemo_sk#38) && isnotnull(cs_bill_customer_sk#36)) && isnotnull(cs_sold_date_sk#30)) && isnotnull(cs_item_sk#28)) + : : : : : : +- *(7) FileScan parquet default.catalog_sales[cs_sold_date_sk#30,cs_bill_customer_sk#36,cs_bill_cdemo_sk#38,cs_item_sk#28,cs_quantity#13,cs_list_price#14,cs_sales_price#16,cs_coupon_amt#15,cs_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNu..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [c_customer_sk#37, c_current_cdemo_sk#34, c_current_addr_sk#32, c_birth_year#18] + : : : : +- *(2) Filter (((c_birth_month#42 IN (1,6,8,9,12,2) && isnotnull(c_customer_sk#37)) && isnotnull(c_current_cdemo_sk#34)) && isnotnull(c_current_addr_sk#32)) + : : : : +- *(2) FileScan parquet default.customer[c_customer_sk#37,c_current_cdemo_sk#34,c_current_addr_sk#32,c_birth_month#42,c_birth_year#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [In(c_birth_month, [1,6,8,9,12,2]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNo..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [ca_address_sk#33, ca_county#27, ca_state#26, ca_country#25] + : : +- *(4) Filter (ca_state#26 IN (MS,IN,ND,OK,NM,VA) && isnotnull(ca_address_sk#33)) + : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#33,ca_county#27,ca_state#26,ca_country#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [In(ca_state, [MS,IN,ND,OK,NM,VA]), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(5) Project [d_date_sk#31] + : +- *(5) Filter ((isnotnull(d_year#43) && (d_year#43 = 1998)) && isnotnull(d_date_sk#31)) + : +- *(5) FileScan parquet default.date_dim[d_date_sk#31,d_year#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(6) Project [i_item_sk#29, i_item_id#24] + +- *(6) Filter isnotnull(i_item_sk#29) + +- *(6) FileScan parquet default.item[i_item_sk#29,i_item_id#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt new file mode 100644 index 000000000..d045bd95e --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt @@ -0,0 +1,59 @@ +TakeOrderedAndProject [ca_country,agg7,agg6,ca_county,agg2,agg1,agg4,agg3,ca_state,agg5,i_item_id] + WholeStageCodegen + HashAggregate [ca_country,avg(cast(cs_quantity as decimal(12,2))),count,sum,avg(cast(cs_coupon_amt as decimal(12,2))),sum,sum,ca_county,avg(cast(cs_list_price as decimal(12,2))),sum,sum,count,sum,count,avg(cast(cs_net_profit as decimal(12,2))),spark_grouping_id,sum,count,ca_state,avg(cast(cd_dep_count as decimal(12,2))),count,avg(cast(cs_sales_price as decimal(12,2))),count,count,avg(cast(c_birth_year as decimal(12,2))),i_item_id] [avg(cast(cs_quantity as decimal(12,2))),count,agg7,sum,avg(cast(cs_coupon_amt as decimal(12,2))),sum,sum,agg6,avg(cast(cs_list_price as decimal(12,2))),agg2,agg1,sum,sum,count,sum,count,agg4,agg3,avg(cast(cs_net_profit as decimal(12,2))),sum,count,agg5,avg(cast(cd_dep_count as decimal(12,2))),count,avg(cast(cs_sales_price as decimal(12,2))),count,count,avg(cast(c_birth_year as decimal(12,2)))] + InputAdapter + Exchange [ca_country,ca_county,spark_grouping_id,ca_state,i_item_id] #1 + WholeStageCodegen + HashAggregate [ca_country,cs_list_price,cs_coupon_amt,sum,count,sum,count,count,sum,sum,count,sum,sum,count,sum,count,ca_county,sum,sum,count,sum,count,count,sum,sum,sum,spark_grouping_id,cs_net_profit,sum,count,ca_state,cs_sales_price,cs_quantity,cd_dep_count,count,count,c_birth_year,count,count,i_item_id] [sum,count,sum,count,count,sum,sum,count,sum,sum,count,sum,count,sum,sum,count,sum,count,count,sum,sum,sum,sum,count,count,count,count,count] + Expand [cs_list_price,cs_coupon_amt,ca_country,i_item_id,ca_county,cs_net_profit,cs_sales_price,cs_quantity,cd_dep_count,ca_state,c_birth_year] + Project [cs_list_price,cs_coupon_amt,i_item_id,ca_county,ca_country,cs_net_profit,cs_sales_price,cs_quantity,cd_dep_count,c_birth_year,ca_state] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [ca_state,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cd_dep_count,ca_county,cs_item_sk,cs_net_profit,ca_country,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [ca_state,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cs_sold_date_sk,cd_dep_count,ca_county,cs_item_sk,cs_net_profit,ca_country,cs_list_price] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cs_sold_date_sk,cd_dep_count,cs_item_sk,cs_net_profit,cs_list_price] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,c_current_addr_sk,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cs_sold_date_sk,cd_dep_count,cs_item_sk,cs_net_profit,cs_list_price] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_coupon_amt,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cd_dep_count,cs_item_sk,cs_net_profit,cs_list_price] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Project [cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_net_profit,cs_list_price] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk] + Scan parquet default.catalog_sales [cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_net_profit,cs_list_price] [cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_net_profit,cs_list_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [cd_demo_sk,cd_dep_count] + Filter [cd_gender,cd_education_status,cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + Scan parquet default.customer [c_current_cdemo_sk,c_current_addr_sk,c_birth_year,c_customer_sk,c_birth_month] [c_current_cdemo_sk,c_current_addr_sk,c_birth_year,c_customer_sk,c_birth_month] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [cd_demo_sk] + Filter [cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk] [cd_demo_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [ca_address_sk,ca_county,ca_state,ca_country] + Filter [ca_state,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state,ca_country] [ca_address_sk,ca_county,ca_state,ca_country] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt new file mode 100644 index 000000000..97442ee59 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt @@ -0,0 +1,38 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[ext_price#1 DESC NULLS LAST,brand#2 ASC NULLS FIRST,brand_id#3 ASC NULLS FIRST,i_manufact_id#4 ASC NULLS FIRST,i_manufact#5 ASC NULLS FIRST], output=[brand_id#3,brand#2,i_manufact_id#4,i_manufact#5,ext_price#1]) ++- *(7) HashAggregate(keys=[i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5], functions=[sum(UnscaledValue(ss_ext_sales_price#8))]) + +- Exchange hashpartitioning(i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5, 200) + +- *(6) HashAggregate(keys=[i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#8))]) + +- *(6) Project [ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5] + +- *(6) BroadcastHashJoin [ss_store_sk#9], [s_store_sk#10], Inner, BuildRight, NOT (substring(ca_zip#11, 1, 5) = substring(s_zip#12, 1, 5)) + :- *(6) Project [ss_store_sk#9, ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5, ca_zip#11] + : +- *(6) BroadcastHashJoin [c_current_addr_sk#13], [ca_address_sk#14], Inner, BuildRight + : :- *(6) Project [ss_store_sk#9, ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5, c_current_addr_sk#13] + : : +- *(6) BroadcastHashJoin [ss_customer_sk#15], [c_customer_sk#16], Inner, BuildRight + : : :- *(6) Project [ss_customer_sk#15, ss_store_sk#9, ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5] + : : : +- *(6) BroadcastHashJoin [ss_item_sk#17], [i_item_sk#18], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#17, ss_customer_sk#15, ss_store_sk#9, ss_ext_sales_price#8] + : : : : +- *(6) BroadcastHashJoin [d_date_sk#19], [ss_sold_date_sk#20], Inner, BuildRight + : : : : :- *(6) Project [d_date_sk#19] + : : : : : +- *(6) Filter ((((isnotnull(d_moy#21) && isnotnull(d_year#22)) && (d_moy#21 = 11)) && (d_year#22 = 1998)) && isnotnull(d_date_sk#19)) + : : : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#19,d_year#22,d_moy#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [ss_sold_date_sk#20, ss_item_sk#17, ss_customer_sk#15, ss_store_sk#9, ss_ext_sales_price#8] + : : : : +- *(1) Filter (((isnotnull(ss_sold_date_sk#20) && isnotnull(ss_item_sk#17)) && isnotnull(ss_customer_sk#15)) && isnotnull(ss_store_sk#9)) + : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#20,ss_item_sk#17,ss_customer_sk#15,ss_store_sk#9,ss_ext_sales_price#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [ca_address_sk#14, ca_zip#11] + : +- *(4) Filter (isnotnull(ca_address_sk#14) && isnotnull(ca_zip#11)) + : +- *(4) FileScan parquet default.customer_address[ca_address_sk#14,ca_zip#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [s_store_sk#10, s_zip#12] + +- *(5) Filter (isnotnull(s_zip#12) && isnotnull(s_store_sk#10)) + +- *(5) FileScan parquet default.store[s_store_sk#10,s_zip#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt new file mode 100644 index 000000000..c008da8ae --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [ext_price,brand_id,i_manufact,i_manufact_id,brand] + WholeStageCodegen + HashAggregate [i_manufact,sum(UnscaledValue(ss_ext_sales_price)),i_brand,i_brand_id,i_manufact_id,sum] [ext_price,brand_id,sum(UnscaledValue(ss_ext_sales_price)),brand,sum] + InputAdapter + Exchange [i_brand,i_brand_id,i_manufact_id,i_manufact] #1 + WholeStageCodegen + HashAggregate [i_manufact,i_brand,sum,ss_ext_sales_price,i_brand_id,i_manufact_id,sum] [sum,sum] + Project [i_manufact,i_manufact_id,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_store_sk,s_store_sk,ca_zip,s_zip] + Project [i_manufact,i_manufact_id,ca_zip,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,i_manufact,i_manufact_id,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [i_manufact,i_manufact_id,ss_store_sk,ss_customer_sk,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_manufact,i_manufact_id,i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] + Scan parquet default.item [i_manufact,i_manufact_id,i_item_sk,i_manager_id,i_brand_id,i_brand] [i_manufact,i_manufact_id,i_item_sk,i_manager_id,i_brand_id,i_brand] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [c_customer_sk,c_current_addr_sk] + Filter [c_customer_sk,c_current_addr_sk] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [ca_address_sk,ca_zip] + Filter [ca_address_sk,ca_zip] + Scan parquet default.customer_address [ca_address_sk,ca_zip] [ca_address_sk,ca_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [s_store_sk,s_zip] + Filter [s_zip,s_store_sk] + Scan parquet default.store [s_store_sk,s_zip] [s_store_sk,s_zip] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt new file mode 100644 index 000000000..aff2e2bdb --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt @@ -0,0 +1,36 @@ +== Physical Plan == +*(13) Sort [d_week_seq1#1 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(d_week_seq1#1 ASC NULLS FIRST, 200) + +- *(12) Project [d_week_seq1#1, round(CheckOverflow((promote_precision(sun_sales1#2) / promote_precision(sun_sales2#3)), DecimalType(37,20)), 2) AS round((sun_sales1 / sun_sales2), 2)#4, round(CheckOverflow((promote_precision(mon_sales1#5) / promote_precision(mon_sales2#6)), DecimalType(37,20)), 2) AS round((mon_sales1 / mon_sales2), 2)#7, round(CheckOverflow((promote_precision(tue_sales1#8) / promote_precision(tue_sales2#9)), DecimalType(37,20)), 2) AS round((tue_sales1 / tue_sales2), 2)#10, round(CheckOverflow((promote_precision(wed_sales1#11) / promote_precision(wed_sales2#12)), DecimalType(37,20)), 2) AS round((wed_sales1 / wed_sales2), 2)#13, round(CheckOverflow((promote_precision(thu_sales1#14) / promote_precision(thu_sales2#15)), DecimalType(37,20)), 2) AS round((thu_sales1 / thu_sales2), 2)#16, round(CheckOverflow((promote_precision(fri_sales1#17) / promote_precision(fri_sales2#18)), DecimalType(37,20)), 2) AS round((fri_sales1 / fri_sales2), 2)#19, round(CheckOverflow((promote_precision(sat_sales1#20) / promote_precision(sat_sales2#21)), DecimalType(37,20)), 2) AS round((sat_sales1 / sat_sales2), 2)#22] + +- *(12) BroadcastHashJoin [d_week_seq1#1], [(d_week_seq2#23 - 53)], Inner, BuildRight + :- *(12) Project [d_week_seq#24 AS d_week_seq1#1, sun_sales#25 AS sun_sales1#2, mon_sales#26 AS mon_sales1#5, tue_sales#27 AS tue_sales1#8, wed_sales#28 AS wed_sales1#11, thu_sales#29 AS thu_sales1#14, fri_sales#30 AS fri_sales1#17, sat_sales#31 AS sat_sales1#20] + : +- *(12) BroadcastHashJoin [d_week_seq#24], [d_week_seq#32], Inner, BuildRight + : :- *(12) HashAggregate(keys=[d_week_seq#24], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) + : : +- Exchange hashpartitioning(d_week_seq#24, 200) + : : +- *(4) HashAggregate(keys=[d_week_seq#24], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) + : : +- *(4) Project [sales_price#34, d_week_seq#24, d_day_name#33] + : : +- *(4) BroadcastHashJoin [sold_date_sk#35], [d_date_sk#36], Inner, BuildRight + : : :- Union + : : : :- *(1) Project [ws_sold_date_sk#37 AS sold_date_sk#35, ws_ext_sales_price#38 AS sales_price#34] + : : : : +- *(1) Filter isnotnull(ws_sold_date_sk#37) + : : : : +- *(1) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_ext_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- *(2) Project [cs_sold_date_sk#39 AS sold_date_sk#40, cs_ext_sales_price#41 AS sales_price#42] + : : : +- *(2) Filter isnotnull(cs_sold_date_sk#39) + : : : +- *(2) FileScan parquet default.catalog_sales[cs_sold_date_sk#39,cs_ext_sales_price#41] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#36, d_week_seq#24, d_day_name#33] + : : +- *(3) Filter (isnotnull(d_date_sk#36) && isnotnull(d_week_seq#24)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#36,d_week_seq#24,d_day_name#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(5) Project [d_week_seq#32] + : +- *(5) Filter ((isnotnull(d_year#43) && (d_year#43 = 2001)) && isnotnull(d_week_seq#32)) + : +- *(5) FileScan parquet default.date_dim[d_week_seq#32,d_year#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint))) + +- *(11) Project [d_week_seq#24 AS d_week_seq2#23, sun_sales#25 AS sun_sales2#3, mon_sales#26 AS mon_sales2#6, tue_sales#27 AS tue_sales2#9, wed_sales#28 AS wed_sales2#12, thu_sales#29 AS thu_sales2#15, fri_sales#30 AS fri_sales2#18, sat_sales#31 AS sat_sales2#21] + +- *(11) BroadcastHashJoin [d_week_seq#24], [d_week_seq#44], Inner, BuildRight + :- *(11) HashAggregate(keys=[d_week_seq#24], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) + : +- ReusedExchange [d_week_seq#24, sum#45, sum#46, sum#47, sum#48, sum#49, sum#50, sum#51], Exchange hashpartitioning(d_week_seq#24, 200) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(10) Project [d_week_seq#44] + +- *(10) Filter ((isnotnull(d_year#52) && (d_year#52 = 2002)) && isnotnull(d_week_seq#44)) + +- *(10) FileScan parquet default.date_dim[d_week_seq#44,d_year#52] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt new file mode 100644 index 000000000..a3121688c --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt @@ -0,0 +1,52 @@ +WholeStageCodegen + Sort [d_week_seq1] + InputAdapter + Exchange [d_week_seq1] #1 + WholeStageCodegen + Project [wed_sales2,mon_sales1,sat_sales2,d_week_seq1,fri_sales2,thu_sales2,sat_sales1,tue_sales1,sun_sales2,sun_sales1,tue_sales2,wed_sales1,mon_sales2,thu_sales1,fri_sales1] + BroadcastHashJoin [d_week_seq1,d_week_seq2] + Project [sun_sales,thu_sales,sat_sales,wed_sales,fri_sales,tue_sales,d_week_seq,mon_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + HashAggregate [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),mon_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum,thu_sales,sum,fri_sales,tue_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sun_sales,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum,sum,sat_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum,wed_sales] + InputAdapter + Exchange [d_week_seq] #2 + WholeStageCodegen + HashAggregate [d_day_name,sum,sum,sum,sum,sum,sum,sum,d_week_seq,sum,sum,sum,sum,sales_price,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,d_week_seq,d_day_name] + BroadcastHashJoin [sold_date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen + Project [ws_sold_date_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_ext_sales_price] + WholeStageCodegen + Project [cs_sold_date_sk,cs_ext_sales_price] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk,d_week_seq,d_day_name] + Filter [d_date_sk,d_week_seq] + Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name] [d_date_sk,d_week_seq,d_day_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_week_seq] + Filter [d_year,d_week_seq] + Scan parquet default.date_dim [d_week_seq,d_year] [d_week_seq,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [sun_sales,thu_sales,sat_sales,wed_sales,fri_sales,tue_sales,d_week_seq,mon_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + HashAggregate [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum] [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),mon_sales,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),thu_sales,fri_sales,tue_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum,sun_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sat_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum,wed_sales] + InputAdapter + ReusedExchange [sum,sum,sum,sum,d_week_seq,sum,sum,sum] [sum,sum,sum,sum,d_week_seq,sum,sum,sum] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [d_week_seq] + Filter [d_year,d_week_seq] + Scan parquet default.date_dim [d_week_seq,d_year] [d_week_seq,d_year] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt new file mode 100644 index 000000000..186e8daef --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt @@ -0,0 +1,24 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_category#1 ASC NULLS FIRST,i_class#2 ASC NULLS FIRST,i_item_id#3 ASC NULLS FIRST,i_item_desc#4 ASC NULLS FIRST,revenueratio#5 ASC NULLS FIRST], output=[i_item_desc#4,i_category#1,i_class#2,i_current_price#6,itemrevenue#7,revenueratio#5]) ++- *(6) Project [i_item_desc#4, i_category#1, i_class#2, i_current_price#6, itemrevenue#7, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#8) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#9)), DecimalType(38,17)) AS revenueratio#5, i_item_id#3] + +- Window [sum(_w1#10) windowspecdefinition(i_class#2, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#9], [i_class#2] + +- *(5) Sort [i_class#2 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(i_class#2, 200) + +- *(4) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[sum(UnscaledValue(cs_ext_sales_price#11))]) + +- Exchange hashpartitioning(i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6, 200) + +- *(3) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#11))]) + +- *(3) Project [cs_ext_sales_price#11, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] + +- *(3) BroadcastHashJoin [cs_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + :- *(3) Project [cs_sold_date_sk#12, cs_ext_sales_price#11, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] + : +- *(3) BroadcastHashJoin [cs_item_sk#14], [i_item_sk#15], Inner, BuildRight + : :- *(3) Project [cs_sold_date_sk#12, cs_item_sk#14, cs_ext_sales_price#11] + : : +- *(3) Filter (isnotnull(cs_item_sk#14) && isnotnull(cs_sold_date_sk#12)) + : : +- *(3) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_item_sk#14,cs_ext_sales_price#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [i_item_sk#15, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] + : +- *(1) Filter (i_category#1 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) + : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#3,i_item_desc#4,i_current_price#6,i_class#2,i_category#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) + +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt new file mode 100644 index 000000000..ebfef4216 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,itemrevenue,revenueratio,i_category,i_current_price,i_class] + WholeStageCodegen + Project [i_item_id,i_item_desc,itemrevenue,_we0,i_category,_w0,i_current_price,i_class] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,sum(UnscaledValue(cs_ext_sales_price)),i_category,sum,i_current_price,i_class] [itemrevenue,sum(UnscaledValue(cs_ext_sales_price)),_w1,sum,_w0] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_current_price,i_class] #2 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,sum,i_category,sum,i_current_price,cs_ext_sales_price,i_class] [sum,sum] + Project [i_class,i_current_price,i_category,i_item_desc,cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [i_class,i_current_price,cs_sold_date_sk,i_category,i_item_desc,cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] + Filter [cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Filter [i_category,i_item_sk] + Scan parquet default.item [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt new file mode 100644 index 000000000..2950af5ff --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt @@ -0,0 +1,27 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#1 ASC NULLS FIRST,i_item_id#2 ASC NULLS FIRST], output=[w_warehouse_name#1,i_item_id#2,inv_before#3,inv_after#4]) ++- *(5) Filter ((CASE WHEN (inv_before#3 > 0) THEN (cast(inv_after#4 as double) / cast(inv_before#3 as double)) ELSE null END >= 0.666667) && (CASE WHEN (inv_before#3 > 0) THEN (cast(inv_after#4 as double) / cast(inv_before#3 as double)) ELSE null END <= 1.5)) + +- *(5) HashAggregate(keys=[w_warehouse_name#1, i_item_id#2], functions=[sum(cast(CASE WHEN (d_date#5 < 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint)), sum(cast(CASE WHEN (d_date#5 >= 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint))]) + +- Exchange hashpartitioning(w_warehouse_name#1, i_item_id#2, 200) + +- *(4) HashAggregate(keys=[w_warehouse_name#1, i_item_id#2], functions=[partial_sum(cast(CASE WHEN (d_date#5 < 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (d_date#5 >= 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint))]) + +- *(4) Project [inv_quantity_on_hand#6, w_warehouse_name#1, i_item_id#2, d_date#5] + +- *(4) BroadcastHashJoin [inv_date_sk#7], [d_date_sk#8], Inner, BuildRight + :- *(4) Project [inv_date_sk#7, inv_quantity_on_hand#6, w_warehouse_name#1, i_item_id#2] + : +- *(4) BroadcastHashJoin [inv_item_sk#9], [i_item_sk#10], Inner, BuildRight + : :- *(4) Project [inv_date_sk#7, inv_item_sk#9, inv_quantity_on_hand#6, w_warehouse_name#1] + : : +- *(4) BroadcastHashJoin [inv_warehouse_sk#11], [w_warehouse_sk#12], Inner, BuildRight + : : :- *(4) Project [inv_date_sk#7, inv_item_sk#9, inv_warehouse_sk#11, inv_quantity_on_hand#6] + : : : +- *(4) Filter ((isnotnull(inv_warehouse_sk#11) && isnotnull(inv_item_sk#9)) && isnotnull(inv_date_sk#7)) + : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#7,inv_item_sk#9,inv_warehouse_sk#11,inv_quantity_on_hand#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_warehouse_sk), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [w_warehouse_sk#12, w_warehouse_name#1] + : : +- *(1) Filter isnotnull(w_warehouse_sk#12) + : : +- *(1) FileScan parquet default.warehouse[w_warehouse_sk#12,w_warehouse_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [i_item_sk#10, i_item_id#2] + : +- *(2) Filter (((isnotnull(i_current_price#13) && (i_current_price#13 >= 0.99)) && (i_current_price#13 <= 1.49)) && isnotnull(i_item_sk#10)) + : +- *(2) FileScan parquet default.item[i_item_sk#10,i_item_id#2,i_current_price#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [d_date_sk#8, d_date#5] + +- *(3) Filter (((isnotnull(d_date#5) && (d_date#5 >= 10997)) && (d_date#5 <= 11057)) && isnotnull(d_date_sk#8)) + +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_date#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt new file mode 100644 index 000000000..b74976c2f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt @@ -0,0 +1,35 @@ +TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after] + WholeStageCodegen + Filter [inv_before,inv_after] + HashAggregate [i_item_id,sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum,w_warehouse_name,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum] [sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum,inv_before,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum,inv_after] + InputAdapter + Exchange [w_warehouse_name,i_item_id] #1 + WholeStageCodegen + HashAggregate [i_item_id,sum,d_date,w_warehouse_name,sum,inv_quantity_on_hand,sum,sum] [sum,sum,sum,sum] + Project [inv_quantity_on_hand,w_warehouse_name,i_item_id,d_date] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_date_sk,inv_quantity_on_hand,w_warehouse_name,i_item_id] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Filter [inv_warehouse_sk,inv_item_sk,inv_date_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [w_warehouse_sk,w_warehouse_name] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_current_price,i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id,i_current_price] [i_item_sk,i_item_id,i_current_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk,d_date] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt new file mode 100644 index 000000000..2a265ae4f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt @@ -0,0 +1,27 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[qoh#1 ASC NULLS FIRST,i_product_name#2 ASC NULLS FIRST,i_brand#3 ASC NULLS FIRST,i_class#4 ASC NULLS FIRST,i_category#5 ASC NULLS FIRST], output=[i_product_name#2,i_brand#3,i_class#4,i_category#5,qoh#1]) ++- *(5) HashAggregate(keys=[i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6], functions=[avg(cast(inv_quantity_on_hand#7 as bigint))]) + +- Exchange hashpartitioning(i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6, 200) + +- *(4) HashAggregate(keys=[i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6], functions=[partial_avg(cast(inv_quantity_on_hand#7 as bigint))]) + +- *(4) Expand [List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, i_class#10, i_category#11, 0), List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, i_class#10, null, 1), List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, null, null, 3), List(inv_quantity_on_hand#7, i_product_name#8, null, null, null, 7), List(inv_quantity_on_hand#7, null, null, null, null, 15)], [inv_quantity_on_hand#7, i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6] + +- *(4) Project [inv_quantity_on_hand#7, i_product_name#12 AS i_product_name#8, i_brand#13 AS i_brand#9, i_class#14 AS i_class#10, i_category#15 AS i_category#11] + +- *(4) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#17], Inner, BuildRight + :- *(4) Project [inv_warehouse_sk#16, inv_quantity_on_hand#7, i_brand#13, i_class#14, i_category#15, i_product_name#12] + : +- *(4) BroadcastHashJoin [inv_item_sk#18], [i_item_sk#19], Inner, BuildRight + : :- *(4) Project [inv_item_sk#18, inv_warehouse_sk#16, inv_quantity_on_hand#7] + : : +- *(4) BroadcastHashJoin [inv_date_sk#20], [d_date_sk#21], Inner, BuildRight + : : :- *(4) Project [inv_date_sk#20, inv_item_sk#18, inv_warehouse_sk#16, inv_quantity_on_hand#7] + : : : +- *(4) Filter ((isnotnull(inv_date_sk#20) && isnotnull(inv_item_sk#18)) && isnotnull(inv_warehouse_sk#16)) + : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#20,inv_item_sk#18,inv_warehouse_sk#16,inv_quantity_on_hand#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_date_sk), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [d_date_sk#21] + : : +- *(1) Filter (((isnotnull(d_month_seq#22) && (d_month_seq#22 >= 1200)) && (d_month_seq#22 <= 1211)) && isnotnull(d_date_sk#21)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#21,d_month_seq#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [i_item_sk#19, i_brand#13, i_class#14, i_category#15, i_product_name#12] + : +- *(2) Filter isnotnull(i_item_sk#19) + : +- *(2) FileScan parquet default.item[i_item_sk#19,i_brand#13,i_class#14,i_category#15,i_product_name#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [w_warehouse_sk#17] + +- *(3) Filter isnotnull(w_warehouse_sk#17) + +- *(3) FileScan parquet default.warehouse[w_warehouse_sk#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt new file mode 100644 index 000000000..49b575687 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt @@ -0,0 +1,35 @@ +TakeOrderedAndProject [i_category,i_class,i_product_name,i_brand,qoh] + WholeStageCodegen + HashAggregate [avg(cast(inv_quantity_on_hand as bigint)),i_category,count,spark_grouping_id,sum,i_class,i_product_name,i_brand] [avg(cast(inv_quantity_on_hand as bigint)),qoh,sum,count] + InputAdapter + Exchange [i_category,spark_grouping_id,i_class,i_product_name,i_brand] #1 + WholeStageCodegen + HashAggregate [sum,i_category,count,count,spark_grouping_id,sum,i_class,i_product_name,inv_quantity_on_hand,i_brand] [sum,count,sum,count] + Expand [i_product_name,i_class,i_brand,i_category,inv_quantity_on_hand] + Project [i_brand,i_category,inv_quantity_on_hand,i_class,i_product_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [i_class,inv_quantity_on_hand,inv_warehouse_sk,i_product_name,i_category,i_brand] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_class,i_item_sk,i_product_name,i_category,i_brand] + Filter [i_item_sk] + Scan parquet default.item [i_class,i_item_sk,i_product_name,i_category,i_brand] [i_class,i_item_sk,i_product_name,i_category,i_brand] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [w_warehouse_sk] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_sk] [w_warehouse_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt new file mode 100644 index 000000000..5dbd09984 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt @@ -0,0 +1,89 @@ +== Physical Plan == +CollectLimit 100 ++- *(20) HashAggregate(keys=[], functions=[sum(sales#1)]) + +- Exchange SinglePartition + +- *(19) HashAggregate(keys=[], functions=[partial_sum(sales#1)]) + +- Union + :- *(9) Project [CheckOverflow((promote_precision(cast(cast(cs_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#3 as decimal(12,2)))), DecimalType(18,2)) AS sales#1] + : +- *(9) BroadcastHashJoin [cs_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + : :- *(9) Project [cs_sold_date_sk#4, cs_quantity#2, cs_list_price#3] + : : +- *(9) BroadcastHashJoin [cs_bill_customer_sk#6], [c_customer_sk#7], LeftSemi, BuildRight + : : :- *(9) Project [cs_sold_date_sk#4, cs_bill_customer_sk#6, cs_quantity#2, cs_list_price#3] + : : : +- *(9) BroadcastHashJoin [cs_item_sk#8], [item_sk#9], LeftSemi, BuildRight + : : : :- *(9) Project [cs_sold_date_sk#4, cs_bill_customer_sk#6, cs_item_sk#8, cs_quantity#2, cs_list_price#3] + : : : : +- *(9) Filter isnotnull(cs_sold_date_sk#4) + : : : : +- *(9) FileScan parquet default.catalog_sales[cs_sold_date_sk#4,cs_bill_customer_sk#6,cs_item_sk#8,cs_quantity#2,cs_list_price#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct 4) + : : : +- *(4) HashAggregate(keys=[substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14], functions=[count(1)]) + : : : +- Exchange hashpartitioning(substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14, 200) + : : : +- *(3) HashAggregate(keys=[substring(i_item_desc#11, 1, 30) AS substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14], functions=[partial_count(1)]) + : : : +- *(3) Project [d_date#14, i_item_sk#13, i_item_desc#11] + : : : +- *(3) BroadcastHashJoin [ss_item_sk#15], [i_item_sk#13], Inner, BuildRight + : : : :- *(3) Project [ss_item_sk#15, d_date#14] + : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#5], Inner, BuildRight + : : : : :- *(3) Project [ss_sold_date_sk#16, ss_item_sk#15] + : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#16) && isnotnull(ss_item_sk#15)) + : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_item_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [d_date_sk#5, d_date#14] + : : : : +- *(1) Filter (d_year#17 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#5)) + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#5,d_date#14,d_year#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [i_item_sk#13, i_item_desc#11] + : : : +- *(2) Filter isnotnull(i_item_sk#13) + : : : +- *(2) FileScan parquet default.item[i_item_sk#13,i_item_desc#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(7) Project [c_customer_sk#7] + : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery3052 as decimal(32,6)))), DecimalType(38,8)))) + : : : +- Subquery subquery3052 + : : : +- *(5) HashAggregate(keys=[], functions=[max(csales#21)]) + : : : +- Exchange SinglePartition + : : : +- *(4) HashAggregate(keys=[], functions=[partial_max(csales#21)]) + : : : +- *(4) HashAggregate(keys=[c_customer_sk#7], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) + : : : +- Exchange hashpartitioning(c_customer_sk#7, 200) + : : : +- *(3) HashAggregate(keys=[c_customer_sk#7], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) + : : : +- *(3) Project [ss_quantity#18, ss_sales_price#19, c_customer_sk#7] + : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#5], Inner, BuildRight + : : : :- *(3) Project [ss_sold_date_sk#16, ss_quantity#18, ss_sales_price#19, c_customer_sk#7] + : : : : +- *(3) BroadcastHashJoin [ss_customer_sk#22], [c_customer_sk#7], Inner, BuildRight + : : : : :- *(3) Project [ss_sold_date_sk#16, ss_customer_sk#22, ss_quantity#18, ss_sales_price#19] + : : : : : +- *(3) Filter (isnotnull(ss_customer_sk#22) && isnotnull(ss_sold_date_sk#16)) + : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_customer_sk#22,ss_quantity#18,ss_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [c_customer_sk#7] + : : : : +- *(1) Filter isnotnull(c_customer_sk#7) + : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [d_date_sk#5] + : : : +- *(2) Filter (d_year#17 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#5)) + : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#5,d_year#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(7) HashAggregate(keys=[c_customer_sk#7], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Exchange hashpartitioning(c_customer_sk#7, 200) + : : +- *(6) HashAggregate(keys=[c_customer_sk#7], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- *(6) Project [ss_quantity#18, ss_sales_price#19, c_customer_sk#7] + : : +- *(6) BroadcastHashJoin [ss_customer_sk#22], [c_customer_sk#7], Inner, BuildRight + : : :- *(6) Project [ss_customer_sk#22, ss_quantity#18, ss_sales_price#19] + : : : +- *(6) Filter isnotnull(ss_customer_sk#22) + : : : +- *(6) FileScan parquet default.store_sales[ss_customer_sk#22,ss_quantity#18,ss_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [c_customer_sk#7] + : : +- *(5) Filter isnotnull(c_customer_sk#7) + : : +- *(5) FileScan parquet default.customer[c_customer_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(8) Project [d_date_sk#5] + : +- *(8) Filter ((((isnotnull(d_year#17) && isnotnull(d_moy#23)) && (d_year#17 = 2000)) && (d_moy#23 = 2)) && isnotnull(d_date_sk#5)) + : +- *(8) FileScan parquet default.date_dim[d_date_sk#5,d_year#17,d_moy#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct + +- *(18) Project [CheckOverflow((promote_precision(cast(cast(ws_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#25 as decimal(12,2)))), DecimalType(18,2)) AS sales#26] + +- *(18) BroadcastHashJoin [ws_sold_date_sk#27], [d_date_sk#5], Inner, BuildRight + :- *(18) Project [ws_sold_date_sk#27, ws_quantity#24, ws_list_price#25] + : +- *(18) BroadcastHashJoin [ws_bill_customer_sk#28], [c_customer_sk#7], LeftSemi, BuildRight + : :- *(18) Project [ws_sold_date_sk#27, ws_bill_customer_sk#28, ws_quantity#24, ws_list_price#25] + : : +- *(18) BroadcastHashJoin [ws_item_sk#29], [item_sk#9], LeftSemi, BuildRight + : : :- *(18) Project [ws_sold_date_sk#27, ws_item_sk#29, ws_bill_customer_sk#28, ws_quantity#24, ws_list_price#25] + : : : +- *(18) Filter isnotnull(ws_sold_date_sk#27) + : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#27,ws_item_sk#29,ws_bill_customer_sk#28,ws_quantity#24,ws_list_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct 4) + : : : : +- *(4) HashAggregate(keys=[substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17], functions=[count(1)]) + : : : : +- Exchange hashpartitioning(substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17, 200) + : : : : +- *(3) HashAggregate(keys=[substring(i_item_desc#14, 1, 30) AS substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17], functions=[partial_count(1)]) + : : : : +- *(3) Project [d_date#17, i_item_sk#16, i_item_desc#14] + : : : : +- *(3) BroadcastHashJoin [ss_item_sk#18], [i_item_sk#16], Inner, BuildRight + : : : : :- *(3) Project [ss_item_sk#18, d_date#17] + : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#7], Inner, BuildRight + : : : : : :- *(3) Project [ss_sold_date_sk#19, ss_item_sk#18] + : : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#19) && isnotnull(ss_item_sk#18)) + : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(1) Project [d_date_sk#7, d_date#17] + : : : : : +- *(1) Filter (d_year#20 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#7)) + : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#7,d_date#17,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [i_item_sk#16, i_item_desc#14] + : : : : +- *(2) Filter isnotnull(i_item_sk#16) + : : : : +- *(2) FileScan parquet default.item[i_item_sk#16,i_item_desc#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(7) Project [c_customer_sk#9 AS c_customer_sk#9#10] + : : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery3144 as decimal(32,6)))), DecimalType(38,8)))) + : : : : +- Subquery subquery3144 + : : : : +- *(5) HashAggregate(keys=[], functions=[max(csales#24)]) + : : : : +- Exchange SinglePartition + : : : : +- *(4) HashAggregate(keys=[], functions=[partial_max(csales#24)]) + : : : : +- *(4) HashAggregate(keys=[c_customer_sk#9], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) + : : : : +- Exchange hashpartitioning(c_customer_sk#9, 200) + : : : : +- *(3) HashAggregate(keys=[c_customer_sk#9], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) + : : : : +- *(3) Project [ss_quantity#21, ss_sales_price#22, c_customer_sk#9] + : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#7], Inner, BuildRight + : : : : :- *(3) Project [ss_sold_date_sk#19, ss_quantity#21, ss_sales_price#22, c_customer_sk#9] + : : : : : +- *(3) BroadcastHashJoin [ss_customer_sk#25], [c_customer_sk#9], Inner, BuildRight + : : : : : :- *(3) Project [ss_sold_date_sk#19, ss_customer_sk#25, ss_quantity#21, ss_sales_price#22] + : : : : : : +- *(3) Filter (isnotnull(ss_customer_sk#25) && isnotnull(ss_sold_date_sk#19)) + : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_customer_sk#25,ss_quantity#21,ss_sales_price#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(1) Project [c_customer_sk#9] + : : : : : +- *(1) Filter isnotnull(c_customer_sk#9) + : : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [d_date_sk#7] + : : : : +- *(2) Filter (d_year#20 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#7)) + : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- *(7) HashAggregate(keys=[c_customer_sk#9], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) + : : : +- Exchange hashpartitioning(c_customer_sk#9, 200) + : : : +- *(6) HashAggregate(keys=[c_customer_sk#9], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) + : : : +- *(6) Project [ss_quantity#21, ss_sales_price#22, c_customer_sk#9] + : : : +- *(6) BroadcastHashJoin [ss_customer_sk#25], [c_customer_sk#9], Inner, BuildRight + : : : :- *(6) Project [ss_customer_sk#25, ss_quantity#21, ss_sales_price#22] + : : : : +- *(6) Filter isnotnull(ss_customer_sk#25) + : : : : +- *(6) FileScan parquet default.store_sales[ss_customer_sk#25,ss_quantity#21,ss_sales_price#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(5) Project [c_customer_sk#9] + : : : +- *(5) Filter isnotnull(c_customer_sk#9) + : : : +- *(5) FileScan parquet default.customer[c_customer_sk#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(11) BroadcastHashJoin [c_customer_sk#9], [c_customer_sk#9#10], LeftSemi, BuildRight + : : :- *(11) Project [c_customer_sk#9, c_first_name#2, c_last_name#1] + : : : +- *(11) Filter isnotnull(c_customer_sk#9) + : : : +- *(11) FileScan parquet default.customer[c_customer_sk#9,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : +- ReusedExchange [c_customer_sk#9#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(12) Project [d_date_sk#7] + : +- *(12) Filter ((((isnotnull(d_year#20) && isnotnull(d_moy#26)) && (d_year#20 = 2000)) && (d_moy#26 = 2)) && isnotnull(d_date_sk#7)) + : +- *(12) FileScan parquet default.date_dim[d_date_sk#7,d_year#20,d_moy#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct + +- *(28) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#28 as decimal(12,2)))), DecimalType(18,2)))]) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, 200) + +- *(27) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#28 as decimal(12,2)))), DecimalType(18,2)))]) + +- *(27) Project [ws_quantity#27, ws_list_price#28, c_first_name#2, c_last_name#1] + +- *(27) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#7], Inner, BuildRight + :- *(27) Project [ws_sold_date_sk#29, ws_quantity#27, ws_list_price#28, c_first_name#2, c_last_name#1] + : +- *(27) BroadcastHashJoin [ws_bill_customer_sk#30], [c_customer_sk#9], Inner, BuildRight + : :- *(27) BroadcastHashJoin [ws_bill_customer_sk#30], [c_customer_sk#9#31], LeftSemi, BuildRight + : : :- *(27) Project [ws_sold_date_sk#29, ws_bill_customer_sk#30, ws_quantity#27, ws_list_price#28] + : : : +- *(27) BroadcastHashJoin [ws_item_sk#32], [item_sk#12], LeftSemi, BuildRight + : : : :- *(27) Project [ws_sold_date_sk#29, ws_item_sk#32, ws_bill_customer_sk#30, ws_quantity#27, ws_list_price#28] + : : : : +- *(27) Filter (isnotnull(ws_bill_customer_sk#30) && isnotnull(ws_sold_date_sk#29)) + : : : : +- *(27) FileScan parquet default.web_sales[ws_sold_date_sk#29,ws_item_sk#32,ws_bill_customer_sk#30,ws_quantity#27,ws_list_price#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct cast(Subquery subquery3246 as decimal(33,8)))) + : +- Subquery subquery3246 + : +- *(8) HashAggregate(keys=[], functions=[avg(netpaid#5)]) + : +- Exchange SinglePartition + : +- *(7) HashAggregate(keys=[], functions=[partial_avg(netpaid#5)]) + : +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) + : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 200) + : +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) + : +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] + : +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight + : :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] + : : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + : : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] + : : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight + : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] + : : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight + : : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] + : : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) + : : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] + : : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) + : : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : : +- *(3) Filter isnotnull(i_item_sk#22) + : : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) + : +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] + : +- *(5) Filter isnotnull(ca_zip#18) + : +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct + +- *(8) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[sum(netpaid#5)]) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, 200) + +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[partial_sum(netpaid#5)]) + +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 200) + +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) + +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] + +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight + :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] + : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight + : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] + : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] + : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight + : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] + : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) + : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] + : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) + : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : +- *(3) Filter ((isnotnull(i_color#9) && (i_color#9 = pale)) && isnotnull(i_item_sk#22)) + : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale), IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) + +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] + +- *(5) Filter isnotnull(ca_zip#18) + +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt new file mode 100644 index 000000000..08aae40a6 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt @@ -0,0 +1,111 @@ +WholeStageCodegen + Project [c_last_name,c_first_name,s_store_name,paid] + Filter [sum(netpaid)] + Subquery #1 + WholeStageCodegen + HashAggregate [sum,count,avg(netpaid)] [avg(netpaid),(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),sum,count] + InputAdapter + Exchange #8 + WholeStageCodegen + HashAggregate [sum,count,count,netpaid,sum] [sum,count,sum,count] + HashAggregate [i_units,i_color,i_size,s_state,sum(UnscaledValue(ss_net_paid)),sum,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #9 + WholeStageCodegen + HashAggregate [i_units,i_color,i_size,ss_net_paid,s_state,sum,i_manager_id,c_last_name,sum,i_current_price,c_first_name,s_store_name,ca_state] [sum,sum] + Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_state,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Filter [i_item_sk] + Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [c_customer_sk,c_first_name,c_last_name,c_birth_country] + Filter [c_customer_sk,c_birth_country] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [ca_state,ca_zip,ca_country] + Filter [ca_zip] + Scan parquet default.customer_address [ca_state,ca_zip,ca_country] [ca_state,ca_zip,ca_country] + HashAggregate [c_first_name,s_store_name,c_last_name,sum(netpaid),sum] [sum(netpaid),paid,sum(netpaid),sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen + HashAggregate [c_first_name,s_store_name,c_last_name,sum,netpaid,sum] [sum,sum] + HashAggregate [i_units,i_color,i_size,sum,s_state,sum(UnscaledValue(ss_net_paid)),i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #2 + WholeStageCodegen + HashAggregate [i_units,sum,i_color,i_size,ss_net_paid,sum,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] [sum,sum] + Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_state,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Filter [i_color,i_item_sk] + Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [c_customer_sk,c_first_name,c_last_name,c_birth_country] + Filter [c_customer_sk,c_birth_country] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [ca_state,ca_zip,ca_country] + Filter [ca_zip] + Scan parquet default.customer_address [ca_state,ca_zip,ca_country] [ca_state,ca_zip,ca_country] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt new file mode 100644 index 000000000..fb2c9c939 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt @@ -0,0 +1,82 @@ +== Physical Plan == +*(8) Project [c_last_name#1, c_first_name#2, s_store_name#3, paid#4] ++- *(8) Filter (isnotnull(sum(netpaid#5)#6) && (cast(sum(netpaid#5)#6 as decimal(33,8)) > cast(Subquery subquery3290 as decimal(33,8)))) + : +- Subquery subquery3290 + : +- *(8) HashAggregate(keys=[], functions=[avg(netpaid#5)]) + : +- Exchange SinglePartition + : +- *(7) HashAggregate(keys=[], functions=[partial_avg(netpaid#5)]) + : +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) + : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 200) + : +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) + : +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] + : +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight + : :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] + : : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + : : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] + : : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight + : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] + : : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight + : : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] + : : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) + : : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] + : : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) + : : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : : +- *(3) Filter isnotnull(i_item_sk#22) + : : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) + : +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] + : +- *(5) Filter isnotnull(ca_zip#18) + : +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct + +- *(8) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[sum(netpaid#5)]) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, 200) + +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[partial_sum(netpaid#5)]) + +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 200) + +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) + +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] + +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight + :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] + : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight + : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] + : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] + : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight + : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] + : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) + : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] + : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) + : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : +- *(3) Filter ((isnotnull(i_color#9) && (i_color#9 = chiffon)) && isnotnull(i_item_sk#22)) + : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_color), EqualTo(i_color,chiffon), IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) + +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] + +- *(5) Filter isnotnull(ca_zip#18) + +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt new file mode 100644 index 000000000..87db312c3 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt @@ -0,0 +1,111 @@ +WholeStageCodegen + Project [c_last_name,c_first_name,s_store_name,paid] + Filter [sum(netpaid)] + Subquery #1 + WholeStageCodegen + HashAggregate [sum,count,avg(netpaid)] [avg(netpaid),(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),sum,count] + InputAdapter + Exchange #8 + WholeStageCodegen + HashAggregate [count,sum,count,netpaid,sum] [sum,count,sum,count] + HashAggregate [i_units,sum,i_color,i_size,s_state,i_manager_id,c_last_name,sum(UnscaledValue(ss_net_paid)),i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #9 + WholeStageCodegen + HashAggregate [i_units,sum,i_color,i_size,ss_net_paid,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,sum,s_store_name,ca_state] [sum,sum] + Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_state,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Filter [i_item_sk] + Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [c_customer_sk,c_first_name,c_last_name,c_birth_country] + Filter [c_customer_sk,c_birth_country] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [ca_state,ca_zip,ca_country] + Filter [ca_zip] + Scan parquet default.customer_address [ca_state,ca_zip,ca_country] [ca_state,ca_zip,ca_country] + HashAggregate [c_first_name,s_store_name,c_last_name,sum(netpaid),sum] [sum(netpaid),paid,sum(netpaid),sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen + HashAggregate [c_first_name,s_store_name,c_last_name,netpaid,sum,sum] [sum,sum] + HashAggregate [i_units,sum,i_color,i_size,s_state,i_manager_id,c_last_name,sum(UnscaledValue(ss_net_paid)),i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #2 + WholeStageCodegen + HashAggregate [i_units,sum,i_color,i_size,ss_net_paid,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,sum,s_store_name,ca_state] [sum,sum] + Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_state,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Filter [i_color,i_item_sk] + Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [c_customer_sk,c_first_name,c_last_name,c_birth_country] + Filter [c_customer_sk,c_birth_country] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [ca_state,ca_zip,ca_country] + Filter [ca_zip] + Scan parquet default.customer_address [ca_state,ca_zip,ca_country] [ca_state,ca_zip,ca_country] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt new file mode 100644 index 000000000..0548dd397 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt @@ -0,0 +1,50 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST,s_store_id#3 ASC NULLS FIRST,s_store_name#4 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,s_store_id#3,s_store_name#4,store_sales_profit#5,store_returns_loss#6,catalog_sales_profit#7]) ++- *(9) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[sum(UnscaledValue(ss_net_profit#8)), sum(UnscaledValue(sr_net_loss#9)), sum(UnscaledValue(cs_net_profit#10))]) + +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4, 200) + +- *(8) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[partial_sum(UnscaledValue(ss_net_profit#8)), partial_sum(UnscaledValue(sr_net_loss#9)), partial_sum(UnscaledValue(cs_net_profit#10))]) + +- *(8) Project [ss_net_profit#8, sr_net_loss#9, cs_net_profit#10, s_store_id#3, s_store_name#4, i_item_id#1, i_item_desc#2] + +- *(8) BroadcastHashJoin [ss_item_sk#11], [i_item_sk#12], Inner, BuildRight + :- *(8) Project [ss_item_sk#11, ss_net_profit#8, sr_net_loss#9, cs_net_profit#10, s_store_id#3, s_store_name#4] + : +- *(8) BroadcastHashJoin [ss_store_sk#13], [s_store_sk#14], Inner, BuildRight + : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_net_loss#9, cs_net_profit#10] + : : +- *(8) BroadcastHashJoin [cs_sold_date_sk#15], [d_date_sk#16], Inner, BuildRight + : : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_net_loss#9, cs_sold_date_sk#15, cs_net_profit#10] + : : : +- *(8) BroadcastHashJoin [sr_returned_date_sk#17], [cast(d_date_sk#18 as bigint)], Inner, BuildRight + : : : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_returned_date_sk#17, sr_net_loss#9, cs_sold_date_sk#15, cs_net_profit#10] + : : : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_returned_date_sk#17, sr_net_loss#9, cs_sold_date_sk#15, cs_net_profit#10] + : : : : : +- *(8) BroadcastHashJoin [sr_customer_sk#21, sr_item_sk#22], [cast(cs_bill_customer_sk#23 as bigint), cast(cs_item_sk#24 as bigint)], Inner, BuildRight + : : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_returned_date_sk#17, sr_item_sk#22, sr_customer_sk#21, sr_net_loss#9] + : : : : : : +- *(8) BroadcastHashJoin [cast(ss_customer_sk#25 as bigint), cast(ss_item_sk#11 as bigint), cast(ss_ticket_number#26 as bigint)], [sr_customer_sk#21, sr_item_sk#22, sr_ticket_number#27], Inner, BuildRight + : : : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_customer_sk#25, ss_store_sk#13, ss_ticket_number#26, ss_net_profit#8] + : : : : : : : +- *(8) Filter ((((isnotnull(ss_ticket_number#26) && isnotnull(ss_item_sk#11)) && isnotnull(ss_customer_sk#25)) && isnotnull(ss_sold_date_sk#19)) && isnotnull(ss_store_sk#13)) + : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#11,ss_customer_sk#25,ss_store_sk#13,ss_ticket_number#26,ss_net_profit#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(3) Project [d_date_sk#20] + : : : : +- *(3) Filter ((((isnotnull(d_moy#28) && isnotnull(d_year#29)) && (d_moy#28 = 4)) && (d_year#29 = 2001)) && isnotnull(d_date_sk#20)) + : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_year#29,d_moy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,4), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [d_date_sk#18] + : : : +- *(4) Filter (((((isnotnull(d_year#30) && isnotnull(d_moy#31)) && (d_moy#31 >= 4)) && (d_moy#31 <= 10)) && (d_year#30 = 2001)) && isnotnull(d_date_sk#18)) + : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#30,d_moy#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [d_date_sk#16] + : : +- *(5) Filter (((((isnotnull(d_year#32) && isnotnull(d_moy#33)) && (d_moy#33 >= 4)) && (d_moy#33 <= 10)) && (d_year#32 = 2001)) && isnotnull(d_date_sk#16)) + : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#32,d_moy#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [s_store_sk#14, s_store_id#3, s_store_name#4] + : +- *(6) Filter isnotnull(s_store_sk#14) + : +- *(6) FileScan parquet default.store[s_store_sk#14,s_store_id#3,s_store_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [i_item_sk#12, i_item_id#1, i_item_desc#2] + +- *(7) Filter isnotnull(i_item_sk#12) + +- *(7) FileScan parquet default.item[i_item_sk#12,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt new file mode 100644 index 000000000..20dec06b1 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt @@ -0,0 +1,66 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,store_sales_profit,catalog_sales_profit,s_store_id,store_returns_loss,s_store_name] + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,sum,sum,s_store_id,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(ss_net_profit)),s_store_name,sum] [sum,store_sales_profit,sum,catalog_sales_profit,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),store_returns_loss,sum(UnscaledValue(ss_net_profit)),sum] + InputAdapter + Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,sum,sum,s_store_id,cs_net_profit,sum,sum,sum,s_store_name,sum,sr_net_loss,ss_net_profit] [sum,sum,sum,sum,sum,sum] + Project [s_store_id,s_store_name,ss_net_profit,cs_net_profit,i_item_desc,sr_net_loss,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_id,s_store_name,ss_item_sk,ss_net_profit,cs_net_profit,sr_net_loss] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_net_profit,cs_net_profit,sr_net_loss] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,cs_sold_date_sk,ss_net_profit,cs_net_profit,sr_net_loss] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,cs_sold_date_sk,ss_net_profit,cs_net_profit,sr_returned_date_sk,sr_net_loss] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,cs_sold_date_sk,ss_net_profit,cs_net_profit,ss_sold_date_sk,sr_returned_date_sk,sr_net_loss] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] + Project [ss_item_sk,ss_store_sk,sr_customer_sk,sr_item_sk,ss_net_profit,ss_sold_date_sk,sr_returned_date_sk,sr_net_loss] + BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] + Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_ticket_number] + Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk,sr_net_loss] + Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] + Scan parquet default.store_returns [sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk,sr_net_loss] [sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk,sr_net_loss] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [s_store_sk,s_store_id,s_store_name] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_store_id,s_store_name] [s_store_sk,s_store_id,s_store_name] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [i_item_sk,i_item_id,i_item_desc] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] [i_item_sk,i_item_id,i_item_desc] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt new file mode 100644 index 000000000..ac72aff5b --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt @@ -0,0 +1,32 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,agg1#2,agg2#3,agg3#4,agg4#5]) ++- *(6) HashAggregate(keys=[i_item_id#1], functions=[avg(cast(cs_quantity#6 as bigint)), avg(UnscaledValue(cs_list_price#7)), avg(UnscaledValue(cs_coupon_amt#8)), avg(UnscaledValue(cs_sales_price#9))]) + +- Exchange hashpartitioning(i_item_id#1, 200) + +- *(5) HashAggregate(keys=[i_item_id#1], functions=[partial_avg(cast(cs_quantity#6 as bigint)), partial_avg(UnscaledValue(cs_list_price#7)), partial_avg(UnscaledValue(cs_coupon_amt#8)), partial_avg(UnscaledValue(cs_sales_price#9))]) + +- *(5) Project [cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8, i_item_id#1] + +- *(5) BroadcastHashJoin [cs_promo_sk#10], [p_promo_sk#11], Inner, BuildRight + :- *(5) Project [cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8, i_item_id#1] + : +- *(5) BroadcastHashJoin [cs_item_sk#12], [i_item_sk#13], Inner, BuildRight + : :- *(5) Project [cs_item_sk#12, cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8] + : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight + : : :- *(5) Project [cs_sold_date_sk#14, cs_item_sk#12, cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8] + : : : +- *(5) BroadcastHashJoin [cs_bill_cdemo_sk#16], [cd_demo_sk#17], Inner, BuildRight + : : : :- *(5) Project [cs_sold_date_sk#14, cs_bill_cdemo_sk#16, cs_item_sk#12, cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8] + : : : : +- *(5) Filter (((isnotnull(cs_bill_cdemo_sk#16) && isnotnull(cs_sold_date_sk#14)) && isnotnull(cs_item_sk#12)) && isnotnull(cs_promo_sk#10)) + : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#14,cs_bill_cdemo_sk#16,cs_item_sk#12,cs_promo_sk#10,cs_quantity#6,cs_list_price#7,cs_sales_price#9,cs_coupon_amt#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_pro..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_date_sk#15] + : : +- *(2) Filter ((isnotnull(d_year#21) && (d_year#21 = 2000)) && isnotnull(d_date_sk#15)) + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#15,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [i_item_sk#13, i_item_id#1] + : +- *(3) Filter isnotnull(i_item_sk#13) + : +- *(3) FileScan parquet default.item[i_item_sk#13,i_item_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [p_promo_sk#11] + +- *(4) Filter (((p_channel_email#22 = N) || (p_channel_event#23 = N)) && isnotnull(p_promo_sk#11)) + +- *(4) FileScan parquet default.promotion[p_promo_sk#11,p_channel_email#22,p_channel_event#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt new file mode 100644 index 000000000..da9dc6cdf --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [i_item_id,agg3,agg1,agg2,agg4] + WholeStageCodegen + HashAggregate [i_item_id,count,avg(UnscaledValue(cs_list_price)),sum,sum,count,sum,avg(UnscaledValue(cs_sales_price)),avg(UnscaledValue(cs_coupon_amt)),count,avg(cast(cs_quantity as bigint)),sum,count] [count,avg(UnscaledValue(cs_list_price)),sum,sum,count,sum,avg(UnscaledValue(cs_sales_price)),avg(UnscaledValue(cs_coupon_amt)),agg3,count,agg1,agg2,agg4,avg(cast(cs_quantity as bigint)),sum,count] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen + HashAggregate [cs_list_price,cs_coupon_amt,i_item_id,count,count,sum,sum,count,sum,sum,count,count,count,sum,sum,sum,count,cs_sales_price,cs_quantity,sum,count] [count,count,sum,sum,count,sum,sum,count,count,count,sum,sum,sum,count,sum,count] + Project [cs_coupon_amt,cs_quantity,cs_sales_price,cs_list_price,i_item_id] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_promo_sk,cs_coupon_amt,cs_quantity,cs_sales_price,cs_list_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_promo_sk,cs_coupon_amt,cs_quantity,cs_sales_price,cs_item_sk,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_promo_sk,cs_coupon_amt,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Project [cs_promo_sk,cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] + Filter [cs_bill_cdemo_sk,cs_sold_date_sk,cs_item_sk,cs_promo_sk] + Scan parquet default.catalog_sales [cs_promo_sk,cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] [cs_promo_sk,cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [cd_demo_sk] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [p_promo_sk] + Filter [p_channel_email,p_channel_event,p_promo_sk] + Scan parquet default.promotion [p_promo_sk,p_channel_email,p_channel_event] [p_promo_sk,p_channel_email,p_channel_event] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt new file mode 100644 index 000000000..8d5a61224 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt @@ -0,0 +1,33 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,s_state#2 ASC NULLS FIRST], output=[i_item_id#1,s_state#2,g_state#3,agg1#4,agg2#5,agg3#6,agg4#7]) ++- *(6) HashAggregate(keys=[i_item_id#1, s_state#2, spark_grouping_id#8], functions=[avg(cast(ss_quantity#9 as bigint)), avg(UnscaledValue(ss_list_price#10)), avg(UnscaledValue(ss_coupon_amt#11)), avg(UnscaledValue(ss_sales_price#12))]) + +- Exchange hashpartitioning(i_item_id#1, s_state#2, spark_grouping_id#8, 200) + +- *(5) HashAggregate(keys=[i_item_id#1, s_state#2, spark_grouping_id#8], functions=[partial_avg(cast(ss_quantity#9 as bigint)), partial_avg(UnscaledValue(ss_list_price#10)), partial_avg(UnscaledValue(ss_coupon_amt#11)), partial_avg(UnscaledValue(ss_sales_price#12))]) + +- *(5) Expand [List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#13, s_state#14, 0), List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#13, null, 1), List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, null, null, 3)], [ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#1, s_state#2, spark_grouping_id#8] + +- *(5) Project [ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#15 AS i_item_id#13, s_state#16 AS s_state#14] + +- *(5) BroadcastHashJoin [ss_item_sk#17], [i_item_sk#18], Inner, BuildRight + :- *(5) Project [ss_item_sk#17, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, s_state#16] + : +- *(5) BroadcastHashJoin [ss_store_sk#19], [s_store_sk#20], Inner, BuildRight + : :- *(5) Project [ss_item_sk#17, ss_store_sk#19, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11] + : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight + : : :- *(5) Project [ss_sold_date_sk#21, ss_item_sk#17, ss_store_sk#19, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11] + : : : +- *(5) BroadcastHashJoin [ss_cdemo_sk#23], [cd_demo_sk#24], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#21, ss_item_sk#17, ss_cdemo_sk#23, ss_store_sk#19, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11] + : : : : +- *(5) Filter (((isnotnull(ss_cdemo_sk#23) && isnotnull(ss_sold_date_sk#21)) && isnotnull(ss_store_sk#19)) && isnotnull(ss_item_sk#17)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#21,ss_item_sk#17,ss_cdemo_sk#23,ss_store_sk#19,ss_quantity#9,ss_list_price#10,ss_sales_price#12,ss_coupon_amt#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_date_sk#22] + : : +- *(2) Filter ((isnotnull(d_year#28) && (d_year#28 = 2002)) && isnotnull(d_date_sk#22)) + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#22,d_year#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [s_store_sk#20, s_state#16] + : +- *(3) Filter ((isnotnull(s_state#16) && (s_state#16 = TN)) && isnotnull(s_store_sk#20)) + : +- *(3) FileScan parquet default.store[s_store_sk#20,s_state#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [i_item_sk#18, i_item_id#15] + +- *(4) Filter isnotnull(i_item_sk#18) + +- *(4) FileScan parquet default.item[i_item_sk#18,i_item_id#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt new file mode 100644 index 000000000..5eead43da --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt @@ -0,0 +1,43 @@ +TakeOrderedAndProject [agg4,i_item_id,s_state,agg3,g_state,agg1,agg2] + WholeStageCodegen + HashAggregate [i_item_id,sum,count,count,avg(UnscaledValue(ss_list_price)),sum,count,avg(cast(ss_quantity as bigint)),sum,s_state,count,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),sum,spark_grouping_id] [agg4,sum,count,count,avg(UnscaledValue(ss_list_price)),sum,count,avg(cast(ss_quantity as bigint)),sum,agg3,count,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),sum,g_state,agg1,agg2] + InputAdapter + Exchange [i_item_id,s_state,spark_grouping_id] #1 + WholeStageCodegen + HashAggregate [i_item_id,sum,sum,count,sum,count,count,sum,sum,count,sum,ss_coupon_amt,count,ss_sales_price,sum,ss_list_price,count,s_state,count,count,ss_quantity,sum,spark_grouping_id] [sum,sum,count,sum,count,count,sum,sum,count,sum,count,sum,count,count,count,sum] + Expand [i_item_id,ss_coupon_amt,ss_sales_price,ss_list_price,ss_quantity,s_state] + Project [i_item_id,ss_coupon_amt,s_state,ss_sales_price,ss_list_price,ss_quantity] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,s_state,ss_sales_price] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk] + Filter [ss_cdemo_sk,ss_sold_date_sk,ss_store_sk,ss_item_sk] + Scan parquet default.store_sales [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk] [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [cd_demo_sk] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk,s_state] + Filter [s_state,s_store_sk] + Scan parquet default.store [s_store_sk,s_state] [s_store_sk,s_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt new file mode 100644 index 000000000..2679a8bf5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt @@ -0,0 +1,66 @@ +== Physical Plan == +CollectLimit 100 ++- BroadcastNestedLoopJoin BuildRight, Inner + :- BroadcastNestedLoopJoin BuildRight, Inner + : :- BroadcastNestedLoopJoin BuildRight, Inner + : : :- BroadcastNestedLoopJoin BuildRight, Inner + : : : :- BroadcastNestedLoopJoin BuildRight, Inner + : : : : :- *(3) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_list_price#1)), count(ss_list_price#1), count(distinct ss_list_price#1)]) + : : : : : +- Exchange SinglePartition + : : : : : +- *(2) HashAggregate(keys=[], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1), partial_count(distinct ss_list_price#1)]) + : : : : : +- *(2) HashAggregate(keys=[ss_list_price#1], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1)]) + : : : : : +- Exchange hashpartitioning(ss_list_price#1, 200) + : : : : : +- *(1) HashAggregate(keys=[ss_list_price#1], functions=[partial_avg(UnscaledValue(ss_list_price#1)), partial_count(ss_list_price#1)]) + : : : : : +- *(1) Project [ss_list_price#1] + : : : : : +- *(1) Filter (((isnotnull(ss_quantity#2) && (ss_quantity#2 >= 0)) && (ss_quantity#2 <= 5)) && ((((ss_list_price#1 >= 8.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 18.00)) || ((ss_coupon_amt#3 >= 459.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 1459.00))) || ((ss_wholesale_cost#4 >= 57.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 77.00)))) + : : : : : +- *(1) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5)], ReadSchema: struct= 6)) && (ss_quantity#2 <= 10)) && ((((ss_list_price#1 >= 90.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 100.00)) || ((ss_coupon_amt#3 >= 2323.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 3323.00))) || ((ss_wholesale_cost#4 >= 31.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 51.00)))) + : : : : +- *(4) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10)], ReadSchema: struct= 11)) && (ss_quantity#2 <= 15)) && ((((ss_list_price#1 >= 142.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 152.00)) || ((ss_coupon_amt#3 >= 12214.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 13214.00))) || ((ss_wholesale_cost#4 >= 79.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 99.00)))) + : : : +- *(7) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15)], ReadSchema: struct= 16)) && (ss_quantity#2 <= 20)) && ((((ss_list_price#1 >= 135.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 145.00)) || ((ss_coupon_amt#3 >= 6071.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 7071.00))) || ((ss_wholesale_cost#4 >= 38.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 58.00)))) + : : +- *(10) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct= 21)) && (ss_quantity#2 <= 25)) && ((((ss_list_price#1 >= 122.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 132.00)) || ((ss_coupon_amt#3 >= 836.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 1836.00))) || ((ss_wholesale_cost#4 >= 17.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 37.00)))) + : +- *(13) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25)], ReadSchema: struct= 26)) && (ss_quantity#2 <= 30)) && ((((ss_list_price#1 >= 154.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 164.00)) || ((ss_coupon_amt#3 >= 7326.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 8326.00))) || ((ss_wholesale_cost#4 >= 7.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 27.00)))) + +- *(16) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(3) Project [d_date_sk#20] + : : : : +- *(3) Filter ((((isnotnull(d_moy#28) && isnotnull(d_year#29)) && (d_moy#28 = 9)) && (d_year#29 = 1999)) && isnotnull(d_date_sk#20)) + : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_year#29,d_moy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [d_date_sk#18] + : : : +- *(4) Filter (((((isnotnull(d_year#30) && isnotnull(d_moy#31)) && (d_moy#31 >= 9)) && (d_moy#31 <= 12)) && (d_year#30 = 1999)) && isnotnull(d_date_sk#18)) + : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#30,d_moy#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), Equ..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [d_date_sk#16] + : : +- *(5) Filter (d_year#32 IN (1999,2000,2001) && isnotnull(d_date_sk#16)) + : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [s_store_sk#14, s_store_id#3, s_store_name#4] + : +- *(6) Filter isnotnull(s_store_sk#14) + : +- *(6) FileScan parquet default.store[s_store_sk#14,s_store_id#3,s_store_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [i_item_sk#12, i_item_id#1, i_item_desc#2] + +- *(7) Filter isnotnull(i_item_sk#12) + +- *(7) FileScan parquet default.item[i_item_sk#12,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt new file mode 100644 index 000000000..64fa8afa9 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt @@ -0,0 +1,66 @@ +TakeOrderedAndProject [store_returns_quantity,i_item_id,i_item_desc,store_sales_quantity,s_store_id,catalog_sales_quantity,s_store_name] + WholeStageCodegen + HashAggregate [sum(cast(cs_quantity as bigint)),i_item_id,i_item_desc,sum,sum(cast(ss_quantity as bigint)),sum,sum(cast(sr_return_quantity as bigint)),s_store_id,sum,s_store_name] [sum(cast(cs_quantity as bigint)),store_returns_quantity,sum,sum(cast(ss_quantity as bigint)),store_sales_quantity,sum,sum(cast(sr_return_quantity as bigint)),sum,catalog_sales_quantity] + InputAdapter + Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + WholeStageCodegen + HashAggregate [i_item_id,sum,i_item_desc,sum,sum,sum,s_store_id,sr_return_quantity,sum,sum,ss_quantity,cs_quantity,s_store_name] [sum,sum,sum,sum,sum,sum] + Project [s_store_id,s_store_name,ss_quantity,cs_quantity,sr_return_quantity,i_item_desc,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_id,s_store_name,ss_quantity,ss_item_sk,cs_quantity,sr_return_quantity] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,sr_returned_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,ss_sold_date_sk,sr_returned_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] + Project [ss_quantity,ss_item_sk,ss_store_sk,sr_return_quantity,sr_customer_sk,sr_item_sk,ss_sold_date_sk,sr_returned_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] + Project [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] + Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] + Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] + Scan parquet default.store_returns [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [s_store_sk,s_store_id,s_store_name] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_store_id,s_store_name] [s_store_sk,s_store_id,s_store_name] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [i_item_sk,i_item_id,i_item_desc] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] [i_item_sk,i_item_id,i_item_desc] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt new file mode 100644 index 000000000..8d08a8b03 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt @@ -0,0 +1,20 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[d_year#1 ASC NULLS FIRST,sum_agg#2 DESC NULLS LAST,brand_id#3 ASC NULLS FIRST], output=[d_year#1,brand_id#3,brand#4,sum_agg#2]) ++- *(4) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[sum(UnscaledValue(ss_ext_sales_price#7))]) + +- Exchange hashpartitioning(d_year#1, i_brand#5, i_brand_id#6, 200) + +- *(3) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#7))]) + +- *(3) Project [d_year#1, ss_ext_sales_price#7, i_brand_id#6, i_brand#5] + +- *(3) BroadcastHashJoin [ss_item_sk#8], [i_item_sk#9], Inner, BuildRight + :- *(3) Project [d_year#1, ss_item_sk#8, ss_ext_sales_price#7] + : +- *(3) BroadcastHashJoin [d_date_sk#10], [ss_sold_date_sk#11], Inner, BuildRight + : :- *(3) Project [d_date_sk#10, d_year#1] + : : +- *(3) Filter ((isnotnull(d_moy#12) && (d_moy#12 = 11)) && isnotnull(d_date_sk#10)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#10,d_year#1,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [ss_sold_date_sk#11, ss_item_sk#8, ss_ext_sales_price#7] + : +- *(1) Filter (isnotnull(ss_sold_date_sk#11) && isnotnull(ss_item_sk#8)) + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#8,ss_ext_sales_price#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [i_item_sk#9, i_brand_id#6, i_brand#5] + +- *(2) Filter ((isnotnull(i_manufact_id#13) && (i_manufact_id#13 = 128)) && isnotnull(i_item_sk#9)) + +- *(2) FileScan parquet default.item[i_item_sk#9,i_brand_id#6,i_brand#5,i_manufact_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,128), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt new file mode 100644 index 000000000..a8432c52f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [d_year,sum_agg,brand_id,brand] + WholeStageCodegen + HashAggregate [d_year,i_brand,sum(UnscaledValue(ss_ext_sales_price)),i_brand_id,sum] [brand_id,sum(UnscaledValue(ss_ext_sales_price)),sum_agg,sum,brand] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen + HashAggregate [sum,d_year,i_brand,ss_ext_sales_price,i_brand_id,sum] [sum,sum] + Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [d_year,ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manufact_id,i_item_sk] + Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id] [i_item_sk,i_brand_id,i_brand,i_manufact_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt new file mode 100644 index 000000000..758cb7159 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt @@ -0,0 +1,52 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salutation#2 ASC NULLS FIRST,c_first_name#3 ASC NULLS FIRST,c_last_name#4 ASC NULLS FIRST,c_preferred_cust_flag#5 ASC NULLS FIRST,c_birth_day#6 ASC NULLS FIRST,c_birth_month#7 ASC NULLS FIRST,c_birth_year#8 ASC NULLS FIRST,c_birth_country#9 ASC NULLS FIRST,c_login#10 ASC NULLS FIRST,c_email_address#11 ASC NULLS FIRST,c_last_review_date#12 ASC NULLS FIRST,ctr_total_return#13 ASC NULLS FIRST], output=[c_customer_id#1,c_salutation#2,c_first_name#3,c_last_name#4,c_preferred_cust_flag#5,c_birth_day#6,c_birth_month#7,c_birth_year#8,c_birth_country#9,c_login#10,c_email_address#11,c_last_review_date#12,ctr_total_return#13]) ++- *(11) Project [c_customer_id#1, c_salutation#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_day#6, c_birth_month#7, c_birth_year#8, c_birth_country#9, c_login#10, c_email_address#11, c_last_review_date#12, ctr_total_return#13] + +- *(11) BroadcastHashJoin [c_current_addr_sk#14], [ca_address_sk#15], Inner, BuildRight + :- *(11) Project [ctr_total_return#13, c_customer_id#1, c_current_addr_sk#14, c_salutation#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_day#6, c_birth_month#7, c_birth_year#8, c_birth_country#9, c_login#10, c_email_address#11, c_last_review_date#12] + : +- *(11) BroadcastHashJoin [ctr_customer_sk#16], [cast(c_customer_sk#17 as bigint)], Inner, BuildRight + : :- *(11) Project [ctr_customer_sk#16, ctr_total_return#13] + : : +- *(11) BroadcastHashJoin [ctr_state#18], [ctr_state#18#19], Inner, BuildRight, (cast(ctr_total_return#13 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#20) + : : :- *(11) Filter isnotnull(ctr_total_return#13) + : : : +- *(11) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[sum(UnscaledValue(wr_return_amt#23))]) + : : : +- Exchange hashpartitioning(wr_returning_customer_sk#21, ca_state#22, 200) + : : : +- *(3) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[partial_sum(UnscaledValue(wr_return_amt#23))]) + : : : +- *(3) Project [wr_returning_customer_sk#21, wr_return_amt#23, ca_state#22] + : : : +- *(3) BroadcastHashJoin [wr_returning_addr_sk#24], [cast(ca_address_sk#15 as bigint)], Inner, BuildRight + : : : :- *(3) Project [wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] + : : : : +- *(3) BroadcastHashJoin [wr_returned_date_sk#25], [cast(d_date_sk#26 as bigint)], Inner, BuildRight + : : : : :- *(3) Project [wr_returned_date_sk#25, wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] + : : : : : +- *(3) Filter ((isnotnull(wr_returned_date_sk#25) && isnotnull(wr_returning_addr_sk#24)) && isnotnull(wr_returning_customer_sk#21)) + : : : : : +- *(3) FileScan parquet default.web_returns[wr_returned_date_sk#25,wr_returning_customer_sk#21,wr_returning_addr_sk#24,wr_return_amt#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk), IsNotNull(wr_returning_customer..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [ca_address_sk#15, ca_state#22] + : : : +- *(2) Filter (isnotnull(ca_address_sk#15) && isnotnull(ca_state#22)) + : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) + : : +- *(8) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#20) + : : +- *(8) HashAggregate(keys=[ctr_state#18], functions=[avg(ctr_total_return#13)]) + : : +- Exchange hashpartitioning(ctr_state#18, 200) + : : +- *(7) HashAggregate(keys=[ctr_state#18], functions=[partial_avg(ctr_total_return#13)]) + : : +- *(7) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[sum(UnscaledValue(wr_return_amt#23))]) + : : +- Exchange hashpartitioning(wr_returning_customer_sk#21, ca_state#22, 200) + : : +- *(6) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[partial_sum(UnscaledValue(wr_return_amt#23))]) + : : +- *(6) Project [wr_returning_customer_sk#21, wr_return_amt#23, ca_state#22] + : : +- *(6) BroadcastHashJoin [wr_returning_addr_sk#24], [cast(ca_address_sk#15 as bigint)], Inner, BuildRight + : : :- *(6) Project [wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] + : : : +- *(6) BroadcastHashJoin [wr_returned_date_sk#25], [cast(d_date_sk#26 as bigint)], Inner, BuildRight + : : : :- *(6) Project [wr_returned_date_sk#25, wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] + : : : : +- *(6) Filter (isnotnull(wr_returned_date_sk#25) && isnotnull(wr_returning_addr_sk#24)) + : : : : +- *(6) FileScan parquet default.web_returns[wr_returned_date_sk#25,wr_returning_customer_sk#21,wr_returning_addr_sk#24,wr_return_amt#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt new file mode 100644 index 000000000..1be056d32 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt @@ -0,0 +1,70 @@ +TakeOrderedAndProject [ctr_total_return,c_birth_day,c_login,c_salutation,c_birth_month,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,c_birth_year,c_last_review_date] + WholeStageCodegen + Project [ctr_total_return,c_birth_day,c_login,c_salutation,c_birth_month,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,c_birth_year,c_last_review_date] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_preferred_cust_flag,c_current_addr_sk,ctr_total_return,c_email_address,c_customer_id,c_birth_year,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] + BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + Project [ctr_customer_sk,ctr_total_return] + BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + Filter [ctr_total_return] + HashAggregate [wr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(wr_return_amt))] [ctr_total_return,ctr_state,sum(UnscaledValue(wr_return_amt)),sum,ctr_customer_sk] + InputAdapter + Exchange [wr_returning_customer_sk,ca_state] #1 + WholeStageCodegen + HashAggregate [wr_return_amt,sum,wr_returning_customer_sk,sum,ca_state] [sum,sum] + Project [wr_returning_customer_sk,wr_return_amt,ca_state] + BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] + Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Project [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + Filter [wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] + Scan parquet default.web_returns [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ca_address_sk,ca_state] + Filter [ca_address_sk,ca_state] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + HashAggregate [ctr_state,sum,count,avg(ctr_total_return)] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),sum,ctr_state,avg(ctr_total_return),count] + InputAdapter + Exchange [ctr_state] #5 + WholeStageCodegen + HashAggregate [ctr_state,count,sum,ctr_total_return,count,sum] [sum,count,sum,count] + HashAggregate [wr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(wr_return_amt))] [sum(UnscaledValue(wr_return_amt)),ctr_state,ctr_total_return,sum] + InputAdapter + Exchange [wr_returning_customer_sk,ca_state] #6 + WholeStageCodegen + HashAggregate [sum,wr_return_amt,sum,wr_returning_customer_sk,ca_state] [sum,sum] + Project [wr_returning_customer_sk,wr_return_amt,ca_state] + BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] + Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Project [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + Filter [wr_returned_date_sk,wr_returning_addr_sk] + Scan parquet default.web_returns [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #2 + InputAdapter + ReusedExchange [ca_address_sk,ca_state] [ca_address_sk,ca_state] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [c_preferred_cust_flag,c_current_addr_sk,c_email_address,c_customer_id,c_birth_year,c_customer_sk,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] + Filter [c_customer_sk,c_current_addr_sk] + Scan parquet default.customer [c_preferred_cust_flag,c_current_addr_sk,c_email_address,c_customer_id,c_birth_year,c_customer_sk,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] [c_preferred_cust_flag,c_current_addr_sk,c_email_address,c_customer_id,c_birth_year,c_customer_sk,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt new file mode 100644 index 000000000..22739d4c7 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt @@ -0,0 +1,100 @@ +== Physical Plan == +*(25) Sort [ca_county#1 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(ca_county#1 ASC NULLS FIRST, 200) + +- *(24) Project [ca_county#1, d_year#2, CheckOverflow((promote_precision(web_sales#3) / promote_precision(web_sales#4)), DecimalType(37,20)) AS web_q1_q2_increase#5, CheckOverflow((promote_precision(store_sales#6) / promote_precision(store_sales#7)), DecimalType(37,20)) AS store_q1_q2_increase#8, CheckOverflow((promote_precision(web_sales#9) / promote_precision(web_sales#3)), DecimalType(37,20)) AS web_q2_q3_increase#10, CheckOverflow((promote_precision(store_sales#11) / promote_precision(store_sales#6)), DecimalType(37,20)) AS store_q2_q3_increase#12] + +- *(24) BroadcastHashJoin [ca_county#13], [ca_county#14], Inner, BuildRight, (CASE WHEN (web_sales#3 > 0.00) THEN CheckOverflow((promote_precision(web_sales#9) / promote_precision(web_sales#3)), DecimalType(37,20)) ELSE null END > CASE WHEN (store_sales#6 > 0.00) THEN CheckOverflow((promote_precision(store_sales#11) / promote_precision(store_sales#6)), DecimalType(37,20)) ELSE null END) + :- *(24) Project [ca_county#1, d_year#2, store_sales#7, store_sales#6, store_sales#11, ca_county#13, web_sales#4, web_sales#3] + : +- *(24) BroadcastHashJoin [ca_county#13], [ca_county#15], Inner, BuildRight, (CASE WHEN (web_sales#4 > 0.00) THEN CheckOverflow((promote_precision(web_sales#3) / promote_precision(web_sales#4)), DecimalType(37,20)) ELSE null END > CASE WHEN (store_sales#7 > 0.00) THEN CheckOverflow((promote_precision(store_sales#6) / promote_precision(store_sales#7)), DecimalType(37,20)) ELSE null END) + : :- *(24) BroadcastHashJoin [ca_county#1], [ca_county#13], Inner, BuildRight + : : :- *(24) Project [ca_county#1, d_year#2, store_sales#7, store_sales#6, store_sales#11] + : : : +- *(24) BroadcastHashJoin [ca_county#16], [ca_county#17], Inner, BuildRight + : : : :- *(24) BroadcastHashJoin [ca_county#1], [ca_county#16], Inner, BuildRight + : : : : :- *(24) HashAggregate(keys=[ca_county#1, d_qoy#18, d_year#2], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : : : +- Exchange hashpartitioning(ca_county#1, d_qoy#18, d_year#2, 200) + : : : : : +- *(3) HashAggregate(keys=[ca_county#1, d_qoy#18, d_year#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : : : +- *(3) Project [ss_ext_sales_price#19, d_year#2, d_qoy#18, ca_county#1] + : : : : : +- *(3) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#21], Inner, BuildRight + : : : : : :- *(3) Project [ss_addr_sk#20, ss_ext_sales_price#19, d_year#2, d_qoy#18] + : : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#23], Inner, BuildRight + : : : : : : :- *(3) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] + : : : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) + : : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct + : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- *(1) Project [d_date_sk#23, d_year#2, d_qoy#18] + : : : : : : +- *(1) Filter ((((isnotnull(d_qoy#18) && isnotnull(d_year#2)) && (d_qoy#18 = 1)) && (d_year#2 = 2000)) && isnotnull(d_date_sk#23)) + : : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#23,d_year#2,d_qoy#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,1), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(2) Project [ca_address_sk#21, ca_county#1] + : : : : : +- *(2) Filter (isnotnull(ca_address_sk#21) && isnotnull(ca_county#1)) + : : : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#21,ca_county#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : : : +- *(7) HashAggregate(keys=[ca_county#16, d_qoy#24, d_year#25], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : : +- Exchange hashpartitioning(ca_county#16, d_qoy#24, d_year#25, 200) + : : : : +- *(6) HashAggregate(keys=[ca_county#16, d_qoy#24, d_year#25], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : : +- *(6) Project [ss_ext_sales_price#19, d_year#25, d_qoy#24, ca_county#16] + : : : : +- *(6) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#26], Inner, BuildRight + : : : : :- *(6) Project [ss_addr_sk#20, ss_ext_sales_price#19, d_year#25, d_qoy#24] + : : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#27], Inner, BuildRight + : : : : : :- *(6) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] + : : : : : : +- *(6) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) + : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(4) Project [d_date_sk#27, d_year#25, d_qoy#24] + : : : : : +- *(4) Filter ((((isnotnull(d_qoy#24) && isnotnull(d_year#25)) && (d_qoy#24 = 2)) && (d_year#25 = 2000)) && isnotnull(d_date_sk#27)) + : : : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#27,d_year#25,d_qoy#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- ReusedExchange [ca_address_sk#26, ca_county#16], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : : +- *(11) HashAggregate(keys=[ca_county#17, d_qoy#28, d_year#29], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : +- Exchange hashpartitioning(ca_county#17, d_qoy#28, d_year#29, 200) + : : : +- *(10) HashAggregate(keys=[ca_county#17, d_qoy#28, d_year#29], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : +- *(10) Project [ss_ext_sales_price#19, d_year#29, d_qoy#28, ca_county#17] + : : : +- *(10) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#30], Inner, BuildRight + : : : :- *(10) Project [ss_addr_sk#20, ss_ext_sales_price#19, d_year#29, d_qoy#28] + : : : : +- *(10) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#31], Inner, BuildRight + : : : : :- *(10) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] + : : : : : +- *(10) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) + : : : : : +- *(10) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(8) Project [d_date_sk#31, d_year#29, d_qoy#28] + : : : : +- *(8) Filter ((((isnotnull(d_qoy#28) && isnotnull(d_year#29)) && (d_qoy#28 = 3)) && (d_year#29 = 2000)) && isnotnull(d_date_sk#31)) + : : : : +- *(8) FileScan parquet default.date_dim[d_date_sk#31,d_year#29,d_qoy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [ca_address_sk#30, ca_county#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : +- *(15) HashAggregate(keys=[ca_county#13, d_qoy#32, d_year#33], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) + : : +- Exchange hashpartitioning(ca_county#13, d_qoy#32, d_year#33, 200) + : : +- *(14) HashAggregate(keys=[ca_county#13, d_qoy#32, d_year#33], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) + : : +- *(14) Project [ws_ext_sales_price#34, d_year#33, d_qoy#32, ca_county#13] + : : +- *(14) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#36], Inner, BuildRight + : : :- *(14) Project [ws_bill_addr_sk#35, ws_ext_sales_price#34, d_year#33, d_qoy#32] + : : : +- *(14) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#38], Inner, BuildRight + : : : :- *(14) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] + : : : : +- *(14) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) + : : : : +- *(14) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#38, d_year#33, d_qoy#32], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [ca_address_sk#36, ca_county#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- *(19) HashAggregate(keys=[ca_county#15, d_qoy#39, d_year#40], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) + : +- Exchange hashpartitioning(ca_county#15, d_qoy#39, d_year#40, 200) + : +- *(18) HashAggregate(keys=[ca_county#15, d_qoy#39, d_year#40], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) + : +- *(18) Project [ws_ext_sales_price#34, d_year#40, d_qoy#39, ca_county#15] + : +- *(18) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#41], Inner, BuildRight + : :- *(18) Project [ws_bill_addr_sk#35, ws_ext_sales_price#34, d_year#40, d_qoy#39] + : : +- *(18) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#42], Inner, BuildRight + : : :- *(18) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] + : : : +- *(18) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) + : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#42, d_year#40, d_qoy#39], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [ca_address_sk#41, ca_county#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(23) HashAggregate(keys=[ca_county#14, d_qoy#43, d_year#44], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) + +- Exchange hashpartitioning(ca_county#14, d_qoy#43, d_year#44, 200) + +- *(22) HashAggregate(keys=[ca_county#14, d_qoy#43, d_year#44], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) + +- *(22) Project [ws_ext_sales_price#34, d_year#44, d_qoy#43, ca_county#14] + +- *(22) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#45], Inner, BuildRight + :- *(22) Project [ws_bill_addr_sk#35, ws_ext_sales_price#34, d_year#44, d_qoy#43] + : +- *(22) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#46], Inner, BuildRight + : :- *(22) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] + : : +- *(22) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) + : : +- *(22) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#46, d_year#44, d_qoy#43], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [ca_address_sk#45, ca_county#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt new file mode 100644 index 000000000..78c6383a4 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt @@ -0,0 +1,140 @@ +WholeStageCodegen + Sort [ca_county] + InputAdapter + Exchange [ca_county] #1 + WholeStageCodegen + Project [web_sales,store_sales,store_sales,web_sales,d_year,store_sales,web_sales,ca_county] + BroadcastHashJoin [web_sales,ca_county,store_sales,ca_county,store_sales,web_sales] + Project [d_year,store_sales,store_sales,ca_county,web_sales,ca_county,store_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county,store_sales,web_sales,store_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county] + Project [d_year,store_sales,store_sales,ca_county,store_sales] + BroadcastHashJoin [ca_county,ca_county] + BroadcastHashJoin [ca_county,ca_county] + HashAggregate [d_year,d_qoy,ca_county,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #2 + WholeStageCodegen + HashAggregate [d_year,d_qoy,sum,ca_county,sum,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk,d_year,d_qoy] + Filter [d_qoy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ca_address_sk,ca_county] + Filter [ca_address_sk,ca_county] + Scan parquet default.customer_address [ca_address_sk,ca_county] [ca_address_sk,ca_county] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + HashAggregate [d_qoy,d_year,sum(UnscaledValue(ss_ext_sales_price)),sum,ca_county] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #6 + WholeStageCodegen + HashAggregate [sum,d_qoy,d_year,sum,ss_ext_sales_price,ca_county] [sum,sum] + Project [ss_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_date_sk,d_year,d_qoy] + Filter [d_qoy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + InputAdapter + ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + HashAggregate [sum,sum(UnscaledValue(ss_ext_sales_price)),d_year,d_qoy,ca_county] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #9 + WholeStageCodegen + HashAggregate [sum,d_year,d_qoy,sum,ss_ext_sales_price,ca_county] [sum,sum] + Project [ss_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [d_date_sk,d_year,d_qoy] + Filter [d_qoy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + InputAdapter + ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + HashAggregate [sum,ca_county,d_qoy,d_year,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #12 + WholeStageCodegen + HashAggregate [sum,ws_ext_sales_price,ca_county,d_qoy,d_year,sum] [sum,sum] + Project [ws_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk,ws_bill_addr_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] #3 + InputAdapter + ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen + HashAggregate [d_qoy,ca_county,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #14 + WholeStageCodegen + HashAggregate [d_qoy,ca_county,d_year,ws_ext_sales_price,sum,sum] [sum,sum] + Project [ws_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk,ws_bill_addr_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] #7 + InputAdapter + ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen + HashAggregate [sum,d_qoy,ca_county,d_year,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #16 + WholeStageCodegen + HashAggregate [sum,sum,d_qoy,ca_county,ws_ext_sales_price,d_year] [sum,sum] + Project [ws_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk,ws_bill_addr_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] #10 + InputAdapter + ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt new file mode 100644 index 000000000..26c877156 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt @@ -0,0 +1,30 @@ +== Physical Plan == +CollectLimit 100 ++- *(6) Project [1 AS excess discount amount #1] + +- *(6) BroadcastHashJoin [cs_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight + :- *(6) Project [cs_sold_date_sk#2] + : +- *(6) BroadcastHashJoin [i_item_sk#4], [cs_item_sk#5#6], Inner, BuildRight, (cast(cs_ext_discount_amt#7 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#8) + : :- *(6) Project [cs_sold_date_sk#2, cs_ext_discount_amt#7, i_item_sk#4] + : : +- *(6) BroadcastHashJoin [cs_item_sk#5], [i_item_sk#4], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#2, cs_item_sk#5, cs_ext_discount_amt#7] + : : : +- *(6) Filter ((isnotnull(cs_item_sk#5) && isnotnull(cs_ext_discount_amt#7)) && isnotnull(cs_sold_date_sk#2)) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#2,cs_item_sk#5,cs_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_ext_discount_amt), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [i_item_sk#4] + : : +- *(1) Filter ((isnotnull(i_manufact_id#9) && (i_manufact_id#9 = 977)) && isnotnull(i_item_sk#4)) + : : +- *(1) FileScan parquet default.item[i_item_sk#4,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,977), IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : +- *(4) Filter isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#8) + : +- *(4) HashAggregate(keys=[cs_item_sk#5], functions=[avg(UnscaledValue(cs_ext_discount_amt#7))]) + : +- Exchange hashpartitioning(cs_item_sk#5, 200) + : +- *(3) HashAggregate(keys=[cs_item_sk#5], functions=[partial_avg(UnscaledValue(cs_ext_discount_amt#7))]) + : +- *(3) Project [cs_item_sk#5, cs_ext_discount_amt#7] + : +- *(3) BroadcastHashJoin [cs_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight + : :- *(3) Project [cs_sold_date_sk#2, cs_item_sk#5, cs_ext_discount_amt#7] + : : +- *(3) Filter (isnotnull(cs_sold_date_sk#2) && isnotnull(cs_item_sk#5)) + : : +- *(3) FileScan parquet default.catalog_sales[cs_sold_date_sk#2,cs_item_sk#5,cs_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#3] + : +- *(2) Filter (((isnotnull(d_date#10) && (cast(d_date#10 as string) >= 2000-01-27)) && (d_date#10 <= 11073)) && isnotnull(d_date_sk#3)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#3,d_date#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#3], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt new file mode 100644 index 000000000..1da0b46e1 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt @@ -0,0 +1,39 @@ +CollectLimit + WholeStageCodegen + Project + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk] + BroadcastHashJoin [i_item_sk,cs_item_sk,cs_ext_discount_amt,(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))] + Project [cs_sold_date_sk,cs_ext_discount_amt,i_item_sk] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] + Filter [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen + Project [i_item_sk] + Filter [i_manufact_id,i_item_sk] + Scan parquet default.item [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))] + HashAggregate [cs_item_sk,sum,count,avg(UnscaledValue(cs_ext_discount_amt))] [avg(UnscaledValue(cs_ext_discount_amt)),(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),sum,count,cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #3 + WholeStageCodegen + HashAggregate [cs_ext_discount_amt,count,sum,cs_item_sk,sum,count] [sum,count,sum,count] + Project [cs_item_sk,cs_ext_discount_amt] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] + Filter [cs_sold_date_sk,cs_item_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt new file mode 100644 index 000000000..234413bf8 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt @@ -0,0 +1,65 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output=[i_manufact_id#2,total_sales#1]) ++- *(20) HashAggregate(keys=[i_manufact_id#2], functions=[sum(total_sales#3)]) + +- Exchange hashpartitioning(i_manufact_id#2, 200) + +- *(19) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(total_sales#3)]) + +- Union + :- *(6) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- Exchange hashpartitioning(i_manufact_id#2, 200) + : +- *(5) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- *(5) Project [ss_ext_sales_price#4, i_manufact_id#2] + : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + : :- *(5) Project [ss_item_sk#5, ss_ext_sales_price#4] + : : +- *(5) BroadcastHashJoin [ss_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + : : :- *(5) Project [ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#10] + : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 1998)) && (d_moy#12 = 5)) && isnotnull(d_date_sk#10)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,5), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [ca_address_sk#8] + : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) + : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) BroadcastHashJoin [i_manufact_id#2], [i_manufact_id#2#14], LeftSemi, BuildRight + : :- *(4) Project [i_item_sk#6, i_manufact_id#2] + : : +- *(4) Filter isnotnull(i_item_sk#6) + : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_manufact_id#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [i_manufact_id#2 AS i_manufact_id#2#14] + : +- *(3) Filter (isnotnull(i_category#15) && (i_category#15 = Electronics)) + : +- *(3) FileScan parquet default.item[i_category#15,i_manufact_id#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics)], ReadSchema: struct + :- *(12) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- Exchange hashpartitioning(i_manufact_id#2, 200) + : +- *(11) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- *(11) Project [cs_ext_sales_price#16, i_manufact_id#2] + : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight + : :- *(11) Project [cs_item_sk#17, cs_ext_sales_price#16] + : : +- *(11) BroadcastHashJoin [cs_bill_addr_sk#18], [ca_address_sk#8], Inner, BuildRight + : : :- *(11) Project [cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight + : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) + : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [i_item_sk#6, i_manufact_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(18) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) + +- Exchange hashpartitioning(i_manufact_id#2, 200) + +- *(17) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) + +- *(17) Project [ws_ext_sales_price#20, i_manufact_id#2] + +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight + :- *(17) Project [ws_item_sk#21, ws_ext_sales_price#20] + : +- *(17) BroadcastHashJoin [ws_bill_addr_sk#22], [ca_address_sk#8], Inner, BuildRight + : :- *(17) Project [ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight + : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) + : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [i_item_sk#6, i_manufact_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt new file mode 100644 index 000000000..f3e63e8a8 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt @@ -0,0 +1,91 @@ +TakeOrderedAndProject [total_sales,i_manufact_id] + WholeStageCodegen + HashAggregate [i_manufact_id,sum,sum(total_sales)] [sum(total_sales),total_sales,sum] + InputAdapter + Exchange [i_manufact_id] #1 + WholeStageCodegen + HashAggregate [i_manufact_id,total_sales,sum,sum] [sum,sum] + InputAdapter + Union + WholeStageCodegen + HashAggregate [i_manufact_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_manufact_id] #2 + WholeStageCodegen + HashAggregate [i_manufact_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [ss_ext_sales_price,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_addr_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + BroadcastHashJoin [i_manufact_id,i_manufact_id] + Project [i_item_sk,i_manufact_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_manufact_id] + Filter [i_category] + Scan parquet default.item [i_category,i_manufact_id] [i_category,i_manufact_id] + WholeStageCodegen + HashAggregate [i_manufact_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_manufact_id] #7 + WholeStageCodegen + HashAggregate [i_manufact_id,cs_ext_sales_price,sum,sum] [sum,sum] + Project [cs_ext_sales_price,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] + Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Filter [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] #5 + WholeStageCodegen + HashAggregate [i_manufact_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_manufact_id] #8 + WholeStageCodegen + HashAggregate [i_manufact_id,ws_ext_sales_price,sum,sum] [sum,sum] + Project [ws_ext_sales_price,i_manufact_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk,ws_bill_addr_sk,ws_item_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt new file mode 100644 index 000000000..e71e76c27 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt @@ -0,0 +1,34 @@ +== Physical Plan == +*(7) Sort [c_last_name#1 ASC NULLS FIRST, c_first_name#2 ASC NULLS FIRST, c_salutation#3 ASC NULLS FIRST, c_preferred_cust_flag#4 DESC NULLS LAST], true, 0 ++- Exchange rangepartitioning(c_last_name#1 ASC NULLS FIRST, c_first_name#2 ASC NULLS FIRST, c_salutation#3 ASC NULLS FIRST, c_preferred_cust_flag#4 DESC NULLS LAST, 200) + +- *(6) Project [c_last_name#1, c_first_name#2, c_salutation#3, c_preferred_cust_flag#4, ss_ticket_number#5, cnt#6] + +- *(6) BroadcastHashJoin [ss_customer_sk#7], [c_customer_sk#8], Inner, BuildRight + :- *(6) Filter ((cnt#6 >= 15) && (cnt#6 <= 20)) + : +- *(6) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#7], functions=[count(1)]) + : +- Exchange hashpartitioning(ss_ticket_number#5, ss_customer_sk#7, 200) + : +- *(4) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#7], functions=[partial_count(1)]) + : +- *(4) Project [ss_customer_sk#7, ss_ticket_number#5] + : +- *(4) BroadcastHashJoin [ss_hdemo_sk#9], [hd_demo_sk#10], Inner, BuildRight + : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_ticket_number#5] + : : +- *(4) BroadcastHashJoin [ss_store_sk#11], [s_store_sk#12], Inner, BuildRight + : : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#5] + : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + : : : :- *(4) Project [ss_sold_date_sk#13, ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#5] + : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#13) && isnotnull(ss_store_sk#11)) && isnotnull(ss_hdemo_sk#9)) && isnotnull(ss_customer_sk#7)) + : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_customer_sk#7,ss_hdemo_sk#9,ss_store_sk#11,ss_ticket_number#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#14] + : : : +- *(1) Filter (((((d_dom#15 >= 1) && (d_dom#15 <= 3)) || ((d_dom#15 >= 25) && (d_dom#15 <= 28))) && d_year#16 IN (1999,2000,2001)) && isnotnull(d_date_sk#14)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_dom#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),Le..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [s_store_sk#12] + : : +- *(2) Filter ((isnotnull(s_county#17) && (s_county#17 = Williamson County)) && isnotnull(s_store_sk#12)) + : : +- *(2) FileScan parquet default.store[s_store_sk#12,s_county#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [hd_demo_sk#10] + : +- *(3) Filter ((((isnotnull(hd_vehicle_count#18) && ((hd_buy_potential#19 = >10000) || (hd_buy_potential#19 = unknown))) && (hd_vehicle_count#18 > 0)) && (CASE WHEN (hd_vehicle_count#18 > 0) THEN (cast(hd_dep_count#20 as double) / cast(hd_vehicle_count#18 as double)) ELSE null END > 1.2)) && isnotnull(hd_demo_sk#10)) + : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#10,hd_buy_potential#19,hd_dep_count#20,hd_vehicle_count#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknow..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [c_customer_sk#8, c_salutation#3, c_first_name#2, c_last_name#1, c_preferred_cust_flag#4] + +- *(5) Filter isnotnull(c_customer_sk#8) + +- *(5) FileScan parquet default.customer[c_customer_sk#8,c_salutation#3,c_first_name#2,c_last_name#1,c_preferred_cust_flag#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [ss_customer_sk#29] + : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#30], [d_date_sk#31], Inner, BuildRight + : : : : :- *(2) Project [ss_sold_date_sk#30, ss_customer_sk#29] + : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#30) + : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#30,ss_customer_sk#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [d_date_sk#31] + : : : : +- *(1) Filter ((((isnotnull(d_year#32) && isnotnull(d_qoy#33)) && (d_year#32 = 2002)) && (d_qoy#33 < 4)) && isnotnull(d_date_sk#31)) + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#31,d_year#32,d_qoy#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [ws_bill_customer_sk#28] + : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#34], [d_date_sk#31], Inner, BuildRight + : : : :- *(4) Project [ws_sold_date_sk#34, ws_bill_customer_sk#28] + : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#34) + : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#34,ws_bill_customer_sk#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#31], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [cs_ship_customer_sk#27] + : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#35], [d_date_sk#31], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#35, cs_ship_customer_sk#27] + : : : +- *(6) Filter isnotnull(cs_sold_date_sk#35) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#35,cs_ship_customer_sk#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#31], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [ca_address_sk#23, ca_state#1] + : +- *(7) Filter isnotnull(ca_address_sk#23) + : +- *(7) FileScan parquet default.customer_address[ca_address_sk#23,ca_state#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [cd_demo_sk#21, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6] + +- *(8) Filter isnotnull(cd_demo_sk#21) + +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_gender#2,cd_marital_status#3,cd_dep_count#19,cd_dep_employed_count#5,cd_dep_college_count#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [i_item_sk#19, i_class#15, i_category#14] + : +- *(2) Filter isnotnull(i_item_sk#19) + : +- *(2) FileScan parquet default.item[i_item_sk#19,i_class#15,i_category#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [s_store_sk#17] + +- *(3) Filter ((isnotnull(s_state#23) && (s_state#23 = TN)) && isnotnull(s_store_sk#17)) + +- *(3) FileScan parquet default.store[s_store_sk#17,s_state#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt new file mode 100644 index 000000000..ac9f32c0a --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt @@ -0,0 +1,43 @@ +TakeOrderedAndProject [i_class,i_category,lochierarchy,gross_margin,rank_within_parent] + WholeStageCodegen + Project [i_class,i_category,lochierarchy,gross_margin,rank_within_parent] + InputAdapter + Window [_w3,_w1,_w2] + WholeStageCodegen + Sort [_w1,_w2,_w3] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen + HashAggregate [sum,i_class,i_category,sum,sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),spark_grouping_id] [_w2,sum,_w1,sum,lochierarchy,sum(UnscaledValue(ss_net_profit)),_w3,sum(UnscaledValue(ss_ext_sales_price)),gross_margin] + InputAdapter + Exchange [i_category,i_class,spark_grouping_id] #2 + WholeStageCodegen + HashAggregate [sum,sum,sum,i_class,i_category,sum,ss_ext_sales_price,spark_grouping_id,ss_net_profit] [sum,sum,sum,sum] + Expand [ss_ext_sales_price,ss_net_profit,i_category,i_class] + Project [ss_ext_sales_price,ss_net_profit,i_category,i_class] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_class,ss_store_sk,ss_ext_sales_price,i_category,ss_net_profit] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_item_sk,ss_store_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_item_sk,i_class,i_category] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_class,i_category] [i_item_sk,i_class,i_category] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [s_store_sk] + Filter [s_state,s_store_sk] + Scan parquet default.store [s_store_sk,s_state] [s_store_sk,s_state] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt new file mode 100644 index 000000000..d9db47342 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt @@ -0,0 +1,26 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,i_current_price#3]) ++- *(5) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) + +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, i_current_price#3, 200) + +- *(4) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) + +- *(4) Project [i_item_id#1, i_item_desc#2, i_current_price#3] + +- *(4) BroadcastHashJoin [i_item_sk#4], [cs_item_sk#5], Inner, BuildRight + :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3] + : +- *(4) BroadcastHashJoin [inv_date_sk#6], [d_date_sk#7], Inner, BuildRight + : :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3, inv_date_sk#6] + : : +- *(4) BroadcastHashJoin [i_item_sk#4], [inv_item_sk#8], Inner, BuildRight + : : :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3] + : : : +- *(4) Filter ((((isnotnull(i_current_price#3) && (i_current_price#3 >= 68.00)) && (cast(i_current_price#3 as decimal(12,2)) <= 98.00)) && i_manufact_id#9 IN (677,940,694,808)) && isnotnull(i_item_sk#4)) + : : : +- *(4) FileScan parquet default.item[i_item_sk#4,i_item_id#1,i_item_desc#2,i_current_price#3,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,68.00), In(i_manufact_id, [677,94..., ReadSchema: struct= 100)) && (inv_quantity_on_hand#10 <= 500)) && isnotnull(inv_item_sk#8)) && isnotnull(inv_date_sk#6)) + : : +- *(1) FileScan parquet default.inventory[inv_date_sk#6,inv_item_sk#8,inv_quantity_on_hand#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(i..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#7] + : +- *(2) Filter (((isnotnull(d_date#11) && (d_date#11 >= 10988)) && (d_date#11 <= 11048)) && isnotnull(d_date_sk#7)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_date#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-01), LessThanOrEqual(d_date,2000-04-01), Is..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [cs_item_sk#5] + +- *(3) Filter isnotnull(cs_item_sk#5) + +- *(3) FileScan parquet default.catalog_sales[cs_item_sk#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt new file mode 100644 index 000000000..afe830f9d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,i_current_price] + InputAdapter + Exchange [i_item_id,i_item_desc,i_current_price] #1 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,i_current_price] + Project [i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [i_item_sk,cs_item_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [i_current_price,i_item_sk,inv_date_sk,i_item_desc,i_item_id] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + Filter [i_current_price,i_manufact_id,i_item_sk] + Scan parquet default.item [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [inv_date_sk,inv_item_sk] + Filter [inv_quantity_on_hand,inv_item_sk,inv_date_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [cs_item_sk] + Filter [cs_item_sk] + Scan parquet default.catalog_sales [cs_item_sk] [cs_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt new file mode 100644 index 000000000..ac423d3c3 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt @@ -0,0 +1,55 @@ +== Physical Plan == +CollectLimit 100 ++- *(13) HashAggregate(keys=[], functions=[count(1)]) + +- Exchange SinglePartition + +- *(12) HashAggregate(keys=[], functions=[partial_count(1)]) + +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#4, ), coalesce(c_first_name#5, ), coalesce(d_date#6, 0)], LeftSemi, BuildRight, (((c_last_name#1 <=> c_last_name#4) && (c_first_name#2 <=> c_first_name#5)) && (d_date#3 <=> d_date#6)) + :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#7, ), coalesce(c_first_name#8, ), coalesce(d_date#9, 0)], LeftSemi, BuildRight, (((c_last_name#1 <=> c_last_name#7) && (c_first_name#2 <=> c_first_name#8)) && (d_date#3 <=> d_date#9)) + : :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, d_date#3, 200) + : : +- *(3) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : : +- *(3) Project [c_last_name#1, c_first_name#2, d_date#3] + : : +- *(3) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight + : : :- *(3) Project [ss_customer_sk#10, d_date#3] + : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : : : :- *(3) Project [ss_sold_date_sk#12, ss_customer_sk#10] + : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#12) && isnotnull(ss_customer_sk#10)) + : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_customer_sk#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#13, d_date#3] + : : : +- *(1) Filter (((isnotnull(d_month_seq#14) && (d_month_seq#14 >= 1200)) && (d_month_seq#14 <= 1211)) && isnotnull(d_date_sk#13)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#13,d_date#3,d_month_seq#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [c_customer_sk#11, c_first_name#2, c_last_name#1] + : : +- *(2) Filter isnotnull(c_customer_sk#11) + : : +- *(2) FileScan parquet default.customer[c_customer_sk#11,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) + : +- *(7) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) + : +- Exchange hashpartitioning(c_last_name#7, c_first_name#8, d_date#9, 200) + : +- *(6) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) + : +- *(6) Project [c_last_name#7, c_first_name#8, d_date#9] + : +- *(6) BroadcastHashJoin [cs_bill_customer_sk#15], [c_customer_sk#16], Inner, BuildRight + : :- *(6) Project [cs_bill_customer_sk#15, d_date#9] + : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#17, cs_bill_customer_sk#15] + : : : +- *(6) Filter (isnotnull(cs_sold_date_sk#17) && isnotnull(cs_bill_customer_sk#15)) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#17,cs_bill_customer_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#18, d_date#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [c_customer_sk#16, c_first_name#8, c_last_name#7], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) + +- *(11) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) + +- Exchange hashpartitioning(c_last_name#4, c_first_name#5, d_date#6, 200) + +- *(10) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) + +- *(10) Project [c_last_name#4, c_first_name#5, d_date#6] + +- *(10) BroadcastHashJoin [ws_bill_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + :- *(10) Project [ws_bill_customer_sk#19, d_date#6] + : +- *(10) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight + : :- *(10) Project [ws_sold_date_sk#21, ws_bill_customer_sk#19] + : : +- *(10) Filter (isnotnull(ws_sold_date_sk#21) && isnotnull(ws_bill_customer_sk#19)) + : : +- *(10) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#22, d_date#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [c_customer_sk#20, c_first_name#5, c_last_name#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt new file mode 100644 index 000000000..caec9c677 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt @@ -0,0 +1,75 @@ +CollectLimit + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [count,count] [count,count] + HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,d_date,c_first_name] + HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_first_name,d_date,d_date,c_last_name,c_last_name,c_first_name] + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #2 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_customer_sk] + Filter [ss_sold_date_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] [ss_sold_date_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] [d_date_sk,d_date,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_sk] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #6 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_bill_customer_sk] + Filter [cs_sold_date_sk,cs_bill_customer_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] [cs_sold_date_sk,cs_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #8 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_bill_customer_sk,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_bill_customer_sk] + Filter [ws_sold_date_sk,ws_bill_customer_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] [ws_sold_date_sk,ws_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt new file mode 100644 index 000000000..5d89d65da --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt @@ -0,0 +1,51 @@ +== Physical Plan == +*(11) Sort [w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST, 200) + +- *(10) BroadcastHashJoin [i_item_sk#2, w_warehouse_sk#1], [i_item_sk#9, w_warehouse_sk#10], Inner, BuildRight + :- *(10) Project [w_warehouse_sk#1, i_item_sk#2, d_moy#3, mean#4, CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END AS cov#5] + : +- *(10) Filter (CASE WHEN (mean#4 = 0.0) THEN 0.0 ELSE (stdev#11 / mean#4) END > 1.0) + : +- *(10) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) + : +- Exchange hashpartitioning(w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3, 200) + : +- *(4) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) + : +- *(4) Project [inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12, d_moy#3] + : +- *(4) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#15], Inner, BuildRight + : :- *(4) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12] + : : +- *(4) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#1], Inner, BuildRight + : : :- *(4) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#2] + : : : +- *(4) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#2], Inner, BuildRight + : : : :- *(4) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] + : : : : +- *(4) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) + : : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [i_item_sk#2] + : : : +- *(1) Filter isnotnull(i_item_sk#2) + : : : +- *(1) FileScan parquet default.item[i_item_sk#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [w_warehouse_sk#1, w_warehouse_name#12] + : : +- *(2) Filter isnotnull(w_warehouse_sk#1) + : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#1,w_warehouse_name#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [d_date_sk#15, d_moy#3] + : +- *(3) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#3)) && (d_year#18 = 2001)) && (d_moy#3 = 1)) && isnotnull(d_date_sk#15)) + : +- *(3) FileScan parquet default.date_dim[d_date_sk#15,d_year#18,d_moy#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) + +- *(9) Project [w_warehouse_sk#10, i_item_sk#9, d_moy#6, mean#7, CASE WHEN (mean#7 = 0.0) THEN null ELSE (stdev#19 / mean#7) END AS cov#8] + +- *(9) Filter (CASE WHEN (mean#7 = 0.0) THEN 0.0 ELSE (stdev#19 / mean#7) END > 1.0) + +- *(9) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) + +- Exchange hashpartitioning(w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6, 200) + +- *(8) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) + +- *(8) Project [inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20, d_moy#6] + +- *(8) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#21], Inner, BuildRight + :- *(8) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20] + : +- *(8) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#10], Inner, BuildRight + : :- *(8) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#9] + : : +- *(8) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#9], Inner, BuildRight + : : :- *(8) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] + : : : +- *(8) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) + : : : +- *(8) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : +- ReusedExchange [i_item_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [w_warehouse_sk#10, w_warehouse_name#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [d_date_sk#21, d_moy#6] + +- *(7) Filter ((((isnotnull(d_year#22) && isnotnull(d_moy#6)) && (d_year#22 = 2001)) && (d_moy#6 = 2)) && isnotnull(d_date_sk#21)) + +- *(7) FileScan parquet default.date_dim[d_date_sk#21,d_year#22,d_moy#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt new file mode 100644 index 000000000..0fadf4ea4 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt @@ -0,0 +1,69 @@ +WholeStageCodegen + Sort [d_moy,mean,i_item_sk,w_warehouse_sk,d_moy,mean,cov,cov] + InputAdapter + Exchange [d_moy,mean,i_item_sk,w_warehouse_sk,d_moy,mean,cov,cov] #1 + WholeStageCodegen + BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] + Project [d_moy,mean,w_warehouse_sk,i_item_sk,stdev] + Filter [mean,stdev] + HashAggregate [m2,w_warehouse_sk,d_moy,sum,count,n,w_warehouse_name,avg,stddev_samp(cast(inv_quantity_on_hand as double)),i_item_sk,avg(cast(inv_quantity_on_hand as bigint))] [stdev,mean,m2,sum,count,n,avg,stddev_samp(cast(inv_quantity_on_hand as double)),avg(cast(inv_quantity_on_hand as bigint))] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + WholeStageCodegen + HashAggregate [m2,w_warehouse_sk,d_moy,m2,sum,count,n,w_warehouse_name,sum,avg,i_item_sk,inv_quantity_on_hand,avg,count,n] [m2,m2,sum,count,n,sum,avg,avg,count,n] + Project [d_moy,inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name,inv_date_sk] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk] [i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [w_warehouse_sk,w_warehouse_name] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk,d_moy] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_item_sk,d_moy,mean,stdev,w_warehouse_sk] + Filter [mean,stdev] + HashAggregate [m2,i_item_sk,sum,count,w_warehouse_name,n,d_moy,avg,stddev_samp(cast(inv_quantity_on_hand as double)),w_warehouse_sk,avg(cast(inv_quantity_on_hand as bigint))] [m2,sum,count,n,mean,avg,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,avg(cast(inv_quantity_on_hand as bigint))] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + WholeStageCodegen + HashAggregate [m2,m2,i_item_sk,sum,count,w_warehouse_name,n,d_moy,sum,avg,w_warehouse_sk,inv_quantity_on_hand,avg,count,n] [m2,m2,sum,count,n,sum,avg,avg,count,n] + Project [w_warehouse_sk,inv_quantity_on_hand,w_warehouse_name,i_item_sk,d_moy] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [w_warehouse_sk,inv_quantity_on_hand,w_warehouse_name,i_item_sk,inv_date_sk] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + InputAdapter + ReusedExchange [i_item_sk] [i_item_sk] #3 + InputAdapter + ReusedExchange [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [d_date_sk,d_moy] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt new file mode 100644 index 000000000..bd660d199 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt @@ -0,0 +1,51 @@ +== Physical Plan == +*(11) Sort [w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST, 200) + +- *(10) BroadcastHashJoin [i_item_sk#2, w_warehouse_sk#1], [i_item_sk#9, w_warehouse_sk#10], Inner, BuildRight + :- *(10) Project [w_warehouse_sk#1, i_item_sk#2, d_moy#3, mean#4, CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END AS cov#5] + : +- *(10) Filter ((CASE WHEN (mean#4 = 0.0) THEN 0.0 ELSE (stdev#11 / mean#4) END > 1.0) && (CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END > 1.5)) + : +- *(10) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) + : +- Exchange hashpartitioning(w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3, 200) + : +- *(4) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) + : +- *(4) Project [inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12, d_moy#3] + : +- *(4) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#15], Inner, BuildRight + : :- *(4) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12] + : : +- *(4) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#1], Inner, BuildRight + : : :- *(4) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#2] + : : : +- *(4) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#2], Inner, BuildRight + : : : :- *(4) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] + : : : : +- *(4) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) + : : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [i_item_sk#2] + : : : +- *(1) Filter isnotnull(i_item_sk#2) + : : : +- *(1) FileScan parquet default.item[i_item_sk#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [w_warehouse_sk#1, w_warehouse_name#12] + : : +- *(2) Filter isnotnull(w_warehouse_sk#1) + : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#1,w_warehouse_name#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [d_date_sk#15, d_moy#3] + : +- *(3) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#3)) && (d_year#18 = 2001)) && (d_moy#3 = 1)) && isnotnull(d_date_sk#15)) + : +- *(3) FileScan parquet default.date_dim[d_date_sk#15,d_year#18,d_moy#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) + +- *(9) Project [w_warehouse_sk#10, i_item_sk#9, d_moy#6, mean#7, CASE WHEN (mean#7 = 0.0) THEN null ELSE (stdev#19 / mean#7) END AS cov#8] + +- *(9) Filter (CASE WHEN (mean#7 = 0.0) THEN 0.0 ELSE (stdev#19 / mean#7) END > 1.0) + +- *(9) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) + +- Exchange hashpartitioning(w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6, 200) + +- *(8) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) + +- *(8) Project [inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20, d_moy#6] + +- *(8) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#21], Inner, BuildRight + :- *(8) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20] + : +- *(8) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#10], Inner, BuildRight + : :- *(8) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#9] + : : +- *(8) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#9], Inner, BuildRight + : : :- *(8) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] + : : : +- *(8) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) + : : : +- *(8) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : +- ReusedExchange [i_item_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [w_warehouse_sk#10, w_warehouse_name#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [d_date_sk#21, d_moy#6] + +- *(7) Filter ((((isnotnull(d_year#22) && isnotnull(d_moy#6)) && (d_year#22 = 2001)) && (d_moy#6 = 2)) && isnotnull(d_date_sk#21)) + +- *(7) FileScan parquet default.date_dim[d_date_sk#21,d_year#22,d_moy#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt new file mode 100644 index 000000000..846b7cca1 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt @@ -0,0 +1,69 @@ +WholeStageCodegen + Sort [i_item_sk,mean,w_warehouse_sk,d_moy,mean,cov,d_moy,cov] + InputAdapter + Exchange [i_item_sk,mean,w_warehouse_sk,d_moy,mean,cov,d_moy,cov] #1 + WholeStageCodegen + BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] + Project [d_moy,w_warehouse_sk,i_item_sk,stdev,mean] + Filter [mean,stdev] + HashAggregate [avg,w_warehouse_sk,d_moy,w_warehouse_name,m2,avg(cast(inv_quantity_on_hand as bigint)),i_item_sk,n,count,sum,stddev_samp(cast(inv_quantity_on_hand as double))] [avg,m2,stdev,avg(cast(inv_quantity_on_hand as bigint)),n,count,mean,sum,stddev_samp(cast(inv_quantity_on_hand as double))] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + WholeStageCodegen + HashAggregate [avg,n,w_warehouse_sk,d_moy,count,w_warehouse_name,m2,sum,i_item_sk,inv_quantity_on_hand,m2,n,avg,count,sum] [avg,n,count,m2,sum,m2,n,avg,count,sum] + Project [d_moy,inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name,inv_date_sk] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk] [i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [w_warehouse_sk,w_warehouse_name] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk,d_moy] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [mean,stdev,d_moy,w_warehouse_sk,i_item_sk] + Filter [mean,stdev] + HashAggregate [avg,m2,avg(cast(inv_quantity_on_hand as bigint)),d_moy,w_warehouse_sk,n,i_item_sk,count,w_warehouse_name,sum,stddev_samp(cast(inv_quantity_on_hand as double))] [avg,stdev,m2,avg(cast(inv_quantity_on_hand as bigint)),n,count,sum,mean,stddev_samp(cast(inv_quantity_on_hand as double))] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + WholeStageCodegen + HashAggregate [avg,n,count,m2,sum,d_moy,w_warehouse_sk,inv_quantity_on_hand,m2,n,i_item_sk,avg,count,w_warehouse_name,sum] [avg,n,count,m2,sum,m2,n,avg,count,sum] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,d_moy,w_warehouse_name] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + InputAdapter + ReusedExchange [i_item_sk] [i_item_sk] #3 + InputAdapter + ReusedExchange [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [d_date_sk,d_moy] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt new file mode 100644 index 000000000..7284720b7 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt @@ -0,0 +1,124 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer_first_name#2 ASC NULLS FIRST,customer_last_name#3 ASC NULLS FIRST,customer_preferred_cust_flag#4 ASC NULLS FIRST,customer_birth_country#5 ASC NULLS FIRST,customer_login#6 ASC NULLS FIRST,customer_email_address#7 ASC NULLS FIRST], output=[customer_id#1,customer_first_name#2,customer_last_name#3,customer_preferred_cust_flag#4,customer_birth_country#5,customer_login#6,customer_email_address#7]) ++- *(25) Project [customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7] + +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#9], Inner, BuildRight, (CASE WHEN (year_total#10 > 0.000000) THEN CheckOverflow((promote_precision(year_total#11) / promote_precision(year_total#10)), DecimalType(38,14)) ELSE null END > CASE WHEN (year_total#12 > 0.000000) THEN CheckOverflow((promote_precision(year_total#13) / promote_precision(year_total#12)), DecimalType(38,14)) ELSE null END) + :- *(25) Project [customer_id#8, customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7, year_total#10, year_total#11, year_total#12] + : +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#14], Inner, BuildRight + : :- *(25) Project [customer_id#8, customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7, year_total#10, year_total#11] + : : +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#15], Inner, BuildRight, (CASE WHEN (year_total#10 > 0.000000) THEN CheckOverflow((promote_precision(year_total#11) / promote_precision(year_total#10)), DecimalType(38,14)) ELSE null END > CASE WHEN (year_total#16 > 0.000000) THEN CheckOverflow((promote_precision(year_total#17) / promote_precision(year_total#16)), DecimalType(38,14)) ELSE null END) + : : :- *(25) Project [customer_id#8, year_total#16, customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7, year_total#17, year_total#10] + : : : +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#18], Inner, BuildRight + : : : :- *(25) BroadcastHashJoin [customer_id#8], [customer_id#1], Inner, BuildRight + : : : : :- Union + : : : : : :- *(4) Filter (isnotnull(year_total#16) && (year_total#16 > 0.000000)) + : : : : : : +- *(4) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + : : : : : : +- *(3) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : : : +- *(3) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27, d_year#26] + : : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight + : : : : : : :- *(3) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_sold_date_sk#31, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27] + : : : : : : : +- *(3) BroadcastHashJoin [c_customer_sk#33], [ss_customer_sk#34], Inner, BuildRight + : : : : : : : :- *(3) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : : : : : : : +- *(3) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : : : : : : : +- *(3) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : : : :- LocalTableScan , [customer_id#35, year_total#36] + : : : : : +- LocalTableScan , [customer_id#37, year_total#38] + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : : : +- Union + : : : : :- *(8) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + : : : : : +- *(7) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : : +- *(7) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27, d_year#26] + : : : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight + : : : : : :- *(7) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_sold_date_sk#31, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27] + : : : : : : +- *(7) BroadcastHashJoin [c_customer_sk#33], [ss_customer_sk#34], Inner, BuildRight + : : : : : : :- *(7) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : : : : : : +- *(7) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : : :- LocalTableScan , [customer_id#35, customer_first_name#39, customer_last_name#40, customer_preferred_cust_flag#41, customer_birth_country#42, customer_login#43, customer_email_address#44, year_total#36] + : : : : +- LocalTableScan , [customer_id#37, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#38] + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : : +- Union + : : : :- LocalTableScan , [customer_id#18, year_total#10] + : : : :- *(12) Filter (isnotnull(year_total#36) && (year_total#36 > 0.000000)) + : : : : +- *(12) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + : : : : +- *(11) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : +- *(11) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51, d_year#26] + : : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#55], [d_date_sk#32], Inner, BuildRight + : : : : :- *(11) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_sold_date_sk#55, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51] + : : : : : +- *(11) BroadcastHashJoin [c_customer_sk#33], [cs_bill_customer_sk#56], Inner, BuildRight + : : : : : :- *(11) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : : : : : +- *(11) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : : : : : +- *(11) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#37, year_total#38] + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : +- Union + : : :- LocalTableScan , [customer_id#15, year_total#11] + : : :- *(16) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + : : : +- *(15) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : +- *(15) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51, d_year#26] + : : : +- *(15) BroadcastHashJoin [cs_sold_date_sk#55], [d_date_sk#32], Inner, BuildRight + : : : :- *(15) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_sold_date_sk#55, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51] + : : : : +- *(15) BroadcastHashJoin [c_customer_sk#33], [cs_bill_customer_sk#56], Inner, BuildRight + : : : : :- *(15) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : : : : +- *(15) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : : : : +- *(15) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#37, year_total#38] + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- Union + : :- LocalTableScan , [customer_id#14, year_total#12] + : :- LocalTableScan , [customer_id#35, year_total#36] + : +- *(20) Filter (isnotnull(year_total#38) && (year_total#38 > 0.000000)) + : +- *(20) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + : +- *(19) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : +- *(19) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57, d_year#26] + : +- *(19) BroadcastHashJoin [ws_sold_date_sk#61], [d_date_sk#32], Inner, BuildRight + : :- *(19) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_sold_date_sk#61, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57] + : : +- *(19) BroadcastHashJoin [c_customer_sk#33], [ws_bill_customer_sk#62], Inner, BuildRight + : : :- *(19) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : : +- *(19) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : : +- *(19) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#9, year_total#13] + :- LocalTableScan , [customer_id#35, year_total#36] + +- *(24) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + +- *(23) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + +- *(23) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57, d_year#26] + +- *(23) BroadcastHashJoin [ws_sold_date_sk#61], [d_date_sk#32], Inner, BuildRight + :- *(23) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_sold_date_sk#61, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57] + : +- *(23) BroadcastHashJoin [c_customer_sk#33], [ws_bill_customer_sk#62], Inner, BuildRight + : :- *(23) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : +- *(23) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : +- *(23) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]) + +- Exchange hashpartitioning(w_state#1, i_item_id#2, 200) + +- *(5) HashAggregate(keys=[w_state#1, i_item_id#2], functions=[partial_sum(CASE WHEN (d_date#5 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#5 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]) + +- *(5) Project [cs_sales_price#6, cr_refunded_cash#7, w_state#1, i_item_id#2, d_date#5] + +- *(5) BroadcastHashJoin [cs_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + :- *(5) Project [cs_sold_date_sk#8, cs_sales_price#6, cr_refunded_cash#7, w_state#1, i_item_id#2] + : +- *(5) BroadcastHashJoin [cs_item_sk#10], [i_item_sk#11], Inner, BuildRight + : :- *(5) Project [cs_sold_date_sk#8, cs_item_sk#10, cs_sales_price#6, cr_refunded_cash#7, w_state#1] + : : +- *(5) BroadcastHashJoin [cs_warehouse_sk#12], [w_warehouse_sk#13], Inner, BuildRight + : : :- *(5) Project [cs_sold_date_sk#8, cs_warehouse_sk#12, cs_item_sk#10, cs_sales_price#6, cr_refunded_cash#7] + : : : +- *(5) BroadcastHashJoin [cs_order_number#14, cs_item_sk#10], [cr_order_number#15, cr_item_sk#16], LeftOuter, BuildRight + : : : :- *(5) Project [cs_sold_date_sk#8, cs_warehouse_sk#12, cs_item_sk#10, cs_order_number#14, cs_sales_price#6] + : : : : +- *(5) Filter ((isnotnull(cs_warehouse_sk#12) && isnotnull(cs_item_sk#10)) && isnotnull(cs_sold_date_sk#8)) + : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#8,cs_warehouse_sk#12,cs_item_sk#10,cs_order_number#14,cs_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [w_warehouse_sk#13, w_state#1] + : : +- *(2) Filter isnotnull(w_warehouse_sk#13) + : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#13,w_state#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [i_item_sk#11, i_item_id#2] + : +- *(3) Filter (((isnotnull(i_current_price#17) && (i_current_price#17 >= 0.99)) && (i_current_price#17 <= 1.49)) && isnotnull(i_item_sk#11)) + : +- *(3) FileScan parquet default.item[i_item_sk#11,i_item_id#2,i_current_price#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [d_date_sk#9, d_date#5] + +- *(4) Filter (((isnotnull(d_date#5) && (d_date#5 >= 10997)) && (d_date#5 <= 11057)) && isnotnull(d_date_sk#9)) + +- *(4) FileScan parquet default.date_dim[d_date_sk#9,d_date#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt new file mode 100644 index 000000000..2e480b9ea --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after] + WholeStageCodegen + HashAggregate [i_item_id,sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),w_state,sum] [sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sales_before,sales_after,sum] + InputAdapter + Exchange [w_state,i_item_id] #1 + WholeStageCodegen + HashAggregate [i_item_id,sum,sum,d_date,w_state,sum,cs_sales_price,cr_refunded_cash,sum] [sum,sum,sum,sum] + Project [d_date,cs_sales_price,w_state,cr_refunded_cash,i_item_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sales_price,w_state,cs_sold_date_sk,cr_refunded_cash,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_sales_price,w_state,cs_sold_date_sk,cs_item_sk,cr_refunded_cash] + BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] + Project [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cr_refunded_cash] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + Project [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] + Filter [cs_warehouse_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [cr_item_sk,cr_order_number,cr_refunded_cash] + Filter [cr_order_number,cr_item_sk] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash] [cr_item_sk,cr_order_number,cr_refunded_cash] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [w_warehouse_sk,w_state] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_sk,w_state] [w_warehouse_sk,w_state] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_current_price,i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id,i_current_price] [i_item_sk,i_item_id,i_current_price] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk,d_date] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt new file mode 100644 index 000000000..886b4e209 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt @@ -0,0 +1,19 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_product_name#1 ASC NULLS FIRST], output=[i_product_name#1]) ++- *(4) HashAggregate(keys=[i_product_name#1], functions=[]) + +- Exchange hashpartitioning(i_product_name#1, 200) + +- *(3) HashAggregate(keys=[i_product_name#1], functions=[]) + +- *(3) Project [i_product_name#1] + +- *(3) BroadcastHashJoin [i_manufact#2], [i_manufact#2#3], Inner, BuildRight + :- *(3) Project [i_manufact#2, i_product_name#1] + : +- *(3) Filter (((isnotnull(i_manufact_id#4) && (i_manufact_id#4 >= 738)) && (i_manufact_id#4 <= 778)) && isnotnull(i_manufact#2)) + : +- *(3) FileScan parquet default.item[i_manufact_id#4,i_manufact#2,i_product_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), GreaterThanOrEqual(i_manufact_id,738), LessThanOrEqual(i_manufact_id,7..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(2) Project [i_manufact#2#3] + +- *(2) Filter (if (isnull(alwaysTrue#5)) 0 else item_cnt#6 > 0) + +- *(2) HashAggregate(keys=[i_manufact#2], functions=[count(1)]) + +- Exchange hashpartitioning(i_manufact#2, 200) + +- *(1) HashAggregate(keys=[i_manufact#2], functions=[partial_count(1)]) + +- *(1) Project [i_manufact#2] + +- *(1) Filter (((((i_category#7 = Women) && (((((i_color#8 = powder) || (i_color#8 = khaki)) && ((i_units#9 = Ounce) || (i_units#9 = Oz))) && ((i_size#10 = medium) || (i_size#10 = extra large))) || ((((i_color#8 = brown) || (i_color#8 = honeydew)) && ((i_units#9 = Bunch) || (i_units#9 = Ton))) && ((i_size#10 = N/A) || (i_size#10 = small))))) || ((i_category#7 = Men) && (((((i_color#8 = floral) || (i_color#8 = deep)) && ((i_units#9 = N/A) || (i_units#9 = Dozen))) && ((i_size#10 = petite) || (i_size#10 = large))) || ((((i_color#8 = light) || (i_color#8 = cornflower)) && ((i_units#9 = Box) || (i_units#9 = Pound))) && ((i_size#10 = medium) || (i_size#10 = extra large)))))) || (((i_category#7 = Women) && (((((i_color#8 = midnight) || (i_color#8 = snow)) && ((i_units#9 = Pallet) || (i_units#9 = Gross))) && ((i_size#10 = medium) || (i_size#10 = extra large))) || ((((i_color#8 = cyan) || (i_color#8 = papaya)) && ((i_units#9 = Cup) || (i_units#9 = Dram))) && ((i_size#10 = N/A) || (i_size#10 = small))))) || ((i_category#7 = Men) && (((((i_color#8 = orange) || (i_color#8 = frosted)) && ((i_units#9 = Each) || (i_units#9 = Tbl))) && ((i_size#10 = petite) || (i_size#10 = large))) || ((((i_color#8 = forest) || (i_color#8 = ghost)) && ((i_units#9 = Lb) || (i_units#9 = Bundle))) && ((i_size#10 = medium) || (i_size#10 = extra large))))))) && isnotnull(i_manufact#2)) + +- *(1) FileScan parquet default.item[i_category#7,i_manufact#2,i_size#10,i_color#8,i_units#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(Or(And(EqualTo(i_category,Women),Or(And(And(Or(EqualTo(i_color,powder),EqualTo(i_color,khaki)..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt new file mode 100644 index 000000000..caac67bf3 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt @@ -0,0 +1,25 @@ +TakeOrderedAndProject [i_product_name] + WholeStageCodegen + HashAggregate [i_product_name] + InputAdapter + Exchange [i_product_name] #1 + WholeStageCodegen + HashAggregate [i_product_name] + Project [i_product_name] + BroadcastHashJoin [i_manufact,i_manufact] + Project [i_manufact,i_product_name] + Filter [i_manufact_id,i_manufact] + Scan parquet default.item [i_manufact_id,i_manufact,i_product_name] [i_manufact_id,i_manufact,i_product_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [i_manufact] + Filter [alwaysTrue,item_cnt] + HashAggregate [i_manufact,count,count(1)] [i_manufact,alwaysTrue,item_cnt,count,count(1)] + InputAdapter + Exchange [i_manufact] #3 + WholeStageCodegen + HashAggregate [i_manufact,count,count] [count,count] + Project [i_manufact] + Filter [i_units,i_manufact,i_color,i_size,i_category] + Scan parquet default.item [i_manufact,i_units,i_size,i_category,i_color] [i_manufact,i_units,i_size,i_category,i_color] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt new file mode 100644 index 000000000..6fb079e4a --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt @@ -0,0 +1,20 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[sum(ss_ext_sales_price)#1 DESC NULLS LAST,d_year#2 ASC NULLS FIRST,i_category_id#3 ASC NULLS FIRST,i_category#4 ASC NULLS FIRST], output=[d_year#2,i_category_id#3,i_category#4,sum(ss_ext_sales_price)#1]) ++- *(4) HashAggregate(keys=[d_year#2, i_category_id#3, i_category#4], functions=[sum(UnscaledValue(ss_ext_sales_price#5))]) + +- Exchange hashpartitioning(d_year#2, i_category_id#3, i_category#4, 200) + +- *(3) HashAggregate(keys=[d_year#2, i_category_id#3, i_category#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#5))]) + +- *(3) Project [d_year#2, ss_ext_sales_price#5, i_category_id#3, i_category#4] + +- *(3) BroadcastHashJoin [ss_item_sk#6], [i_item_sk#7], Inner, BuildRight + :- *(3) Project [d_year#2, ss_item_sk#6, ss_ext_sales_price#5] + : +- *(3) BroadcastHashJoin [d_date_sk#8], [ss_sold_date_sk#9], Inner, BuildRight + : :- *(3) Project [d_date_sk#8, d_year#2] + : : +- *(3) Filter ((((isnotnull(d_moy#10) && isnotnull(d_year#2)) && (d_moy#10 = 11)) && (d_year#2 = 2000)) && isnotnull(d_date_sk#8)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_year#2,d_moy#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#6, ss_ext_sales_price#5] + : +- *(1) Filter (isnotnull(ss_sold_date_sk#9) && isnotnull(ss_item_sk#6)) + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#6,ss_ext_sales_price#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [i_item_sk#7, i_category_id#3, i_category#4] + +- *(2) Filter ((isnotnull(i_manager_id#11) && (i_manager_id#11 = 1)) && isnotnull(i_item_sk#7)) + +- *(2) FileScan parquet default.item[i_item_sk#7,i_category_id#3,i_category#4,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt new file mode 100644 index 000000000..abcbca371 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category] + WholeStageCodegen + HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),sum,d_year,i_category,i_category_id] [sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price),sum] + InputAdapter + Exchange [d_year,i_category_id,i_category] #1 + WholeStageCodegen + HashAggregate [sum,d_year,sum,i_category,i_category_id,ss_ext_sales_price] [sum,sum] + Project [d_year,ss_ext_sales_price,i_category_id,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [d_year,ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk,i_category_id,i_category] + Filter [i_manager_id,i_item_sk] + Scan parquet default.item [i_item_sk,i_category_id,i_category,i_manager_id] [i_item_sk,i_category_id,i_category,i_manager_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt new file mode 100644 index 000000000..d2b16d01d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt @@ -0,0 +1,20 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,s_store_id#2 ASC NULLS FIRST,sun_sales#3 ASC NULLS FIRST,mon_sales#4 ASC NULLS FIRST,tue_sales#5 ASC NULLS FIRST,wed_sales#6 ASC NULLS FIRST,thu_sales#7 ASC NULLS FIRST,fri_sales#8 ASC NULLS FIRST,sat_sales#9 ASC NULLS FIRST], output=[s_store_name#1,s_store_id#2,sun_sales#3,mon_sales#4,tue_sales#5,wed_sales#6,thu_sales#7,fri_sales#8,sat_sales#9]) ++- *(4) HashAggregate(keys=[s_store_name#1, s_store_id#2], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#10 = Sunday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Monday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Tuesday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Wednesday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Thursday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Friday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Saturday) THEN ss_sales_price#11 ELSE null END))]) + +- Exchange hashpartitioning(s_store_name#1, s_store_id#2, 200) + +- *(3) HashAggregate(keys=[s_store_name#1, s_store_id#2], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Sunday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Monday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Tuesday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Wednesday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Thursday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Friday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Saturday) THEN ss_sales_price#11 ELSE null END))]) + +- *(3) Project [d_day_name#10, ss_sales_price#11, s_store_id#2, s_store_name#1] + +- *(3) BroadcastHashJoin [ss_store_sk#12], [s_store_sk#13], Inner, BuildRight + :- *(3) Project [d_day_name#10, ss_store_sk#12, ss_sales_price#11] + : +- *(3) BroadcastHashJoin [d_date_sk#14], [ss_sold_date_sk#15], Inner, BuildRight + : :- *(3) Project [d_date_sk#14, d_day_name#10] + : : +- *(3) Filter ((isnotnull(d_year#16) && (d_year#16 = 2000)) && isnotnull(d_date_sk#14)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_day_name#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [ss_sold_date_sk#15, ss_store_sk#12, ss_sales_price#11] + : +- *(1) Filter (isnotnull(ss_sold_date_sk#15) && isnotnull(ss_store_sk#12)) + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#15,ss_store_sk#12,ss_sales_price#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [s_store_sk#13, s_store_id#2, s_store_name#1] + +- *(2) Filter ((isnotnull(s_gmt_offset#17) && (s_gmt_offset#17 = -5.00)) && isnotnull(s_store_sk#13)) + +- *(2) FileScan parquet default.store[s_store_sk#13,s_store_id#2,s_store_name#1,s_gmt_offset#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt new file mode 100644 index 000000000..a5e60a60c --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [sat_sales,fri_sales,sun_sales,s_store_id,tue_sales,mon_sales,s_store_name,thu_sales,wed_sales] + WholeStageCodegen + HashAggregate [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,sum,s_store_id,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),s_store_name,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END))] [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sat_sales,fri_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sun_sales,sum,sum,sum,sum,tue_sales,mon_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),thu_sales,wed_sales] + InputAdapter + Exchange [s_store_name,s_store_id] #1 + WholeStageCodegen + HashAggregate [sum,sum,sum,d_day_name,sum,sum,sum,sum,sum,sum,s_store_id,ss_sales_price,sum,sum,sum,sum,sum,s_store_name] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,ss_sales_price,s_store_id,s_store_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [d_day_name,ss_store_sk,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_day_name] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_day_name] [d_date_sk,d_year,d_day_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [ss_sold_date_sk,ss_store_sk,ss_sales_price] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_store_sk,ss_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [s_store_sk,s_store_id,s_store_name] + Filter [s_gmt_offset,s_store_sk] + Scan parquet default.store [s_store_sk,s_store_id,s_store_name,s_gmt_offset] [s_store_sk,s_store_id,s_store_name,s_gmt_offset] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt new file mode 100644 index 000000000..91c290e9f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt @@ -0,0 +1,50 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[rnk#1 ASC NULLS FIRST], output=[rnk#1,best_performing#2,worst_performing#3]) ++- *(10) Project [rnk#1, i_product_name#4 AS best_performing#2, i_product_name#5 AS worst_performing#3] + +- *(10) BroadcastHashJoin [item_sk#6], [i_item_sk#7], Inner, BuildRight + :- *(10) Project [rnk#1, item_sk#6, i_product_name#4] + : +- *(10) BroadcastHashJoin [item_sk#8], [i_item_sk#9], Inner, BuildRight + : :- *(10) Project [item_sk#8, rnk#1, item_sk#6] + : : +- *(10) BroadcastHashJoin [rnk#1], [rnk#10], Inner, BuildRight + : : :- *(10) Project [item_sk#8, rnk#1] + : : : +- *(10) Filter ((isnotnull(rnk#1) && (rnk#1 < 11)) && isnotnull(item_sk#8)) + : : : +- Window [rank(rank_col#11) windowspecdefinition(rank_col#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#1], [rank_col#11 ASC NULLS FIRST] + : : : +- *(3) Sort [rank_col#11 ASC NULLS FIRST], false, 0 + : : : +- Exchange SinglePartition + : : : +- *(2) Project [item_sk#8, rank_col#11] + : : : +- *(2) Filter (isnotnull(avg(ss_net_profit#12)#13) && (cast(avg(ss_net_profit#12)#13 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery7089)), DecimalType(13,7)))) + : : : : +- Subquery subquery7089 + : : : : +- *(2) HashAggregate(keys=[ss_store_sk#14], functions=[avg(UnscaledValue(ss_net_profit#12))]) + : : : : +- Exchange hashpartitioning(ss_store_sk#14, 200) + : : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) + : : : : +- *(1) Project [ss_store_sk#14, ss_net_profit#12] + : : : : +- *(1) Filter ((isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) && isnull(ss_addr_sk#15)) + : : : : +- *(1) FileScan parquet default.store_sales[ss_addr_sk#15,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)], ReadSchema: struct + : : : +- *(2) HashAggregate(keys=[ss_item_sk#16], functions=[avg(UnscaledValue(ss_net_profit#12))]) + : : : +- Exchange hashpartitioning(ss_item_sk#16, 200) + : : : +- *(1) HashAggregate(keys=[ss_item_sk#16], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) + : : : +- *(1) Project [ss_item_sk#16, ss_net_profit#12] + : : : +- *(1) Filter (isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) + : : : +- *(1) FileScan parquet default.store_sales[ss_item_sk#16,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- *(7) Project [item_sk#6, rnk#10] + : : +- *(7) Filter ((isnotnull(rnk#10) && (rnk#10 < 11)) && isnotnull(item_sk#6)) + : : +- Window [rank(rank_col#17) windowspecdefinition(rank_col#17 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#10], [rank_col#17 DESC NULLS LAST] + : : +- *(6) Sort [rank_col#17 DESC NULLS LAST], false, 0 + : : +- Exchange SinglePartition + : : +- *(5) Project [item_sk#6, rank_col#17] + : : +- *(5) Filter (isnotnull(avg(ss_net_profit#12)#18) && (cast(avg(ss_net_profit#12)#18 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery7094)), DecimalType(13,7)))) + : : : +- Subquery subquery7094 + : : : +- *(2) HashAggregate(keys=[ss_store_sk#14], functions=[avg(UnscaledValue(ss_net_profit#12))]) + : : : +- Exchange hashpartitioning(ss_store_sk#14, 200) + : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) + : : : +- *(1) Project [ss_store_sk#14, ss_net_profit#12] + : : : +- *(1) Filter ((isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) && isnull(ss_addr_sk#15)) + : : : +- *(1) FileScan parquet default.store_sales[ss_addr_sk#15,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)], ReadSchema: struct + : : +- *(5) HashAggregate(keys=[ss_item_sk#16], functions=[avg(UnscaledValue(ss_net_profit#12))]) + : : +- ReusedExchange [ss_item_sk#16, sum#19, count#20], Exchange hashpartitioning(ss_item_sk#16, 200) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(8) Project [i_item_sk#9, i_product_name#4] + : +- *(8) Filter isnotnull(i_item_sk#9) + : +- *(8) FileScan parquet default.item[i_item_sk#9,i_product_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- ReusedExchange [i_item_sk#7, i_product_name#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt new file mode 100644 index 000000000..55aab5cc2 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt @@ -0,0 +1,72 @@ +TakeOrderedAndProject [rnk,best_performing,worst_performing] + WholeStageCodegen + Project [rnk,i_product_name,i_product_name] + BroadcastHashJoin [item_sk,i_item_sk] + Project [rnk,item_sk,i_product_name] + BroadcastHashJoin [item_sk,i_item_sk] + Project [item_sk,rnk,item_sk] + BroadcastHashJoin [rnk,rnk] + Project [item_sk,rnk] + Filter [rnk,item_sk] + InputAdapter + Window [rank_col] + WholeStageCodegen + Sort [rank_col] + InputAdapter + Exchange #1 + WholeStageCodegen + Project [item_sk,rank_col] + Filter [avg(ss_net_profit)] + Subquery #1 + WholeStageCodegen + HashAggregate [ss_store_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(UnscaledValue(ss_net_profit)),rank_col,sum,count] + InputAdapter + Exchange [ss_store_sk] #3 + WholeStageCodegen + HashAggregate [count,ss_store_sk,sum,sum,count,ss_net_profit] [sum,count,sum,count] + Project [ss_store_sk,ss_net_profit] + Filter [ss_store_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit] [ss_addr_sk,ss_store_sk,ss_net_profit] + HashAggregate [ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(UnscaledValue(ss_net_profit)),rank_col,sum,avg(ss_net_profit),item_sk,count] + InputAdapter + Exchange [ss_item_sk] #2 + WholeStageCodegen + HashAggregate [ss_item_sk,sum,sum,count,count,ss_net_profit] [sum,count,sum,count] + Project [ss_item_sk,ss_net_profit] + Filter [ss_store_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_profit] [ss_item_sk,ss_store_sk,ss_net_profit] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [item_sk,rnk] + Filter [rnk,item_sk] + InputAdapter + Window [rank_col] + WholeStageCodegen + Sort [rank_col] + InputAdapter + Exchange #5 + WholeStageCodegen + Project [item_sk,rank_col] + Filter [avg(ss_net_profit)] + Subquery #2 + WholeStageCodegen + HashAggregate [ss_store_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(UnscaledValue(ss_net_profit)),rank_col,sum,count] + InputAdapter + Exchange [ss_store_sk] #6 + WholeStageCodegen + HashAggregate [count,ss_store_sk,sum,count,sum,ss_net_profit] [sum,count,sum,count] + Project [ss_store_sk,ss_net_profit] + Filter [ss_store_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit] [ss_addr_sk,ss_store_sk,ss_net_profit] + HashAggregate [ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(ss_net_profit),item_sk,count,avg(UnscaledValue(ss_net_profit)),rank_col,sum] + InputAdapter + ReusedExchange [ss_item_sk,sum,count] [ss_item_sk,sum,count] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [i_item_sk,i_product_name] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_product_name] [i_item_sk,i_product_name] + InputAdapter + ReusedExchange [i_item_sk,i_product_name] [i_item_sk,i_product_name] #7 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt new file mode 100644 index 000000000..687085a12 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt @@ -0,0 +1,39 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[ca_zip#1 ASC NULLS FIRST,ca_city#2 ASC NULLS FIRST], output=[ca_zip#1,ca_city#2,sum(ws_sales_price)#3]) ++- *(7) HashAggregate(keys=[ca_zip#1, ca_city#2], functions=[sum(UnscaledValue(ws_sales_price#4))]) + +- Exchange hashpartitioning(ca_zip#1, ca_city#2, 200) + +- *(6) HashAggregate(keys=[ca_zip#1, ca_city#2], functions=[partial_sum(UnscaledValue(ws_sales_price#4))]) + +- *(6) Project [ws_sales_price#4, ca_city#2, ca_zip#1] + +- *(6) Filter (substring(ca_zip#1, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) || exists#5) + +- *(6) BroadcastHashJoin [i_item_id#6], [i_item_id#6#7], ExistenceJoin(exists#5), BuildRight + :- *(6) Project [ws_sales_price#4, ca_city#2, ca_zip#1, i_item_id#6] + : +- *(6) BroadcastHashJoin [ws_item_sk#8], [i_item_sk#9], Inner, BuildRight + : :- *(6) Project [ws_item_sk#8, ws_sales_price#4, ca_city#2, ca_zip#1] + : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#10], [d_date_sk#11], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#10, ws_item_sk#8, ws_sales_price#4, ca_city#2, ca_zip#1] + : : : +- *(6) BroadcastHashJoin [c_current_addr_sk#12], [ca_address_sk#13], Inner, BuildRight + : : : :- *(6) Project [ws_sold_date_sk#10, ws_item_sk#8, ws_sales_price#4, c_current_addr_sk#12] + : : : : +- *(6) BroadcastHashJoin [ws_bill_customer_sk#14], [c_customer_sk#15], Inner, BuildRight + : : : : :- *(6) Project [ws_sold_date_sk#10, ws_item_sk#8, ws_bill_customer_sk#14, ws_sales_price#4] + : : : : : +- *(6) Filter ((isnotnull(ws_bill_customer_sk#14) && isnotnull(ws_sold_date_sk#10)) && isnotnull(ws_item_sk#8)) + : : : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#10,ws_item_sk#8,ws_bill_customer_sk#14,ws_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [c_customer_sk#15, c_current_addr_sk#12] + : : : : +- *(1) Filter (isnotnull(c_customer_sk#15) && isnotnull(c_current_addr_sk#12)) + : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#15,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [ca_address_sk#13, ca_city#2, ca_zip#1] + : : : +- *(2) Filter isnotnull(ca_address_sk#13) + : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#13,ca_city#2,ca_zip#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#11] + : : +- *(3) Filter ((((isnotnull(d_qoy#16) && isnotnull(d_year#17)) && (d_qoy#16 = 2)) && (d_year#17 = 2001)) && isnotnull(d_date_sk#11)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#11,d_year#17,d_qoy#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [i_item_sk#9, i_item_id#6] + : +- *(4) Filter isnotnull(i_item_sk#9) + : +- *(4) FileScan parquet default.item[i_item_sk#9,i_item_id#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(5) Project [i_item_id#6 AS i_item_id#6#7] + +- *(5) Filter i_item_sk#9 IN (2,3,5,7,11,13,17,19,23,29) + +- *(5) FileScan parquet default.item[i_item_sk#9,i_item_id#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_item_sk, [2,3,5,7,11,13,17,19,23,29])], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt new file mode 100644 index 000000000..f6d66999d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt @@ -0,0 +1,51 @@ +TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)] + WholeStageCodegen + HashAggregate [ca_zip,ca_city,sum,sum(UnscaledValue(ws_sales_price))] [sum(UnscaledValue(ws_sales_price)),sum(ws_sales_price),sum] + InputAdapter + Exchange [ca_zip,ca_city] #1 + WholeStageCodegen + HashAggregate [ca_city,ca_zip,sum,ws_sales_price,sum] [sum,sum] + Project [ws_sales_price,ca_city,ca_zip] + Filter [ca_zip,exists] + BroadcastHashJoin [i_item_id,i_item_id] + Project [ws_sales_price,ca_city,ca_zip,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_sales_price,ca_city,ca_zip] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ca_zip,ca_city,ws_sold_date_sk,ws_sales_price,ws_item_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_sales_price,c_current_addr_sk] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_sales_price] + Filter [ws_bill_customer_sk,ws_sold_date_sk,ws_item_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_sales_price] [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_sales_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [c_customer_sk,c_current_addr_sk] + Filter [c_customer_sk,c_current_addr_sk] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ca_address_sk,ca_city,ca_zip] + Filter [ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_zip] [ca_address_sk,ca_city,ca_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_qoy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_item_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt new file mode 100644 index 000000000..0ae77a813 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt @@ -0,0 +1,41 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_name#2 ASC NULLS FIRST,ca_city#3 ASC NULLS FIRST,bought_city#4 ASC NULLS FIRST,ss_ticket_number#5 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#2,ca_city#3,bought_city#4,ss_ticket_number#5,amt#6,profit#7]) ++- *(8) Project [c_last_name#1, c_first_name#2, ca_city#3, bought_city#4, ss_ticket_number#5, amt#6, profit#7] + +- *(8) BroadcastHashJoin [c_current_addr_sk#8], [ca_address_sk#9], Inner, BuildRight, NOT (ca_city#3 = bought_city#4) + :- *(8) Project [ss_ticket_number#5, bought_city#4, amt#6, profit#7, c_current_addr_sk#8, c_first_name#2, c_last_name#1] + : +- *(8) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight + : :- *(8) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3], functions=[sum(UnscaledValue(ss_coupon_amt#13)), sum(UnscaledValue(ss_net_profit#14))]) + : : +- Exchange hashpartitioning(ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3, 200) + : : +- *(5) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3], functions=[partial_sum(UnscaledValue(ss_coupon_amt#13)), partial_sum(UnscaledValue(ss_net_profit#14))]) + : : +- *(5) Project [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14, ca_city#3] + : : +- *(5) BroadcastHashJoin [ss_addr_sk#12], [ca_address_sk#9], Inner, BuildRight + : : :- *(5) Project [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] + : : : +- *(5) BroadcastHashJoin [ss_hdemo_sk#15], [hd_demo_sk#16], Inner, BuildRight + : : : :- *(5) Project [ss_customer_sk#10, ss_hdemo_sk#15, ss_addr_sk#12, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] + : : : : +- *(5) BroadcastHashJoin [ss_store_sk#17], [s_store_sk#18], Inner, BuildRight + : : : : :- *(5) Project [ss_customer_sk#10, ss_hdemo_sk#15, ss_addr_sk#12, ss_store_sk#17, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] + : : : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : : : : :- *(5) Project [ss_sold_date_sk#19, ss_customer_sk#10, ss_hdemo_sk#15, ss_addr_sk#12, ss_store_sk#17, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] + : : : : : : +- *(5) Filter ((((isnotnull(ss_sold_date_sk#19) && isnotnull(ss_store_sk#17)) && isnotnull(ss_hdemo_sk#15)) && isnotnull(ss_addr_sk#12)) && isnotnull(ss_customer_sk#10)) + : : : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_customer_sk#10,ss_hdemo_sk#15,ss_addr_sk#12,ss_store_sk#17,ss_ticket_number#5,ss_coupon_amt#13,ss_net_profit#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [s_store_sk#18] + : : : : +- *(2) Filter (s_city#23 IN (Fairview,Midway) && isnotnull(s_store_sk#18)) + : : : : +- *(2) FileScan parquet default.store[s_store_sk#18,s_city#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [hd_demo_sk#16] + : : : +- *(3) Filter (((hd_dep_count#24 = 4) || (hd_vehicle_count#25 = 3)) && isnotnull(hd_demo_sk#16)) + : : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#16,hd_dep_count#24,hd_vehicle_count#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [ca_address_sk#9, ca_city#3] + : : +- *(4) Filter (isnotnull(ca_address_sk#9) && isnotnull(ca_city#3)) + : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#9,ca_city#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [c_customer_sk#11, c_current_addr_sk#8, c_first_name#2, c_last_name#1] + : +- *(6) Filter (isnotnull(c_customer_sk#11) && isnotnull(c_current_addr_sk#8)) + : +- *(6) FileScan parquet default.customer[c_customer_sk#11,c_current_addr_sk#8,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + +- ReusedExchange [ca_address_sk#9, ca_city#3], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt new file mode 100644 index 000000000..3010603c0 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt @@ -0,0 +1,54 @@ +TakeOrderedAndProject [bought_city,ss_ticket_number,profit,c_last_name,c_first_name,amt,ca_city] + WholeStageCodegen + Project [bought_city,ss_ticket_number,profit,c_last_name,c_first_name,amt,ca_city] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] + Project [c_current_addr_sk,profit,amt,c_last_name,c_first_name,ss_ticket_number,bought_city] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_coupon_amt)),sum,ca_city,ss_customer_sk,sum,ss_ticket_number,ss_addr_sk] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_coupon_amt)),profit,sum,amt,sum,bought_city] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + WholeStageCodegen + HashAggregate [sum,ca_city,ss_customer_sk,ss_coupon_amt,sum,sum,sum,ss_ticket_number,ss_addr_sk,ss_net_profit] [sum,sum,sum,sum] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ca_city,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + Filter [ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [d_date_sk] + Filter [d_dow,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_dow] [d_date_sk,d_year,d_dow] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [s_store_sk] + Filter [s_city,s_store_sk] + Scan parquet default.store [s_store_sk,s_city] [s_store_sk,s_city] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [hd_demo_sk] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [ca_address_sk,ca_city] + Filter [ca_address_sk,ca_city] + Scan parquet default.customer_address [ca_address_sk,ca_city] [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + Filter [c_customer_sk,c_current_addr_sk] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ca_address_sk,ca_city] [ca_address_sk,ca_city] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt new file mode 100644 index 000000000..914ebe9f5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt @@ -0,0 +1,51 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,s_store_name#3 ASC NULLS FIRST], output=[i_category#4,i_brand#5,s_store_name#3,s_company_name#6,d_year#7,d_moy#8,avg_monthly_sales#2,sum_sales#1,psum#9,nsum#10]) ++- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, avg_monthly_sales#2, sum_sales#1, sum_sales#11 AS psum#9, sum_sales#12 AS nsum#10] + +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, (rn#18 - 1)], Inner, BuildRight + :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13, sum_sales#11] + : +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, (rn#23 + 1)], Inner, BuildRight + : :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13] + : : +- *(22) Filter (((isnotnull(avg_monthly_sales#2) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#13)) + : : +- Window [avg(_w0#24) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7] + : : +- *(7) Filter (isnotnull(d_year#7) && (d_year#7 = 1999)) + : : +- Window [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#13], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] + : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#6 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, 200) + : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#25))]) + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 200) + : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#25))]) + : : +- *(4) Project [i_brand#5, i_category#4, ss_sales_price#25, d_year#7, d_moy#8, s_store_name#3, s_company_name#6] + : : +- *(4) BroadcastHashJoin [ss_store_sk#26], [s_store_sk#27], Inner, BuildRight + : : :- *(4) Project [i_brand#5, i_category#4, ss_store_sk#26, ss_sales_price#25, d_year#7, d_moy#8] + : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#28], [d_date_sk#29], Inner, BuildRight + : : : :- *(4) Project [i_brand#5, i_category#4, ss_sold_date_sk#28, ss_store_sk#26, ss_sales_price#25] + : : : : +- *(4) BroadcastHashJoin [i_item_sk#30], [ss_item_sk#31], Inner, BuildRight + : : : : :- *(4) Project [i_item_sk#30, i_brand#5, i_category#4] + : : : : : +- *(4) Filter ((isnotnull(i_item_sk#30) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) + : : : : : +- *(4) FileScan parquet default.item[i_item_sk#30,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : : +- *(1) Project [ss_sold_date_sk#28, ss_item_sk#31, ss_store_sk#26, ss_sales_price#25] + : : : : +- *(1) Filter ((isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#28)) && isnotnull(ss_store_sk#26)) + : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#28,ss_item_sk#31,ss_store_sk#26,ss_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [d_date_sk#29, d_year#7, d_moy#8] + : : : +- *(2) Filter ((((d_year#7 = 1999) || ((d_year#7 = 1998) && (d_moy#8 = 12))) || ((d_year#7 = 2000) && (d_moy#8 = 1))) && isnotnull(d_date_sk#29)) + : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#29,d_year#7,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [s_store_sk#27, s_store_name#3, s_company_name#6] + : : +- *(3) Filter ((isnotnull(s_store_sk#27) && isnotnull(s_store_name#3)) && isnotnull(s_company_name#6)) + : : +- *(3) FileScan parquet default.store[s_store_sk#27,s_store_name#3,s_company_name#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] + 1))) + : +- *(14) Project [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#11, rn#23] + : +- *(14) Filter isnotnull(rn#23) + : +- Window [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#23], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] + : +- *(13) Sort [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, s_store_name#21 ASC NULLS FIRST, s_company_name#22 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 + : +- Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 200) + : +- *(12) HashAggregate(keys=[i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33], functions=[sum(UnscaledValue(ss_sales_price#25))]) + : +- ReusedExchange [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33, sum#34], Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 200) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] - 1))) + +- *(21) Project [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, sum_sales#12, rn#18] + +- *(21) Filter isnotnull(rn#18) + +- Window [rank(d_year#35, d_moy#36) windowspecdefinition(i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#18], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17], [d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST] + +- *(20) Sort [i_category#14 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#16 ASC NULLS FIRST, s_company_name#17 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 + +- ReusedExchange [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35, d_moy#36, sum_sales#12], Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 200) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt new file mode 100644 index 000000000..656b50091 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt @@ -0,0 +1,77 @@ +TakeOrderedAndProject [d_year,d_moy,s_store_name,i_category,psum,i_brand,sum_sales,s_company_name,avg_monthly_sales,nsum] + WholeStageCodegen + Project [s_store_name,d_year,avg_monthly_sales,d_moy,sum_sales,sum_sales,i_category,i_brand,s_company_name,sum_sales] + BroadcastHashJoin [s_store_name,s_company_name,i_brand,i_category,i_category,rn,i_brand,s_store_name,rn,s_company_name] + Project [d_year,s_company_name,rn,d_moy,sum_sales,i_brand,sum_sales,i_category,s_store_name,avg_monthly_sales] + BroadcastHashJoin [s_store_name,i_category,i_category,s_company_name,rn,i_brand,rn,s_company_name,s_store_name,i_brand] + Project [d_year,s_company_name,rn,d_moy,i_brand,sum_sales,i_category,s_store_name,avg_monthly_sales] + Filter [avg_monthly_sales,sum_sales,rn] + InputAdapter + Window [d_year,s_company_name,_w0,i_brand,i_category,s_store_name] + WholeStageCodegen + Filter [d_year] + InputAdapter + Window [d_year,s_company_name,d_moy,i_brand,i_category,s_store_name] + WholeStageCodegen + Sort [d_year,s_company_name,d_moy,i_brand,i_category,s_store_name] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + WholeStageCodegen + HashAggregate [d_year,s_company_name,d_moy,sum,sum(UnscaledValue(ss_sales_price)),i_brand,i_category,s_store_name] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [d_year,s_company_name,d_moy,i_brand,i_category,s_store_name] #2 + WholeStageCodegen + HashAggregate [d_year,s_company_name,d_moy,sum,i_brand,ss_sales_price,i_category,sum,s_store_name] [sum,sum] + Project [s_store_name,s_company_name,d_moy,d_year,i_category,ss_sales_price,i_brand] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [d_moy,d_year,ss_store_sk,i_category,ss_sales_price,i_brand] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_store_sk,i_category,ss_sales_price,ss_sold_date_sk,i_brand] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_brand,i_category] + Filter [i_item_sk,i_brand,i_category] + Scan parquet default.item [i_item_sk,i_brand,i_category] [i_item_sk,i_brand,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk,d_year,d_moy] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_company_name] + Filter [s_store_sk,s_store_name,s_company_name] + Scan parquet default.store [s_store_sk,s_store_name,s_company_name] [s_store_sk,s_store_name,s_company_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_category,rn,s_company_name,s_store_name,sum_sales,i_brand] + Filter [rn] + InputAdapter + Window [i_category,s_company_name,s_store_name,d_moy,d_year,i_brand] + WholeStageCodegen + Sort [i_category,s_company_name,s_store_name,d_moy,d_year,i_brand] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #7 + WholeStageCodegen + HashAggregate [i_category,s_company_name,s_store_name,sum(UnscaledValue(ss_sales_price)),d_moy,d_year,sum,i_brand] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum] + InputAdapter + ReusedExchange [i_category,s_company_name,s_store_name,d_moy,d_year,sum,i_brand] [i_category,s_company_name,s_store_name,d_moy,d_year,sum,i_brand] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [rn,i_category,s_store_name,i_brand,s_company_name,sum_sales] + Filter [rn] + InputAdapter + Window [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year] + WholeStageCodegen + Sort [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year] + InputAdapter + ReusedExchange [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year,sum_sales] [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year,sum_sales] #7 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt new file mode 100644 index 000000000..735a739a8 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt @@ -0,0 +1,31 @@ +== Physical Plan == +*(6) HashAggregate(keys=[], functions=[sum(cast(ss_quantity#1 as bigint))]) ++- Exchange SinglePartition + +- *(5) HashAggregate(keys=[], functions=[partial_sum(cast(ss_quantity#1 as bigint))]) + +- *(5) Project [ss_quantity#1] + +- *(5) BroadcastHashJoin [ss_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight + :- *(5) Project [ss_sold_date_sk#2, ss_quantity#1] + : +- *(5) BroadcastHashJoin [ss_addr_sk#4], [ca_address_sk#5], Inner, BuildRight, ((((ca_state#6 IN (CO,OH,TX) && (ss_net_profit#7 >= 0.00)) && (ss_net_profit#7 <= 2000.00)) || ((ca_state#6 IN (OR,MN,KY) && (ss_net_profit#7 >= 150.00)) && (ss_net_profit#7 <= 3000.00))) || ((ca_state#6 IN (VA,CA,MS) && (ss_net_profit#7 >= 50.00)) && (ss_net_profit#7 <= 25000.00))) + : :- *(5) Project [ss_sold_date_sk#2, ss_addr_sk#4, ss_quantity#1, ss_net_profit#7] + : : +- *(5) BroadcastHashJoin [ss_cdemo_sk#8], [cd_demo_sk#9], Inner, BuildRight, ((((((cd_marital_status#10 = M) && (cd_education_status#11 = 4 yr Degree)) && (ss_sales_price#12 >= 100.00)) && (ss_sales_price#12 <= 150.00)) || ((((cd_marital_status#10 = D) && (cd_education_status#11 = 2 yr Degree)) && (ss_sales_price#12 >= 50.00)) && (ss_sales_price#12 <= 100.00))) || ((((cd_marital_status#10 = S) && (cd_education_status#11 = College)) && (ss_sales_price#12 >= 150.00)) && (ss_sales_price#12 <= 200.00))) + : : :- *(5) Project [ss_sold_date_sk#2, ss_cdemo_sk#8, ss_addr_sk#4, ss_quantity#1, ss_sales_price#12, ss_net_profit#7] + : : : +- *(5) BroadcastHashJoin [ss_store_sk#13], [s_store_sk#14], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#2, ss_cdemo_sk#8, ss_addr_sk#4, ss_store_sk#13, ss_quantity#1, ss_sales_price#12, ss_net_profit#7] + : : : : +- *(5) Filter (((isnotnull(ss_store_sk#13) && isnotnull(ss_cdemo_sk#8)) && isnotnull(ss_addr_sk#4)) && isnotnull(ss_sold_date_sk#2)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#2,ss_cdemo_sk#8,ss_addr_sk#4,ss_store_sk#13,ss_quantity#1,ss_sales_price#12,ss_net_profit#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] + : : +- *(2) Filter isnotnull(cd_demo_sk#9) + : : +- *(2) FileScan parquet default.customer_demographics[cd_demo_sk#9,cd_marital_status#10,cd_education_status#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [ca_address_sk#5, ca_state#6] + : +- *(3) Filter ((isnotnull(ca_country#15) && (ca_country#15 = United States)) && isnotnull(ca_address_sk#5)) + : +- *(3) FileScan parquet default.customer_address[ca_address_sk#5,ca_state#6,ca_country#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [d_date_sk#3] + +- *(4) Filter ((isnotnull(d_year#16) && (d_year#16 = 2001)) && isnotnull(d_date_sk#3)) + +- *(4) FileScan parquet default.date_dim[d_date_sk#3,d_year#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt new file mode 100644 index 000000000..78fb16b88 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt @@ -0,0 +1,41 @@ +WholeStageCodegen + HashAggregate [sum,sum(cast(ss_quantity as bigint))] [sum(cast(ss_quantity as bigint)),sum(ss_quantity),sum] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [ss_quantity,sum,sum] [sum,sum] + Project [ss_quantity] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity] + BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] + Project [ss_sold_date_sk,ss_addr_sk,ss_quantity,ss_net_profit] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk,ss_sales_price,cd_education_status,cd_marital_status] + Project [ss_addr_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_addr_sk,ss_quantity,ss_store_sk,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] + Filter [ss_store_sk,ss_cdemo_sk,ss_addr_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_quantity,ss_store_sk,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] [ss_addr_sk,ss_quantity,ss_store_sk,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [cd_demo_sk,cd_marital_status,cd_education_status] + Filter [cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ca_address_sk,ca_state] + Filter [ca_country,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country] [ca_address_sk,ca_state,ca_country] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt new file mode 100644 index 000000000..bf898b8ba --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt @@ -0,0 +1,75 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,return_rank#2 ASC NULLS FIRST,currency_rank#3 ASC NULLS FIRST], output=[channel#1,item#4,return_ratio#5,return_rank#2,currency_rank#3]) ++- *(23) HashAggregate(keys=[channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3], functions=[]) + +- Exchange hashpartitioning(channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3, 200) + +- *(22) HashAggregate(keys=[channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3], functions=[]) + +- Union + :- *(7) Project [web AS channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3] + : +- *(7) Filter ((return_rank#2 <= 10) || (currency_rank#3 <= 10)) + : +- Window [rank(currency_ratio#6) windowspecdefinition(currency_ratio#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#3], [currency_ratio#6 ASC NULLS FIRST] + : +- *(6) Sort [currency_ratio#6 ASC NULLS FIRST], false, 0 + : +- Window [rank(return_ratio#5) windowspecdefinition(return_ratio#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#2], [return_ratio#5 ASC NULLS FIRST] + : +- *(5) Sort [return_ratio#5 ASC NULLS FIRST], false, 0 + : +- Exchange SinglePartition + : +- *(4) HashAggregate(keys=[ws_item_sk#7], functions=[sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), sum(cast(coalesce(ws_quantity#9, 0) as bigint)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) + : +- Exchange hashpartitioning(ws_item_sk#7, 200) + : +- *(3) HashAggregate(keys=[ws_item_sk#7], functions=[partial_sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#9, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) + : +- *(3) Project [ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] + : +- *(3) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] + : : +- *(3) BroadcastHashJoin [cast(ws_order_number#14 as bigint), cast(ws_item_sk#7 as bigint)], [wr_order_number#15, wr_item_sk#16], Inner, BuildRight + : : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_order_number#14, ws_quantity#9, ws_net_paid#11] + : : : +- *(3) Filter ((((((((isnotnull(ws_net_profit#17) && isnotnull(ws_quantity#9)) && isnotnull(ws_net_paid#11)) && (ws_net_profit#17 > 1.00)) && (ws_net_paid#11 > 0.00)) && (ws_quantity#9 > 0)) && isnotnull(ws_order_number#14)) && isnotnull(ws_item_sk#7)) && isnotnull(ws_sold_date_sk#12)) + : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#7,ws_order_number#14,ws_quantity#9,ws_net_paid#11,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_quantity), IsNotNull(ws_net_paid), GreaterThan(ws_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(wr_item_sk#16)) && isnotnull(wr_order_number#15)) + : : +- *(1) FileScan parquet default.web_returns[wr_item_sk#16,wr_order_number#15,wr_return_quantity#8,wr_return_amt#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_item_sk), IsNotNull(..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#13] + : +- *(2) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#19)) && (d_year#18 = 2001)) && (d_moy#19 = 12)) && isnotnull(d_date_sk#13)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_year#18,d_moy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)], ReadSchema: struct + :- *(14) Project [catalog AS channel#20, item#21, return_ratio#22, return_rank#23, currency_rank#24] + : +- *(14) Filter ((return_rank#23 <= 10) || (currency_rank#24 <= 10)) + : +- Window [rank(currency_ratio#25) windowspecdefinition(currency_ratio#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#24], [currency_ratio#25 ASC NULLS FIRST] + : +- *(13) Sort [currency_ratio#25 ASC NULLS FIRST], false, 0 + : +- Window [rank(return_ratio#22) windowspecdefinition(return_ratio#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#23], [return_ratio#22 ASC NULLS FIRST] + : +- *(12) Sort [return_ratio#22 ASC NULLS FIRST], false, 0 + : +- Exchange SinglePartition + : +- *(11) HashAggregate(keys=[cs_item_sk#26], functions=[sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), sum(cast(coalesce(cs_quantity#28, 0) as bigint)), sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) + : +- Exchange hashpartitioning(cs_item_sk#26, 200) + : +- *(10) HashAggregate(keys=[cs_item_sk#26], functions=[partial_sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#28, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) + : +- *(10) Project [cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] + : +- *(10) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#13], Inner, BuildRight + : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] + : : +- *(10) BroadcastHashJoin [cs_order_number#32, cs_item_sk#26], [cr_order_number#33, cr_item_sk#34], Inner, BuildRight + : : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_order_number#32, cs_quantity#28, cs_net_paid#30] + : : : +- *(10) Filter ((((((((isnotnull(cs_net_profit#35) && isnotnull(cs_net_paid#30)) && isnotnull(cs_quantity#28)) && (cs_net_profit#35 > 1.00)) && (cs_net_paid#30 > 0.00)) && (cs_quantity#28 > 0)) && isnotnull(cs_item_sk#26)) && isnotnull(cs_order_number#32)) && isnotnull(cs_sold_date_sk#31)) + : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#26,cs_order_number#32,cs_quantity#28,cs_net_paid#30,cs_net_profit#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_net_paid), IsNotNull(cs_quantity), GreaterThan(cs_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(cr_item_sk#34)) && isnotnull(cr_order_number#33)) + : : +- *(8) FileScan parquet default.catalog_returns[cr_item_sk#34,cr_order_number#33,cr_return_quantity#27,cr_return_amount#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_item_sk), IsNo..., ReadSchema: struct + : +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(21) Project [store AS channel#36, item#37, return_ratio#38, return_rank#39, currency_rank#40] + +- *(21) Filter ((return_rank#39 <= 10) || (currency_rank#40 <= 10)) + +- Window [rank(currency_ratio#41) windowspecdefinition(currency_ratio#41 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#40], [currency_ratio#41 ASC NULLS FIRST] + +- *(20) Sort [currency_ratio#41 ASC NULLS FIRST], false, 0 + +- Window [rank(return_ratio#38) windowspecdefinition(return_ratio#38 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#39], [return_ratio#38 ASC NULLS FIRST] + +- *(19) Sort [return_ratio#38 ASC NULLS FIRST], false, 0 + +- Exchange SinglePartition + +- *(18) HashAggregate(keys=[ss_item_sk#42], functions=[sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), sum(cast(coalesce(ss_quantity#44, 0) as bigint)), sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) + +- Exchange hashpartitioning(ss_item_sk#42, 200) + +- *(17) HashAggregate(keys=[ss_item_sk#42], functions=[partial_sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), partial_sum(cast(coalesce(ss_quantity#44, 0) as bigint)), partial_sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) + +- *(17) Project [ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] + +- *(17) BroadcastHashJoin [ss_sold_date_sk#47], [d_date_sk#13], Inner, BuildRight + :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] + : +- *(17) BroadcastHashJoin [cast(ss_ticket_number#48 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#49, sr_item_sk#50], Inner, BuildRight + : :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_ticket_number#48, ss_quantity#44, ss_net_paid#46] + : : +- *(17) Filter ((((((((isnotnull(ss_quantity#44) && isnotnull(ss_net_paid#46)) && isnotnull(ss_net_profit#51)) && (ss_net_profit#51 > 1.00)) && (ss_net_paid#46 > 0.00)) && (ss_quantity#44 > 0)) && isnotnull(ss_ticket_number#48)) && isnotnull(ss_item_sk#42)) && isnotnull(ss_sold_date_sk#47)) + : : +- *(17) FileScan parquet default.store_sales[ss_sold_date_sk#47,ss_item_sk#42,ss_ticket_number#48,ss_quantity#44,ss_net_paid#46,ss_net_profit#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), IsNotNull(ss_net_paid), IsNotNull(ss_net_profit), GreaterThan(ss_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(sr_ticket_number#49)) && isnotnull(sr_item_sk#50)) + : +- *(15) FileScan parquet default.store_returns[sr_item_sk#50,sr_ticket_number#49,sr_return_quantity#43,sr_return_amt#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNo..., ReadSchema: struct + +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt new file mode 100644 index 000000000..e0a243a63 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt @@ -0,0 +1,115 @@ +TakeOrderedAndProject [channel,currency_rank,return_rank,item,return_ratio] + WholeStageCodegen + HashAggregate [channel,currency_rank,return_rank,item,return_ratio] + InputAdapter + Exchange [channel,currency_rank,return_rank,item,return_ratio] #1 + WholeStageCodegen + HashAggregate [channel,currency_rank,return_rank,item,return_ratio] + InputAdapter + Union + WholeStageCodegen + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen + Sort [return_ratio] + InputAdapter + Exchange #2 + WholeStageCodegen + HashAggregate [sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),ws_item_sk,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum,sum(cast(coalesce(ws_quantity, 0) as bigint))] [currency_ratio,return_ratio,sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),item,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum,sum(cast(coalesce(ws_quantity, 0) as bigint))] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen + HashAggregate [sum,wr_return_amt,sum,ws_item_sk,sum,sum,sum,ws_net_paid,ws_quantity,wr_return_quantity,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [wr_return_amt,ws_net_paid,ws_quantity,wr_return_quantity,ws_item_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [wr_return_amt,ws_net_paid,ws_quantity,ws_sold_date_sk,wr_return_quantity,ws_item_sk] + BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + Project [ws_net_paid,ws_quantity,ws_order_number,ws_sold_date_sk,ws_item_sk] + Filter [ws_net_profit,ws_order_number,ws_item_sk,ws_net_paid,ws_sold_date_sk,ws_quantity] + Scan parquet default.web_sales [ws_net_paid,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_item_sk] [ws_net_paid,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + Filter [wr_return_amt,wr_item_sk,wr_order_number] + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + WholeStageCodegen + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen + Sort [return_ratio] + InputAdapter + Exchange #6 + WholeStageCodegen + HashAggregate [sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum,sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),sum,sum,cs_item_sk,sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(cast(coalesce(cs_quantity, 0) as bigint))] [sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum,item,return_ratio,sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),sum,sum,sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(cast(coalesce(cs_quantity, 0) as bigint)),currency_ratio] + InputAdapter + Exchange [cs_item_sk] #7 + WholeStageCodegen + HashAggregate [sum,cr_return_amount,cr_return_quantity,sum,cs_net_paid,cs_quantity,sum,sum,sum,sum,sum,cs_item_sk,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [cs_net_paid,cs_quantity,cr_return_quantity,cr_return_amount,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_net_paid,cs_quantity,cr_return_quantity,cs_sold_date_sk,cr_return_amount,cs_item_sk] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + Project [cs_net_paid,cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk] + Filter [cs_net_profit,cs_net_paid,cs_quantity,cs_item_sk,cs_sold_date_sk,cs_order_number] + Scan parquet default.catalog_sales [cs_net_paid,cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit] [cs_net_paid,cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + Filter [cr_return_amount,cr_item_sk,cr_order_number] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #5 + WholeStageCodegen + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen + Sort [return_ratio] + InputAdapter + Exchange #9 + WholeStageCodegen + HashAggregate [sum,ss_item_sk,sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum,sum] [sum,sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),item,sum(cast(coalesce(ss_quantity, 0) as bigint)),sum,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(cast(coalesce(sr_return_quantity, 0) as bigint)),return_ratio,sum,sum,currency_ratio] + InputAdapter + Exchange [ss_item_sk] #10 + WholeStageCodegen + HashAggregate [ss_net_paid,sum,ss_item_sk,sum,sum,sr_return_amt,sum,sum,ss_quantity,sr_return_quantity,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_quantity,ss_item_sk,ss_net_paid,sr_return_quantity,sr_return_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,ss_net_paid,sr_return_quantity,ss_sold_date_sk,sr_return_amt] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_quantity,ss_item_sk,ss_net_paid,ss_sold_date_sk,ss_ticket_number] + Filter [ss_net_paid,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_net_profit,ss_ticket_number] + Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_net_paid,ss_net_profit,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_net_paid,ss_net_profit,ss_sold_date_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + Filter [sr_return_amt,sr_ticket_number,sr_item_sk] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt new file mode 100644 index 000000000..983bb86ee --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt @@ -0,0 +1,76 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) ++- *(21) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) + +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 200) + +- *(20) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) + +- *(20) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] + +- Union + :- *(6) HashAggregate(keys=[s_store_id#12], functions=[sum(UnscaledValue(sales_price#13)), sum(UnscaledValue(return_amt#14)), sum(UnscaledValue(profit#15)), sum(UnscaledValue(net_loss#16))]) + : +- Exchange hashpartitioning(s_store_id#12, 200) + : +- *(5) HashAggregate(keys=[s_store_id#12], functions=[partial_sum(UnscaledValue(sales_price#13)), partial_sum(UnscaledValue(return_amt#14)), partial_sum(UnscaledValue(profit#15)), partial_sum(UnscaledValue(net_loss#16))]) + : +- *(5) Project [sales_price#13, profit#15, return_amt#14, net_loss#16, s_store_id#12] + : +- *(5) BroadcastHashJoin [store_sk#17], [cast(s_store_sk#18 as bigint)], Inner, BuildRight + : :- *(5) Project [store_sk#17, sales_price#13, profit#15, return_amt#14, net_loss#16] + : : +- *(5) BroadcastHashJoin [date_sk#19], [cast(d_date_sk#20 as bigint)], Inner, BuildRight + : : :- Union + : : : :- *(1) Project [cast(ss_store_sk#21 as bigint) AS store_sk#17, cast(ss_sold_date_sk#22 as bigint) AS date_sk#19, ss_ext_sales_price#23 AS sales_price#13, ss_net_profit#24 AS profit#15, 0.00 AS return_amt#14, 0.00 AS net_loss#16] + : : : : +- *(1) Filter (isnotnull(cast(ss_sold_date_sk#22 as bigint)) && isnotnull(cast(ss_store_sk#21 as bigint))) + : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_store_sk#21,ss_ext_sales_price#23,ss_net_profit#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [], ReadSchema: struct= 11192)) && (d_date#35 <= 11206)) && isnotnull(d_date_sk#20)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_date#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), Is..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [s_store_sk#18, s_store_id#12] + : +- *(4) Filter isnotnull(s_store_sk#18) + : +- *(4) FileScan parquet default.store[s_store_sk#18,s_store_id#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + :- *(12) HashAggregate(keys=[cp_catalog_page_id#36], functions=[sum(UnscaledValue(sales_price#37)), sum(UnscaledValue(return_amt#38)), sum(UnscaledValue(profit#39)), sum(UnscaledValue(net_loss#40))]) + : +- Exchange hashpartitioning(cp_catalog_page_id#36, 200) + : +- *(11) HashAggregate(keys=[cp_catalog_page_id#36], functions=[partial_sum(UnscaledValue(sales_price#37)), partial_sum(UnscaledValue(return_amt#38)), partial_sum(UnscaledValue(profit#39)), partial_sum(UnscaledValue(net_loss#40))]) + : +- *(11) Project [sales_price#37, profit#39, return_amt#38, net_loss#40, cp_catalog_page_id#36] + : +- *(11) BroadcastHashJoin [page_sk#41], [cp_catalog_page_sk#42], Inner, BuildRight + : :- *(11) Project [page_sk#41, sales_price#37, profit#39, return_amt#38, net_loss#40] + : : +- *(11) BroadcastHashJoin [date_sk#43], [d_date_sk#20], Inner, BuildRight + : : :- Union + : : : :- *(7) Project [cs_catalog_page_sk#44 AS page_sk#41, cs_sold_date_sk#45 AS date_sk#43, cs_ext_sales_price#46 AS sales_price#37, cs_net_profit#47 AS profit#39, 0.00 AS return_amt#38, 0.00 AS net_loss#40] + : : : : +- *(7) Filter (isnotnull(cs_sold_date_sk#45) && isnotnull(cs_catalog_page_sk#44)) + : : : : +- *(7) FileScan parquet default.catalog_sales[cs_sold_date_sk#45,cs_catalog_page_sk#44,cs_ext_sales_price#46,cs_net_profit#47] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk)], ReadSchema: struct= 11192)) && (d_date#35 <= 11206)) && isnotnull(d_date_sk#20)) + : : +- *(9) FileScan parquet default.date_dim[d_date_sk#20,d_date#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), Is..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(10) Project [cp_catalog_page_sk#42, cp_catalog_page_id#36] + : +- *(10) Filter isnotnull(cp_catalog_page_sk#42) + : +- *(10) FileScan parquet default.catalog_page[cp_catalog_page_sk#42,cp_catalog_page_id#36] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_page], PartitionFilters: [], PushedFilters: [IsNotNull(cp_catalog_page_sk)], ReadSchema: struct + +- *(19) HashAggregate(keys=[web_site_id#58], functions=[sum(UnscaledValue(sales_price#59)), sum(UnscaledValue(return_amt#60)), sum(UnscaledValue(profit#61)), sum(UnscaledValue(net_loss#62))]) + +- Exchange hashpartitioning(web_site_id#58, 200) + +- *(18) HashAggregate(keys=[web_site_id#58], functions=[partial_sum(UnscaledValue(sales_price#59)), partial_sum(UnscaledValue(return_amt#60)), partial_sum(UnscaledValue(profit#61)), partial_sum(UnscaledValue(net_loss#62))]) + +- *(18) Project [sales_price#59, profit#61, return_amt#60, net_loss#62, web_site_id#58] + +- *(18) BroadcastHashJoin [wsr_web_site_sk#63], [web_site_sk#64], Inner, BuildRight + :- *(18) Project [wsr_web_site_sk#63, sales_price#59, profit#61, return_amt#60, net_loss#62] + : +- *(18) BroadcastHashJoin [date_sk#65], [cast(d_date_sk#20 as bigint)], Inner, BuildRight + : :- Union + : : :- *(13) Project [ws_web_site_sk#66 AS wsr_web_site_sk#63, cast(ws_sold_date_sk#67 as bigint) AS date_sk#65, ws_ext_sales_price#68 AS sales_price#59, ws_net_profit#69 AS profit#61, 0.00 AS return_amt#60, 0.00 AS net_loss#62] + : : : +- *(13) Filter (isnotnull(cast(ws_sold_date_sk#67 as bigint)) && isnotnull(ws_web_site_sk#66)) + : : : +- *(13) FileScan parquet default.web_sales[ws_sold_date_sk#67,ws_web_site_sk#66,ws_ext_sales_price#68,ws_net_profit#69] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_web_site_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(17) Project [web_site_sk#64, web_site_id#58] + +- *(17) Filter isnotnull(web_site_sk#64) + +- *(17) FileScan parquet default.web_site[web_site_sk#64,web_site_id#58] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt new file mode 100644 index 000000000..a3baee3e1 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt @@ -0,0 +1,110 @@ +TakeOrderedAndProject [profit,channel,sales,returns,id] + WholeStageCodegen + HashAggregate [sum(returns),channel,spark_grouping_id,sum,sum(profit),sum(sales),sum,sum,id] [sum(returns),profit,sum,sum(profit),sum(sales),sales,sum,sum,returns] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen + HashAggregate [sum,channel,spark_grouping_id,sum,sum,sum,profit,returns,sum,sum,id,sales] [sum,sum,sum,sum,sum,sum] + Expand [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen + HashAggregate [sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(net_loss)),sum,sum(UnscaledValue(profit)),s_store_id,sum(UnscaledValue(sales_price)),sum,sum] [sales,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(net_loss)),RETURNS,sum,sum(UnscaledValue(profit)),channel,profit,sum(UnscaledValue(sales_price)),id,sum,sum] + InputAdapter + Exchange [s_store_id] #2 + WholeStageCodegen + HashAggregate [sum,sum,return_amt,net_loss,sum,sum,s_store_id,sum,sales_price,sum,sum,sum,profit] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [s_store_id,sales_price,net_loss,return_amt,profit] + BroadcastHashJoin [store_sk,s_store_sk] + Project [store_sk,sales_price,net_loss,return_amt,profit] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen + Project [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + WholeStageCodegen + Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] + Filter [sr_returned_date_sk,sr_store_sk] + Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk,s_store_id] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_store_id] [s_store_sk,s_store_id] + WholeStageCodegen + HashAggregate [cp_catalog_page_id,sum(UnscaledValue(profit)),sum,sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum,sum,sum(UnscaledValue(net_loss)),sum] [sum(UnscaledValue(profit)),sum,sum(UnscaledValue(sales_price)),channel,sum(UnscaledValue(return_amt)),sales,sum,profit,sum,id,sum(UnscaledValue(net_loss)),sum,RETURNS] + InputAdapter + Exchange [cp_catalog_page_id] #5 + WholeStageCodegen + HashAggregate [cp_catalog_page_id,sum,sum,sum,sales_price,return_amt,net_loss,sum,profit,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,cp_catalog_page_id,profit,net_loss,return_amt] + BroadcastHashJoin [page_sk,cp_catalog_page_sk] + Project [sales_price,profit,page_sk,net_loss,return_amt] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen + Project [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit] + Filter [cs_sold_date_sk,cs_catalog_page_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit] [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit] + WholeStageCodegen + Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss] + Filter [cr_returned_date_sk,cr_catalog_page_sk] + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss] [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [cp_catalog_page_sk,cp_catalog_page_id] + Filter [cp_catalog_page_sk] + Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] [cp_catalog_page_sk,cp_catalog_page_id] + WholeStageCodegen + HashAggregate [web_site_id,sum,sum,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price)),sum(UnscaledValue(profit)),sum,sum(UnscaledValue(net_loss))] [sales,profit,sum,sum,RETURNS,channel,id,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price)),sum(UnscaledValue(profit)),sum,sum(UnscaledValue(net_loss))] + InputAdapter + Exchange [web_site_id] #8 + WholeStageCodegen + HashAggregate [web_site_id,sum,profit,sum,sum,net_loss,sales_price,sum,sum,sum,sum,sum,return_amt] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [web_site_id,profit,sales_price,return_amt,net_loss] + BroadcastHashJoin [wsr_web_site_sk,web_site_sk] + Project [profit,wsr_web_site_sk,sales_price,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen + Project [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] + Filter [ws_sold_date_sk,ws_web_site_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit] [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit] + WholeStageCodegen + Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [wr_order_number,wr_return_amt,wr_returned_date_sk,wr_net_loss,wr_item_sk] + Filter [wr_returned_date_sk] + Scan parquet default.web_returns [wr_order_number,wr_return_amt,wr_returned_date_sk,wr_net_loss,wr_item_sk] [wr_order_number,wr_return_amt,wr_returned_date_sk,wr_net_loss,wr_item_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [ws_item_sk,ws_web_site_sk,ws_order_number] + Filter [ws_item_sk,ws_order_number,ws_web_site_sk] + Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number] [ws_item_sk,ws_web_site_sk,ws_order_number] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [web_site_sk,web_site_id] + Filter [web_site_sk] + Scan parquet default.web_site [web_site_sk,web_site_id] [web_site_sk,web_site_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt new file mode 100644 index 000000000..f1df63c4f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt @@ -0,0 +1,32 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,s_company_id#2 ASC NULLS FIRST,s_street_number#3 ASC NULLS FIRST,s_street_name#4 ASC NULLS FIRST,s_street_type#5 ASC NULLS FIRST,s_suite_number#6 ASC NULLS FIRST,s_city#7 ASC NULLS FIRST,s_county#8 ASC NULLS FIRST,s_state#9 ASC NULLS FIRST,s_zip#10 ASC NULLS FIRST], output=[s_store_name#1,s_company_id#2,s_street_number#3,s_street_name#4,s_street_type#5,s_suite_number#6,s_city#7,s_county#8,s_state#9,s_zip#10,30 days #11,31 - 60 days #12,61 - 90 days #13,91 - 120 days #14,>120 days #15]) ++- *(6) HashAggregate(keys=[s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10], functions=[sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 30) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 60) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 90) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]) + +- Exchange hashpartitioning(s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10, 200) + +- *(5) HashAggregate(keys=[s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10], functions=[partial_sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 30) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 60) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 90) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]) + +- *(5) Project [ss_sold_date_sk#17, sr_returned_date_sk#16, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] + +- *(5) BroadcastHashJoin [sr_returned_date_sk#16], [cast(d_date_sk#18 as bigint)], Inner, BuildRight + :- *(5) Project [ss_sold_date_sk#17, sr_returned_date_sk#16, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] + : +- *(5) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#19], Inner, BuildRight + : :- *(5) Project [ss_sold_date_sk#17, sr_returned_date_sk#16, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] + : : +- *(5) BroadcastHashJoin [ss_store_sk#20], [s_store_sk#21], Inner, BuildRight + : : :- *(5) Project [ss_sold_date_sk#17, ss_store_sk#20, sr_returned_date_sk#16] + : : : +- *(5) BroadcastHashJoin [cast(ss_ticket_number#22 as bigint), cast(ss_item_sk#23 as bigint), cast(ss_customer_sk#24 as bigint)], [sr_ticket_number#25, sr_item_sk#26, sr_customer_sk#27], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#17, ss_item_sk#23, ss_customer_sk#24, ss_store_sk#20, ss_ticket_number#22] + : : : : +- *(5) Filter ((((isnotnull(ss_ticket_number#22) && isnotnull(ss_item_sk#23)) && isnotnull(ss_customer_sk#24)) && isnotnull(ss_store_sk#20)) && isnotnull(ss_sold_date_sk#17)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_item_sk#23,ss_customer_sk#24,ss_store_sk#20,ss_ticket_number#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_stor..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[3, bigint, true], input[1, bigint, true], input[2, bigint, true])) + : : : +- *(1) Project [sr_returned_date_sk#16, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#25] + : : : +- *(1) Filter (((isnotnull(sr_ticket_number#25) && isnotnull(sr_customer_sk#27)) && isnotnull(sr_item_sk#26)) && isnotnull(sr_returned_date_sk#16)) + : : : +- *(1) FileScan parquet default.store_returns[sr_returned_date_sk#16,sr_item_sk#26,sr_customer_sk#27,sr_ticket_number#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_retu..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [s_store_sk#21, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] + : : +- *(2) Filter isnotnull(s_store_sk#21) + : : +- *(2) FileScan parquet default.store[s_store_sk#21,s_store_name#1,s_company_id#2,s_street_number#3,s_street_name#4,s_street_type#5,s_suite_number#6,s_city#7,s_county#8,s_state#9,s_zip#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [d_date_sk#18] + +- *(4) Filter ((((isnotnull(d_year#28) && isnotnull(d_moy#29)) && (d_year#28 = 2001)) && (d_moy#29 = 8)) && isnotnull(d_date_sk#18)) + +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#28,d_moy#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt new file mode 100644 index 000000000..fcbf25c1d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [s_street_number,s_street_type,s_company_id,31 - 60 days ,61 - 90 days ,s_county,>120 days ,30 days ,91 - 120 days ,s_state,s_street_name,s_city,s_suite_number,s_store_name,s_zip] + WholeStageCodegen + HashAggregate [s_street_number,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),s_street_type,sum,s_company_id,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),s_county,sum,s_state,s_street_name,sum,sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),sum,s_city,s_suite_number,s_store_name,s_zip,sum] [sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum,31 - 60 days ,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum,>120 days ,30 days ,91 - 120 days ,sum,sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),sum,sum] + InputAdapter + Exchange [s_street_number,s_street_type,s_company_id,s_county,s_state,s_street_name,s_city,s_suite_number,s_store_name,s_zip] #1 + WholeStageCodegen + HashAggregate [sum,sum,s_street_number,ss_sold_date_sk,s_street_type,sum,s_company_id,sum,s_county,sum,s_state,s_street_name,sum,sum,sr_returned_date_sk,sum,sum,s_city,s_suite_number,s_store_name,s_zip,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_zip,s_state,s_suite_number,ss_sold_date_sk,s_city,sr_returned_date_sk] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_zip,s_state,s_suite_number,ss_sold_date_sk,s_city,sr_returned_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_zip,s_state,s_suite_number,ss_sold_date_sk,s_city,sr_returned_date_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_sold_date_sk,ss_store_sk,sr_returned_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] + Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] + Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_store_sk,s_zip,s_state,s_suite_number,s_city] + Filter [s_store_sk] + Scan parquet default.store [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_store_sk,s_zip,s_state,s_suite_number,s_city] [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_store_sk,s_zip,s_state,s_suite_number,s_city] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk] [d_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt new file mode 100644 index 000000000..004057dbc --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt @@ -0,0 +1,41 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[item_sk#1 ASC NULLS FIRST,d_date#2 ASC NULLS FIRST], output=[item_sk#1,d_date#2,web_sales#3,store_sales#4,web_cumulative#5,store_cumulative#6]) ++- *(15) Filter ((isnotnull(web_cumulative#5) && isnotnull(store_cumulative#6)) && (web_cumulative#5 > store_cumulative#6)) + +- Window [max(web_sales#3) windowspecdefinition(item_sk#1, d_date#2 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#5, max(store_sales#4) windowspecdefinition(item_sk#1, d_date#2 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#6], [item_sk#1], [d_date#2 ASC NULLS FIRST] + +- *(14) Sort [item_sk#1 ASC NULLS FIRST, d_date#2 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(item_sk#1, 200) + +- *(13) Project [CASE WHEN isnotnull(item_sk#7) THEN item_sk#7 ELSE item_sk#8 END AS item_sk#1, CASE WHEN isnotnull(d_date#9) THEN d_date#9 ELSE d_date#10 END AS d_date#2, cume_sales#11 AS web_sales#3, cume_sales#12 AS store_sales#4] + +- SortMergeJoin [item_sk#7, d_date#9], [item_sk#8, d_date#10], FullOuter + :- *(6) Sort [item_sk#7 ASC NULLS FIRST, d_date#9 ASC NULLS FIRST], false, 0 + : +- Exchange hashpartitioning(item_sk#7, d_date#9, 200) + : +- *(5) Project [item_sk#7, d_date#9, cume_sales#11] + : +- Window [sum(_w0#13) windowspecdefinition(ws_item_sk#14, d_date#9 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#11], [ws_item_sk#14], [d_date#9 ASC NULLS FIRST] + : +- *(4) Sort [ws_item_sk#14 ASC NULLS FIRST, d_date#9 ASC NULLS FIRST], false, 0 + : +- Exchange hashpartitioning(ws_item_sk#14, 200) + : +- *(3) HashAggregate(keys=[ws_item_sk#14, d_date#9], functions=[sum(UnscaledValue(ws_sales_price#15))]) + : +- Exchange hashpartitioning(ws_item_sk#14, d_date#9, 200) + : +- *(2) HashAggregate(keys=[ws_item_sk#14, d_date#9], functions=[partial_sum(UnscaledValue(ws_sales_price#15))]) + : +- *(2) Project [ws_item_sk#14, ws_sales_price#15, d_date#9] + : +- *(2) BroadcastHashJoin [ws_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight + : :- *(2) Project [ws_sold_date_sk#16, ws_item_sk#14, ws_sales_price#15] + : : +- *(2) Filter (isnotnull(ws_item_sk#14) && isnotnull(ws_sold_date_sk#16)) + : : +- *(2) FileScan parquet default.web_sales[ws_sold_date_sk#16,ws_item_sk#14,ws_sales_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [d_date_sk#17, d_date#9] + : +- *(1) Filter (((isnotnull(d_month_seq#18) && (d_month_seq#18 >= 1200)) && (d_month_seq#18 <= 1211)) && isnotnull(d_date_sk#17)) + : +- *(1) FileScan parquet default.date_dim[d_date_sk#17,d_date#9,d_month_seq#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + +- *(12) Sort [item_sk#8 ASC NULLS FIRST, d_date#10 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(item_sk#8, d_date#10, 200) + +- *(11) Project [item_sk#8, d_date#10, cume_sales#12] + +- Window [sum(_w0#19) windowspecdefinition(ss_item_sk#20, d_date#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#12], [ss_item_sk#20], [d_date#10 ASC NULLS FIRST] + +- *(10) Sort [ss_item_sk#20 ASC NULLS FIRST, d_date#10 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(ss_item_sk#20, 200) + +- *(9) HashAggregate(keys=[ss_item_sk#20, d_date#10], functions=[sum(UnscaledValue(ss_sales_price#21))]) + +- Exchange hashpartitioning(ss_item_sk#20, d_date#10, 200) + +- *(8) HashAggregate(keys=[ss_item_sk#20, d_date#10], functions=[partial_sum(UnscaledValue(ss_sales_price#21))]) + +- *(8) Project [ss_item_sk#20, ss_sales_price#21, d_date#10] + +- *(8) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#23], Inner, BuildRight + :- *(8) Project [ss_sold_date_sk#22, ss_item_sk#20, ss_sales_price#21] + : +- *(8) Filter (isnotnull(ss_item_sk#20) && isnotnull(ss_sold_date_sk#22)) + : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_item_sk#20,ss_sales_price#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#23, d_date#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt new file mode 100644 index 000000000..af45acaf5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt @@ -0,0 +1,67 @@ +TakeOrderedAndProject [item_sk,store_cumulative,store_sales,web_sales,d_date,web_cumulative] + WholeStageCodegen + Filter [web_cumulative,store_cumulative] + InputAdapter + Window [web_sales,item_sk,d_date,store_sales] + WholeStageCodegen + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk] #1 + WholeStageCodegen + Project [d_date,d_date,item_sk,item_sk,cume_sales,cume_sales] + InputAdapter + SortMergeJoin [item_sk,d_date,item_sk,d_date] + WholeStageCodegen + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #2 + WholeStageCodegen + Project [item_sk,d_date,cume_sales] + InputAdapter + Window [_w0,ws_item_sk,d_date] + WholeStageCodegen + Sort [ws_item_sk,d_date] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen + HashAggregate [ws_item_sk,d_date,sum,sum(UnscaledValue(ws_sales_price))] [sum(UnscaledValue(ws_sales_price)),item_sk,_w0,sum] + InputAdapter + Exchange [ws_item_sk,d_date] #4 + WholeStageCodegen + HashAggregate [sum,ws_item_sk,sum,d_date,ws_sales_price] [sum,sum] + Project [ws_item_sk,ws_sales_price,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_sales_price] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_sales_price] [ws_sold_date_sk,ws_item_sk,ws_sales_price] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] [d_date_sk,d_date,d_month_seq] + WholeStageCodegen + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #6 + WholeStageCodegen + Project [item_sk,d_date,cume_sales] + InputAdapter + Window [_w0,ss_item_sk,d_date] + WholeStageCodegen + Sort [ss_item_sk,d_date] + InputAdapter + Exchange [ss_item_sk] #7 + WholeStageCodegen + HashAggregate [ss_item_sk,d_date,sum,sum(UnscaledValue(ss_sales_price))] [sum(UnscaledValue(ss_sales_price)),item_sk,_w0,sum] + InputAdapter + Exchange [ss_item_sk,d_date] #8 + WholeStageCodegen + HashAggregate [d_date,sum,ss_sales_price,sum,ss_item_sk] [sum,sum] + Project [ss_item_sk,ss_sales_price,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_sales_price] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_sales_price] + InputAdapter + ReusedExchange [d_date_sk,d_date] [d_date_sk,d_date] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt new file mode 100644 index 000000000..aefc40ed5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt @@ -0,0 +1,20 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[d_year#1 ASC NULLS FIRST,ext_price#2 DESC NULLS LAST,brand_id#3 ASC NULLS FIRST], output=[d_year#1,brand_id#3,brand#4,ext_price#2]) ++- *(4) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[sum(UnscaledValue(ss_ext_sales_price#7))]) + +- Exchange hashpartitioning(d_year#1, i_brand#5, i_brand_id#6, 200) + +- *(3) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#7))]) + +- *(3) Project [d_year#1, ss_ext_sales_price#7, i_brand_id#6, i_brand#5] + +- *(3) BroadcastHashJoin [ss_item_sk#8], [i_item_sk#9], Inner, BuildRight + :- *(3) Project [d_year#1, ss_item_sk#8, ss_ext_sales_price#7] + : +- *(3) BroadcastHashJoin [d_date_sk#10], [ss_sold_date_sk#11], Inner, BuildRight + : :- *(3) Project [d_date_sk#10, d_year#1] + : : +- *(3) Filter ((((isnotnull(d_moy#12) && isnotnull(d_year#1)) && (d_moy#12 = 11)) && (d_year#1 = 2000)) && isnotnull(d_date_sk#10)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#10,d_year#1,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [ss_sold_date_sk#11, ss_item_sk#8, ss_ext_sales_price#7] + : +- *(1) Filter (isnotnull(ss_sold_date_sk#11) && isnotnull(ss_item_sk#8)) + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#8,ss_ext_sales_price#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [i_item_sk#9, i_brand_id#6, i_brand#5] + +- *(2) Filter ((isnotnull(i_manager_id#13) && (i_manager_id#13 = 1)) && isnotnull(i_item_sk#9)) + +- *(2) FileScan parquet default.item[i_item_sk#9,i_brand_id#6,i_brand#5,i_manager_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt new file mode 100644 index 000000000..00c2d4b14 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [d_year,ext_price,brand_id,brand] + WholeStageCodegen + HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),d_year,i_brand,i_brand_id,sum] [brand_id,sum(UnscaledValue(ss_ext_sales_price)),brand,sum,ext_price] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen + HashAggregate [sum,d_year,i_brand,ss_ext_sales_price,i_brand_id,sum] [sum,sum] + Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [d_year,ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] + Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt new file mode 100644 index 000000000..80e2ae182 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt @@ -0,0 +1,31 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[avg_quarterly_sales#1 ASC NULLS FIRST,sum_sales#2 ASC NULLS FIRST,i_manufact_id#3 ASC NULLS FIRST], output=[i_manufact_id#3,sum_sales#2,avg_quarterly_sales#1]) ++- *(7) Project [i_manufact_id#3, sum_sales#2, avg_quarterly_sales#1] + +- *(7) Filter (CASE WHEN (avg_quarterly_sales#1 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#2 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#1 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#1 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) + +- Window [avg(_w0#4) windowspecdefinition(i_manufact_id#3, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#1], [i_manufact_id#3] + +- *(6) Sort [i_manufact_id#3 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(i_manufact_id#3, 200) + +- *(5) HashAggregate(keys=[i_manufact_id#3, d_qoy#5], functions=[sum(UnscaledValue(ss_sales_price#6))]) + +- Exchange hashpartitioning(i_manufact_id#3, d_qoy#5, 200) + +- *(4) HashAggregate(keys=[i_manufact_id#3, d_qoy#5], functions=[partial_sum(UnscaledValue(ss_sales_price#6))]) + +- *(4) Project [i_manufact_id#3, ss_sales_price#6, d_qoy#5] + +- *(4) BroadcastHashJoin [ss_store_sk#7], [s_store_sk#8], Inner, BuildRight + :- *(4) Project [i_manufact_id#3, ss_store_sk#7, ss_sales_price#6, d_qoy#5] + : +- *(4) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + : :- *(4) Project [i_manufact_id#3, ss_sold_date_sk#9, ss_store_sk#7, ss_sales_price#6] + : : +- *(4) BroadcastHashJoin [i_item_sk#11], [ss_item_sk#12], Inner, BuildRight + : : :- *(4) Project [i_item_sk#11, i_manufact_id#3] + : : : +- *(4) Filter ((((i_category#13 IN (Books,Children,Electronics) && i_class#14 IN (personal,portable,reference,self-help)) && i_brand#15 IN (scholaramalgamalg #16,scholaramalgamalg #17,exportiunivamalg #18,scholaramalgamalg #18)) || ((i_category#13 IN (Women,Music,Men) && i_class#14 IN (accessories,classical,fragrances,pants)) && i_brand#15 IN (amalgimporto #19,edu packscholar #19,exportiimporto #19,importoamalg #19))) && isnotnull(i_item_sk#11)) + : : : +- *(4) FileScan parquet default.item[i_item_sk#11,i_brand#15,i_class#14,i_category#13,i_manufact_id#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,reference..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#12, ss_store_sk#7, ss_sales_price#6] + : : +- *(1) Filter ((isnotnull(ss_item_sk#12) && isnotnull(ss_sold_date_sk#9)) && isnotnull(ss_store_sk#7)) + : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#12,ss_store_sk#7,ss_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#10, d_qoy#5] + : +- *(2) Filter (d_month_seq#20 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) && isnotnull(d_date_sk#10)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#10,d_month_seq#20,d_qoy#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [s_store_sk#8] + +- *(3) Filter isnotnull(s_store_sk#8) + +- *(3) FileScan parquet default.store[s_store_sk#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt new file mode 100644 index 000000000..6a8804ae2 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt @@ -0,0 +1,43 @@ +TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id] + WholeStageCodegen + Project [i_manufact_id,sum_sales,avg_quarterly_sales] + Filter [avg_quarterly_sales,sum_sales] + InputAdapter + Window [_w0,i_manufact_id] + WholeStageCodegen + Sort [i_manufact_id] + InputAdapter + Exchange [i_manufact_id] #1 + WholeStageCodegen + HashAggregate [i_manufact_id,d_qoy,sum,sum(UnscaledValue(ss_sales_price))] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_manufact_id,d_qoy] #2 + WholeStageCodegen + HashAggregate [d_qoy,sum,ss_sales_price,sum,i_manufact_id] [sum,sum] + Project [i_manufact_id,ss_sales_price,d_qoy] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_manufact_id,ss_store_sk,ss_sales_price,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_manufact_id,ss_sold_date_sk,ss_store_sk,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_manufact_id] + Filter [i_category,i_class,i_brand,i_item_sk] + Scan parquet default.item [i_class,i_manufact_id,i_item_sk,i_category,i_brand] [i_class,i_manufact_id,i_item_sk,i_category,i_brand] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk,d_qoy] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_qoy] [d_date_sk,d_month_seq,d_qoy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt new file mode 100644 index 000000000..142360af3 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt @@ -0,0 +1,88 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[segment#1 ASC NULLS FIRST,num_customers#2 ASC NULLS FIRST], output=[segment#1,num_customers#2,segment_base#3]) ++- *(13) HashAggregate(keys=[segment#1], functions=[count(1)]) + +- Exchange hashpartitioning(segment#1, 200) + +- *(12) HashAggregate(keys=[segment#1], functions=[partial_count(1)]) + +- *(12) HashAggregate(keys=[c_customer_sk#4], functions=[sum(UnscaledValue(ss_ext_sales_price#5))]) + +- Exchange hashpartitioning(c_customer_sk#4, 200) + +- *(11) HashAggregate(keys=[c_customer_sk#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#5))]) + +- *(11) Project [c_customer_sk#4, ss_ext_sales_price#5] + +- *(11) BroadcastHashJoin [ss_sold_date_sk#6], [d_date_sk#7], Inner, BuildRight + :- *(11) Project [c_customer_sk#4, ss_sold_date_sk#6, ss_ext_sales_price#5] + : +- *(11) BroadcastHashJoin [ca_county#8, ca_state#9], [s_county#10, s_state#11], Inner, BuildRight + : :- *(11) Project [c_customer_sk#4, ss_sold_date_sk#6, ss_ext_sales_price#5, ca_county#8, ca_state#9] + : : +- *(11) BroadcastHashJoin [c_current_addr_sk#12], [ca_address_sk#13], Inner, BuildRight + : : :- *(11) Project [c_customer_sk#4, c_current_addr_sk#12, ss_sold_date_sk#6, ss_ext_sales_price#5] + : : : +- *(11) BroadcastHashJoin [c_customer_sk#4], [ss_customer_sk#14], Inner, BuildRight + : : : :- *(11) HashAggregate(keys=[c_customer_sk#4, c_current_addr_sk#12], functions=[]) + : : : : +- Exchange hashpartitioning(c_customer_sk#4, c_current_addr_sk#12, 200) + : : : : +- *(6) HashAggregate(keys=[c_customer_sk#4, c_current_addr_sk#12], functions=[]) + : : : : +- *(6) Project [c_customer_sk#4, c_current_addr_sk#12] + : : : : +- *(6) BroadcastHashJoin [customer_sk#15], [c_customer_sk#4], Inner, BuildRight + : : : : :- *(6) Project [customer_sk#15] + : : : : : +- *(6) BroadcastHashJoin [sold_date_sk#16], [d_date_sk#7], Inner, BuildRight + : : : : : :- *(6) Project [sold_date_sk#16, customer_sk#15] + : : : : : : +- *(6) BroadcastHashJoin [item_sk#17], [i_item_sk#18], Inner, BuildRight + : : : : : : :- Union + : : : : : : : :- *(1) Project [cs_sold_date_sk#19 AS sold_date_sk#16, cs_bill_customer_sk#20 AS customer_sk#15, cs_item_sk#21 AS item_sk#17] + : : : : : : : : +- *(1) Filter ((isnotnull(cs_item_sk#21) && isnotnull(cs_sold_date_sk#19)) && isnotnull(cs_bill_customer_sk#20)) + : : : : : : : : +- *(1) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_customer_sk#20,cs_item_sk#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct + : : : : : : : +- *(2) Project [ws_sold_date_sk#22 AS sold_date_sk#23, ws_bill_customer_sk#24 AS customer_sk#25, ws_item_sk#26 AS item_sk#27] + : : : : : : : +- *(2) Filter ((isnotnull(ws_item_sk#26) && isnotnull(ws_sold_date_sk#22)) && isnotnull(ws_bill_customer_sk#24)) + : : : : : : : +- *(2) FileScan parquet default.web_sales[ws_sold_date_sk#22,ws_item_sk#26,ws_bill_customer_sk#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct + : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- *(3) Project [i_item_sk#18] + : : : : : : +- *(3) Filter ((((isnotnull(i_category#28) && isnotnull(i_class#29)) && (i_category#28 = Women)) && (i_class#29 = maternity)) && isnotnull(i_item_sk#18)) + : : : : : : +- *(3) FileScan parquet default.item[i_item_sk#18,i_class#29,i_category#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women), EqualTo(i_class,maternity)..., ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(4) Project [d_date_sk#7] + : : : : : +- *(4) Filter ((((isnotnull(d_moy#30) && isnotnull(d_year#31)) && (d_moy#30 = 12)) && (d_year#31 = 1998)) && isnotnull(d_date_sk#7)) + : : : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#7,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(5) Project [c_customer_sk#4, c_current_addr_sk#12] + : : : : +- *(5) Filter (isnotnull(c_customer_sk#4) && isnotnull(c_current_addr_sk#12)) + : : : : +- *(5) FileScan parquet default.customer[c_customer_sk#4,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : +- *(7) Project [ss_sold_date_sk#6, ss_customer_sk#14, ss_ext_sales_price#5] + : : : +- *(7) Filter (isnotnull(ss_customer_sk#14) && isnotnull(ss_sold_date_sk#6)) + : : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#6,ss_customer_sk#14,ss_ext_sales_price#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(8) Project [ca_address_sk#13, ca_county#8, ca_state#9] + : : +- *(8) Filter ((isnotnull(ca_address_sk#13) && isnotnull(ca_state#9)) && isnotnull(ca_county#8)) + : : +- *(8) FileScan parquet default.customer_address[ca_address_sk#13,ca_county#8,ca_state#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state), IsNotNull(ca_county)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true])) + : +- *(9) Project [s_county#10, s_state#11] + : +- *(9) Filter (isnotnull(s_state#11) && isnotnull(s_county#10)) + : +- *(9) FileScan parquet default.store[s_county#10,s_state#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), IsNotNull(s_county)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(10) Project [d_date_sk#7] + +- *(10) Filter (((isnotnull(d_month_seq#32) && (d_month_seq#32 >= Subquery subquery10089)) && (d_month_seq#32 <= Subquery subquery10090)) && isnotnull(d_date_sk#7)) + : :- Subquery subquery10089 + : : +- *(2) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) + : : +- Exchange hashpartitioning((d_month_seq + 1)#33, 200) + : : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) + : : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] + : : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) + : : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct + : +- Subquery subquery10090 + : +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) + : +- Exchange hashpartitioning((d_month_seq + 3)#34, 200) + : +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) + : +- *(1) Project [(d_month_seq#32 + 3) AS (d_month_seq + 3)#34] + : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) + : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct + +- *(10) FileScan parquet default.date_dim[d_date_sk#7,d_month_seq#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct + :- Subquery subquery10089 + : +- *(2) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) + : +- Exchange hashpartitioning((d_month_seq + 1)#33, 200) + : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) + : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] + : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) + : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct + +- Subquery subquery10090 + +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) + +- Exchange hashpartitioning((d_month_seq + 3)#34, 200) + +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) + +- *(1) Project [(d_month_seq#32 + 3) AS (d_month_seq + 3)#34] + +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) + +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt new file mode 100644 index 000000000..3707aa2f8 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt @@ -0,0 +1,123 @@ +TakeOrderedAndProject [segment,num_customers,segment_base] + WholeStageCodegen + HashAggregate [segment,count,count(1)] [count(1),num_customers,segment_base,count] + InputAdapter + Exchange [segment] #1 + WholeStageCodegen + HashAggregate [segment,count,count] [count,count] + HashAggregate [c_customer_sk,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),segment,sum] + InputAdapter + Exchange [c_customer_sk] #2 + WholeStageCodegen + HashAggregate [c_customer_sk,ss_ext_sales_price,sum,sum] [sum,sum] + Project [c_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_sk,ss_sold_date_sk,ss_ext_sales_price] + BroadcastHashJoin [ca_county,ca_state,s_county,s_state] + Project [ca_state,c_customer_sk,ss_ext_sales_price,ca_county,ss_sold_date_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_customer_sk,c_current_addr_sk,ss_sold_date_sk,ss_ext_sales_price] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + HashAggregate [c_customer_sk,c_current_addr_sk] + InputAdapter + Exchange [c_customer_sk,c_current_addr_sk] #3 + WholeStageCodegen + HashAggregate [c_customer_sk,c_current_addr_sk] + Project [c_customer_sk,c_current_addr_sk] + BroadcastHashJoin [customer_sk,c_customer_sk] + Project [customer_sk] + BroadcastHashJoin [sold_date_sk,d_date_sk] + Project [sold_date_sk,customer_sk] + BroadcastHashJoin [item_sk,i_item_sk] + InputAdapter + Union + WholeStageCodegen + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + Filter [cs_item_sk,cs_sold_date_sk,cs_bill_customer_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + WholeStageCodegen + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk,ws_bill_customer_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_item_sk] + Filter [i_category,i_class,i_item_sk] + Scan parquet default.item [i_item_sk,i_class,i_category] [i_item_sk,i_class,i_category] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [c_customer_sk,c_current_addr_sk] + Filter [c_customer_sk,c_current_addr_sk] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [ss_sold_date_sk,ss_customer_sk,ss_ext_sales_price] + Filter [ss_customer_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_customer_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [ca_address_sk,ca_county,ca_state] + Filter [ca_address_sk,ca_state,ca_county] + Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state] [ca_address_sk,ca_county,ca_state] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [s_county,s_state] + Filter [s_state,s_county] + Scan parquet default.store [s_county,s_state] [s_county,s_state] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Subquery #1 + WholeStageCodegen + HashAggregate [(d_month_seq + 1)] + InputAdapter + Exchange [(d_month_seq + 1)] #11 + WholeStageCodegen + HashAggregate [(d_month_seq + 1)] + Project [d_month_seq] + Filter [d_year,d_moy] + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] + Subquery #2 + WholeStageCodegen + HashAggregate [(d_month_seq + 3)] + InputAdapter + Exchange [(d_month_seq + 3)] #12 + WholeStageCodegen + HashAggregate [(d_month_seq + 3)] + Project [d_month_seq] + Filter [d_year,d_moy] + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen + HashAggregate [(d_month_seq + 1)] + InputAdapter + Exchange [(d_month_seq + 1)] #11 + WholeStageCodegen + HashAggregate [(d_month_seq + 1)] + Project [d_month_seq] + Filter [d_year,d_moy] + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] + Subquery #2 + WholeStageCodegen + HashAggregate [(d_month_seq + 3)] + InputAdapter + Exchange [(d_month_seq + 3)] #12 + WholeStageCodegen + HashAggregate [(d_month_seq + 3)] + Project [d_month_seq] + Filter [d_year,d_moy] + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt new file mode 100644 index 000000000..eccd937f1 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt @@ -0,0 +1,20 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[ext_price#1 DESC NULLS LAST,brand_id#2 ASC NULLS FIRST], output=[brand_id#2,brand#3,ext_price#1]) ++- *(4) HashAggregate(keys=[i_brand#4, i_brand_id#5], functions=[sum(UnscaledValue(ss_ext_sales_price#6))]) + +- Exchange hashpartitioning(i_brand#4, i_brand_id#5, 200) + +- *(3) HashAggregate(keys=[i_brand#4, i_brand_id#5], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#6))]) + +- *(3) Project [ss_ext_sales_price#6, i_brand_id#5, i_brand#4] + +- *(3) BroadcastHashJoin [ss_item_sk#7], [i_item_sk#8], Inner, BuildRight + :- *(3) Project [ss_item_sk#7, ss_ext_sales_price#6] + : +- *(3) BroadcastHashJoin [d_date_sk#9], [ss_sold_date_sk#10], Inner, BuildRight + : :- *(3) Project [d_date_sk#9] + : : +- *(3) Filter ((((isnotnull(d_moy#11) && isnotnull(d_year#12)) && (d_moy#11 = 11)) && (d_year#12 = 1999)) && isnotnull(d_date_sk#9)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#9,d_year#12,d_moy#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [ss_sold_date_sk#10, ss_item_sk#7, ss_ext_sales_price#6] + : +- *(1) Filter (isnotnull(ss_sold_date_sk#10) && isnotnull(ss_item_sk#7)) + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#10,ss_item_sk#7,ss_ext_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [i_item_sk#8, i_brand_id#5, i_brand#4] + +- *(2) Filter ((isnotnull(i_manager_id#13) && (i_manager_id#13 = 28)) && isnotnull(i_item_sk#8)) + +- *(2) FileScan parquet default.item[i_item_sk#8,i_brand_id#5,i_brand#4,i_manager_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt new file mode 100644 index 000000000..e5ff08959 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [ext_price,brand_id,brand] + WholeStageCodegen + HashAggregate [i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,ext_price,sum(UnscaledValue(ss_ext_sales_price)),sum,brand_id] + InputAdapter + Exchange [i_brand,i_brand_id] #1 + WholeStageCodegen + HashAggregate [i_brand,sum,sum,ss_ext_sales_price,i_brand_id] [sum,sum] + Project [ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] + Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt new file mode 100644 index 000000000..5678609bc --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt @@ -0,0 +1,65 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output=[i_item_id#2,total_sales#1]) ++- *(20) HashAggregate(keys=[i_item_id#2], functions=[sum(total_sales#3)]) + +- Exchange hashpartitioning(i_item_id#2, 200) + +- *(19) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(total_sales#3)]) + +- Union + :- *(6) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- Exchange hashpartitioning(i_item_id#2, 200) + : +- *(5) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- *(5) Project [ss_ext_sales_price#4, i_item_id#2] + : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + : :- *(5) Project [ss_item_sk#5, ss_ext_sales_price#4] + : : +- *(5) BroadcastHashJoin [ss_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + : : :- *(5) Project [ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#10] + : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 2001)) && (d_moy#12 = 2)) && isnotnull(d_date_sk#10)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [ca_address_sk#8] + : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) + : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) BroadcastHashJoin [i_item_id#2], [i_item_id#2#14], LeftSemi, BuildRight + : :- *(4) Project [i_item_sk#6, i_item_id#2] + : : +- *(4) Filter isnotnull(i_item_sk#6) + : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_item_id#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- *(3) Project [i_item_id#2 AS i_item_id#2#14] + : +- *(3) Filter i_color#15 IN (slate,blanched,burnished) + : +- *(3) FileScan parquet default.item[i_item_id#2,i_color#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_color, [slate,blanched,burnished])], ReadSchema: struct + :- *(12) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- Exchange hashpartitioning(i_item_id#2, 200) + : +- *(11) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- *(11) Project [cs_ext_sales_price#16, i_item_id#2] + : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight + : :- *(11) Project [cs_item_sk#17, cs_ext_sales_price#16] + : : +- *(11) BroadcastHashJoin [cs_bill_addr_sk#18], [ca_address_sk#8], Inner, BuildRight + : : :- *(11) Project [cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight + : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) + : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [i_item_sk#6, i_item_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(18) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) + +- Exchange hashpartitioning(i_item_id#2, 200) + +- *(17) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) + +- *(17) Project [ws_ext_sales_price#20, i_item_id#2] + +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight + :- *(17) Project [ws_item_sk#21, ws_ext_sales_price#20] + : +- *(17) BroadcastHashJoin [ws_bill_addr_sk#22], [ca_address_sk#8], Inner, BuildRight + : :- *(17) Project [ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight + : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) + : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [i_item_sk#6, i_item_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt new file mode 100644 index 000000000..693fb71e6 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt @@ -0,0 +1,91 @@ +TakeOrderedAndProject [total_sales,i_item_id] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(total_sales)] [sum(total_sales),total_sales,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen + HashAggregate [i_item_id,total_sales,sum,sum] [sum,sum] + InputAdapter + Union + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #2 + WholeStageCodegen + HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_addr_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + BroadcastHashJoin [i_item_id,i_item_id] + Project [i_item_sk,i_item_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_item_id] + Filter [i_color] + Scan parquet default.item [i_item_id,i_color] [i_item_id,i_color] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen + HashAggregate [i_item_id,cs_ext_sales_price,sum,sum] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] + Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Filter [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #5 + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #8 + WholeStageCodegen + HashAggregate [i_item_id,ws_ext_sales_price,sum,sum] [sum,sum] + Project [ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk,ws_bill_addr_sk,ws_item_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt new file mode 100644 index 000000000..458642360 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt @@ -0,0 +1,51 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,cc_name#3 ASC NULLS FIRST], output=[i_category#4,i_brand#5,cc_name#3,d_year#6,d_moy#7,avg_monthly_sales#2,sum_sales#1,psum#8,nsum#9]) ++- *(22) Project [i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, avg_monthly_sales#2, sum_sales#1, sum_sales#10 AS psum#8, sum_sales#11 AS nsum#9] + +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, cc_name#3, rn#12], [i_category#13, i_brand#14, cc_name#15, (rn#16 - 1)], Inner, BuildRight + :- *(22) Project [i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, sum_sales#1, avg_monthly_sales#2, rn#12, sum_sales#10] + : +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, cc_name#3, rn#12], [i_category#17, i_brand#18, cc_name#19, (rn#20 + 1)], Inner, BuildRight + : :- *(22) Project [i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, sum_sales#1, avg_monthly_sales#2, rn#12] + : : +- *(22) Filter (((isnotnull(avg_monthly_sales#2) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#12)) + : : +- Window [avg(_w0#21) windowspecdefinition(i_category#4, i_brand#5, cc_name#3, d_year#6, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, cc_name#3, d_year#6] + : : +- *(7) Filter (isnotnull(d_year#6) && (d_year#6 = 1999)) + : : +- Window [rank(d_year#6, d_moy#7) windowspecdefinition(i_category#4, i_brand#5, cc_name#3, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#12], [i_category#4, i_brand#5, cc_name#3], [d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST] + : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, cc_name#3 ASC NULLS FIRST, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST], false, 0 + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, 200) + : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7], functions=[sum(UnscaledValue(cs_sales_price#22))]) + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, 200) + : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7], functions=[partial_sum(UnscaledValue(cs_sales_price#22))]) + : : +- *(4) Project [i_brand#5, i_category#4, cs_sales_price#22, d_year#6, d_moy#7, cc_name#3] + : : +- *(4) BroadcastHashJoin [cs_call_center_sk#23], [cc_call_center_sk#24], Inner, BuildRight + : : :- *(4) Project [i_brand#5, i_category#4, cs_call_center_sk#23, cs_sales_price#22, d_year#6, d_moy#7] + : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#26], Inner, BuildRight + : : : :- *(4) Project [i_brand#5, i_category#4, cs_sold_date_sk#25, cs_call_center_sk#23, cs_sales_price#22] + : : : : +- *(4) BroadcastHashJoin [i_item_sk#27], [cs_item_sk#28], Inner, BuildRight + : : : : :- *(4) Project [i_item_sk#27, i_brand#5, i_category#4] + : : : : : +- *(4) Filter ((isnotnull(i_item_sk#27) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) + : : : : : +- *(4) FileScan parquet default.item[i_item_sk#27,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) + : : : : +- *(1) Project [cs_sold_date_sk#25, cs_call_center_sk#23, cs_item_sk#28, cs_sales_price#22] + : : : : +- *(1) Filter ((isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#25)) && isnotnull(cs_call_center_sk#23)) + : : : : +- *(1) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_call_center_sk#23,cs_item_sk#28,cs_sales_price#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_call_center_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [d_date_sk#26, d_year#6, d_moy#7] + : : : +- *(2) Filter ((((d_year#6 = 1999) || ((d_year#6 = 1998) && (d_moy#7 = 12))) || ((d_year#6 = 2000) && (d_moy#7 = 1))) && isnotnull(d_date_sk#26)) + : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#26,d_year#6,d_moy#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [cc_call_center_sk#24, cc_name#3] + : : +- *(3) Filter (isnotnull(cc_call_center_sk#24) && isnotnull(cc_name#3)) + : : +- *(3) FileScan parquet default.call_center[cc_call_center_sk#24,cc_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] + 1))) + : +- *(14) Project [i_category#17, i_brand#18, cc_name#19, sum_sales#10, rn#20] + : +- *(14) Filter isnotnull(rn#20) + : +- Window [rank(d_year#29, d_moy#30) windowspecdefinition(i_category#17, i_brand#18, cc_name#19, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#20], [i_category#17, i_brand#18, cc_name#19], [d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] + : +- *(13) Sort [i_category#17 ASC NULLS FIRST, i_brand#18 ASC NULLS FIRST, cc_name#19 ASC NULLS FIRST, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST], false, 0 + : +- Exchange hashpartitioning(i_category#17, i_brand#18, cc_name#19, 200) + : +- *(12) HashAggregate(keys=[i_category#17, i_brand#18, cc_name#19, d_year#29, d_moy#30], functions=[sum(UnscaledValue(cs_sales_price#22))]) + : +- ReusedExchange [i_category#17, i_brand#18, cc_name#19, d_year#29, d_moy#30, sum#31], Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, 200) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] - 1))) + +- *(21) Project [i_category#13, i_brand#14, cc_name#15, sum_sales#11, rn#16] + +- *(21) Filter isnotnull(rn#16) + +- Window [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#13, i_brand#14, cc_name#15, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#16], [i_category#13, i_brand#14, cc_name#15], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] + +- *(20) Sort [i_category#13 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#15 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 + +- ReusedExchange [i_category#13, i_brand#14, cc_name#15, d_year#32, d_moy#33, sum_sales#11], Exchange hashpartitioning(i_category#17, i_brand#18, cc_name#19, 200) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt new file mode 100644 index 000000000..d41aaf1e2 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt @@ -0,0 +1,77 @@ +TakeOrderedAndProject [d_year,avg_monthly_sales,nsum,d_moy,sum_sales,i_category,i_brand,cc_name,psum] + WholeStageCodegen + Project [sum_sales,d_year,sum_sales,d_moy,sum_sales,i_category,i_brand,cc_name,avg_monthly_sales] + BroadcastHashJoin [cc_name,i_brand,rn,rn,i_category,i_category,i_brand,cc_name] + Project [d_year,d_moy,sum_sales,rn,i_brand,cc_name,sum_sales,i_category,avg_monthly_sales] + BroadcastHashJoin [i_category,cc_name,rn,i_category,rn,i_brand,cc_name,i_brand] + Project [d_year,d_moy,sum_sales,rn,i_brand,cc_name,i_category,avg_monthly_sales] + Filter [avg_monthly_sales,sum_sales,rn] + InputAdapter + Window [d_year,i_brand,cc_name,i_category,_w0] + WholeStageCodegen + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_brand,cc_name,i_category] + WholeStageCodegen + Sort [d_year,d_moy,i_brand,cc_name,i_category] + InputAdapter + Exchange [i_category,i_brand,cc_name] #1 + WholeStageCodegen + HashAggregate [d_year,d_moy,sum(UnscaledValue(cs_sales_price)),i_brand,sum,cc_name,i_category] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [d_year,d_moy,i_brand,cc_name,i_category] #2 + WholeStageCodegen + HashAggregate [d_year,d_moy,i_brand,sum,sum,cc_name,i_category,cs_sales_price] [sum,sum] + Project [d_moy,d_year,cs_sales_price,i_category,cc_name,i_brand] + BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] + Project [d_moy,d_year,cs_sales_price,i_category,i_brand,cs_call_center_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sales_price,cs_sold_date_sk,i_category,i_brand,cs_call_center_sk] + BroadcastHashJoin [i_item_sk,cs_item_sk] + Project [i_item_sk,i_brand,i_category] + Filter [i_item_sk,i_brand,i_category] + Scan parquet default.item [i_item_sk,i_brand,i_category] [i_item_sk,i_brand,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [cs_sold_date_sk,cs_call_center_sk,cs_item_sk,cs_sales_price] + Filter [cs_item_sk,cs_sold_date_sk,cs_call_center_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_item_sk,cs_sales_price] [cs_sold_date_sk,cs_call_center_sk,cs_item_sk,cs_sales_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk,d_year,d_moy] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [cc_call_center_sk,cc_name] + Filter [cc_call_center_sk,cc_name] + Scan parquet default.call_center [cc_call_center_sk,cc_name] [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [cc_name,sum_sales,i_brand,rn,i_category] + Filter [rn] + InputAdapter + Window [cc_name,i_brand,d_moy,d_year,i_category] + WholeStageCodegen + Sort [cc_name,i_brand,d_moy,d_year,i_category] + InputAdapter + Exchange [i_category,i_brand,cc_name] #7 + WholeStageCodegen + HashAggregate [sum(UnscaledValue(cs_sales_price)),sum,cc_name,i_brand,d_moy,d_year,i_category] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum] + InputAdapter + ReusedExchange [cc_name,i_brand,d_moy,sum,d_year,i_category] [cc_name,i_brand,d_moy,sum,d_year,i_category] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [cc_name,rn,i_brand,i_category,sum_sales] + Filter [rn] + InputAdapter + Window [cc_name,d_year,i_brand,i_category,d_moy] + WholeStageCodegen + Sort [cc_name,d_year,i_brand,i_category,d_moy] + InputAdapter + ReusedExchange [cc_name,d_year,i_brand,i_category,d_moy,sum_sales] [cc_name,d_year,i_brand,i_category,d_moy,sum_sales] #7 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt new file mode 100644 index 000000000..5b1f75cde --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt @@ -0,0 +1,101 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev#2 ASC NULLS FIRST], output=[item_id#1,ss_item_rev#2,ss_dev#3,cs_item_rev#4,cs_dev#5,ws_item_rev#6,ws_dev#7,average#8]) ++- *(15) Project [item_id#1, ss_item_rev#2, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ss_dev#3, cs_item_rev#4, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#4 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS cs_dev#5, ws_item_rev#6, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#6 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ws_dev#7, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2))) / 3.00), DecimalType(23,6)) AS average#8] + +- *(15) BroadcastHashJoin [item_id#1], [item_id#9], Inner, BuildRight, ((((((((cast(ss_item_rev#2 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#6)), DecimalType(19,3))) && (cast(ss_item_rev#2 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#6)), DecimalType(20,3)))) && (cast(cs_item_rev#4 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#6)), DecimalType(19,3)))) && (cast(cs_item_rev#4 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#6)), DecimalType(20,3)))) && (cast(ws_item_rev#6 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#2)), DecimalType(19,3)))) && (cast(ws_item_rev#6 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#2)), DecimalType(20,3)))) && (cast(ws_item_rev#6 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#4)), DecimalType(19,3)))) && (cast(ws_item_rev#6 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#4)), DecimalType(20,3)))) + :- *(15) Project [item_id#1, ss_item_rev#2, cs_item_rev#4] + : +- *(15) BroadcastHashJoin [item_id#1], [item_id#10], Inner, BuildRight, ((((cast(ss_item_rev#2 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#4)), DecimalType(19,3))) && (cast(ss_item_rev#2 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#4)), DecimalType(20,3)))) && (cast(cs_item_rev#4 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#2)), DecimalType(19,3)))) && (cast(cs_item_rev#4 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#2)), DecimalType(20,3)))) + : :- *(15) Filter isnotnull(ss_item_rev#2) + : : +- *(15) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(ss_ext_sales_price#12))]) + : : +- Exchange hashpartitioning(i_item_id#11, 200) + : : +- *(4) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#12))]) + : : +- *(4) Project [ss_ext_sales_price#12, i_item_id#11] + : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + : : :- *(4) Project [ss_sold_date_sk#13, ss_ext_sales_price#12, i_item_id#11] + : : : +- *(4) BroadcastHashJoin [ss_item_sk#15], [i_item_sk#16], Inner, BuildRight + : : : :- *(4) Project [ss_sold_date_sk#13, ss_item_sk#15, ss_ext_sales_price#12] + : : : : +- *(4) Filter (isnotnull(ss_item_sk#15) && isnotnull(ss_sold_date_sk#13)) + : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_item_sk#15,ss_ext_sales_price#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [i_item_sk#16, i_item_id#11] + : : : +- *(1) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) + : : : +- *(1) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#14] + : : +- *(3) BroadcastHashJoin [d_date#17], [d_date#17#18], LeftSemi, BuildRight + : : :- *(3) Project [d_date_sk#14, d_date#17] + : : : +- *(3) Filter isnotnull(d_date_sk#14) + : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) + : : +- *(2) Project [d_date#17 AS d_date#17#18] + : : +- *(2) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12416)) + : : : +- Subquery subquery12416 + : : : +- *(1) Project [d_week_seq#19] + : : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + : : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct + : : +- Subquery subquery12416 + : : +- *(1) Project [d_week_seq#19] + : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- *(9) Filter isnotnull(cs_item_rev#4) + : +- *(9) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(cs_ext_sales_price#20))]) + : +- Exchange hashpartitioning(i_item_id#11, 200) + : +- *(8) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#20))]) + : +- *(8) Project [cs_ext_sales_price#20, i_item_id#11] + : +- *(8) BroadcastHashJoin [cs_sold_date_sk#21], [d_date_sk#14], Inner, BuildRight + : :- *(8) Project [cs_sold_date_sk#21, cs_ext_sales_price#20, i_item_id#11] + : : +- *(8) BroadcastHashJoin [cs_item_sk#22], [i_item_sk#16], Inner, BuildRight + : : :- *(8) Project [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#20] + : : : +- *(8) Filter (isnotnull(cs_item_sk#22) && isnotnull(cs_sold_date_sk#21)) + : : : +- *(8) FileScan parquet default.catalog_sales[cs_sold_date_sk#21,cs_item_sk#22,cs_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [d_date_sk#14] + : +- *(7) BroadcastHashJoin [d_date#17], [d_date#17#23], LeftSemi, BuildRight + : :- *(7) Project [d_date_sk#14, d_date#17] + : : +- *(7) Filter isnotnull(d_date_sk#14) + : : +- *(7) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) + : +- *(6) Project [d_date#17 AS d_date#17#23] + : +- *(6) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12420)) + : : +- Subquery subquery12420 + : : +- *(1) Project [d_week_seq#19] + : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + : +- *(6) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct + : +- Subquery subquery12420 + : +- *(1) Project [d_week_seq#19] + : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(14) Filter isnotnull(ws_item_rev#6) + +- *(14) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(ws_ext_sales_price#24))]) + +- Exchange hashpartitioning(i_item_id#11, 200) + +- *(13) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#24))]) + +- *(13) Project [ws_ext_sales_price#24, i_item_id#11] + +- *(13) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#14], Inner, BuildRight + :- *(13) Project [ws_sold_date_sk#25, ws_ext_sales_price#24, i_item_id#11] + : +- *(13) BroadcastHashJoin [ws_item_sk#26], [i_item_sk#16], Inner, BuildRight + : :- *(13) Project [ws_sold_date_sk#25, ws_item_sk#26, ws_ext_sales_price#24] + : : +- *(13) Filter (isnotnull(ws_item_sk#26) && isnotnull(ws_sold_date_sk#25)) + : : +- *(13) FileScan parquet default.web_sales[ws_sold_date_sk#25,ws_item_sk#26,ws_ext_sales_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(12) Project [d_date_sk#14] + +- *(12) BroadcastHashJoin [d_date#17], [d_date#17#27], LeftSemi, BuildRight + :- *(12) Project [d_date_sk#14, d_date#17] + : +- *(12) Filter isnotnull(d_date_sk#14) + : +- *(12) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) + +- *(11) Project [d_date#17 AS d_date#17#27] + +- *(11) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12424)) + : +- Subquery subquery12424 + : +- *(1) Project [d_week_seq#19] + : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + +- *(11) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct + +- Subquery subquery12424 + +- *(1) Project [d_week_seq#19] + +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt new file mode 100644 index 000000000..fd4503647 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt @@ -0,0 +1,133 @@ +TakeOrderedAndProject [cs_item_rev,item_id,ws_dev,ws_item_rev,average,cs_dev,ss_item_rev,ss_dev] + WholeStageCodegen + Project [item_id,ss_item_rev,cs_item_rev,ws_item_rev] + BroadcastHashJoin [cs_item_rev,item_id,ws_item_rev,item_id,ss_item_rev] + Project [item_id,ss_item_rev,cs_item_rev] + BroadcastHashJoin [item_id,item_id,ss_item_rev,cs_item_rev] + Filter [ss_item_rev] + HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),item_id,ss_item_rev,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen + HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_item_sk,i_item_id] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Project [d_date_sk,d_date] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date] + Filter [d_week_seq] + Subquery #1 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Subquery #1 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Filter [cs_item_rev] + HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum(UnscaledValue(cs_ext_sales_price)),item_id,cs_item_rev,sum] + InputAdapter + Exchange [i_item_id] #6 + WholeStageCodegen + HashAggregate [i_item_id,cs_ext_sales_price,sum,sum] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] + Filter [cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Project [d_date_sk,d_date] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [d_date] + Filter [d_week_seq] + Subquery #2 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Subquery #2 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Filter [ws_item_rev] + HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),item_id,ws_item_rev,sum] + InputAdapter + Exchange [i_item_id] #10 + WholeStageCodegen + HashAggregate [i_item_id,ws_ext_sales_price,sum,sum] [sum,sum] + Project [ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #2 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Project [d_date_sk,d_date] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen + Project [d_date] + Filter [d_week_seq] + Subquery #3 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Subquery #3 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt new file mode 100644 index 000000000..56e7c1173 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt @@ -0,0 +1,43 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[s_store_name1#1 ASC NULLS FIRST,s_store_id1#2 ASC NULLS FIRST,d_week_seq1#3 ASC NULLS FIRST], output=[s_store_name1#1,s_store_id1#2,d_week_seq1#3,(sun_sales1 / sun_sales2)#4,(mon_sales1 / mon_sales2)#5,(tue_sales1 / tue_sales2)#6,(wed_sales1 / wed_sales2)#7,(thu_sales1 / thu_sales2)#8,(fri_sales1 / fri_sales2)#9,(sat_sales1 / sat_sales2)#10]) ++- *(10) Project [s_store_name1#1, s_store_id1#2, d_week_seq1#3, CheckOverflow((promote_precision(sun_sales1#11) / promote_precision(sun_sales2#12)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#4, CheckOverflow((promote_precision(mon_sales1#13) / promote_precision(mon_sales2#14)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#5, CheckOverflow((promote_precision(tue_sales1#15) / promote_precision(tue_sales2#16)), DecimalType(37,20)) AS (tue_sales1 / tue_sales2)#6, CheckOverflow((promote_precision(wed_sales1#17) / promote_precision(wed_sales2#18)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#7, CheckOverflow((promote_precision(thu_sales1#19) / promote_precision(thu_sales2#20)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#8, CheckOverflow((promote_precision(fri_sales1#21) / promote_precision(fri_sales2#22)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#9, CheckOverflow((promote_precision(sat_sales1#23) / promote_precision(sat_sales2#24)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#10] + +- *(10) BroadcastHashJoin [s_store_id1#2, d_week_seq1#3], [s_store_id2#25, (d_week_seq2#26 - 52)], Inner, BuildRight + :- *(10) Project [s_store_name#27 AS s_store_name1#1, d_week_seq#28 AS d_week_seq1#3, s_store_id#29 AS s_store_id1#2, sun_sales#30 AS sun_sales1#11, mon_sales#31 AS mon_sales1#13, tue_sales#32 AS tue_sales1#15, wed_sales#33 AS wed_sales1#17, thu_sales#34 AS thu_sales1#19, fri_sales#35 AS fri_sales1#21, sat_sales#36 AS sat_sales1#23] + : +- *(10) BroadcastHashJoin [d_week_seq#28], [d_week_seq#37], Inner, BuildRight + : :- *(10) Project [d_week_seq#28, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#29, s_store_name#27] + : : +- *(10) BroadcastHashJoin [ss_store_sk#38], [s_store_sk#39], Inner, BuildRight + : : :- *(10) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) + : : : +- Exchange hashpartitioning(d_week_seq#28, ss_store_sk#38, 200) + : : : +- *(2) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) + : : : +- *(2) Project [ss_store_sk#38, ss_sales_price#41, d_week_seq#28, d_day_name#40] + : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#42], [d_date_sk#43], Inner, BuildRight + : : : :- *(2) Project [ss_sold_date_sk#42, ss_store_sk#38, ss_sales_price#41] + : : : : +- *(2) Filter (isnotnull(ss_sold_date_sk#42) && isnotnull(ss_store_sk#38)) + : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#42,ss_store_sk#38,ss_sales_price#41] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#43, d_week_seq#28, d_day_name#40] + : : : +- *(1) Filter (isnotnull(d_date_sk#43) && isnotnull(d_week_seq#28)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#43,d_week_seq#28,d_day_name#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [s_store_sk#39, s_store_id#29, s_store_name#27] + : : +- *(3) Filter (isnotnull(s_store_sk#39) && isnotnull(s_store_id#29)) + : : +- *(3) FileScan parquet default.store[s_store_sk#39,s_store_id#29,s_store_name#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [d_week_seq#37] + : +- *(4) Filter (((isnotnull(d_month_seq#44) && (d_month_seq#44 >= 1212)) && (d_month_seq#44 <= 1223)) && isnotnull(d_week_seq#37)) + : +- *(4) FileScan parquet default.date_dim[d_month_seq#44,d_week_seq#37] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223),..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52))) + +- *(9) Project [d_week_seq#28 AS d_week_seq2#26, s_store_id#29 AS s_store_id2#25, sun_sales#30 AS sun_sales2#12, mon_sales#31 AS mon_sales2#14, tue_sales#32 AS tue_sales2#16, wed_sales#33 AS wed_sales2#18, thu_sales#34 AS thu_sales2#20, fri_sales#35 AS fri_sales2#22, sat_sales#36 AS sat_sales2#24] + +- *(9) BroadcastHashJoin [d_week_seq#28], [d_week_seq#45], Inner, BuildRight + :- *(9) Project [d_week_seq#28, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#29] + : +- *(9) BroadcastHashJoin [ss_store_sk#38], [s_store_sk#39], Inner, BuildRight + : :- *(9) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) + : : +- ReusedExchange [d_week_seq#28, ss_store_sk#38, sum#46, sum#47, sum#48, sum#49, sum#50, sum#51, sum#52], Exchange hashpartitioning(d_week_seq#28, ss_store_sk#38, 200) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [s_store_sk#39, s_store_id#29] + : +- *(7) Filter (isnotnull(s_store_sk#39) && isnotnull(s_store_id#29)) + : +- *(7) FileScan parquet default.store[s_store_sk#39,s_store_id#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [d_week_seq#45] + +- *(8) Filter (((isnotnull(d_month_seq#53) && (d_month_seq#53 >= 1224)) && (d_month_seq#53 <= 1235)) && isnotnull(d_week_seq#45)) + +- *(8) FileScan parquet default.date_dim[d_month_seq#53,d_week_seq#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235),..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt new file mode 100644 index 000000000..0ffe4eb4e --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt @@ -0,0 +1,58 @@ +TakeOrderedAndProject [(wed_sales1 / wed_sales2),d_week_seq1,(mon_sales1 / mon_sales2),(thu_sales1 / thu_sales2),(sat_sales1 / sat_sales2),s_store_name1,(sun_sales1 / sun_sales2),s_store_id1,(fri_sales1 / fri_sales2),(tue_sales1 / tue_sales2)] + WholeStageCodegen + Project [sat_sales1,sat_sales2,sun_sales2,tue_sales1,d_week_seq1,mon_sales1,fri_sales1,wed_sales1,thu_sales1,mon_sales2,s_store_name1,s_store_id1,wed_sales2,thu_sales2,fri_sales2,tue_sales2,sun_sales1] + BroadcastHashJoin [s_store_id1,d_week_seq1,s_store_id2,d_week_seq2] + Project [fri_sales,tue_sales,sat_sales,s_store_id,sun_sales,d_week_seq,wed_sales,mon_sales,s_store_name,thu_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [s_store_id,s_store_name,thu_sales,sun_sales,fri_sales,tue_sales,d_week_seq,wed_sales,sat_sales,mon_sales] + BroadcastHashJoin [ss_store_sk,s_store_sk] + HashAggregate [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum,sum,ss_store_sk,d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),thu_sales,sun_sales,sum,fri_sales,tue_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,wed_sales,sat_sales,sum,mon_sales,sum] + InputAdapter + Exchange [d_week_seq,ss_store_sk] #1 + WholeStageCodegen + HashAggregate [sum,sum,d_day_name,sum,sum,sum,sum,sum,sum,ss_store_sk,d_week_seq,ss_sales_price,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_store_sk,ss_sales_price,d_week_seq,d_day_name] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_sales_price] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_store_sk,ss_sales_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [d_date_sk,d_week_seq,d_day_name] + Filter [d_date_sk,d_week_seq] + Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name] [d_date_sk,d_week_seq,d_day_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [s_store_sk,s_store_id,s_store_name] + Filter [s_store_sk,s_store_id] + Scan parquet default.store [s_store_sk,s_store_id,s_store_name] [s_store_sk,s_store_id,s_store_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_week_seq] + Filter [d_month_seq,d_week_seq] + Scan parquet default.date_dim [d_month_seq,d_week_seq] [d_month_seq,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [fri_sales,tue_sales,sat_sales,s_store_id,sun_sales,d_week_seq,wed_sales,mon_sales,thu_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [s_store_id,thu_sales,sun_sales,fri_sales,tue_sales,d_week_seq,wed_sales,sat_sales,mon_sales] + BroadcastHashJoin [ss_store_sk,s_store_sk] + HashAggregate [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum,ss_store_sk,d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),thu_sales,sum,sun_sales,fri_sales,tue_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,wed_sales,sat_sales,sum,mon_sales] + InputAdapter + ReusedExchange [sum,sum,sum,ss_store_sk,d_week_seq,sum,sum,sum,sum] [sum,sum,sum,ss_store_sk,d_week_seq,sum,sum,sum,sum] #1 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [s_store_sk,s_store_id] + Filter [s_store_sk,s_store_id] + Scan parquet default.store [s_store_sk,s_store_id] [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_week_seq] + Filter [d_month_seq,d_week_seq] + Scan parquet default.date_dim [d_month_seq,d_week_seq] [d_month_seq,d_week_seq] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt new file mode 100644 index 000000000..410992cf5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt @@ -0,0 +1,58 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[cnt#1 ASC NULLS FIRST], output=[state#2,cnt#1]) ++- *(8) Project [state#2, cnt#1] + +- *(8) Filter (count(1)#3 >= 10) + +- *(8) HashAggregate(keys=[ca_state#4], functions=[count(1)]) + +- Exchange hashpartitioning(ca_state#4, 200) + +- *(7) HashAggregate(keys=[ca_state#4], functions=[partial_count(1)]) + +- *(7) Project [ca_state#4] + +- *(7) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + :- *(7) Project [ca_state#4, ss_item_sk#5] + : +- *(7) BroadcastHashJoin [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + : :- *(7) Project [ca_state#4, ss_sold_date_sk#7, ss_item_sk#5] + : : +- *(7) BroadcastHashJoin [c_customer_sk#9], [ss_customer_sk#10], Inner, BuildRight + : : :- *(7) Project [ca_state#4, c_customer_sk#9] + : : : +- *(7) BroadcastHashJoin [ca_address_sk#11], [c_current_addr_sk#12], Inner, BuildRight + : : : :- *(7) Project [ca_address_sk#11, ca_state#4] + : : : : +- *(7) Filter isnotnull(ca_address_sk#11) + : : : : +- *(7) FileScan parquet default.customer_address[ca_address_sk#11,ca_state#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : +- *(1) Project [c_customer_sk#9, c_current_addr_sk#12] + : : : +- *(1) Filter (isnotnull(c_current_addr_sk#12) && isnotnull(c_customer_sk#9)) + : : : +- *(1) FileScan parquet default.customer[c_customer_sk#9,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) + : : +- *(2) Project [ss_sold_date_sk#7, ss_item_sk#5, ss_customer_sk#10] + : : +- *(2) Filter ((isnotnull(ss_customer_sk#10) && isnotnull(ss_sold_date_sk#7)) && isnotnull(ss_item_sk#5)) + : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#7,ss_item_sk#5,ss_customer_sk#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [d_date_sk#8] + : +- *(3) Filter ((isnotnull(d_month_seq#13) && (d_month_seq#13 = Subquery subquery982)) && isnotnull(d_date_sk#8)) + : : +- Subquery subquery982 + : : +- *(2) HashAggregate(keys=[d_month_seq#13], functions=[]) + : : +- Exchange hashpartitioning(d_month_seq#13, 200) + : : +- *(1) HashAggregate(keys=[d_month_seq#13], functions=[]) + : : +- *(1) Project [d_month_seq#13] + : : +- *(1) Filter (((isnotnull(d_year#14) && isnotnull(d_moy#15)) && (d_year#14 = 2000)) && (d_moy#15 = 1)) + : : +- *(1) FileScan parquet default.date_dim[d_month_seq#13,d_year#14,d_moy#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)], ReadSchema: struct + : +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_month_seq#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct + : +- Subquery subquery982 + : +- *(2) HashAggregate(keys=[d_month_seq#13], functions=[]) + : +- Exchange hashpartitioning(d_month_seq#13, 200) + : +- *(1) HashAggregate(keys=[d_month_seq#13], functions=[]) + : +- *(1) Project [d_month_seq#13] + : +- *(1) Filter (((isnotnull(d_year#14) && isnotnull(d_moy#15)) && (d_year#14 = 2000)) && (d_moy#15 = 1)) + : +- *(1) FileScan parquet default.date_dim[d_month_seq#13,d_year#14,d_moy#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(6) Project [i_item_sk#6] + +- *(6) Filter (cast(i_current_price#16 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#17)), DecimalType(14,7))) + +- *(6) BroadcastHashJoin [i_category#18], [i_category#18#19], LeftOuter, BuildRight + :- *(6) Project [i_item_sk#6, i_current_price#16, i_category#18] + : +- *(6) Filter (isnotnull(i_current_price#16) && isnotnull(i_item_sk#6)) + : +- *(6) FileScan parquet default.item[i_item_sk#6,i_current_price#16,i_category#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) + +- *(5) HashAggregate(keys=[i_category#18], functions=[avg(UnscaledValue(i_current_price#16))]) + +- Exchange hashpartitioning(i_category#18, 200) + +- *(4) HashAggregate(keys=[i_category#18], functions=[partial_avg(UnscaledValue(i_current_price#16))]) + +- *(4) Project [i_current_price#16, i_category#18] + +- *(4) Filter isnotnull(i_category#18) + +- *(4) FileScan parquet default.item[i_current_price#16,i_category#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt new file mode 100644 index 000000000..59ea8c0d9 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt @@ -0,0 +1,78 @@ +TakeOrderedAndProject [cnt,state] + WholeStageCodegen + Project [state,cnt] + Filter [count(1)] + HashAggregate [ca_state,count,count(1)] [state,cnt,count(1),count,count(1)] + InputAdapter + Exchange [ca_state] #1 + WholeStageCodegen + HashAggregate [ca_state,count,count] [count,count] + Project [ca_state] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ca_state,ss_item_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ca_state,ss_sold_date_sk,ss_item_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [ca_state,c_customer_sk] + BroadcastHashJoin [ca_address_sk,c_current_addr_sk] + Project [ca_address_sk,ca_state] + Filter [ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [c_customer_sk,c_current_addr_sk] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + Filter [ss_customer_sk,ss_sold_date_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Subquery #1 + WholeStageCodegen + HashAggregate [d_month_seq] + InputAdapter + Exchange [d_month_seq] #5 + WholeStageCodegen + HashAggregate [d_month_seq] + Project [d_month_seq] + Filter [d_year,d_moy] + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen + HashAggregate [d_month_seq] + InputAdapter + Exchange [d_month_seq] #5 + WholeStageCodegen + HashAggregate [d_month_seq] + Project [d_month_seq] + Filter [d_year,d_moy] + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_item_sk] + Filter [i_current_price,avg(i_current_price)] + BroadcastHashJoin [i_category,i_category] + Project [i_item_sk,i_current_price,i_category] + Filter [i_current_price,i_item_sk] + Scan parquet default.item [i_item_sk,i_current_price,i_category] [i_item_sk,i_current_price,i_category] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + HashAggregate [i_category,sum,count,avg(UnscaledValue(i_current_price))] [avg(i_current_price),sum,i_category,count,avg(UnscaledValue(i_current_price))] + InputAdapter + Exchange [i_category] #8 + WholeStageCodegen + HashAggregate [sum,sum,count,i_category,i_current_price,count] [sum,count,sum,count] + Project [i_current_price,i_category] + Filter [i_category] + Scan parquet default.item [i_current_price,i_category] [i_current_price,i_category] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt new file mode 100644 index 000000000..c5587f4e4 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt @@ -0,0 +1,65 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,total_sales#2 ASC NULLS FIRST], output=[i_item_id#1,total_sales#2]) ++- *(20) HashAggregate(keys=[i_item_id#1], functions=[sum(total_sales#3)]) + +- Exchange hashpartitioning(i_item_id#1, 200) + +- *(19) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(total_sales#3)]) + +- Union + :- *(6) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- Exchange hashpartitioning(i_item_id#1, 200) + : +- *(5) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- *(5) Project [ss_ext_sales_price#4, i_item_id#1] + : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + : :- *(5) Project [ss_item_sk#5, ss_ext_sales_price#4] + : : +- *(5) BroadcastHashJoin [ss_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + : : :- *(5) Project [ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#10] + : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 1998)) && (d_moy#12 = 9)) && isnotnull(d_date_sk#10)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,9), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [ca_address_sk#8] + : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) + : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) BroadcastHashJoin [i_item_id#1], [i_item_id#1#14], LeftSemi, BuildRight + : :- *(4) Project [i_item_sk#6, i_item_id#1] + : : +- *(4) Filter isnotnull(i_item_sk#6) + : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_item_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- *(3) Project [i_item_id#1 AS i_item_id#1#14] + : +- *(3) Filter (isnotnull(i_category#15) && (i_category#15 = Music)) + : +- *(3) FileScan parquet default.item[i_item_id#1,i_category#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Music)], ReadSchema: struct + :- *(12) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- Exchange hashpartitioning(i_item_id#1, 200) + : +- *(11) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- *(11) Project [cs_ext_sales_price#16, i_item_id#1] + : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight + : :- *(11) Project [cs_item_sk#17, cs_ext_sales_price#16] + : : +- *(11) BroadcastHashJoin [cs_bill_addr_sk#18], [ca_address_sk#8], Inner, BuildRight + : : :- *(11) Project [cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight + : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) + : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [i_item_sk#6, i_item_id#1], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(18) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) + +- Exchange hashpartitioning(i_item_id#1, 200) + +- *(17) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) + +- *(17) Project [ws_ext_sales_price#20, i_item_id#1] + +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight + :- *(17) Project [ws_item_sk#21, ws_ext_sales_price#20] + : +- *(17) BroadcastHashJoin [ws_bill_addr_sk#22], [ca_address_sk#8], Inner, BuildRight + : :- *(17) Project [ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight + : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) + : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [i_item_sk#6, i_item_id#1], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt new file mode 100644 index 000000000..e37083df9 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt @@ -0,0 +1,91 @@ +TakeOrderedAndProject [i_item_id,total_sales] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(total_sales)] [sum(total_sales),total_sales,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen + HashAggregate [i_item_id,total_sales,sum,sum] [sum,sum] + InputAdapter + Union + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #2 + WholeStageCodegen + HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_addr_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + BroadcastHashJoin [i_item_id,i_item_id] + Project [i_item_sk,i_item_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_item_id] + Filter [i_category] + Scan parquet default.item [i_item_id,i_category] [i_item_id,i_category] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen + HashAggregate [i_item_id,cs_ext_sales_price,sum,sum] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] + Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Filter [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #5 + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #8 + WholeStageCodegen + HashAggregate [i_item_id,ws_ext_sales_price,sum,sum] [sum,sum] + Project [ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk,ws_bill_addr_sk,ws_item_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt new file mode 100644 index 000000000..49bd37330 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt @@ -0,0 +1,68 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[promotions#1 ASC NULLS FIRST,total#2 ASC NULLS FIRST], output=[promotions#1,total#2,(CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#3]) ++- *(16) Project [promotions#1, total#2, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#1 as decimal(15,4))) / promote_precision(cast(total#2 as decimal(15,4)))), DecimalType(35,20))) * 100.00000000000000000000), DecimalType(38,19)) AS (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#3] + +- BroadcastNestedLoopJoin BuildRight, Inner + :- *(8) HashAggregate(keys=[], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- Exchange SinglePartition + : +- *(7) HashAggregate(keys=[], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- *(7) Project [ss_ext_sales_price#4] + : +- *(7) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + : :- *(7) Project [ss_item_sk#5, ss_ext_sales_price#4] + : : +- *(7) BroadcastHashJoin [c_current_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + : : :- *(7) Project [ss_item_sk#5, ss_ext_sales_price#4, c_current_addr_sk#7] + : : : +- *(7) BroadcastHashJoin [ss_customer_sk#9], [c_customer_sk#10], Inner, BuildRight + : : : :- *(7) Project [ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] + : : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#11], [d_date_sk#12], Inner, BuildRight + : : : : :- *(7) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] + : : : : : +- *(7) BroadcastHashJoin [ss_promo_sk#13], [p_promo_sk#14], Inner, BuildRight + : : : : : :- *(7) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_promo_sk#13, ss_ext_sales_price#4] + : : : : : : +- *(7) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight + : : : : : : :- *(7) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_store_sk#15, ss_promo_sk#13, ss_ext_sales_price#4] + : : : : : : : +- *(7) Filter ((((isnotnull(ss_store_sk#15) && isnotnull(ss_promo_sk#13)) && isnotnull(ss_sold_date_sk#11)) && isnotnull(ss_customer_sk#9)) && isnotnull(ss_item_sk#5)) + : : : : : : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#5,ss_customer_sk#9,ss_store_sk#15,ss_promo_sk#13,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(2) Project [p_promo_sk#14] + : : : : : +- *(2) Filter ((((p_channel_dmail#18 = Y) || (p_channel_email#19 = Y)) || (p_channel_tv#20 = Y)) && isnotnull(p_promo_sk#14)) + : : : : : +- *(2) FileScan parquet default.promotion[p_promo_sk#14,p_channel_dmail#18,p_channel_email#19,p_channel_tv#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(p_channel_dmail,Y),EqualTo(p_channel_email,Y)),EqualTo(p_channel_tv,Y)), IsNotNull..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(3) Project [d_date_sk#12] + : : : : +- *(3) Filter ((((isnotnull(d_year#21) && isnotnull(d_moy#22)) && (d_year#21 = 1998)) && (d_moy#22 = 11)) && isnotnull(d_date_sk#12)) + : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_year#21,d_moy#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [c_customer_sk#10, c_current_addr_sk#7] + : : : +- *(4) Filter (isnotnull(c_customer_sk#10) && isnotnull(c_current_addr_sk#7)) + : : : +- *(4) FileScan parquet default.customer[c_customer_sk#10,c_current_addr_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [ca_address_sk#8] + : : +- *(5) Filter ((isnotnull(ca_gmt_offset#23) && (ca_gmt_offset#23 = -5.00)) && isnotnull(ca_address_sk#8)) + : : +- *(5) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [i_item_sk#6] + : +- *(6) Filter ((isnotnull(i_category#24) && (i_category#24 = Jewelry)) && isnotnull(i_item_sk#6)) + : +- *(6) FileScan parquet default.item[i_item_sk#6,i_category#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry), IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange IdentityBroadcastMode + +- *(15) HashAggregate(keys=[], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) + +- Exchange SinglePartition + +- *(14) HashAggregate(keys=[], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) + +- *(14) Project [ss_ext_sales_price#4] + +- *(14) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + :- *(14) Project [ss_item_sk#5, ss_ext_sales_price#4] + : +- *(14) BroadcastHashJoin [c_current_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + : :- *(14) Project [ss_item_sk#5, ss_ext_sales_price#4, c_current_addr_sk#7] + : : +- *(14) BroadcastHashJoin [ss_customer_sk#9], [c_customer_sk#10], Inner, BuildRight + : : :- *(14) Project [ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] + : : : +- *(14) BroadcastHashJoin [ss_sold_date_sk#11], [d_date_sk#12], Inner, BuildRight + : : : :- *(14) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] + : : : : +- *(14) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight + : : : : :- *(14) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_store_sk#15, ss_ext_sales_price#4] + : : : : : +- *(14) Filter (((isnotnull(ss_store_sk#15) && isnotnull(ss_sold_date_sk#11)) && isnotnull(ss_customer_sk#9)) && isnotnull(ss_item_sk#5)) + : : : : : +- *(14) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#5,ss_customer_sk#9,ss_store_sk#15,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item..., ReadSchema: struct120 days #8]) ++- *(6) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3], functions=[sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 30) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 60) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 90) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) + +- Exchange hashpartitioning(substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3, 200) + +- *(5) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20) AS substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3], functions=[partial_sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 30) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 60) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 90) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) + +- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, w_warehouse_name#9, sm_type#2, web_name#3] + +- *(5) BroadcastHashJoin [ws_ship_date_sk#11], [d_date_sk#13], Inner, BuildRight + :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, w_warehouse_name#9, sm_type#2, web_name#3] + : +- *(5) BroadcastHashJoin [ws_web_site_sk#14], [web_site_sk#15], Inner, BuildRight + : :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, ws_web_site_sk#14, w_warehouse_name#9, sm_type#2] + : : +- *(5) BroadcastHashJoin [ws_ship_mode_sk#16], [sm_ship_mode_sk#17], Inner, BuildRight + : : :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, ws_web_site_sk#14, ws_ship_mode_sk#16, w_warehouse_name#9] + : : : +- *(5) BroadcastHashJoin [ws_warehouse_sk#18], [w_warehouse_sk#19], Inner, BuildRight + : : : :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, ws_web_site_sk#14, ws_ship_mode_sk#16, ws_warehouse_sk#18] + : : : : +- *(5) Filter (((isnotnull(ws_warehouse_sk#18) && isnotnull(ws_ship_mode_sk#16)) && isnotnull(ws_web_site_sk#14)) && isnotnull(ws_ship_date_sk#11)) + : : : : +- *(5) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_ship_date_sk#11,ws_web_site_sk#14,ws_ship_mode_sk#16,ws_warehouse_sk#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_ship_mode_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [sm_ship_mode_sk#17, sm_type#2] + : : +- *(2) Filter isnotnull(sm_ship_mode_sk#17) + : : +- *(2) FileScan parquet default.ship_mode[sm_ship_mode_sk#17,sm_type#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/ship_mode], PartitionFilters: [], PushedFilters: [IsNotNull(sm_ship_mode_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [web_site_sk#15, web_name#3] + : +- *(3) Filter isnotnull(web_site_sk#15) + : +- *(3) FileScan parquet default.web_site[web_site_sk#15,web_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [d_date_sk#13] + +- *(4) Filter (((isnotnull(d_month_seq#20) && (d_month_seq#20 >= 1200)) && (d_month_seq#20 <= 1211)) && isnotnull(d_date_sk#13)) + +- *(4) FileScan parquet default.date_dim[d_date_sk#13,d_month_seq#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt new file mode 100644 index 000000000..a6a12999a --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [web_name,substring(w_warehouse_name, 1, 20),91 - 120 days ,31 - 60 days ,>120 days ,30 days ,sm_type,61 - 90 days ] + WholeStageCodegen + HashAggregate [web_name,sum,sum,sum,sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum,sm_type,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint))] [substring(w_warehouse_name, 1, 20),sum,sum,91 - 120 days ,31 - 60 days ,>120 days ,sum,30 days ,sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint))] + InputAdapter + Exchange [substring(w_warehouse_name, 1, 20),sm_type,web_name] #1 + WholeStageCodegen + HashAggregate [web_name,sum,sum,ws_sold_date_sk,w_warehouse_name,sum,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sm_type,sum,sum,ws_ship_date_sk] [sum,sum,sum,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sum,sum] + Project [web_name,ws_sold_date_sk,sm_type,ws_ship_date_sk,w_warehouse_name] + BroadcastHashJoin [ws_ship_date_sk,d_date_sk] + Project [web_name,ws_sold_date_sk,sm_type,ws_ship_date_sk,w_warehouse_name] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_web_site_sk,ws_sold_date_sk,sm_type,ws_ship_date_sk,w_warehouse_name] + BroadcastHashJoin [ws_ship_mode_sk,sm_ship_mode_sk] + Project [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,w_warehouse_name] + BroadcastHashJoin [ws_warehouse_sk,w_warehouse_sk] + Project [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,ws_warehouse_sk] + Filter [ws_warehouse_sk,ws_ship_mode_sk,ws_web_site_sk,ws_ship_date_sk] + Scan parquet default.web_sales [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,ws_warehouse_sk] [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,ws_warehouse_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [w_warehouse_sk,w_warehouse_name] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [sm_ship_mode_sk,sm_type] + Filter [sm_ship_mode_sk] + Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] [sm_ship_mode_sk,sm_type] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [web_site_sk,web_name] + Filter [web_site_sk] + Scan parquet default.web_site [web_site_sk,web_name] [web_site_sk,web_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt new file mode 100644 index 000000000..c1e90579f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt @@ -0,0 +1,31 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_manager_id#1 ASC NULLS FIRST,avg_monthly_sales#2 ASC NULLS FIRST,sum_sales#3 ASC NULLS FIRST], output=[i_manager_id#1,sum_sales#3,avg_monthly_sales#2]) ++- *(7) Project [i_manager_id#1, sum_sales#3, avg_monthly_sales#2] + +- *(7) Filter (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#3 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) + +- Window [avg(_w0#4) windowspecdefinition(i_manager_id#1, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_manager_id#1] + +- *(6) Sort [i_manager_id#1 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(i_manager_id#1, 200) + +- *(5) HashAggregate(keys=[i_manager_id#1, d_moy#5], functions=[sum(UnscaledValue(ss_sales_price#6))]) + +- Exchange hashpartitioning(i_manager_id#1, d_moy#5, 200) + +- *(4) HashAggregate(keys=[i_manager_id#1, d_moy#5], functions=[partial_sum(UnscaledValue(ss_sales_price#6))]) + +- *(4) Project [i_manager_id#1, ss_sales_price#6, d_moy#5] + +- *(4) BroadcastHashJoin [ss_store_sk#7], [s_store_sk#8], Inner, BuildRight + :- *(4) Project [i_manager_id#1, ss_store_sk#7, ss_sales_price#6, d_moy#5] + : +- *(4) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + : :- *(4) Project [i_manager_id#1, ss_sold_date_sk#9, ss_store_sk#7, ss_sales_price#6] + : : +- *(4) BroadcastHashJoin [i_item_sk#11], [ss_item_sk#12], Inner, BuildRight + : : :- *(4) Project [i_item_sk#11, i_manager_id#1] + : : : +- *(4) Filter ((((i_category#13 IN (Books,Children,Electronics) && i_class#14 IN (personal,portable,refernece,self-help)) && i_brand#15 IN (scholaramalgamalg #16,scholaramalgamalg #17,exportiunivamalg #18,scholaramalgamalg #18)) || ((i_category#13 IN (Women,Music,Men) && i_class#14 IN (accessories,classical,fragrances,pants)) && i_brand#15 IN (amalgimporto #19,edu packscholar #19,exportiimporto #19,importoamalg #19))) && isnotnull(i_item_sk#11)) + : : : +- *(4) FileScan parquet default.item[i_item_sk#11,i_brand#15,i_class#14,i_category#13,i_manager_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,refernece..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#12, ss_store_sk#7, ss_sales_price#6] + : : +- *(1) Filter ((isnotnull(ss_item_sk#12) && isnotnull(ss_sold_date_sk#9)) && isnotnull(ss_store_sk#7)) + : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#12,ss_store_sk#7,ss_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#10, d_moy#5] + : +- *(2) Filter (d_month_seq#20 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) && isnotnull(d_date_sk#10)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#10,d_month_seq#20,d_moy#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [s_store_sk#8] + +- *(3) Filter isnotnull(s_store_sk#8) + +- *(3) FileScan parquet default.store[s_store_sk#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt new file mode 100644 index 000000000..941d3fe37 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt @@ -0,0 +1,43 @@ +TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales] + WholeStageCodegen + Project [i_manager_id,sum_sales,avg_monthly_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_manager_id] + WholeStageCodegen + Sort [i_manager_id] + InputAdapter + Exchange [i_manager_id] #1 + WholeStageCodegen + HashAggregate [i_manager_id,d_moy,sum,sum(UnscaledValue(ss_sales_price))] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_manager_id,d_moy] #2 + WholeStageCodegen + HashAggregate [d_moy,sum,ss_sales_price,i_manager_id,sum] [sum,sum] + Project [i_manager_id,ss_sales_price,d_moy] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_manager_id,ss_store_sk,ss_sales_price,d_moy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_manager_id,ss_sold_date_sk,ss_store_sk,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_manager_id] + Filter [i_category,i_class,i_brand,i_item_sk] + Scan parquet default.item [i_class,i_item_sk,i_manager_id,i_category,i_brand] [i_class,i_item_sk,i_manager_id,i_category,i_brand] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk,d_moy] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy] [d_date_sk,d_month_seq,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt new file mode 100644 index 000000000..fde217dca --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt @@ -0,0 +1,170 @@ +== Physical Plan == +*(43) Sort [product_name#1 ASC NULLS FIRST, store_name#2 ASC NULLS FIRST, cnt#3 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(product_name#1 ASC NULLS FIRST, store_name#2 ASC NULLS FIRST, cnt#3 ASC NULLS FIRST, 200) + +- *(42) Project [product_name#1, store_name#2, store_zip#4, b_street_number#5, b_streen_name#6, b_city#7, b_zip#8, c_street_number#9, c_street_name#10, c_city#11, c_zip#12, syear#13, cnt#14, s1#15, s2#16, s3#17, s1#18, s2#19, s3#20, syear#21, cnt#3] + +- *(42) BroadcastHashJoin [item_sk#22, store_name#2, store_zip#4], [item_sk#23, store_name#24, store_zip#25], Inner, BuildRight, (cnt#3 <= cnt#14) + :- *(42) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[count(1), sum(UnscaledValue(ss_wholesale_cost#41)), sum(UnscaledValue(ss_list_price#42)), sum(UnscaledValue(ss_coupon_amt#43))]) + : +- Exchange hashpartitioning(i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40, 200) + : +- *(20) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#41)), partial_sum(UnscaledValue(ss_list_price#42)), partial_sum(UnscaledValue(ss_coupon_amt#43))]) + : +- *(20) Project [ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, d_year#39, d_year#40, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, i_item_sk#27, i_product_name#26] + : +- *(20) BroadcastHashJoin [ss_item_sk#44], [i_item_sk#27], Inner, BuildRight + : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : : +- *(20) BroadcastHashJoin [hd_income_band_sk#45], [ib_income_band_sk#46], Inner, BuildRight + : : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : : : +- *(20) BroadcastHashJoin [hd_income_band_sk#47], [ib_income_band_sk#48], Inner, BuildRight + : : : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : : : : +- *(20) BroadcastHashJoin [c_current_addr_sk#49], [ca_address_sk#50], Inner, BuildRight + : : : : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33] + : : : : : +- *(20) BroadcastHashJoin [ss_addr_sk#51], [ca_address_sk#52], Inner, BuildRight + : : : : : :- *(20) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45] + : : : : : : +- *(20) BroadcastHashJoin [c_current_hdemo_sk#53], [hd_demo_sk#54], Inner, BuildRight + : : : : : : :- *(20) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47] + : : : : : : : +- *(20) BroadcastHashJoin [ss_hdemo_sk#55], [hd_demo_sk#56], Inner, BuildRight + : : : : : : : :- *(20) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : : +- *(20) BroadcastHashJoin [ss_promo_sk#57], [p_promo_sk#58], Inner, BuildRight + : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : : : +- *(20) BroadcastHashJoin [c_current_cdemo_sk#59], [cd_demo_sk#60], Inner, BuildRight, NOT (cd_marital_status#61 = cd_marital_status#62) + : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, cd_marital_status#61] + : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_cdemo_sk#63], [cd_demo_sk#64], Inner, BuildRight + : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : : : : : +- *(20) BroadcastHashJoin [c_first_shipto_date_sk#65], [d_date_sk#66], Inner, BuildRight + : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, d_year#39] + : : : : : : : : : : : : +- *(20) BroadcastHashJoin [c_first_sales_date_sk#67], [d_date_sk#68], Inner, BuildRight + : : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67] + : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_customer_sk#69], [c_customer_sk#70], Inner, BuildRight + : : : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29] + : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_store_sk#71], [s_store_sk#72], Inner, BuildRight + : : : : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38] + : : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_sold_date_sk#73], [d_date_sk#74], Inner, BuildRight + : : : : : : : : : : : : : : : :- *(20) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_item_sk#44], [cs_item_sk#75], Inner, BuildRight + : : : : : : : : : : : : : : : : :- *(20) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [cast(ss_item_sk#44 as bigint), cast(ss_ticket_number#76 as bigint)], [sr_item_sk#77, sr_ticket_number#78], Inner, BuildRight + : : : : : : : : : : : : : : : : : :- *(20) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_ticket_number#76, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : : : : +- *(20) Filter ((((((((isnotnull(ss_item_sk#44) && isnotnull(ss_ticket_number#76)) && isnotnull(ss_sold_date_sk#73)) && isnotnull(ss_store_sk#71)) && isnotnull(ss_customer_sk#69)) && isnotnull(ss_cdemo_sk#63)) && isnotnull(ss_promo_sk#57)) && isnotnull(ss_hdemo_sk#55)) && isnotnull(ss_addr_sk#51)) + : : : : : : : : : : : : : : : : : : +- *(20) FileScan parquet default.store_sales[ss_sold_date_sk#73,ss_item_sk#44,ss_customer_sk#69,ss_cdemo_sk#63,ss_hdemo_sk#55,ss_addr_sk#51,ss_store_sk#71,ss_promo_sk#57,ss_ticket_number#76,ss_wholesale_cost#41,ss_list_price#42,ss_coupon_amt#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_sto..., ReadSchema: struct + : : : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : : : : : : : +- *(4) Project [cs_item_sk#75] + : : : : : : : : : : : : : : : : +- *(4) Filter (isnotnull(sum(cs_ext_list_price#79)#80) && (cast(sum(cs_ext_list_price#79)#80 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))#84)), DecimalType(21,2)))) + : : : : : : : : : : : : : : : : +- *(4) HashAggregate(keys=[cs_item_sk#75], functions=[sum(UnscaledValue(cs_ext_list_price#79)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))]) + : : : : : : : : : : : : : : : : +- Exchange hashpartitioning(cs_item_sk#75, 200) + : : : : : : : : : : : : : : : : +- *(3) HashAggregate(keys=[cs_item_sk#75], functions=[partial_sum(UnscaledValue(cs_ext_list_price#79)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))]) + : : : : : : : : : : : : : : : : +- *(3) Project [cs_item_sk#75, cs_ext_list_price#79, cr_refunded_cash#81, cr_reversed_charge#82, cr_store_credit#83] + : : : : : : : : : : : : : : : : +- *(3) BroadcastHashJoin [cs_item_sk#75, cs_order_number#85], [cr_item_sk#86, cr_order_number#87], Inner, BuildRight + : : : : : : : : : : : : : : : : :- *(3) Project [cs_item_sk#75, cs_order_number#85, cs_ext_list_price#79] + : : : : : : : : : : : : : : : : : +- *(3) Filter (isnotnull(cs_item_sk#75) && isnotnull(cs_order_number#85)) + : : : : : : : : : : : : : : : : : +- *(3) FileScan parquet default.catalog_sales[cs_item_sk#75,cs_order_number#85,cs_ext_list_price#79] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)], ReadSchema: struct + : : : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295)))) + : : : : : : : : : : : : : : : : +- *(2) Project [cr_item_sk#86, cr_order_number#87, cr_refunded_cash#81, cr_reversed_charge#82, cr_store_credit#83] + : : : : : : : : : : : : : : : : +- *(2) Filter (isnotnull(cr_item_sk#86) && isnotnull(cr_order_number#87)) + : : : : : : : : : : : : : : : : +- *(2) FileScan parquet default.catalog_returns[cr_item_sk#86,cr_order_number#87,cr_refunded_cash#81,cr_reversed_charge#82,cr_store_credit#83] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct + : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : : : : : +- *(6) Project [s_store_sk#72, s_store_name#28, s_zip#29] + : : : : : : : : : : : : : : +- *(6) Filter ((isnotnull(s_store_sk#72) && isnotnull(s_zip#29)) && isnotnull(s_store_name#28)) + : : : : : : : : : : : : : : +- *(6) FileScan parquet default.store[s_store_sk#72,s_store_name#28,s_zip#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip), IsNotNull(s_store_name)], ReadSchema: struct + : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : : : : +- *(7) Project [c_customer_sk#70, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67] + : : : : : : : : : : : : : +- *(7) Filter (((((isnotnull(c_customer_sk#70) && isnotnull(c_first_sales_date_sk#67)) && isnotnull(c_first_shipto_date_sk#65)) && isnotnull(c_current_cdemo_sk#59)) && isnotnull(c_current_hdemo_sk#53)) && isnotnull(c_current_addr_sk#49)) + : : : : : : : : : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#70,c_current_cdemo_sk#59,c_current_hdemo_sk#53,c_current_addr_sk#49,c_first_shipto_date_sk#65,c_first_sales_date_sk#67] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), I..., ReadSchema: struct + : : : : : : : : : : : +- ReusedExchange [d_date_sk#66, d_year#40], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : +- *(10) Project [cd_demo_sk#64, cd_marital_status#61] + : : : : : : : : : : +- *(10) Filter (isnotnull(cd_demo_sk#64) && isnotnull(cd_marital_status#61)) + : : : : : : : : : : +- *(10) FileScan parquet default.customer_demographics[cd_demo_sk#64,cd_marital_status#61] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)], ReadSchema: struct + : : : : : : : : : +- ReusedExchange [cd_demo_sk#60, cd_marital_status#62], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : +- *(12) Project [p_promo_sk#58] + : : : : : : : : +- *(12) Filter isnotnull(p_promo_sk#58) + : : : : : : : : +- *(12) FileScan parquet default.promotion[p_promo_sk#58] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_promo_sk)], ReadSchema: struct + : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : +- *(13) Project [hd_demo_sk#56, hd_income_band_sk#47] + : : : : : : : +- *(13) Filter (isnotnull(hd_demo_sk#56) && isnotnull(hd_income_band_sk#47)) + : : : : : : : +- *(13) FileScan parquet default.household_demographics[hd_demo_sk#56,hd_income_band_sk#47] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)], ReadSchema: struct + : : : : : : +- ReusedExchange [hd_demo_sk#54, hd_income_band_sk#45], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(15) Project [ca_address_sk#52, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33] + : : : : : +- *(15) Filter isnotnull(ca_address_sk#52) + : : : : : +- *(15) FileScan parquet default.customer_address[ca_address_sk#52,ca_street_number#30,ca_street_name#31,ca_city#32,ca_zip#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + : : : : +- ReusedExchange [ca_address_sk#50, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(17) Project [ib_income_band_sk#48] + : : : +- *(17) Filter isnotnull(ib_income_band_sk#48) + : : : +- *(17) FileScan parquet default.income_band[ib_income_band_sk#48] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/income_band], PartitionFilters: [], PushedFilters: [IsNotNull(ib_income_band_sk)], ReadSchema: struct + : : +- ReusedExchange [ib_income_band_sk#46], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(19) Project [i_item_sk#27, i_product_name#26] + : +- *(19) Filter ((((((isnotnull(i_current_price#88) && i_color#89 IN (purple,burlywood,indian,spring,floral,medium)) && (i_current_price#88 >= 64.00)) && (cast(i_current_price#88 as decimal(12,2)) <= 74.00)) && (cast(i_current_price#88 as decimal(12,2)) >= 65.00)) && (cast(i_current_price#88 as decimal(12,2)) <= 79.00)) && isnotnull(i_item_sk#27)) + : +- *(19) FileScan parquet default.item[i_item_sk#27,i_current_price#88,i_color#89,i_product_name#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), In(i_color, [purple,burlywood,indian,spring,floral,medium]), Greater..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, string, true], input[2, string, true])) + +- *(41) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[count(1), sum(UnscaledValue(ss_wholesale_cost#41)), sum(UnscaledValue(ss_list_price#42)), sum(UnscaledValue(ss_coupon_amt#43))]) + +- Exchange hashpartitioning(i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40, 200) + +- *(40) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#41)), partial_sum(UnscaledValue(ss_list_price#42)), partial_sum(UnscaledValue(ss_coupon_amt#43))]) + +- *(40) Project [ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, d_year#39, d_year#40, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, i_item_sk#27, i_product_name#26] + +- *(40) BroadcastHashJoin [ss_item_sk#44], [i_item_sk#27], Inner, BuildRight + :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : +- *(40) BroadcastHashJoin [hd_income_band_sk#45], [ib_income_band_sk#46], Inner, BuildRight + : :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : : +- *(40) BroadcastHashJoin [hd_income_band_sk#47], [ib_income_band_sk#48], Inner, BuildRight + : : :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : : : +- *(40) BroadcastHashJoin [c_current_addr_sk#49], [ca_address_sk#50], Inner, BuildRight + : : : :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33] + : : : : +- *(40) BroadcastHashJoin [ss_addr_sk#51], [ca_address_sk#52], Inner, BuildRight + : : : : :- *(40) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45] + : : : : : +- *(40) BroadcastHashJoin [c_current_hdemo_sk#53], [hd_demo_sk#54], Inner, BuildRight + : : : : : :- *(40) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47] + : : : : : : +- *(40) BroadcastHashJoin [ss_hdemo_sk#55], [hd_demo_sk#56], Inner, BuildRight + : : : : : : :- *(40) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : +- *(40) BroadcastHashJoin [ss_promo_sk#57], [p_promo_sk#58], Inner, BuildRight + : : : : : : : :- *(40) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : : +- *(40) BroadcastHashJoin [c_current_cdemo_sk#59], [cd_demo_sk#60], Inner, BuildRight, NOT (cd_marital_status#61 = cd_marital_status#62) + : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, cd_marital_status#61] + : : : : : : : : : +- *(40) BroadcastHashJoin [ss_cdemo_sk#63], [cd_demo_sk#64], Inner, BuildRight + : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : : : : +- *(40) BroadcastHashJoin [c_first_shipto_date_sk#65], [d_date_sk#66], Inner, BuildRight + : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, d_year#39] + : : : : : : : : : : : +- *(40) BroadcastHashJoin [c_first_sales_date_sk#67], [d_date_sk#68], Inner, BuildRight + : : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67] + : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_customer_sk#69], [c_customer_sk#70], Inner, BuildRight + : : : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29] + : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_store_sk#71], [s_store_sk#72], Inner, BuildRight + : : : : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38] + : : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_sold_date_sk#73], [d_date_sk#74], Inner, BuildRight + : : : : : : : : : : : : : : :- *(40) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_item_sk#44], [cs_item_sk#75], Inner, BuildRight + : : : : : : : : : : : : : : : :- *(40) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [cast(ss_item_sk#44 as bigint), cast(ss_ticket_number#76 as bigint)], [sr_item_sk#77, sr_ticket_number#78], Inner, BuildRight + : : : : : : : : : : : : : : : : :- *(40) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_ticket_number#76, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : : : +- *(40) Filter ((((((((isnotnull(ss_item_sk#44) && isnotnull(ss_ticket_number#76)) && isnotnull(ss_sold_date_sk#73)) && isnotnull(ss_store_sk#71)) && isnotnull(ss_customer_sk#69)) && isnotnull(ss_cdemo_sk#63)) && isnotnull(ss_promo_sk#57)) && isnotnull(ss_hdemo_sk#55)) && isnotnull(ss_addr_sk#51)) + : : : : : : : : : : : : : : : : : +- *(40) FileScan parquet default.store_sales[ss_sold_date_sk#73,ss_item_sk#44,ss_customer_sk#69,ss_cdemo_sk#63,ss_hdemo_sk#55,ss_addr_sk#51,ss_store_sk#71,ss_promo_sk#57,ss_ticket_number#76,ss_wholesale_cost#41,ss_list_price#42,ss_coupon_amt#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_sto..., ReadSchema: struct + : : : : : : : : : : : : : +- ReusedExchange [s_store_sk#72, s_store_name#28, s_zip#29], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : : : +- ReusedExchange [c_customer_sk#70, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : : +- ReusedExchange [d_date_sk#68, d_year#39], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : +- ReusedExchange [d_date_sk#66, d_year#40], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : +- ReusedExchange [cd_demo_sk#64, cd_marital_status#61], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : +- ReusedExchange [cd_demo_sk#60, cd_marital_status#62], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : +- ReusedExchange [p_promo_sk#58], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- ReusedExchange [hd_demo_sk#56, hd_income_band_sk#47], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- ReusedExchange [hd_demo_sk#54, hd_income_band_sk#45], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- ReusedExchange [ca_address_sk#52, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- ReusedExchange [ca_address_sk#50, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [ib_income_band_sk#48], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [ib_income_band_sk#46], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [i_item_sk#27, i_product_name#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt new file mode 100644 index 000000000..7336416f6 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt @@ -0,0 +1,229 @@ +WholeStageCodegen + Sort [product_name,store_name,cnt] + InputAdapter + Exchange [product_name,store_name,cnt] #1 + WholeStageCodegen + Project [cnt,b_streen_name,product_name,b_city,b_zip,syear,c_street_name,store_zip,c_street_number,s1,s3,c_zip,s3,s1,b_street_number,s2,store_name,cnt,s2,syear,c_city] + BroadcastHashJoin [cnt,store_zip,store_name,store_zip,store_name,item_sk,cnt,item_sk] + HashAggregate [d_year,ca_street_name,count,sum(UnscaledValue(ss_coupon_amt)),sum,ca_street_number,ca_street_name,sum(UnscaledValue(ss_list_price)),ca_zip,count(1),d_year,ca_city,sum,i_item_sk,sum,ca_street_number,d_year,ca_city,s_store_name,s_zip,sum(UnscaledValue(ss_wholesale_cost)),ca_zip,i_product_name] [c_city,b_streen_name,count,b_zip,sum(UnscaledValue(ss_coupon_amt)),sum,b_city,c_street_name,s3,sum(UnscaledValue(ss_list_price)),cnt,b_street_number,c_street_number,item_sk,count(1),store_zip,s2,product_name,sum,sum,store_name,c_zip,sum(UnscaledValue(ss_wholesale_cost)),s1,syear] + InputAdapter + Exchange [d_year,ca_street_name,ca_street_number,ca_street_name,ca_zip,d_year,ca_city,i_item_sk,ca_street_number,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] #2 + WholeStageCodegen + HashAggregate [d_year,ca_street_name,count,sum,sum,ca_street_number,ss_wholesale_cost,ca_street_name,sum,count,ca_zip,ss_coupon_amt,d_year,sum,ca_city,ss_list_price,sum,i_item_sk,sum,ca_street_number,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] [count,sum,sum,sum,count,sum,sum,sum] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,d_year,ca_zip,ca_street_number,ca_zip,d_year,i_item_sk,ca_street_name,ca_city,i_product_name,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,hd_income_band_sk,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,hd_income_band_sk,d_year,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,hd_income_band_sk,d_year,s_zip,hd_income_band_sk,d_year,ss_wholesale_cost] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,hd_income_band_sk,d_year,s_zip,d_year,ss_wholesale_cost] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,cd_marital_status,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,c_first_sales_date_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ss_customer_sk,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,d_year,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_item_sk,cs_item_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] + Filter [ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_promo_sk,ss_store_sk,ss_ticket_number,ss_addr_sk,ss_item_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_item_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [cs_item_sk] + Filter [sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2)))] + HashAggregate [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),cs_item_sk,sum,sum(UnscaledValue(cs_ext_list_price)),sum] [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum,sum(UnscaledValue(cs_ext_list_price)),sum] + InputAdapter + Exchange [cs_item_sk] #5 + WholeStageCodegen + HashAggregate [sum,sum,cs_item_sk,sum,cr_refunded_cash,cr_reversed_charge,cr_store_credit,sum,cs_ext_list_price] [sum,sum,sum,sum] + Project [cr_store_credit,cs_item_sk,cr_refunded_cash,cs_ext_list_price,cr_reversed_charge] + BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + Project [cs_item_sk,cs_order_number,cs_ext_list_price] + Filter [cs_item_sk,cs_order_number] + Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price] [cs_item_sk,cs_order_number,cs_ext_list_price] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [cr_order_number,cr_store_credit,cr_refunded_cash,cr_reversed_charge,cr_item_sk] + Filter [cr_item_sk,cr_order_number] + Scan parquet default.catalog_returns [cr_order_number,cr_store_credit,cr_refunded_cash,cr_reversed_charge,cr_item_sk] [cr_order_number,cr_store_credit,cr_refunded_cash,cr_reversed_charge,cr_item_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_zip] + Filter [s_store_sk,s_zip,s_store_name] + Scan parquet default.store [s_store_sk,s_store_name,s_zip] [s_store_sk,s_store_name,s_zip] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] + Filter [c_current_cdemo_sk,c_customer_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,c_current_addr_sk] + Scan parquet default.customer [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + Project [cd_demo_sk,cd_marital_status] + Filter [cd_demo_sk,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] #11 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen + Project [p_promo_sk] + Filter [p_promo_sk] + Scan parquet default.promotion [p_promo_sk] [p_promo_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen + Project [hd_demo_sk,hd_income_band_sk] + Filter [hd_demo_sk,hd_income_band_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen + Project [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] + Filter [ca_address_sk] + Scan parquet default.customer_address [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] + InputAdapter + ReusedExchange [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] #14 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen + Project [ib_income_band_sk] + Filter [ib_income_band_sk] + Scan parquet default.income_band [ib_income_band_sk] [ib_income_band_sk] + InputAdapter + ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen + Project [i_item_sk,i_product_name] + Filter [i_current_price,i_color,i_item_sk] + Scan parquet default.item [i_item_sk,i_current_price,i_color,i_product_name] [i_item_sk,i_current_price,i_color,i_product_name] + InputAdapter + BroadcastExchange #17 + WholeStageCodegen + HashAggregate [d_year,ca_street_name,count,sum(UnscaledValue(ss_coupon_amt)),ca_street_number,sum,ca_street_name,sum(UnscaledValue(ss_list_price)),ca_zip,count(1),d_year,ca_city,sum,i_item_sk,ca_street_number,sum,d_year,ca_city,s_store_name,s_zip,sum(UnscaledValue(ss_wholesale_cost)),ca_zip,i_product_name] [count,sum(UnscaledValue(ss_coupon_amt)),syear,s3,sum,sum(UnscaledValue(ss_list_price)),count(1),store_name,cnt,sum,s2,item_sk,s1,sum,store_zip,sum(UnscaledValue(ss_wholesale_cost))] + InputAdapter + Exchange [d_year,ca_street_name,ca_street_number,ca_street_name,ca_zip,d_year,ca_city,i_item_sk,ca_street_number,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] #18 + WholeStageCodegen + HashAggregate [d_year,ca_street_name,count,ca_street_number,ss_wholesale_cost,sum,sum,ca_street_name,count,ca_zip,ss_coupon_amt,d_year,ca_city,ss_list_price,sum,sum,i_item_sk,sum,ca_street_number,sum,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] [count,sum,sum,count,sum,sum,sum,sum] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,d_year,ca_zip,ca_street_number,ca_zip,d_year,i_item_sk,ca_street_name,ca_city,i_product_name,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,hd_income_band_sk,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,hd_income_band_sk,d_year,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,hd_income_band_sk,d_year,s_zip,hd_income_band_sk,d_year,ss_wholesale_cost] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,hd_income_band_sk,d_year,s_zip,d_year,ss_wholesale_cost] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,cd_marital_status,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,c_first_sales_date_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ss_customer_sk,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,d_year,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_item_sk,cs_item_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] + Filter [ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_promo_sk,ss_store_sk,ss_ticket_number,ss_addr_sk,ss_item_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] #3 + InputAdapter + ReusedExchange [cs_item_sk] [cs_item_sk] #4 + InputAdapter + BroadcastExchange #19 + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + ReusedExchange [s_store_sk,s_store_name,s_zip] [s_store_sk,s_store_name,s_zip] #8 + InputAdapter + ReusedExchange [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 + InputAdapter + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] #11 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] #11 + InputAdapter + ReusedExchange [p_promo_sk] [p_promo_sk] #12 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 + InputAdapter + ReusedExchange [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] #14 + InputAdapter + ReusedExchange [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] #14 + InputAdapter + ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 + InputAdapter + ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 + InputAdapter + ReusedExchange [i_item_sk,i_product_name] [i_item_sk,i_product_name] #16 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt new file mode 100644 index 000000000..b06bc7ded --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt @@ -0,0 +1,42 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST], output=[s_store_name#1,i_item_desc#2,revenue#3,i_current_price#4,i_wholesale_cost#5,i_brand#6]) ++- *(9) Project [s_store_name#1, i_item_desc#2, revenue#3, i_current_price#4, i_wholesale_cost#5, i_brand#6] + +- *(9) BroadcastHashJoin [ss_store_sk#7], [ss_store_sk#8], Inner, BuildRight, (cast(revenue#3 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#9)), DecimalType(23,7))) + :- *(9) Project [s_store_name#1, ss_store_sk#7, revenue#3, i_item_desc#2, i_current_price#4, i_wholesale_cost#5, i_brand#6] + : +- *(9) BroadcastHashJoin [ss_item_sk#10], [i_item_sk#11], Inner, BuildRight + : :- *(9) Project [s_store_name#1, ss_store_sk#7, ss_item_sk#10, revenue#3] + : : +- *(9) BroadcastHashJoin [s_store_sk#12], [ss_store_sk#7], Inner, BuildRight + : : :- *(9) Project [s_store_sk#12, s_store_name#1] + : : : +- *(9) Filter isnotnull(s_store_sk#12) + : : : +- *(9) FileScan parquet default.store[s_store_sk#12,s_store_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Filter isnotnull(revenue#3) + : : +- *(3) HashAggregate(keys=[ss_store_sk#7, ss_item_sk#10], functions=[sum(UnscaledValue(ss_sales_price#13))]) + : : +- Exchange hashpartitioning(ss_store_sk#7, ss_item_sk#10, 200) + : : +- *(2) HashAggregate(keys=[ss_store_sk#7, ss_item_sk#10], functions=[partial_sum(UnscaledValue(ss_sales_price#13))]) + : : +- *(2) Project [ss_item_sk#10, ss_store_sk#7, ss_sales_price#13] + : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight + : : :- *(2) Project [ss_sold_date_sk#14, ss_item_sk#10, ss_store_sk#7, ss_sales_price#13] + : : : +- *(2) Filter ((isnotnull(ss_sold_date_sk#14) && isnotnull(ss_store_sk#7)) && isnotnull(ss_item_sk#10)) + : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#14,ss_item_sk#10,ss_store_sk#7,ss_sales_price#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [d_date_sk#15] + : : +- *(1) Filter (((isnotnull(d_month_seq#16) && (d_month_seq#16 >= 1176)) && (d_month_seq#16 <= 1187)) && isnotnull(d_date_sk#15)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#15,d_month_seq#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187),..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [i_item_sk#11, i_item_desc#2, i_current_price#4, i_wholesale_cost#5, i_brand#6] + : +- *(4) Filter isnotnull(i_item_sk#11) + : +- *(4) FileScan parquet default.item[i_item_sk#11,i_item_desc#2,i_current_price#4,i_wholesale_cost#5,i_brand#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt new file mode 100644 index 000000000..a83319f19 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt @@ -0,0 +1,57 @@ +TakeOrderedAndProject [i_item_desc,i_brand,revenue,i_wholesale_cost,i_current_price,s_store_name] + WholeStageCodegen + Project [i_item_desc,i_brand,revenue,i_wholesale_cost,i_current_price,s_store_name] + BroadcastHashJoin [ss_store_sk,ss_store_sk,revenue,ave] + Project [s_store_name,i_current_price,ss_store_sk,revenue,i_brand,i_wholesale_cost,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_name,ss_store_sk,ss_item_sk,revenue] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [s_store_sk,s_store_name] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_store_name] [s_store_sk,s_store_name] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen + Filter [revenue] + HashAggregate [ss_store_sk,ss_item_sk,sum,sum(UnscaledValue(ss_sales_price))] [sum(UnscaledValue(ss_sales_price)),revenue,sum] + InputAdapter + Exchange [ss_store_sk,ss_item_sk] #2 + WholeStageCodegen + HashAggregate [ss_item_sk,sum,sum,ss_store_sk,ss_sales_price] [sum,sum] + Project [ss_item_sk,ss_store_sk,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_current_price,i_item_sk,i_brand,i_wholesale_cost,i_item_desc] + Filter [i_item_sk] + Scan parquet default.item [i_current_price,i_item_sk,i_brand,i_wholesale_cost,i_item_desc] [i_current_price,i_item_sk,i_brand,i_wholesale_cost,i_item_desc] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + HashAggregate [ss_store_sk,sum,count,avg(revenue)] [avg(revenue),ave,sum,count] + InputAdapter + Exchange [ss_store_sk] #6 + WholeStageCodegen + HashAggregate [sum,sum,count,revenue,ss_store_sk,count] [sum,count,sum,count] + HashAggregate [ss_store_sk,ss_item_sk,sum,sum(UnscaledValue(ss_sales_price))] [sum(UnscaledValue(ss_sales_price)),revenue,sum] + InputAdapter + Exchange [ss_store_sk,ss_item_sk] #7 + WholeStageCodegen + HashAggregate [sum,ss_store_sk,sum,ss_sales_price,ss_item_sk] [sum,sum] + Project [ss_item_sk,ss_store_sk,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt new file mode 100644 index 000000000..14d9e4ec9 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt @@ -0,0 +1,54 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#1 ASC NULLS FIRST], output=[w_warehouse_name#1,w_warehouse_sq_ft#2,w_city#3,w_county#4,w_state#5,w_country#6,ship_carriers#7,year#8,jan_sales#9,feb_sales#10,mar_sales#11,apr_sales#12,may_sales#13,jun_sales#14,jul_sales#15,aug_sales#16,sep_sales#17,oct_sales#18,nov_sales#19,dec_sales#20,jan_sales_per_sq_foot#21,feb_sales_per_sq_foot#22,mar_sales_per_sq_foot#23,apr_sales_per_sq_foot#24,... 20 more fields]) ++- *(14) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, ship_carriers#7, year#8], functions=[sum(jan_sales#25), sum(feb_sales#26), sum(mar_sales#27), sum(apr_sales#28), sum(may_sales#29), sum(jun_sales#30), sum(jul_sales#31), sum(aug_sales#32), sum(sep_sales#33), sum(oct_sales#34), sum(nov_sales#35), sum(dec_sales#36), sum(CheckOverflow((promote_precision(jan_sales#25) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(feb_sales#26) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(mar_sales#27) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(apr_sales#28) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(may_sales#29) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jun_sales#30) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jul_sales#31) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(aug_sales#32) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(sep_sales#33) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(oct_sales#34) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(nov_sales#35) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(dec_sales#36) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), ... 12 more fields]) + +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, ship_carriers#7, year#8, 200) + +- *(13) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, ship_carriers#7, year#8], functions=[partial_sum(jan_sales#25), partial_sum(feb_sales#26), partial_sum(mar_sales#27), partial_sum(apr_sales#28), partial_sum(may_sales#29), partial_sum(jun_sales#30), partial_sum(jul_sales#31), partial_sum(aug_sales#32), partial_sum(sep_sales#33), partial_sum(oct_sales#34), partial_sum(nov_sales#35), partial_sum(dec_sales#36), partial_sum(CheckOverflow((promote_precision(jan_sales#25) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(feb_sales#26) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(mar_sales#27) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(apr_sales#28) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(may_sales#29) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jun_sales#30) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jul_sales#31) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(aug_sales#32) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(sep_sales#33) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(oct_sales#34) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(nov_sales#35) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(dec_sales#36) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), ... 12 more fields]) + +- Union + :- *(6) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) + : +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, 200) + : +- *(5) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) + : +- *(5) Project [ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + : +- *(5) BroadcastHashJoin [ws_ship_mode_sk#42], [sm_ship_mode_sk#43], Inner, BuildRight + : :- *(5) Project [ws_ship_mode_sk#42, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + : : +- *(5) BroadcastHashJoin [ws_sold_time_sk#44], [t_time_sk#45], Inner, BuildRight + : : :- *(5) Project [ws_sold_time_sk#44, ws_ship_mode_sk#42, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + : : : +- *(5) BroadcastHashJoin [ws_sold_date_sk#46], [d_date_sk#47], Inner, BuildRight + : : : :- *(5) Project [ws_sold_date_sk#46, ws_sold_time_sk#44, ws_ship_mode_sk#42, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6] + : : : : +- *(5) BroadcastHashJoin [ws_warehouse_sk#48], [w_warehouse_sk#49], Inner, BuildRight + : : : : :- *(5) Project [ws_sold_date_sk#46, ws_sold_time_sk#44, ws_ship_mode_sk#42, ws_warehouse_sk#48, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41] + : : : : : +- *(5) Filter (((isnotnull(ws_warehouse_sk#48) && isnotnull(ws_sold_date_sk#46)) && isnotnull(ws_sold_time_sk#44)) && isnotnull(ws_ship_mode_sk#42)) + : : : : : +- *(5) FileScan parquet default.web_sales[ws_sold_date_sk#46,ws_sold_time_sk#44,ws_ship_mode_sk#42,ws_warehouse_sk#48,ws_quantity#40,ws_ext_sales_price#39,ws_net_paid#41] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [t_time_sk#45] + : : +- *(3) Filter (((isnotnull(t_time#50) && (t_time#50 >= 30838)) && (t_time#50 <= 59638)) && isnotnull(t_time_sk#45)) + : : +- *(3) FileScan parquet default.time_dim[t_time_sk#45,t_time#50] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_time), GreaterThanOrEqual(t_time,30838), LessThanOrEqual(t_time,59638), IsNotNull(t_..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [sm_ship_mode_sk#43] + : +- *(4) Filter (sm_carrier#51 IN (DHL,BARIAN) && isnotnull(sm_ship_mode_sk#43)) + : +- *(4) FileScan parquet default.ship_mode[sm_ship_mode_sk#43,sm_carrier#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/ship_mode], PartitionFilters: [], PushedFilters: [In(sm_carrier, [DHL,BARIAN]), IsNotNull(sm_ship_mode_sk)], ReadSchema: struct + +- *(12) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) + +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, 200) + +- *(11) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) + +- *(11) Project [cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + +- *(11) BroadcastHashJoin [cs_ship_mode_sk#55], [sm_ship_mode_sk#43], Inner, BuildRight + :- *(11) Project [cs_ship_mode_sk#55, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + : +- *(11) BroadcastHashJoin [cs_sold_time_sk#56], [t_time_sk#45], Inner, BuildRight + : :- *(11) Project [cs_sold_time_sk#56, cs_ship_mode_sk#55, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#57], [d_date_sk#47], Inner, BuildRight + : : :- *(11) Project [cs_sold_date_sk#57, cs_sold_time_sk#56, cs_ship_mode_sk#55, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6] + : : : +- *(11) BroadcastHashJoin [cs_warehouse_sk#58], [w_warehouse_sk#49], Inner, BuildRight + : : : :- *(11) Project [cs_sold_date_sk#57, cs_sold_time_sk#56, cs_ship_mode_sk#55, cs_warehouse_sk#58, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54] + : : : : +- *(11) Filter (((isnotnull(cs_warehouse_sk#58) && isnotnull(cs_sold_date_sk#57)) && isnotnull(cs_sold_time_sk#56)) && isnotnull(cs_ship_mode_sk#55)) + : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#57,cs_sold_time_sk#56,cs_ship_mode_sk#55,cs_warehouse_sk#58,cs_quantity#53,cs_sales_price#52,cs_net_paid_inc_tax#54] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_sold_time_sk), IsNotNull(cs..., ReadSchema: struct= 1200)) && (d_month_seq#33 <= 1211)) && isnotnull(d_date_sk#32)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#32,d_month_seq#33,d_year#18,d_moy#20,d_qoy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [s_store_sk#30, s_store_id#26] + : +- *(2) Filter isnotnull(s_store_sk#30) + : +- *(2) FileScan parquet default.store[s_store_sk#30,s_store_id#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [i_item_sk#28, i_brand#24, i_class#23, i_category#22, i_product_name#25] + +- *(3) Filter isnotnull(i_item_sk#28) + +- *(3) FileScan parquet default.item[i_item_sk#28,i_brand#24,i_class#23,i_category#22,i_product_name#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt new file mode 100644 index 000000000..9381a8892 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt @@ -0,0 +1,43 @@ +TakeOrderedAndProject [d_qoy,i_category,i_brand,i_class,d_year,d_moy,rk,i_product_name,s_store_id,sumsales] + WholeStageCodegen + Filter [rk] + InputAdapter + Window [sumsales,i_category] + WholeStageCodegen + Sort [i_category,sumsales] + InputAdapter + Exchange [i_category] #1 + WholeStageCodegen + HashAggregate [s_store_id,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),i_category,sum,i_brand,i_product_name,spark_grouping_id,d_moy,i_class,d_year,d_qoy] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum] + InputAdapter + Exchange [s_store_id,i_category,i_brand,i_product_name,spark_grouping_id,d_moy,i_class,d_year,d_qoy] #2 + WholeStageCodegen + HashAggregate [s_store_id,i_category,sum,i_brand,ss_sales_price,i_product_name,spark_grouping_id,d_moy,i_class,ss_quantity,d_year,d_qoy,sum] [sum,sum] + Expand [d_year,i_class,d_qoy,d_moy,i_brand,i_category,s_store_id,i_product_name,ss_sales_price,ss_quantity] + Project [d_year,d_qoy,d_moy,i_brand,s_store_id,ss_sales_price,i_category,ss_quantity,i_class,i_product_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_id,d_qoy,ss_quantity,ss_item_sk,d_moy,d_year,ss_sales_price] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [d_qoy,ss_quantity,ss_item_sk,d_moy,d_year,ss_store_sk,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk] + Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] [ss_quantity,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk,d_year,d_moy,d_qoy] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_qoy,d_moy,d_year,d_month_seq,d_date_sk] [d_qoy,d_moy,d_year,d_month_seq,d_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk,s_store_id] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_store_id] [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [i_class,i_item_sk,i_product_name,i_category,i_brand] + Filter [i_item_sk] + Scan parquet default.item [i_class,i_item_sk,i_product_name,i_category,i_brand] [i_class,i_item_sk,i_product_name,i_category,i_brand] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt new file mode 100644 index 000000000..c0d696ed3 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt @@ -0,0 +1,41 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,ss_ticket_number#2 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#3,ca_city#4,bought_city#5,ss_ticket_number#2,extended_price#6,extended_tax#7,list_price#8]) ++- *(8) Project [c_last_name#1, c_first_name#3, ca_city#4, bought_city#5, ss_ticket_number#2, extended_price#6, extended_tax#7, list_price#8] + +- *(8) BroadcastHashJoin [c_current_addr_sk#9], [ca_address_sk#10], Inner, BuildRight, NOT (ca_city#4 = bought_city#5) + :- *(8) Project [ss_ticket_number#2, bought_city#5, extended_price#6, list_price#8, extended_tax#7, c_current_addr_sk#9, c_first_name#3, c_last_name#1] + : +- *(8) BroadcastHashJoin [ss_customer_sk#11], [c_customer_sk#12], Inner, BuildRight + : :- *(8) HashAggregate(keys=[ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4], functions=[sum(UnscaledValue(ss_ext_sales_price#14)), sum(UnscaledValue(ss_ext_list_price#15)), sum(UnscaledValue(ss_ext_tax#16))]) + : : +- Exchange hashpartitioning(ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4, 200) + : : +- *(5) HashAggregate(keys=[ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#14)), partial_sum(UnscaledValue(ss_ext_list_price#15)), partial_sum(UnscaledValue(ss_ext_tax#16))]) + : : +- *(5) Project [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16, ca_city#4] + : : +- *(5) BroadcastHashJoin [ss_addr_sk#13], [ca_address_sk#10], Inner, BuildRight + : : :- *(5) Project [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] + : : : +- *(5) BroadcastHashJoin [ss_hdemo_sk#17], [hd_demo_sk#18], Inner, BuildRight + : : : :- *(5) Project [ss_customer_sk#11, ss_hdemo_sk#17, ss_addr_sk#13, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] + : : : : +- *(5) BroadcastHashJoin [ss_store_sk#19], [s_store_sk#20], Inner, BuildRight + : : : : :- *(5) Project [ss_customer_sk#11, ss_hdemo_sk#17, ss_addr_sk#13, ss_store_sk#19, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] + : : : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight + : : : : : :- *(5) Project [ss_sold_date_sk#21, ss_customer_sk#11, ss_hdemo_sk#17, ss_addr_sk#13, ss_store_sk#19, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] + : : : : : : +- *(5) Filter ((((isnotnull(ss_sold_date_sk#21) && isnotnull(ss_store_sk#19)) && isnotnull(ss_hdemo_sk#17)) && isnotnull(ss_addr_sk#13)) && isnotnull(ss_customer_sk#11)) + : : : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#21,ss_customer_sk#11,ss_hdemo_sk#17,ss_addr_sk#13,ss_store_sk#19,ss_ticket_number#2,ss_ext_sales_price#14,ss_ext_list_price#15,ss_ext_tax#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk..., ReadSchema: struct= 1)) && (d_dom#23 <= 2)) && d_year#24 IN (1999,2000,2001)) && isnotnull(d_date_sk#22)) + : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#22,d_year#24,d_dom#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [s_store_sk#20] + : : : : +- *(2) Filter (s_city#25 IN (Midway,Fairview) && isnotnull(s_store_sk#20)) + : : : : +- *(2) FileScan parquet default.store[s_store_sk#20,s_city#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [In(s_city, [Midway,Fairview]), IsNotNull(s_store_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [hd_demo_sk#18] + : : : +- *(3) Filter (((hd_dep_count#26 = 4) || (hd_vehicle_count#27 = 3)) && isnotnull(hd_demo_sk#18)) + : : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#18,hd_dep_count#26,hd_vehicle_count#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [ca_address_sk#10, ca_city#4] + : : +- *(4) Filter (isnotnull(ca_address_sk#10) && isnotnull(ca_city#4)) + : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_city#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [c_customer_sk#12, c_current_addr_sk#9, c_first_name#3, c_last_name#1] + : +- *(6) Filter (isnotnull(c_customer_sk#12) && isnotnull(c_current_addr_sk#9)) + : +- *(6) FileScan parquet default.customer[c_customer_sk#12,c_current_addr_sk#9,c_first_name#3,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + +- ReusedExchange [ca_address_sk#10, ca_city#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt new file mode 100644 index 000000000..1e9ce42b5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt @@ -0,0 +1,54 @@ +TakeOrderedAndProject [bought_city,ss_ticket_number,extended_price,list_price,c_last_name,c_first_name,extended_tax,ca_city] + WholeStageCodegen + Project [bought_city,ss_ticket_number,extended_price,list_price,c_last_name,c_first_name,extended_tax,ca_city] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] + Project [c_current_addr_sk,extended_price,c_last_name,list_price,c_first_name,ss_ticket_number,extended_tax,bought_city] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [sum,sum,ca_city,ss_customer_sk,sum(UnscaledValue(ss_ext_tax)),sum(UnscaledValue(ss_ext_sales_price)),ss_ticket_number,ss_addr_sk,sum(UnscaledValue(ss_ext_list_price)),sum] [sum,extended_price,sum,list_price,sum(UnscaledValue(ss_ext_tax)),sum(UnscaledValue(ss_ext_sales_price)),extended_tax,sum(UnscaledValue(ss_ext_list_price)),sum,bought_city] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + WholeStageCodegen + HashAggregate [sum,ss_ext_tax,sum,ca_city,ss_customer_sk,sum,sum,sum,ss_ext_sales_price,ss_ext_list_price,ss_ticket_number,ss_addr_sk,sum] [sum,sum,sum,sum,sum,sum] + Project [ss_addr_sk,ss_ext_tax,ss_customer_sk,ca_city,ss_ext_sales_price,ss_ext_list_price,ss_ticket_number] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_ext_tax,ss_customer_sk,ss_ext_sales_price,ss_ext_list_price,ss_ticket_number] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_addr_sk,ss_ext_tax,ss_customer_sk,ss_ext_sales_price,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] + Filter [ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [d_date_sk] + Filter [d_dom,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_dom] [d_date_sk,d_year,d_dom] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [s_store_sk] + Filter [s_city,s_store_sk] + Scan parquet default.store [s_store_sk,s_city] [s_store_sk,s_city] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [hd_demo_sk] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [ca_address_sk,ca_city] + Filter [ca_address_sk,ca_city] + Scan parquet default.customer_address [ca_address_sk,ca_city] [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + Filter [c_customer_sk,c_current_addr_sk] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ca_address_sk,ca_city] [ca_address_sk,ca_city] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt new file mode 100644 index 000000000..76eb92a0d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt @@ -0,0 +1,48 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[cd_gender#1 ASC NULLS FIRST,cd_marital_status#2 ASC NULLS FIRST,cd_education_status#3 ASC NULLS FIRST,cd_purchase_estimate#4 ASC NULLS FIRST,cd_credit_rating#5 ASC NULLS FIRST], output=[cd_gender#1,cd_marital_status#2,cd_education_status#3,cnt1#6,cd_purchase_estimate#4,cnt2#7,cd_credit_rating#5,cnt3#8]) ++- *(10) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5], functions=[count(1)]) + +- Exchange hashpartitioning(cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, 200) + +- *(9) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5], functions=[partial_count(1)]) + +- *(9) Project [cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5] + +- *(9) BroadcastHashJoin [c_current_cdemo_sk#9], [cd_demo_sk#10], Inner, BuildRight + :- *(9) Project [c_current_cdemo_sk#9] + : +- *(9) BroadcastHashJoin [c_current_addr_sk#11], [ca_address_sk#12], Inner, BuildRight + : :- *(9) Project [c_current_cdemo_sk#9, c_current_addr_sk#11] + : : +- *(9) BroadcastHashJoin [c_customer_sk#13], [cs_ship_customer_sk#14], LeftAnti, BuildRight + : : :- *(9) BroadcastHashJoin [c_customer_sk#13], [ws_bill_customer_sk#15], LeftAnti, BuildRight + : : : :- *(9) BroadcastHashJoin [c_customer_sk#13], [ss_customer_sk#16], LeftSemi, BuildRight + : : : : :- *(9) Project [c_customer_sk#13, c_current_cdemo_sk#9, c_current_addr_sk#11] + : : : : : +- *(9) Filter (isnotnull(c_current_addr_sk#11) && isnotnull(c_current_cdemo_sk#9)) + : : : : : +- *(9) FileScan parquet default.customer[c_customer_sk#13,c_current_cdemo_sk#9,c_current_addr_sk#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [ss_customer_sk#16] + : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : : : : :- *(2) Project [ss_sold_date_sk#17, ss_customer_sk#16] + : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#17) + : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_customer_sk#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [d_date_sk#18] + : : : : +- *(1) Filter (((((isnotnull(d_year#19) && isnotnull(d_moy#20)) && (d_year#19 = 2001)) && (d_moy#20 >= 4)) && (d_moy#20 <= 6)) && isnotnull(d_date_sk#18)) + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_year#19,d_moy#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThan..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [ws_bill_customer_sk#15] + : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#18], Inner, BuildRight + : : : :- *(4) Project [ws_sold_date_sk#21, ws_bill_customer_sk#15] + : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#21) + : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [cs_ship_customer_sk#14] + : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#22], [d_date_sk#18], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#22, cs_ship_customer_sk#14] + : : : +- *(6) Filter isnotnull(cs_sold_date_sk#22) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#22,cs_ship_customer_sk#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [ca_address_sk#12] + : +- *(7) Filter (ca_state#23 IN (KY,GA,NM) && isnotnull(ca_address_sk#12)) + : +- *(7) FileScan parquet default.customer_address[ca_address_sk#12,ca_state#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [In(ca_state, [KY,GA,NM]), IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [cd_demo_sk#10, cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5] + +- *(8) Filter isnotnull(cd_demo_sk#10) + +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#10,cd_gender#1,cd_marital_status#2,cd_education_status#3,cd_purchase_estimate#4,cd_credit_rating#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_date_sk#15] + : : +- *(2) Filter ((isnotnull(d_year#21) && (d_year#21 = 2000)) && isnotnull(d_date_sk#15)) + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#15,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [i_item_sk#13, i_item_id#1] + : +- *(3) Filter isnotnull(i_item_sk#13) + : +- *(3) FileScan parquet default.item[i_item_sk#13,i_item_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [p_promo_sk#11] + +- *(4) Filter (((p_channel_email#22 = N) || (p_channel_event#23 = N)) && isnotnull(p_promo_sk#11)) + +- *(4) FileScan parquet default.promotion[p_promo_sk#11,p_channel_email#22,p_channel_event#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt new file mode 100644 index 000000000..b72978a57 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [agg2,i_item_id,agg1,agg4,agg3] + WholeStageCodegen + HashAggregate [i_item_id,count,sum,count,avg(UnscaledValue(ss_sales_price)),sum,avg(cast(ss_quantity as bigint)),sum,sum,avg(UnscaledValue(ss_coupon_amt)),count,count,avg(UnscaledValue(ss_list_price))] [agg2,agg1,count,sum,count,avg(UnscaledValue(ss_sales_price)),sum,agg4,avg(cast(ss_quantity as bigint)),sum,sum,avg(UnscaledValue(ss_coupon_amt)),count,agg3,count,avg(UnscaledValue(ss_list_price))] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen + HashAggregate [count,i_item_id,count,count,sum,count,sum,count,sum,sum,sum,ss_coupon_amt,count,ss_sales_price,sum,sum,ss_list_price,sum,count,ss_quantity,count] [count,count,count,sum,count,sum,count,sum,sum,sum,count,sum,sum,sum,count,count] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_sales_price,i_item_id] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_sales_price,ss_promo_sk,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_promo_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_promo_sk] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk] + Filter [ss_cdemo_sk,ss_sold_date_sk,ss_item_sk,ss_promo_sk] + Scan parquet default.store_sales [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk] [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [cd_demo_sk] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [p_promo_sk] + Filter [p_channel_email,p_channel_event,p_promo_sk] + Scan parquet default.promotion [p_promo_sk,p_channel_email,p_channel_event] [p_promo_sk,p_channel_email,p_channel_event] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt new file mode 100644 index 000000000..030bc811b --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt @@ -0,0 +1,46 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WHEN (cast(lochierarchy#1 as int) = 0) THEN s_state#2 END ASC NULLS FIRST,rank_within_parent#3 ASC NULLS FIRST], output=[total_sum#4,s_state#2,s_county#5,lochierarchy#1,rank_within_parent#3]) ++- *(11) Project [total_sum#4, s_state#2, s_county#5, lochierarchy#1, rank_within_parent#3] + +- Window [rank(_w3#6) windowspecdefinition(_w1#7, _w2#8, _w3#6 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#3], [_w1#7, _w2#8], [_w3#6 DESC NULLS LAST] + +- *(10) Sort [_w1#7 ASC NULLS FIRST, _w2#8 ASC NULLS FIRST, _w3#6 DESC NULLS LAST], false, 0 + +- Exchange hashpartitioning(_w1#7, _w2#8, 200) + +- *(9) HashAggregate(keys=[s_state#2, s_county#5, spark_grouping_id#9], functions=[sum(UnscaledValue(ss_net_profit#10))]) + +- Exchange hashpartitioning(s_state#2, s_county#5, spark_grouping_id#9, 200) + +- *(8) HashAggregate(keys=[s_state#2, s_county#5, spark_grouping_id#9], functions=[partial_sum(UnscaledValue(ss_net_profit#10))]) + +- *(8) Expand [List(ss_net_profit#10, s_state#11, s_county#12, 0), List(ss_net_profit#10, s_state#11, null, 1), List(ss_net_profit#10, null, null, 3)], [ss_net_profit#10, s_state#2, s_county#5, spark_grouping_id#9] + +- *(8) Project [ss_net_profit#10, s_state#13 AS s_state#11, s_county#14 AS s_county#12] + +- *(8) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight + :- *(8) Project [ss_store_sk#15, ss_net_profit#10] + : +- *(8) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : :- *(8) Project [ss_sold_date_sk#17, ss_store_sk#15, ss_net_profit#10] + : : +- *(8) Filter (isnotnull(ss_sold_date_sk#17) && isnotnull(ss_store_sk#15)) + : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_store_sk#15,ss_net_profit#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [d_date_sk#18] + : +- *(1) Filter (((isnotnull(d_month_seq#19) && (d_month_seq#19 >= 1200)) && (d_month_seq#19 <= 1211)) && isnotnull(d_date_sk#18)) + : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_month_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) BroadcastHashJoin [s_state#13], [s_state#20], LeftSemi, BuildRight + :- *(7) Project [s_store_sk#16, s_county#14, s_state#13] + : +- *(7) Filter isnotnull(s_store_sk#16) + : +- *(7) FileScan parquet default.store[s_store_sk#16,s_county#14,s_state#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(6) Project [s_state#20] + +- *(6) Filter (isnotnull(ranking#21) && (ranking#21 <= 5)) + +- Window [rank(_w2#22) windowspecdefinition(s_state#13, _w2#22 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#21], [s_state#13], [_w2#22 DESC NULLS LAST] + +- *(5) Sort [s_state#13 ASC NULLS FIRST, _w2#22 DESC NULLS LAST], false, 0 + +- *(5) HashAggregate(keys=[s_state#13], functions=[sum(UnscaledValue(ss_net_profit#10))]) + +- Exchange hashpartitioning(s_state#13, 200) + +- *(4) HashAggregate(keys=[s_state#13], functions=[partial_sum(UnscaledValue(ss_net_profit#10))]) + +- *(4) Project [ss_net_profit#10, s_state#13] + +- *(4) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + :- *(4) Project [ss_sold_date_sk#17, ss_net_profit#10, s_state#13] + : +- *(4) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight + : :- *(4) Project [ss_sold_date_sk#17, ss_store_sk#15, ss_net_profit#10] + : : +- *(4) Filter (isnotnull(ss_store_sk#15) && isnotnull(ss_sold_date_sk#17)) + : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_store_sk#15,ss_net_profit#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [s_store_sk#16, s_state#13] + : +- *(2) Filter isnotnull(s_store_sk#16) + : +- *(2) FileScan parquet default.store[s_store_sk#16,s_state#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt new file mode 100644 index 000000000..2ef724de4 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt @@ -0,0 +1,65 @@ +TakeOrderedAndProject [total_sum,lochierarchy,s_state,s_county,rank_within_parent] + WholeStageCodegen + Project [total_sum,lochierarchy,s_state,s_county,rank_within_parent] + InputAdapter + Window [_w3,_w1,_w2] + WholeStageCodegen + Sort [_w1,_w2,_w3] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen + HashAggregate [sum,s_state,s_county,spark_grouping_id,sum(UnscaledValue(ss_net_profit))] [total_sum,sum,lochierarchy,_w3,sum(UnscaledValue(ss_net_profit)),_w2,_w1] + InputAdapter + Exchange [s_state,s_county,spark_grouping_id] #2 + WholeStageCodegen + HashAggregate [sum,s_state,s_county,spark_grouping_id,sum,ss_net_profit] [sum,sum] + Expand [ss_net_profit,s_state,s_county] + Project [ss_net_profit,s_state,s_county] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_net_profit] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] [ss_sold_date_sk,ss_store_sk,ss_net_profit] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + BroadcastHashJoin [s_state,s_state] + Project [s_store_sk,s_county,s_state] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_county,s_state] [s_store_sk,s_county,s_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [s_state] + Filter [ranking] + InputAdapter + Window [_w2,s_state] + WholeStageCodegen + Sort [s_state,_w2] + HashAggregate [s_state,sum,sum(UnscaledValue(ss_net_profit))] [sum(UnscaledValue(ss_net_profit)),s_state,_w2,sum] + InputAdapter + Exchange [s_state] #6 + WholeStageCodegen + HashAggregate [s_state,ss_net_profit,sum,sum] [sum,sum] + Project [ss_net_profit,s_state] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_net_profit,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_net_profit] + Filter [ss_store_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] [ss_sold_date_sk,ss_store_sk,ss_net_profit] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [s_store_sk,s_state] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_state] [s_store_sk,s_state] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt new file mode 100644 index 000000000..32f2c64c4 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt @@ -0,0 +1,40 @@ +== Physical Plan == +*(11) Sort [ext_price#1 DESC NULLS LAST, brand_id#2 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(ext_price#1 DESC NULLS LAST, brand_id#2 ASC NULLS FIRST, 200) + +- *(10) HashAggregate(keys=[i_brand#3, i_brand_id#4, t_hour#5, t_minute#6], functions=[sum(UnscaledValue(ext_price#7))]) + +- Exchange hashpartitioning(i_brand#3, i_brand_id#4, t_hour#5, t_minute#6, 200) + +- *(9) HashAggregate(keys=[i_brand#3, i_brand_id#4, t_hour#5, t_minute#6], functions=[partial_sum(UnscaledValue(ext_price#7))]) + +- *(9) Project [i_brand_id#4, i_brand#3, ext_price#7, t_hour#5, t_minute#6] + +- *(9) BroadcastHashJoin [time_sk#8], [t_time_sk#9], Inner, BuildRight + :- *(9) Project [i_brand_id#4, i_brand#3, ext_price#7, time_sk#8] + : +- *(9) BroadcastHashJoin [i_item_sk#10], [sold_item_sk#11], Inner, BuildLeft + : :- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [i_item_sk#10, i_brand_id#4, i_brand#3] + : : +- *(1) Filter ((isnotnull(i_manager_id#12) && (i_manager_id#12 = 1)) && isnotnull(i_item_sk#10)) + : : +- *(1) FileScan parquet default.item[i_item_sk#10,i_brand_id#4,i_brand#3,i_manager_id#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct + : +- Union + : :- *(3) Project [ws_ext_sales_price#13 AS ext_price#7, ws_item_sk#14 AS sold_item_sk#11, ws_sold_time_sk#15 AS time_sk#8] + : : +- *(3) BroadcastHashJoin [ws_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight + : : :- *(3) Project [ws_sold_date_sk#16, ws_sold_time_sk#15, ws_item_sk#14, ws_ext_sales_price#13] + : : : +- *(3) Filter ((isnotnull(ws_sold_date_sk#16) && isnotnull(ws_item_sk#14)) && isnotnull(ws_sold_time_sk#15)) + : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#16,ws_sold_time_sk#15,ws_item_sk#14,ws_ext_sales_price#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_time_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_date_sk#17] + : : +- *(2) Filter ((((isnotnull(d_moy#18) && isnotnull(d_year#19)) && (d_moy#18 = 11)) && (d_year#19 = 1999)) && isnotnull(d_date_sk#17)) + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#17,d_year#19,d_moy#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct + : :- *(5) Project [cs_ext_sales_price#20 AS ext_price#21, cs_item_sk#22 AS sold_item_sk#23, cs_sold_time_sk#24 AS time_sk#25] + : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#17], Inner, BuildRight + : : :- *(5) Project [cs_sold_date_sk#26, cs_sold_time_sk#24, cs_item_sk#22, cs_ext_sales_price#20] + : : : +- *(5) Filter ((isnotnull(cs_sold_date_sk#26) && isnotnull(cs_item_sk#22)) && isnotnull(cs_sold_time_sk#24)) + : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_sold_time_sk#24,cs_item_sk#22,cs_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_time_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [ss_ext_sales_price#27 AS ext_price#28, ss_item_sk#29 AS sold_item_sk#30, ss_sold_time_sk#31 AS time_sk#32] + : +- *(7) BroadcastHashJoin [ss_sold_date_sk#33], [d_date_sk#17], Inner, BuildRight + : :- *(7) Project [ss_sold_date_sk#33, ss_sold_time_sk#31, ss_item_sk#29, ss_ext_sales_price#27] + : : +- *(7) Filter ((isnotnull(ss_sold_date_sk#33) && isnotnull(ss_item_sk#29)) && isnotnull(ss_sold_time_sk#31)) + : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#33,ss_sold_time_sk#31,ss_item_sk#29,ss_ext_sales_price#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_sold_time_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [t_time_sk#9, t_hour#5, t_minute#6] + +- *(8) Filter (((t_meal_time#34 = breakfast) || (t_meal_time#34 = dinner)) && isnotnull(t_time_sk#9)) + +- *(8) FileScan parquet default.time_dim[t_time_sk#9,t_hour#5,t_minute#6,t_meal_time#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [Or(EqualTo(t_meal_time,breakfast),EqualTo(t_meal_time,dinner)), IsNotNull(t_time_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt new file mode 100644 index 000000000..0153205a5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt @@ -0,0 +1,56 @@ +WholeStageCodegen + Sort [ext_price,brand_id] + InputAdapter + Exchange [ext_price,brand_id] #1 + WholeStageCodegen + HashAggregate [sum(UnscaledValue(ext_price)),sum,t_minute,t_hour,i_brand,i_brand_id] [sum(UnscaledValue(ext_price)),sum,brand,ext_price,brand_id] + InputAdapter + Exchange [i_brand,i_brand_id,t_hour,t_minute] #2 + WholeStageCodegen + HashAggregate [ext_price,sum,t_minute,t_hour,i_brand,i_brand_id,sum] [sum,sum] + Project [ext_price,t_minute,i_brand_id,i_brand,t_hour] + BroadcastHashJoin [time_sk,t_time_sk] + Project [i_brand_id,i_brand,ext_price,time_sk] + BroadcastHashJoin [i_item_sk,sold_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] + Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] [i_item_sk,i_brand_id,i_brand,i_manager_id] + InputAdapter + Union + WholeStageCodegen + Project [ws_ext_sales_price,ws_item_sk,ws_sold_time_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_sold_time_sk,ws_item_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk,ws_item_sk,ws_sold_time_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_sold_time_sk,ws_item_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_sold_time_sk,ws_item_sk,ws_ext_sales_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + WholeStageCodegen + Project [cs_ext_sales_price,cs_item_sk,cs_sold_time_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_sold_time_sk,cs_item_sk,cs_ext_sales_price] + Filter [cs_sold_date_sk,cs_item_sk,cs_sold_time_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_sold_time_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_sold_time_sk,cs_item_sk,cs_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #4 + WholeStageCodegen + Project [ss_ext_sales_price,ss_item_sk,ss_sold_time_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_sold_time_sk,ss_item_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_item_sk,ss_sold_time_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_sold_time_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_sold_time_sk,ss_item_sk,ss_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #4 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [t_time_sk,t_hour,t_minute] + Filter [t_meal_time,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute,t_meal_time] [t_time_sk,t_hour,t_minute,t_meal_time] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt new file mode 100644 index 000000000..5ad7c14ac --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt @@ -0,0 +1,68 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[total_cnt#1 DESC NULLS LAST,i_item_desc#2 ASC NULLS FIRST,w_warehouse_name#3 ASC NULLS FIRST,d_week_seq#4 ASC NULLS FIRST], output=[i_item_desc#2,w_warehouse_name#3,d_week_seq#4,no_promo#5,promo#6,total_cnt#1]) ++- *(12) HashAggregate(keys=[i_item_desc#2, w_warehouse_name#3, d_week_seq#4], functions=[count(1)]) + +- Exchange hashpartitioning(i_item_desc#2, w_warehouse_name#3, d_week_seq#4, 200) + +- *(11) HashAggregate(keys=[i_item_desc#2, w_warehouse_name#3, d_week_seq#4], functions=[partial_count(1)]) + +- *(11) Project [w_warehouse_name#3, i_item_desc#2, d_week_seq#4] + +- *(11) BroadcastHashJoin [cs_item_sk#7, cs_order_number#8], [cr_item_sk#9, cr_order_number#10], LeftOuter, BuildRight + :- *(11) Project [cs_item_sk#7, cs_order_number#8, w_warehouse_name#3, i_item_desc#2, d_week_seq#4] + : +- *(11) BroadcastHashJoin [cs_promo_sk#11], [p_promo_sk#12], LeftOuter, BuildRight + : :- *(11) Project [cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, w_warehouse_name#3, i_item_desc#2, d_week_seq#4] + : : +- *(11) BroadcastHashJoin [cs_ship_date_sk#13], [d_date_sk#14], Inner, BuildRight, (d_date#15 > cast(cast(d_date#16 as timestamp) + interval 5 days as date)) + : : :- *(11) Project [cs_ship_date_sk#13, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, w_warehouse_name#3, i_item_desc#2, d_date#16, d_week_seq#4] + : : : +- *(11) BroadcastHashJoin [d_week_seq#4, inv_date_sk#17], [d_week_seq#18, d_date_sk#19], Inner, BuildRight + : : : :- *(11) Project [cs_ship_date_sk#13, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2, d_date#16, d_week_seq#4] + : : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight + : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2] + : : : : : +- *(11) BroadcastHashJoin [cs_bill_hdemo_sk#22], [hd_demo_sk#23], Inner, BuildRight + : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2] + : : : : : : +- *(11) BroadcastHashJoin [cs_bill_cdemo_sk#24], [cd_demo_sk#25], Inner, BuildRight + : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2] + : : : : : : : +- *(11) BroadcastHashJoin [cs_item_sk#7], [i_item_sk#26], Inner, BuildRight + : : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3] + : : : : : : : : +- *(11) BroadcastHashJoin [inv_warehouse_sk#27], [w_warehouse_sk#28], Inner, BuildRight + : : : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, inv_warehouse_sk#27] + : : : : : : : : : +- *(11) BroadcastHashJoin [cs_item_sk#7], [inv_item_sk#29], Inner, BuildRight, (inv_quantity_on_hand#30 < cs_quantity#31) + : : : : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, cs_quantity#31] + : : : : : : : : : : +- *(11) Filter (((((isnotnull(cs_quantity#31) && isnotnull(cs_item_sk#7)) && isnotnull(cs_bill_cdemo_sk#24)) && isnotnull(cs_bill_hdemo_sk#22)) && isnotnull(cs_sold_date_sk#20)) && isnotnull(cs_ship_date_sk#13)) + : : : : : : : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#20,cs_ship_date_sk#13,cs_bill_cdemo_sk#24,cs_bill_hdemo_sk#22,cs_item_sk#7,cs_promo_sk#11,cs_order_number#8,cs_quantity#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hd..., ReadSchema: struct + : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : +- *(2) Project [w_warehouse_sk#28, w_warehouse_name#3] + : : : : : : : : +- *(2) Filter isnotnull(w_warehouse_sk#28) + : : : : : : : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#28,w_warehouse_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : +- *(3) Project [i_item_sk#26, i_item_desc#2] + : : : : : : : +- *(3) Filter isnotnull(i_item_sk#26) + : : : : : : : +- *(3) FileScan parquet default.item[i_item_sk#26,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- *(4) Project [cd_demo_sk#25] + : : : : : : +- *(4) Filter ((isnotnull(cd_marital_status#32) && (cd_marital_status#32 = D)) && isnotnull(cd_demo_sk#25)) + : : : : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,D), IsNotNull(cd_demo_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(5) Project [hd_demo_sk#23] + : : : : : +- *(5) Filter ((isnotnull(hd_buy_potential#33) && (hd_buy_potential#33 = >10000)) && isnotnull(hd_demo_sk#23)) + : : : : : +- *(5) FileScan parquet default.household_demographics[hd_demo_sk#23,hd_buy_potential#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000), IsNotNull(hd_demo_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(6) Project [d_date_sk#21, d_date#16, d_week_seq#4] + : : : : +- *(6) Filter (((isnotnull(d_year#34) && (d_year#34 = 1999)) && isnotnull(d_date_sk#21)) && isnotnull(d_week_seq#4)) + : : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#21,d_date#16,d_week_seq#4,d_year#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) + : : : +- *(7) Project [d_date_sk#19, d_week_seq#18] + : : : +- *(7) Filter (isnotnull(d_week_seq#18) && isnotnull(d_date_sk#19)) + : : : +- *(7) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(8) Project [d_date_sk#14, d_date#15] + : : +- *(8) Filter (isnotnull(d_date#15) && isnotnull(d_date_sk#14)) + : : +- *(8) FileScan parquet default.date_dim[d_date_sk#14,d_date#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(9) Project [p_promo_sk#12] + : +- *(9) Filter isnotnull(p_promo_sk#12) + : +- *(9) FileScan parquet default.promotion[p_promo_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_promo_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295)))) + +- *(10) Project [cr_item_sk#9, cr_order_number#10] + +- *(10) Filter (isnotnull(cr_item_sk#9) && isnotnull(cr_order_number#10)) + +- *(10) FileScan parquet default.catalog_returns[cr_item_sk#9,cr_order_number#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt new file mode 100644 index 000000000..ee1925612 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt @@ -0,0 +1,90 @@ +TakeOrderedAndProject [i_item_desc,w_warehouse_name,promo,no_promo,d_week_seq,total_cnt] + WholeStageCodegen + HashAggregate [i_item_desc,count(1),w_warehouse_name,d_week_seq,count] [count(1),promo,no_promo,count,total_cnt] + InputAdapter + Exchange [i_item_desc,w_warehouse_name,d_week_seq] #1 + WholeStageCodegen + HashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count,count] [count,count] + Project [w_warehouse_name,i_item_desc,d_week_seq] + BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + Project [d_week_seq,cs_order_number,cs_item_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [d_week_seq,cs_promo_sk,cs_order_number,cs_item_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk,d_date,d_date] + Project [d_date,d_week_seq,cs_promo_sk,cs_ship_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [d_week_seq,inv_date_sk,d_week_seq,d_date_sk] + Project [d_date,d_week_seq,cs_promo_sk,cs_ship_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc] + BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] + Project [cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc,cs_bill_hdemo_sk] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Project [cs_promo_sk,cs_bill_cdemo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc,cs_bill_hdemo_sk] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_promo_sk,cs_bill_cdemo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,cs_bill_hdemo_sk] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [cs_promo_sk,cs_bill_cdemo_sk,cs_ship_date_sk,inv_warehouse_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,inv_date_sk,cs_bill_hdemo_sk] + BroadcastHashJoin [cs_item_sk,inv_item_sk,inv_quantity_on_hand,cs_quantity] + Project [cs_promo_sk,cs_bill_cdemo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_bill_hdemo_sk] + Filter [cs_bill_cdemo_sk,cs_sold_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_quantity,cs_ship_date_sk] + Scan parquet default.catalog_sales [cs_promo_sk,cs_bill_cdemo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_bill_hdemo_sk] [cs_promo_sk,cs_bill_cdemo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_bill_hdemo_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [w_warehouse_sk,w_warehouse_name] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_item_sk,i_item_desc] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_desc] [i_item_sk,i_item_desc] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [cd_demo_sk] + Filter [cd_marital_status,cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential] [hd_demo_sk,hd_buy_potential] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_date_sk,d_date,d_week_seq] + Filter [d_year,d_date_sk,d_week_seq] + Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year] [d_date_sk,d_date,d_week_seq,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [d_date_sk,d_week_seq] + Filter [d_date_sk,d_week_seq] + Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [d_date_sk,d_date] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [p_promo_sk] + Filter [p_promo_sk] + Scan parquet default.promotion [p_promo_sk] [p_promo_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + Project [cr_item_sk,cr_order_number] + Filter [cr_item_sk,cr_order_number] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] [cr_item_sk,cr_order_number] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt new file mode 100644 index 000000000..1f917c14b --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt @@ -0,0 +1,34 @@ +== Physical Plan == +*(7) Sort [cnt#1 DESC NULLS LAST], true, 0 ++- Exchange rangepartitioning(cnt#1 DESC NULLS LAST, 200) + +- *(6) Project [c_last_name#2, c_first_name#3, c_salutation#4, c_preferred_cust_flag#5, ss_ticket_number#6, cnt#1] + +- *(6) BroadcastHashJoin [ss_customer_sk#7], [c_customer_sk#8], Inner, BuildRight + :- *(6) Filter ((cnt#1 >= 1) && (cnt#1 <= 5)) + : +- *(6) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#7], functions=[count(1)]) + : +- Exchange hashpartitioning(ss_ticket_number#6, ss_customer_sk#7, 200) + : +- *(4) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#7], functions=[partial_count(1)]) + : +- *(4) Project [ss_customer_sk#7, ss_ticket_number#6] + : +- *(4) BroadcastHashJoin [ss_hdemo_sk#9], [hd_demo_sk#10], Inner, BuildRight + : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_ticket_number#6] + : : +- *(4) BroadcastHashJoin [ss_store_sk#11], [s_store_sk#12], Inner, BuildRight + : : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#6] + : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + : : : :- *(4) Project [ss_sold_date_sk#13, ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#6] + : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#13) && isnotnull(ss_store_sk#11)) && isnotnull(ss_hdemo_sk#9)) && isnotnull(ss_customer_sk#7)) + : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_customer_sk#7,ss_hdemo_sk#9,ss_store_sk#11,ss_ticket_number#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#14] + : : : +- *(1) Filter ((((isnotnull(d_dom#15) && (d_dom#15 >= 1)) && (d_dom#15 <= 2)) && d_year#16 IN (1999,2000,2001)) && isnotnull(d_date_sk#14)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_dom#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [s_store_sk#12] + : : +- *(2) Filter (s_county#17 IN (Williamson County,Franklin Parish,Bronx County,Orange County) && isnotnull(s_store_sk#12)) + : : +- *(2) FileScan parquet default.store[s_store_sk#12,s_county#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [In(s_county, [Williamson County,Franklin Parish,Bronx County,Orange County]), IsNotNull(s_store_..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [hd_demo_sk#10] + : +- *(3) Filter ((((isnotnull(hd_vehicle_count#18) && ((hd_buy_potential#19 = >10000) || (hd_buy_potential#19 = unknown))) && (hd_vehicle_count#18 > 0)) && (CASE WHEN (hd_vehicle_count#18 > 0) THEN (cast(hd_dep_count#20 as double) / cast(hd_vehicle_count#18 as double)) ELSE null END > 1.0)) && isnotnull(hd_demo_sk#10)) + : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#10,hd_buy_potential#19,hd_dep_count#20,hd_vehicle_count#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknow..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [c_customer_sk#8, c_salutation#4, c_first_name#3, c_last_name#2, c_preferred_cust_flag#5] + +- *(5) Filter isnotnull(c_customer_sk#8) + +- *(5) FileScan parquet default.customer[c_customer_sk#8,c_salutation#4,c_first_name#3,c_last_name#2,c_preferred_cust_flag#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct 0.00) THEN CheckOverflow((promote_precision(year_total#7) / promote_precision(year_total#6)), DecimalType(37,20)) ELSE null END > CASE WHEN (year_total#8 > 0.00) THEN CheckOverflow((promote_precision(year_total#9) / promote_precision(year_total#8)), DecimalType(37,20)) ELSE null END) + :- *(17) Project [customer_id#4, year_total#8, customer_id#1, customer_first_name#2, customer_last_name#3, year_total#9, year_total#6] + : +- *(17) BroadcastHashJoin [customer_id#4], [customer_id#10], Inner, BuildRight + : :- *(17) BroadcastHashJoin [customer_id#4], [customer_id#1], Inner, BuildRight + : : :- Union + : : : :- *(4) Filter (isnotnull(year_total#8) && (year_total#8 > 0.00)) + : : : : +- *(4) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ss_net_paid#15))]) + : : : : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 200) + : : : : +- *(3) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ss_net_paid#15))]) + : : : : +- *(3) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_net_paid#15, d_year#14] + : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight + : : : : :- *(3) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_sold_date_sk#16, ss_net_paid#15] + : : : : : +- *(3) BroadcastHashJoin [c_customer_sk#18], [ss_customer_sk#19], Inner, BuildRight + : : : : : :- *(3) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] + : : : : : : +- *(3) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) + : : : : : : +- *(3) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : : : +- *(1) Project [ss_sold_date_sk#16, ss_customer_sk#19, ss_net_paid#15] + : : : : : +- *(1) Filter (isnotnull(ss_customer_sk#19) && isnotnull(ss_sold_date_sk#16)) + : : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_customer_sk#19,ss_net_paid#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [d_date_sk#17, d_year#14] + : : : : +- *(2) Filter (((isnotnull(d_year#14) && d_year#14 IN (2001,2002)) && (d_year#14 = 2001)) && isnotnull(d_date_sk#17)) + : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#17,d_year#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), In(d_year, [2001,2002]), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- LocalTableScan , [customer_id#20, year_total#21] + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : +- Union + : : :- *(8) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ss_net_paid#15))]) + : : : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 200) + : : : +- *(7) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ss_net_paid#15))]) + : : : +- *(7) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_net_paid#15, d_year#14] + : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight + : : : :- *(7) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_sold_date_sk#16, ss_net_paid#15] + : : : : +- *(7) BroadcastHashJoin [c_customer_sk#18], [ss_customer_sk#19], Inner, BuildRight + : : : : :- *(7) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] + : : : : : +- *(7) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) + : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : : +- ReusedExchange [ss_sold_date_sk#16, ss_customer_sk#19, ss_net_paid#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(6) Project [d_date_sk#17, d_year#14] + : : : +- *(6) Filter (((isnotnull(d_year#14) && d_year#14 IN (2001,2002)) && (d_year#14 = 2002)) && isnotnull(d_date_sk#17)) + : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#17,d_year#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), In(d_year, [2001,2002]), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- LocalTableScan , [customer_id#20, customer_first_name#22, customer_last_name#23, year_total#21] + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- Union + : :- LocalTableScan , [customer_id#10, year_total#6] + : +- *(12) Filter (isnotnull(year_total#21) && (year_total#21 > 0.00)) + : +- *(12) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ws_net_paid#24))]) + : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 200) + : +- *(11) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ws_net_paid#24))]) + : +- *(11) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_net_paid#24, d_year#14] + : +- *(11) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#17], Inner, BuildRight + : :- *(11) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_sold_date_sk#25, ws_net_paid#24] + : : +- *(11) BroadcastHashJoin [c_customer_sk#18], [ws_bill_customer_sk#26], Inner, BuildRight + : : :- *(11) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] + : : : +- *(11) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) + : : : +- *(11) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- *(9) Project [ws_sold_date_sk#25, ws_bill_customer_sk#26, ws_net_paid#24] + : : +- *(9) Filter (isnotnull(ws_bill_customer_sk#26) && isnotnull(ws_sold_date_sk#25)) + : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#25,ws_bill_customer_sk#26,ws_net_paid#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#17, d_year#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- Union + :- LocalTableScan , [customer_id#5, year_total#7] + +- *(16) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ws_net_paid#24))]) + +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 200) + +- *(15) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ws_net_paid#24))]) + +- *(15) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_net_paid#24, d_year#14] + +- *(15) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#17], Inner, BuildRight + :- *(15) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_sold_date_sk#25, ws_net_paid#24] + : +- *(15) BroadcastHashJoin [c_customer_sk#18], [ws_bill_customer_sk#26], Inner, BuildRight + : :- *(15) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] + : : +- *(15) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) + : : +- *(15) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : +- ReusedExchange [ws_sold_date_sk#25, ws_bill_customer_sk#26, ws_net_paid#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + +- ReusedExchange [d_date_sk#17, d_year#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt new file mode 100644 index 000000000..c3144e1d9 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt @@ -0,0 +1,108 @@ +TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] + WholeStageCodegen + Project [customer_id,customer_first_name,customer_last_name] + BroadcastHashJoin [year_total,year_total,year_total,year_total,customer_id,customer_id] + Project [customer_first_name,year_total,year_total,customer_last_name,year_total,customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + InputAdapter + Union + WholeStageCodegen + Filter [year_total] + HashAggregate [d_year,sum(UnscaledValue(ss_net_paid)),c_customer_id,c_last_name,sum,c_first_name] [sum(UnscaledValue(ss_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #1 + WholeStageCodegen + HashAggregate [d_year,sum,ss_net_paid,c_customer_id,c_last_name,sum,c_first_name] [sum,sum] + Project [c_customer_id,d_year,ss_net_paid,c_last_name,c_first_name] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,ss_net_paid,c_last_name,c_first_name,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] + Filter [c_customer_sk,c_customer_id] + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [ss_sold_date_sk,ss_customer_sk,ss_net_paid] + Filter [ss_customer_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_net_paid] [ss_sold_date_sk,ss_customer_sk,ss_net_paid] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + LocalTableScan [customer_id,year_total] [customer_id,year_total] + InputAdapter + BroadcastExchange #4 + Union + WholeStageCodegen + HashAggregate [d_year,sum(UnscaledValue(ss_net_paid)),sum,c_customer_id,c_last_name,c_first_name] [sum(UnscaledValue(ss_net_paid)),customer_first_name,sum,customer_last_name,year_total,customer_id] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #5 + WholeStageCodegen + HashAggregate [d_year,ss_net_paid,sum,c_customer_id,c_last_name,c_first_name,sum] [sum,sum] + Project [c_customer_id,d_year,ss_net_paid,c_last_name,c_first_name] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,ss_net_paid,c_last_name,c_first_name,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] + Filter [c_customer_sk,c_customer_id] + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ss_sold_date_sk,ss_customer_sk,ss_net_paid] [ss_sold_date_sk,ss_customer_sk,ss_net_paid] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + LocalTableScan [customer_id,customer_first_name,customer_last_name,year_total] [customer_id,customer_first_name,customer_last_name,year_total] + InputAdapter + BroadcastExchange #7 + Union + LocalTableScan [customer_id,year_total] [customer_id,year_total] + WholeStageCodegen + Filter [year_total] + HashAggregate [d_year,sum,sum(UnscaledValue(ws_net_paid)),c_customer_id,c_last_name,c_first_name] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #8 + WholeStageCodegen + HashAggregate [d_year,sum,sum,ws_net_paid,c_customer_id,c_last_name,c_first_name] [sum,sum] + Project [c_customer_id,ws_net_paid,d_year,c_last_name,c_first_name] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,ws_net_paid,c_last_name,ws_sold_date_sk,c_first_name] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] + Filter [c_customer_sk,c_customer_id] + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] + InputAdapter + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #10 + Union + LocalTableScan [customer_id,year_total] [customer_id,year_total] + WholeStageCodegen + HashAggregate [d_year,sum(UnscaledValue(ws_net_paid)),c_customer_id,c_last_name,c_first_name,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 + WholeStageCodegen + HashAggregate [d_year,ws_net_paid,c_customer_id,sum,c_last_name,c_first_name,sum] [sum,sum] + Project [c_customer_id,ws_net_paid,d_year,c_last_name,c_first_name] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,ws_net_paid,c_last_name,ws_sold_date_sk,c_first_name] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] + Filter [c_customer_sk,c_customer_id] + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #6 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt new file mode 100644 index 000000000..80af7fa5e --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt @@ -0,0 +1,117 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[sales_cnt_diff#1 ASC NULLS FIRST], output=[prev_year#2,year#3,i_brand_id#4,i_class_id#5,i_category_id#6,i_manufact_id#7,prev_yr_cnt#8,curr_yr_cnt#9,sales_cnt_diff#1,sales_amt_diff#10]) ++- *(34) Project [d_year#11 AS prev_year#2, d_year#12 AS year#3, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#13 AS prev_yr_cnt#8, sales_cnt#14 AS curr_yr_cnt#9, (sales_cnt#14 - sales_cnt#13) AS sales_cnt_diff#1, CheckOverflow((promote_precision(cast(sales_amt#15 as decimal(19,2))) - promote_precision(cast(sales_amt#16 as decimal(19,2)))), DecimalType(19,2)) AS sales_amt_diff#10] + +- *(34) BroadcastHashJoin [i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], [i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], Inner, BuildRight, (CheckOverflow((promote_precision(cast(sales_cnt#14 as decimal(17,2))) / promote_precision(cast(sales_cnt#13 as decimal(17,2)))), DecimalType(37,20)) < 0.90000000000000000000) + :- *(34) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], functions=[sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))]) + : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, 200) + : +- *(16) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], functions=[partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))]) + : +- *(16) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) + : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22, 200) + : +- *(15) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) + : +- Union + : :- *(10) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) + : : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22, 200) + : : +- *(9) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) + : : +- Union + : : :- *(4) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (cs_quantity#23 - coalesce(cr_return_quantity#24, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#25 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#26, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#22] + : : : +- *(4) BroadcastHashJoin [cs_order_number#27, cs_item_sk#28], [cr_order_number#29, cr_item_sk#30], LeftOuter, BuildRight + : : : :- *(4) Project [cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, d_year#12] + : : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight + : : : : :- *(4) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7] + : : : : : +- *(4) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#33], Inner, BuildRight + : : : : : :- *(4) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25] + : : : : : : +- *(4) Filter (isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#31)) + : : : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#28,cs_order_number#27,cs_quantity#23,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) + : : : +- *(3) Project [cr_item_sk#30, cr_order_number#29, cr_return_quantity#24, cr_return_amount#26] + : : : +- *(3) Filter (isnotnull(cr_item_sk#30) && isnotnull(cr_order_number#29)) + : : : +- *(3) FileScan parquet default.catalog_returns[cr_item_sk#30,cr_order_number#29,cr_return_quantity#24,cr_return_amount#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct + : : +- *(8) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (ss_quantity#35 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#37, CheckOverflow((promote_precision(cast(ss_ext_sales_price#38 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#39, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40] + : : +- *(8) BroadcastHashJoin [cast(ss_ticket_number#41 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#43, sr_item_sk#44], LeftOuter, BuildRight + : : :- *(8) Project [ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, d_year#12] + : : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#45], [d_date_sk#32], Inner, BuildRight + : : : :- *(8) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7] + : : : : +- *(8) BroadcastHashJoin [ss_item_sk#42], [i_item_sk#33], Inner, BuildRight + : : : : :- *(8) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38] + : : : : : +- *(8) Filter (isnotnull(ss_item_sk#42) && isnotnull(ss_sold_date_sk#45)) + : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#45,ss_item_sk#42,ss_ticket_number#41,ss_quantity#35,ss_ext_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : +- *(14) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (ws_quantity#46 - coalesce(wr_return_quantity#47, 0)) AS sales_cnt#48, CheckOverflow((promote_precision(cast(ws_ext_sales_price#49 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#50, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#51] + : +- *(14) BroadcastHashJoin [cast(ws_order_number#52 as bigint), cast(ws_item_sk#53 as bigint)], [wr_order_number#54, wr_item_sk#55], LeftOuter, BuildRight + : :- *(14) Project [ws_item_sk#53, ws_order_number#52, ws_quantity#46, ws_ext_sales_price#49, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, d_year#12] + : : +- *(14) BroadcastHashJoin [ws_sold_date_sk#56], [d_date_sk#32], Inner, BuildRight + : : :- *(14) Project [ws_sold_date_sk#56, ws_item_sk#53, ws_order_number#52, ws_quantity#46, ws_ext_sales_price#49, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7] + : : : +- *(14) BroadcastHashJoin [ws_item_sk#53], [i_item_sk#33], Inner, BuildRight + : : : :- *(14) Project [ws_sold_date_sk#56, ws_item_sk#53, ws_order_number#52, ws_quantity#46, ws_ext_sales_price#49] + : : : : +- *(14) Filter (isnotnull(ws_item_sk#53) && isnotnull(ws_sold_date_sk#56)) + : : : : +- *(14) FileScan parquet default.web_sales[ws_sold_date_sk#56,ws_item_sk#53,ws_order_number#52,ws_quantity#46,ws_ext_sales_price#49] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true])) + +- *(33) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], functions=[sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))]) + +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, 200) + +- *(32) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], functions=[partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))]) + +- *(32) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) + +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22, 200) + +- *(31) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) + +- Union + :- *(26) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) + : +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22, 200) + : +- *(25) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) + : +- Union + : :- *(20) Project [d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, (cs_quantity#23 - coalesce(cr_return_quantity#24, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#25 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#26, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#22] + : : +- *(20) BroadcastHashJoin [cs_order_number#27, cs_item_sk#28], [cr_order_number#29, cr_item_sk#30], LeftOuter, BuildRight + : : :- *(20) Project [cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, d_year#11] + : : : +- *(20) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#57], Inner, BuildRight + : : : :- *(20) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20] + : : : : +- *(20) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#58], Inner, BuildRight + : : : : :- *(20) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25] + : : : : : +- *(20) Filter (isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#31)) + : : : : : +- *(20) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#28,cs_order_number#27,cs_quantity#23,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [cr_item_sk#30, cr_order_number#29, cr_return_quantity#24, cr_return_amount#26], BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) + : +- *(24) Project [d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, (ss_quantity#35 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#37, CheckOverflow((promote_precision(cast(ss_ext_sales_price#38 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#39, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40] + : +- *(24) BroadcastHashJoin [cast(ss_ticket_number#41 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#43, sr_item_sk#44], LeftOuter, BuildRight + : :- *(24) Project [ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, d_year#11] + : : +- *(24) BroadcastHashJoin [ss_sold_date_sk#45], [d_date_sk#57], Inner, BuildRight + : : :- *(24) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20] + : : : +- *(24) BroadcastHashJoin [ss_item_sk#42], [i_item_sk#58], Inner, BuildRight + : : : :- *(24) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38] + : : : : +- *(24) Filter (isnotnull(ss_item_sk#42) && isnotnull(ss_sold_date_sk#45)) + : : : : +- *(24) FileScan parquet default.store_sales[ss_sold_date_sk#45,ss_item_sk#42,ss_ticket_number#41,ss_quantity#35,ss_ext_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [i_item_sk#14, i_category#5] + : : +- *(1) Filter isnotnull(i_item_sk#14) + : : +- *(1) FileScan parquet default.item[i_item_sk#14,i_category#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#12, d_year#3, d_qoy#4] + : +- *(2) Filter isnotnull(d_date_sk#12) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#12,d_year#3,d_qoy#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + :- *(6) Project [web AS channel#15, ws_ship_customer_sk#16 AS col_name#17, d_year#3, d_qoy#4, i_category#5, ws_ext_sales_price#18 AS ext_sales_price#19] + : +- *(6) BroadcastHashJoin [ws_sold_date_sk#20], [d_date_sk#12], Inner, BuildRight + : :- *(6) Project [ws_sold_date_sk#20, ws_ship_customer_sk#16, ws_ext_sales_price#18, i_category#5] + : : +- *(6) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#14], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#20, ws_item_sk#21, ws_ship_customer_sk#16, ws_ext_sales_price#18] + : : : +- *(6) Filter ((isnull(ws_ship_customer_sk#16) && isnotnull(ws_item_sk#21)) && isnotnull(ws_sold_date_sk#20)) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#20,ws_item_sk#21,ws_ship_customer_sk#16,ws_ext_sales_price#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNull(ws_ship_customer_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [i_item_sk#14, i_category#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [d_date_sk#12, d_year#3, d_qoy#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(9) Project [catalog AS channel#22, cs_ship_addr_sk#23 AS col_name#24, d_year#3, d_qoy#4, i_category#5, cs_ext_sales_price#25 AS ext_sales_price#26] + +- *(9) BroadcastHashJoin [cs_sold_date_sk#27], [d_date_sk#12], Inner, BuildRight + :- *(9) Project [cs_sold_date_sk#27, cs_ship_addr_sk#23, cs_ext_sales_price#25, i_category#5] + : +- *(9) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#14], Inner, BuildRight + : :- *(9) Project [cs_sold_date_sk#27, cs_ship_addr_sk#23, cs_item_sk#28, cs_ext_sales_price#25] + : : +- *(9) Filter ((isnull(cs_ship_addr_sk#23) && isnotnull(cs_item_sk#28)) && isnotnull(cs_sold_date_sk#27)) + : : +- *(9) FileScan parquet default.catalog_sales[cs_sold_date_sk#27,cs_ship_addr_sk#23,cs_item_sk#28,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNull(cs_ship_addr_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [i_item_sk#14, i_category#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [d_date_sk#12, d_year#3, d_qoy#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt new file mode 100644 index 000000000..e40976f52 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt @@ -0,0 +1,53 @@ +TakeOrderedAndProject [d_year,channel,sales_cnt,col_name,d_qoy,i_category,sales_amt] + WholeStageCodegen + HashAggregate [d_year,channel,sum,sum(UnscaledValue(ext_sales_price)),count,count(1),col_name,d_qoy,i_category] [sum,sum(UnscaledValue(ext_sales_price)),sales_cnt,count,count(1),sales_amt] + InputAdapter + Exchange [d_year,channel,col_name,d_qoy,i_category] #1 + WholeStageCodegen + HashAggregate [d_year,count,channel,sum,ext_sales_price,count,col_name,d_qoy,i_category,sum] [count,sum,count,sum] + InputAdapter + Union + WholeStageCodegen + Project [d_year,d_qoy,ss_store_sk,i_category,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price] + Filter [ss_store_sk,ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [i_item_sk,i_category] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_category] [i_item_sk,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk,d_year,d_qoy] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + WholeStageCodegen + Project [d_year,d_qoy,ws_ship_customer_sk,ws_ext_sales_price,i_category] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_ship_customer_sk,ws_ext_sales_price,i_category] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price] + Filter [ws_ship_customer_sk,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_category] [i_item_sk,i_category] #2 + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] #3 + WholeStageCodegen + Project [d_year,d_qoy,i_category,cs_ship_addr_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_ship_addr_sk,cs_ext_sales_price,i_category] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_sold_date_sk,cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price] + Filter [cs_ship_addr_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_category] [i_item_sk,i_category] #2 + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt new file mode 100644 index 000000000..0199d72c5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt @@ -0,0 +1,100 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) ++- *(25) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) + +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 200) + +- *(24) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) + +- *(24) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] + +- Union + :- *(8) Project [sales#7, coalesce(returns#12, 0.00) AS returns#8, CheckOverflow((promote_precision(cast(profit#13 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#14, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#9, store channel AS channel#10, s_store_sk#15 AS id#11] + : +- *(8) BroadcastHashJoin [s_store_sk#15], [s_store_sk#16], LeftOuter, BuildRight + : :- *(8) HashAggregate(keys=[s_store_sk#15], functions=[sum(UnscaledValue(ss_ext_sales_price#17)), sum(UnscaledValue(ss_net_profit#18))]) + : : +- Exchange hashpartitioning(s_store_sk#15, 200) + : : +- *(3) HashAggregate(keys=[s_store_sk#15], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#17)), partial_sum(UnscaledValue(ss_net_profit#18))]) + : : +- *(3) Project [ss_ext_sales_price#17, ss_net_profit#18, s_store_sk#15] + : : +- *(3) BroadcastHashJoin [ss_store_sk#19], [s_store_sk#15], Inner, BuildRight + : : :- *(3) Project [ss_store_sk#19, ss_ext_sales_price#17, ss_net_profit#18] + : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight + : : : :- *(3) Project [ss_sold_date_sk#20, ss_store_sk#19, ss_ext_sales_price#17, ss_net_profit#18] + : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#20) && isnotnull(ss_store_sk#19)) + : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#20,ss_store_sk#19,ss_ext_sales_price#17,ss_net_profit#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct= 11172)) && (d_date#22 <= 11202)) && isnotnull(d_date_sk#21)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#21,d_date#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), Is..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [s_store_sk#15] + : : +- *(2) Filter isnotnull(s_store_sk#15) + : : +- *(2) FileScan parquet default.store[s_store_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) HashAggregate(keys=[s_store_sk#16], functions=[sum(UnscaledValue(sr_return_amt#23)), sum(UnscaledValue(sr_net_loss#24))]) + : +- Exchange hashpartitioning(s_store_sk#16, 200) + : +- *(6) HashAggregate(keys=[s_store_sk#16], functions=[partial_sum(UnscaledValue(sr_return_amt#23)), partial_sum(UnscaledValue(sr_net_loss#24))]) + : +- *(6) Project [sr_return_amt#23, sr_net_loss#24, s_store_sk#16] + : +- *(6) BroadcastHashJoin [sr_store_sk#25], [cast(s_store_sk#16 as bigint)], Inner, BuildRight + : :- *(6) Project [sr_store_sk#25, sr_return_amt#23, sr_net_loss#24] + : : +- *(6) BroadcastHashJoin [sr_returned_date_sk#26], [cast(d_date_sk#21 as bigint)], Inner, BuildRight + : : :- *(6) Project [sr_returned_date_sk#26, sr_store_sk#25, sr_return_amt#23, sr_net_loss#24] + : : : +- *(6) Filter (isnotnull(sr_returned_date_sk#26) && isnotnull(sr_store_sk#25)) + : : : +- *(6) FileScan parquet default.store_returns[sr_returned_date_sk#26,sr_store_sk#25,sr_return_amt#23,sr_net_loss#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)], ReadSchema: struct= 11172)) && (d_date#22 <= 11202)) && isnotnull(d_date_sk#21)) + : : +- *(4) FileScan parquet default.date_dim[d_date_sk#21,d_date#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), Is..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(5) Project [s_store_sk#16] + : +- *(5) Filter isnotnull(s_store_sk#16) + : +- *(5) FileScan parquet default.store[s_store_sk#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + :- *(15) Project [sales#27, returns#28, CheckOverflow((promote_precision(cast(profit#29 as decimal(18,2))) - promote_precision(cast(profit_loss#30 as decimal(18,2)))), DecimalType(18,2)) AS profit#31, catalog channel AS channel#32, cs_call_center_sk#33 AS id#34] + : +- BroadcastNestedLoopJoin BuildLeft, Inner + : :- BroadcastExchange IdentityBroadcastMode + : : +- *(11) HashAggregate(keys=[cs_call_center_sk#33], functions=[sum(UnscaledValue(cs_ext_sales_price#35)), sum(UnscaledValue(cs_net_profit#36))]) + : : +- Exchange hashpartitioning(cs_call_center_sk#33, 200) + : : +- *(10) HashAggregate(keys=[cs_call_center_sk#33], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#35)), partial_sum(UnscaledValue(cs_net_profit#36))]) + : : +- *(10) Project [cs_call_center_sk#33, cs_ext_sales_price#35, cs_net_profit#36] + : : +- *(10) BroadcastHashJoin [cs_sold_date_sk#37], [d_date_sk#21], Inner, BuildRight + : : :- *(10) Project [cs_sold_date_sk#37, cs_call_center_sk#33, cs_ext_sales_price#35, cs_net_profit#36] + : : : +- *(10) Filter isnotnull(cs_sold_date_sk#37) + : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#37,cs_call_center_sk#33,cs_ext_sales_price#35,cs_net_profit#36] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#21], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(23) Project [sales#41, coalesce(returns#42, 0.00) AS returns#43, CheckOverflow((promote_precision(cast(profit#44 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#45, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#46, web channel AS channel#47, wp_web_page_sk#48 AS id#49] + +- *(23) BroadcastHashJoin [wp_web_page_sk#48], [wp_web_page_sk#50], LeftOuter, BuildRight + :- *(23) HashAggregate(keys=[wp_web_page_sk#48], functions=[sum(UnscaledValue(ws_ext_sales_price#51)), sum(UnscaledValue(ws_net_profit#52))]) + : +- Exchange hashpartitioning(wp_web_page_sk#48, 200) + : +- *(18) HashAggregate(keys=[wp_web_page_sk#48], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#51)), partial_sum(UnscaledValue(ws_net_profit#52))]) + : +- *(18) Project [ws_ext_sales_price#51, ws_net_profit#52, wp_web_page_sk#48] + : +- *(18) BroadcastHashJoin [ws_web_page_sk#53], [wp_web_page_sk#48], Inner, BuildRight + : :- *(18) Project [ws_web_page_sk#53, ws_ext_sales_price#51, ws_net_profit#52] + : : +- *(18) BroadcastHashJoin [ws_sold_date_sk#54], [d_date_sk#21], Inner, BuildRight + : : :- *(18) Project [ws_sold_date_sk#54, ws_web_page_sk#53, ws_ext_sales_price#51, ws_net_profit#52] + : : : +- *(18) Filter (isnotnull(ws_sold_date_sk#54) && isnotnull(ws_web_page_sk#53)) + : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#54,ws_web_page_sk#53,ws_ext_sales_price#51,ws_net_profit#52] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(22) HashAggregate(keys=[wp_web_page_sk#50], functions=[sum(UnscaledValue(wr_return_amt#55)), sum(UnscaledValue(wr_net_loss#56))]) + +- Exchange hashpartitioning(wp_web_page_sk#50, 200) + +- *(21) HashAggregate(keys=[wp_web_page_sk#50], functions=[partial_sum(UnscaledValue(wr_return_amt#55)), partial_sum(UnscaledValue(wr_net_loss#56))]) + +- *(21) Project [wr_return_amt#55, wr_net_loss#56, wp_web_page_sk#50] + +- *(21) BroadcastHashJoin [wr_web_page_sk#57], [cast(wp_web_page_sk#50 as bigint)], Inner, BuildRight + :- *(21) Project [wr_web_page_sk#57, wr_return_amt#55, wr_net_loss#56] + : +- *(21) BroadcastHashJoin [wr_returned_date_sk#58], [cast(d_date_sk#21 as bigint)], Inner, BuildRight + : :- *(21) Project [wr_returned_date_sk#58, wr_web_page_sk#57, wr_return_amt#55, wr_net_loss#56] + : : +- *(21) Filter (isnotnull(wr_returned_date_sk#58) && isnotnull(wr_web_page_sk#57)) + : : +- *(21) FileScan parquet default.web_returns[wr_returned_date_sk#58,wr_web_page_sk#57,wr_return_amt#55,wr_net_loss#56] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_web_page_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt new file mode 100644 index 000000000..1c29af5ef --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt @@ -0,0 +1,141 @@ +TakeOrderedAndProject [channel,profit,id,sales,returns] + WholeStageCodegen + HashAggregate [channel,sum,sum,id,sum(profit),spark_grouping_id,sum(sales),sum,sum(returns)] [sum,profit,sum,sum(profit),sales,sum(sales),sum,returns,sum(returns)] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen + HashAggregate [profit,channel,sum,sum,sales,sum,sum,id,sum,spark_grouping_id,returns,sum] [sum,sum,sum,sum,sum,sum] + Expand [profit,channel,sales,id,returns] + InputAdapter + Union + WholeStageCodegen + Project [profit,s_store_sk,sales,returns,profit_loss] + BroadcastHashJoin [s_store_sk,s_store_sk] + HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),sum,sum,s_store_sk,sum(UnscaledValue(ss_net_profit))] [sales,sum(UnscaledValue(ss_ext_sales_price)),sum,sum,sum(UnscaledValue(ss_net_profit)),profit] + InputAdapter + Exchange [s_store_sk] #2 + WholeStageCodegen + HashAggregate [sum,sum,sum,sum,s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + Project [ss_ext_sales_price,ss_net_profit,s_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + HashAggregate [sum,sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),s_store_sk,sum] [sum,sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),profit_loss,returns,sum] + InputAdapter + Exchange [s_store_sk] #6 + WholeStageCodegen + HashAggregate [sum,sum,sr_return_amt,s_store_sk,sum,sum,sr_net_loss] [sum,sum,sum,sum] + Project [sr_return_amt,sr_net_loss,s_store_sk] + BroadcastHashJoin [sr_store_sk,s_store_sk] + Project [sr_store_sk,sr_return_amt,sr_net_loss] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] + Filter [sr_returned_date_sk,sr_store_sk] + Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] + WholeStageCodegen + Project [sales,returns,profit,cs_call_center_sk,profit_loss] + InputAdapter + BroadcastNestedLoopJoin + BroadcastExchange #9 + WholeStageCodegen + HashAggregate [sum,sum(UnscaledValue(cs_net_profit)),sum,sum(UnscaledValue(cs_ext_sales_price)),cs_call_center_sk] [sales,sum,profit,sum(UnscaledValue(cs_net_profit)),sum,sum(UnscaledValue(cs_ext_sales_price))] + InputAdapter + Exchange [cs_call_center_sk] #10 + WholeStageCodegen + HashAggregate [sum,sum,cs_net_profit,sum,cs_ext_sales_price,cs_call_center_sk,sum] [sum,sum,sum,sum] + Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + WholeStageCodegen + HashAggregate [sum,sum,sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss))] [profit_loss,sum,returns,sum(UnscaledValue(cr_return_amount)),sum,sum(UnscaledValue(cr_net_loss))] + InputAdapter + Exchange #11 + WholeStageCodegen + HashAggregate [cr_return_amount,sum,sum,sum,cr_net_loss,sum] [sum,sum,sum,sum] + Project [cr_return_amount,cr_net_loss] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Project [cr_returned_date_sk,cr_return_amount,cr_net_loss] + Filter [cr_returned_date_sk] + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_return_amount,cr_net_loss] [cr_returned_date_sk,cr_return_amount,cr_net_loss] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + WholeStageCodegen + Project [profit,returns,sales,wp_web_page_sk,profit_loss] + BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] + HashAggregate [sum(UnscaledValue(ws_ext_sales_price)),sum,sum(UnscaledValue(ws_net_profit)),sum,wp_web_page_sk] [sum(UnscaledValue(ws_ext_sales_price)),profit,sales,sum,sum(UnscaledValue(ws_net_profit)),sum] + InputAdapter + Exchange [wp_web_page_sk] #12 + WholeStageCodegen + HashAggregate [sum,sum,ws_ext_sales_price,ws_net_profit,sum,sum,wp_web_page_sk] [sum,sum,sum,sum] + Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + Filter [ws_sold_date_sk,ws_web_page_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit] [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen + Project [wp_web_page_sk] + Filter [wp_web_page_sk] + Scan parquet default.web_page [wp_web_page_sk] [wp_web_page_sk] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen + HashAggregate [sum,wp_web_page_sk,sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),sum] [sum,returns,sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),profit_loss,sum] + InputAdapter + Exchange [wp_web_page_sk] #15 + WholeStageCodegen + HashAggregate [sum,wp_web_page_sk,wr_return_amt,sum,wr_net_loss,sum,sum] [sum,sum,sum,sum] + Project [wr_return_amt,wr_net_loss,wp_web_page_sk] + BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk] + Project [wr_web_page_sk,wr_return_amt,wr_net_loss] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Project [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss] + Filter [wr_returned_date_sk,wr_web_page_sk] + Scan parquet default.web_returns [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss] [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #7 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen + Project [wp_web_page_sk] + Filter [wp_web_page_sk] + Scan parquet default.web_page [wp_web_page_sk] [wp_web_page_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt new file mode 100644 index 000000000..70080e59a --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt @@ -0,0 +1,61 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[ratio#1 ASC NULLS FIRST,ss_qty#2 DESC NULLS LAST,ss_wc#3 DESC NULLS LAST,ss_sp#4 DESC NULLS LAST,other_chan_qty#5 ASC NULLS FIRST,other_chan_wholesale_cost#6 ASC NULLS FIRST,other_chan_sales_price#7 ASC NULLS FIRST,round((cast(ss_qty#2 as double) / cast(coalesce((ws_qty#8 + cs_qty#9), 1) as double)), 2) ASC NULLS FIRST], output=[ratio#1,store_qty#10,store_wholesale_cost#11,store_sales_price#12,other_chan_qty#5,other_chan_wholesale_cost#6,other_chan_sales_price#7]) ++- *(12) Project [round((cast(ss_qty#2 as double) / cast(coalesce((ws_qty#8 + cs_qty#9), 1) as double)), 2) AS ratio#1, ss_qty#2 AS store_qty#10, ss_wc#3 AS store_wholesale_cost#11, ss_sp#4 AS store_sales_price#12, (coalesce(ws_qty#8, 0) + coalesce(cs_qty#9, 0)) AS other_chan_qty#5, CheckOverflow((promote_precision(cast(coalesce(ws_wc#13, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#14, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_wholesale_cost#6, CheckOverflow((promote_precision(cast(coalesce(ws_sp#15, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#16, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_sales_price#7, cs_qty#9, ws_qty#8, ss_qty#2, ss_sp#4, ss_wc#3] + +- *(12) BroadcastHashJoin [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19], [cs_sold_year#20, cs_item_sk#21, cs_customer_sk#22], Inner, BuildRight + :- *(12) Project [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19, ss_qty#2, ss_wc#3, ss_sp#4, ws_qty#8, ws_wc#13, ws_sp#15] + : +- *(12) BroadcastHashJoin [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19], [ws_sold_year#23, ws_item_sk#24, ws_customer_sk#25], Inner, BuildRight + : :- *(12) HashAggregate(keys=[d_year#26, ss_item_sk#18, ss_customer_sk#19], functions=[sum(cast(ss_quantity#27 as bigint)), sum(UnscaledValue(ss_wholesale_cost#28)), sum(UnscaledValue(ss_sales_price#29))]) + : : +- Exchange hashpartitioning(d_year#26, ss_item_sk#18, ss_customer_sk#19, 200) + : : +- *(3) HashAggregate(keys=[d_year#26, ss_item_sk#18, ss_customer_sk#19], functions=[partial_sum(cast(ss_quantity#27 as bigint)), partial_sum(UnscaledValue(ss_wholesale_cost#28)), partial_sum(UnscaledValue(ss_sales_price#29))]) + : : +- *(3) Project [ss_item_sk#18, ss_customer_sk#19, ss_quantity#27, ss_wholesale_cost#28, ss_sales_price#29, d_year#26] + : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#30], [d_date_sk#31], Inner, BuildRight + : : :- *(3) Project [ss_sold_date_sk#30, ss_item_sk#18, ss_customer_sk#19, ss_quantity#27, ss_wholesale_cost#28, ss_sales_price#29] + : : : +- *(3) Filter isnull(sr_ticket_number#32) + : : : +- *(3) BroadcastHashJoin [cast(ss_ticket_number#33 as bigint), cast(ss_item_sk#18 as bigint)], [sr_ticket_number#32, sr_item_sk#34], LeftOuter, BuildRight + : : : :- *(3) Project [ss_sold_date_sk#30, ss_item_sk#18, ss_customer_sk#19, ss_ticket_number#33, ss_quantity#27, ss_wholesale_cost#28, ss_sales_price#29] + : : : : +- *(3) Filter ((isnotnull(ss_sold_date_sk#30) && isnotnull(ss_item_sk#18)) && isnotnull(ss_customer_sk#19)) + : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#30,ss_item_sk#18,ss_customer_sk#19,ss_ticket_number#33,ss_quantity#27,ss_wholesale_cost#28,ss_sales_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_date_sk#31, d_year#26] + : : +- *(2) Filter ((isnotnull(d_year#26) && (d_year#26 = 2000)) && isnotnull(d_date_sk#31)) + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#31,d_year#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) + : +- *(7) Filter (coalesce(ws_qty#8, 0) > 0) + : +- *(7) HashAggregate(keys=[d_year#26, ws_item_sk#24, ws_bill_customer_sk#35], functions=[sum(cast(ws_quantity#36 as bigint)), sum(UnscaledValue(ws_wholesale_cost#37)), sum(UnscaledValue(ws_sales_price#38))]) + : +- Exchange hashpartitioning(d_year#26, ws_item_sk#24, ws_bill_customer_sk#35, 200) + : +- *(6) HashAggregate(keys=[d_year#26, ws_item_sk#24, ws_bill_customer_sk#35], functions=[partial_sum(cast(ws_quantity#36 as bigint)), partial_sum(UnscaledValue(ws_wholesale_cost#37)), partial_sum(UnscaledValue(ws_sales_price#38))]) + : +- *(6) Project [ws_item_sk#24, ws_bill_customer_sk#35, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38, d_year#26] + : +- *(6) BroadcastHashJoin [ws_sold_date_sk#39], [d_date_sk#31], Inner, BuildRight + : :- *(6) Project [ws_sold_date_sk#39, ws_item_sk#24, ws_bill_customer_sk#35, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38] + : : +- *(6) Filter isnull(wr_order_number#40) + : : +- *(6) BroadcastHashJoin [cast(ws_order_number#41 as bigint), cast(ws_item_sk#24 as bigint)], [wr_order_number#40, wr_item_sk#42], LeftOuter, BuildRight + : : :- *(6) Project [ws_sold_date_sk#39, ws_item_sk#24, ws_bill_customer_sk#35, ws_order_number#41, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38] + : : : +- *(6) Filter ((isnotnull(ws_sold_date_sk#39) && isnotnull(ws_item_sk#24)) && isnotnull(ws_bill_customer_sk#35)) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#39,ws_item_sk#24,ws_bill_customer_sk#35,ws_order_number#41,ws_quantity#36,ws_wholesale_cost#37,ws_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#31, d_year#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) + +- *(11) Filter (coalesce(cs_qty#9, 0) > 0) + +- *(11) HashAggregate(keys=[d_year#26, cs_item_sk#21, cs_bill_customer_sk#43], functions=[sum(cast(cs_quantity#44 as bigint)), sum(UnscaledValue(cs_wholesale_cost#45)), sum(UnscaledValue(cs_sales_price#46))]) + +- Exchange hashpartitioning(d_year#26, cs_item_sk#21, cs_bill_customer_sk#43, 200) + +- *(10) HashAggregate(keys=[d_year#26, cs_item_sk#21, cs_bill_customer_sk#43], functions=[partial_sum(cast(cs_quantity#44 as bigint)), partial_sum(UnscaledValue(cs_wholesale_cost#45)), partial_sum(UnscaledValue(cs_sales_price#46))]) + +- *(10) Project [cs_bill_customer_sk#43, cs_item_sk#21, cs_quantity#44, cs_wholesale_cost#45, cs_sales_price#46, d_year#26] + +- *(10) BroadcastHashJoin [cs_sold_date_sk#47], [d_date_sk#31], Inner, BuildRight + :- *(10) Project [cs_sold_date_sk#47, cs_bill_customer_sk#43, cs_item_sk#21, cs_quantity#44, cs_wholesale_cost#45, cs_sales_price#46] + : +- *(10) Filter isnull(cr_order_number#48) + : +- *(10) BroadcastHashJoin [cs_order_number#49, cs_item_sk#21], [cr_order_number#48, cr_item_sk#50], LeftOuter, BuildRight + : :- *(10) Project [cs_sold_date_sk#47, cs_bill_customer_sk#43, cs_item_sk#21, cs_order_number#49, cs_quantity#44, cs_wholesale_cost#45, cs_sales_price#46] + : : +- *(10) Filter ((isnotnull(cs_sold_date_sk#47) && isnotnull(cs_item_sk#21)) && isnotnull(cs_bill_customer_sk#43)) + : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#47,cs_bill_customer_sk#43,cs_item_sk#21,cs_order_number#49,cs_quantity#44,cs_wholesale_cost#45,cs_sales_price#46] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#31, d_year#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt new file mode 100644 index 000000000..64b375073 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt @@ -0,0 +1,81 @@ +TakeOrderedAndProject [cs_qty,store_wholesale_cost,store_sales_price,ws_qty,ss_qty,ss_sp,other_chan_qty,store_qty,ratio,other_chan_sales_price,other_chan_wholesale_cost,ss_wc] + WholeStageCodegen + Project [ws_wc,cs_qty,ws_qty,cs_sp,ss_qty,ss_sp,ws_sp,cs_wc,ss_wc] + BroadcastHashJoin [ss_item_sk,ss_customer_sk,cs_customer_sk,cs_item_sk,cs_sold_year,ss_sold_year] + Project [ss_customer_sk,ws_sp,ws_wc,ss_wc,ss_sold_year,ss_sp,ws_qty,ss_qty,ss_item_sk] + BroadcastHashJoin [ss_item_sk,ss_customer_sk,ws_sold_year,ws_item_sk,ws_customer_sk,ss_sold_year] + HashAggregate [d_year,ss_customer_sk,sum(UnscaledValue(ss_sales_price)),sum,sum,sum(cast(ss_quantity as bigint)),sum,sum(UnscaledValue(ss_wholesale_cost)),ss_item_sk] [sum(UnscaledValue(ss_sales_price)),sum,ss_wc,sum,sum(cast(ss_quantity as bigint)),ss_sold_year,ss_sp,ss_qty,sum,sum(UnscaledValue(ss_wholesale_cost))] + InputAdapter + Exchange [d_year,ss_item_sk,ss_customer_sk] #1 + WholeStageCodegen + HashAggregate [d_year,sum,ss_wholesale_cost,ss_customer_sk,sum,sum,ss_sales_price,sum,sum,ss_quantity,sum,ss_item_sk] [sum,sum,sum,sum,sum,sum] + Project [ss_quantity,ss_item_sk,d_year,ss_customer_sk,ss_sales_price,ss_wholesale_cost] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost] + Filter [sr_ticket_number] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost,ss_ticket_number] + Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost,ss_ticket_number] [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Filter [ws_qty] + HashAggregate [d_year,sum(cast(ws_quantity as bigint)),ws_item_sk,sum(UnscaledValue(ws_sales_price)),ws_bill_customer_sk,sum,sum,sum(UnscaledValue(ws_wholesale_cost)),sum] [sum(cast(ws_quantity as bigint)),sum(UnscaledValue(ws_sales_price)),ws_sp,ws_wc,sum,ws_sold_year,sum,sum(UnscaledValue(ws_wholesale_cost)),sum,ws_customer_sk,ws_qty] + InputAdapter + Exchange [d_year,ws_item_sk,ws_bill_customer_sk] #5 + WholeStageCodegen + HashAggregate [d_year,ws_wholesale_cost,ws_item_sk,sum,ws_bill_customer_sk,sum,sum,sum,sum,sum,ws_sales_price,ws_quantity] [sum,sum,sum,sum,sum,sum] + Project [d_year,ws_quantity,ws_wholesale_cost,ws_bill_customer_sk,ws_sales_price,ws_item_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_quantity,ws_wholesale_cost,ws_sold_date_sk,ws_bill_customer_sk,ws_sales_price,ws_item_sk] + Filter [wr_order_number] + BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + Project [ws_quantity,ws_wholesale_cost,ws_order_number,ws_sold_date_sk,ws_bill_customer_sk,ws_sales_price,ws_item_sk] + Filter [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] + Scan parquet default.web_sales [ws_quantity,ws_wholesale_cost,ws_order_number,ws_sold_date_sk,ws_bill_customer_sk,ws_sales_price,ws_item_sk] [ws_quantity,ws_wholesale_cost,ws_order_number,ws_sold_date_sk,ws_bill_customer_sk,ws_sales_price,ws_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [wr_item_sk,wr_order_number] + Filter [wr_order_number,wr_item_sk] + Scan parquet default.web_returns [wr_item_sk,wr_order_number] [wr_item_sk,wr_order_number] + InputAdapter + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Filter [cs_qty] + HashAggregate [d_year,sum,sum(UnscaledValue(cs_sales_price)),sum(cast(cs_quantity as bigint)),sum(UnscaledValue(cs_wholesale_cost)),cs_item_sk,cs_bill_customer_sk,sum,sum] [cs_qty,cs_wc,sum,sum(UnscaledValue(cs_sales_price)),sum(cast(cs_quantity as bigint)),cs_sold_year,cs_sp,sum(UnscaledValue(cs_wholesale_cost)),sum,cs_customer_sk,sum] + InputAdapter + Exchange [d_year,cs_item_sk,cs_bill_customer_sk] #8 + WholeStageCodegen + HashAggregate [d_year,sum,sum,sum,cs_item_sk,cs_bill_customer_sk,sum,sum,cs_sales_price,cs_quantity,sum,cs_wholesale_cost] [sum,sum,sum,sum,sum,sum] + Project [cs_wholesale_cost,cs_quantity,d_year,cs_bill_customer_sk,cs_sales_price,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_wholesale_cost,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk] + Filter [cr_order_number] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + Project [cs_wholesale_cost,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] + Filter [cs_sold_date_sk,cs_item_sk,cs_bill_customer_sk] + Scan parquet default.catalog_sales [cs_wholesale_cost,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] [cs_wholesale_cost,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [cr_item_sk,cr_order_number] + Filter [cr_order_number,cr_item_sk] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] [cr_item_sk,cr_order_number] + InputAdapter + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt new file mode 100644 index 000000000..c0fa08c66 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt @@ -0,0 +1,32 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_name#2 ASC NULLS FIRST,substring(s_city#3, 1, 30) ASC NULLS FIRST,profit#4 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#2,substring(s_city, 1, 30)#5,ss_ticket_number#6,amt#7,profit#4]) ++- *(6) Project [c_last_name#1, c_first_name#2, substring(s_city#3, 1, 30) AS substring(s_city, 1, 30)#5, ss_ticket_number#6, amt#7, profit#4, s_city#3] + +- *(6) BroadcastHashJoin [ss_customer_sk#8], [c_customer_sk#9], Inner, BuildRight + :- *(6) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3], functions=[sum(UnscaledValue(ss_coupon_amt#11)), sum(UnscaledValue(ss_net_profit#12))]) + : +- Exchange hashpartitioning(ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3, 200) + : +- *(4) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3], functions=[partial_sum(UnscaledValue(ss_coupon_amt#11)), partial_sum(UnscaledValue(ss_net_profit#12))]) + : +- *(4) Project [ss_customer_sk#8, ss_addr_sk#10, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12, s_city#3] + : +- *(4) BroadcastHashJoin [ss_hdemo_sk#13], [hd_demo_sk#14], Inner, BuildRight + : :- *(4) Project [ss_customer_sk#8, ss_hdemo_sk#13, ss_addr_sk#10, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12, s_city#3] + : : +- *(4) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight + : : :- *(4) Project [ss_customer_sk#8, ss_hdemo_sk#13, ss_addr_sk#10, ss_store_sk#15, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12] + : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : : : :- *(4) Project [ss_sold_date_sk#17, ss_customer_sk#8, ss_hdemo_sk#13, ss_addr_sk#10, ss_store_sk#15, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12] + : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#17) && isnotnull(ss_store_sk#15)) && isnotnull(ss_hdemo_sk#13)) && isnotnull(ss_customer_sk#8)) + : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_customer_sk#8,ss_hdemo_sk#13,ss_addr_sk#10,ss_store_sk#15,ss_ticket_number#6,ss_coupon_amt#11,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [s_store_sk#16, s_city#3] + : : +- *(2) Filter (((isnotnull(s_number_employees#21) && (s_number_employees#21 >= 200)) && (s_number_employees#21 <= 295)) && isnotnull(s_store_sk#16)) + : : +- *(2) FileScan parquet default.store[s_store_sk#16,s_number_employees#21,s_city#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_num..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [hd_demo_sk#14] + : +- *(3) Filter (((hd_dep_count#22 = 6) || (hd_vehicle_count#23 > 2)) && isnotnull(hd_demo_sk#14)) + : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#14,hd_dep_count#22,hd_vehicle_count#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,6),GreaterThan(hd_vehicle_count,2)), IsNotNull(hd_demo_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [c_customer_sk#9, c_first_name#2, c_last_name#1] + +- *(5) Filter isnotnull(c_customer_sk#9) + +- *(5) FileScan parquet default.customer[c_customer_sk#9,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt new file mode 100644 index 000000000..cee3e4c79 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [profit,amt,substring(s_city, 1, 30),s_city,c_last_name,ss_ticket_number,c_first_name] + WholeStageCodegen + Project [profit,amt,s_city,c_last_name,ss_ticket_number,c_first_name] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [sum(UnscaledValue(ss_net_profit)),ss_customer_sk,sum(UnscaledValue(ss_coupon_amt)),sum,sum,s_city,ss_ticket_number,ss_addr_sk] [amt,sum(UnscaledValue(ss_net_profit)),profit,sum(UnscaledValue(ss_coupon_amt)),sum,sum] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city] #1 + WholeStageCodegen + HashAggregate [sum,ss_customer_sk,ss_coupon_amt,sum,sum,sum,s_city,ss_ticket_number,ss_addr_sk,ss_net_profit] [sum,sum,sum,sum] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,s_city,ss_ticket_number] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,s_city,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [d_date_sk] + Filter [d_dow,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_dow] [d_date_sk,d_year,d_dow] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [s_store_sk,s_city] + Filter [s_number_employees,s_store_sk] + Scan parquet default.store [s_store_sk,s_number_employees,s_city] [s_store_sk,s_number_employees,s_city] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [hd_demo_sk] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_sk] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt new file mode 100644 index 000000000..8ae1eb34e --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt @@ -0,0 +1,45 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST], output=[s_store_name#1,sum(ss_net_profit)#2]) ++- *(9) HashAggregate(keys=[s_store_name#1], functions=[sum(UnscaledValue(ss_net_profit#3))]) + +- Exchange hashpartitioning(s_store_name#1, 200) + +- *(8) HashAggregate(keys=[s_store_name#1], functions=[partial_sum(UnscaledValue(ss_net_profit#3))]) + +- *(8) Project [ss_net_profit#3, s_store_name#1] + +- *(8) BroadcastHashJoin [substring(s_zip#4, 1, 2)], [substring(ca_zip#5, 1, 2)], Inner, BuildRight + :- *(8) Project [ss_net_profit#3, s_store_name#1, s_zip#4] + : +- *(8) BroadcastHashJoin [ss_store_sk#6], [s_store_sk#7], Inner, BuildRight + : :- *(8) Project [ss_store_sk#6, ss_net_profit#3] + : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + : : :- *(8) Project [ss_sold_date_sk#8, ss_store_sk#6, ss_net_profit#3] + : : : +- *(8) Filter (isnotnull(ss_sold_date_sk#8) && isnotnull(ss_store_sk#6)) + : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#8,ss_store_sk#6,ss_net_profit#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [d_date_sk#9] + : : +- *(1) Filter ((((isnotnull(d_qoy#10) && isnotnull(d_year#11)) && (d_qoy#10 = 2)) && (d_year#11 = 1998)) && isnotnull(d_date_sk#9)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#9,d_year#11,d_qoy#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [s_store_sk#7, s_store_name#1, s_zip#4] + : +- *(2) Filter (isnotnull(s_store_sk#7) && isnotnull(s_zip#4)) + : +- *(2) FileScan parquet default.store[s_store_sk#7,s_store_name#1,s_zip#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(substring(input[0, string, true], 1, 2))) + +- *(7) HashAggregate(keys=[ca_zip#5], functions=[]) + +- Exchange hashpartitioning(ca_zip#5, 200) + +- *(6) HashAggregate(keys=[ca_zip#5], functions=[]) + +- *(6) BroadcastHashJoin [coalesce(ca_zip#5, )], [coalesce(ca_zip#12, )], LeftSemi, BuildRight, (ca_zip#5 <=> ca_zip#12) + :- *(6) Project [substring(ca_zip#13, 1, 5) AS ca_zip#5] + : +- *(6) Filter (substring(ca_zip#13, 1, 5) INSET (56910,69952,63792,39371,74351,11101,25003,97189,57834,73134,62377,51200,32754,22752,86379,14171,91110,40162,98569,28709,13394,66162,25733,25782,26065,18383,51949,87343,50298,83849,33786,64528,23470,67030,46136,25280,46820,77721,99076,18426,31880,17871,98235,45748,49156,18652,72013,51622,43848,78567,41248,13695,44165,67853,54917,53179,64034,10567,71791,68908,55565,59402,64147,85816,57855,61547,27700,68100,28810,58263,15723,83933,51103,58058,90578,82276,81096,81426,96451,77556,38607,76638,18906,62971,57047,48425,35576,11928,30625,83444,73520,51650,57647,60099,30122,94983,24128,10445,41368,26233,26859,21756,24676,19849,36420,38193,58470,39127,13595,87501,24317,15455,69399,98025,81019,48033,11376,39516,67875,92712,14867,38122,29741,42961,30469,51211,56458,15559,16021,33123,33282,33515,72823,54601,76698,56240,72175,60279,20004,68806,72325,28488,43933,50412,45200,22246,78668,79777,96765,67301,73273,49448,82636,23932,47305,29839,39192,18799,61265,37125,58943,64457,88424,24610,84935,89360,68893,30431,28898,10336,90257,59166,46081,26105,96888,36634,86284,35258,39972,22927,73241,53268,24206,27385,99543,31671,14663,30903,39861,24996,63089,88086,83921,21076,67897,66708,45721,60576,25103,52867,30450,36233,30010,96576,73171,56571,56575,64544,13955,78451,43285,18119,16725,83041,76107,79994,54364,35942,56691,19769,63435,34102,18845,22744,13354,75691,45549,23968,31387,83144,13375,15765,28577,88190,19736,73650,37930,25989,83926,94898,51798,39736,22437,55253,38415,71256,18376,42029,25858,44438,19515,38935,51649,71954,15882,18767,63193,25486,49130,37126,40604,34425,17043,12305,11634,26653,94167,36446,10516,67473,66864,72425,63981,18842,22461,42666,47770,69035,70372,28587,45266,15371,15798,45375,90225,16807,31016,68014,21337,19505,50016,10144,84093,21286,19430,34322,91068,94945,72305,24671,58048,65084,28545,21195,20548,22245,77191,96976,48583,76231,15734,61810,11356,68621,68786,98359,41367,26689,69913,76614,68101,88885,50308,79077,18270,28915,29178,53672,62878,10390,14922,68341,56529,41766,68309,56616,15126,61860,97789,11489,45692,41918,72151,72550,27156,36495,70738,17879,53535,17920,68880,78890,35850,14089,58078,65164,27068,26231,13376,57665,32213,77610,87816,21309,15146,86198,91137,55307,67467,40558,94627,82136,22351,89091,20260,23006,91393,47537,62496,98294,18840,71286,81312,31029,70466,35458,14060,22685,28286,25631,19512,40081,63837,14328,35474,22152,76232,51061,86057,17183) && isnotnull(substring(ca_zip#13, 1, 5))) + : +- *(6) FileScan parquet default.customer_address[ca_zip#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ))) + +- *(5) Project [ca_zip#12] + +- *(5) Filter (count(1)#14 > 10) + +- *(5) HashAggregate(keys=[ca_zip#13], functions=[count(1)]) + +- Exchange hashpartitioning(ca_zip#13, 200) + +- *(4) HashAggregate(keys=[ca_zip#13], functions=[partial_count(1)]) + +- *(4) Project [ca_zip#13] + +- *(4) BroadcastHashJoin [ca_address_sk#15], [c_current_addr_sk#16], Inner, BuildRight + :- *(4) Project [ca_address_sk#15, ca_zip#13] + : +- *(4) Filter isnotnull(ca_address_sk#15) + : +- *(4) FileScan parquet default.customer_address[ca_address_sk#15,ca_zip#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [c_current_addr_sk#16] + +- *(3) Filter ((isnotnull(c_preferred_cust_flag#17) && (c_preferred_cust_flag#17 = Y)) && isnotnull(c_current_addr_sk#16)) + +- *(3) FileScan parquet default.customer[c_current_addr_sk#16,c_preferred_cust_flag#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt new file mode 100644 index 000000000..855f6d67d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt @@ -0,0 +1,61 @@ +TakeOrderedAndProject [s_store_name,sum(ss_net_profit)] + WholeStageCodegen + HashAggregate [s_store_name,sum,sum(UnscaledValue(ss_net_profit))] [sum(UnscaledValue(ss_net_profit)),sum(ss_net_profit),sum] + InputAdapter + Exchange [s_store_name] #1 + WholeStageCodegen + HashAggregate [s_store_name,ss_net_profit,sum,sum] [sum,sum] + Project [ss_net_profit,s_store_name] + BroadcastHashJoin [s_zip,ca_zip] + Project [ss_net_profit,s_store_name,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_net_profit] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] [ss_sold_date_sk,ss_store_sk,ss_net_profit] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [d_date_sk] + Filter [d_qoy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_zip] + Filter [s_store_sk,s_zip] + Scan parquet default.store [s_store_sk,s_store_name,s_zip] [s_store_sk,s_store_name,s_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + HashAggregate [ca_zip] + InputAdapter + Exchange [ca_zip] #5 + WholeStageCodegen + HashAggregate [ca_zip] + BroadcastHashJoin [ca_zip,ca_zip] + Project [ca_zip] + Filter [ca_zip] + Scan parquet default.customer_address [ca_zip] [ca_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [ca_zip] + Filter [count(1)] + HashAggregate [ca_zip,count,count(1)] [count(1),ca_zip,count(1),count] + InputAdapter + Exchange [ca_zip] #7 + WholeStageCodegen + HashAggregate [ca_zip,count,count] [count,count] + Project [ca_zip] + BroadcastHashJoin [ca_address_sk,c_current_addr_sk] + Project [ca_address_sk,ca_zip] + Filter [ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_zip] [ca_address_sk,ca_zip] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [c_current_addr_sk] + Filter [c_preferred_cust_flag,c_current_addr_sk] + Scan parquet default.customer [c_current_addr_sk,c_preferred_cust_flag] [c_current_addr_sk,c_preferred_cust_flag] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt new file mode 100644 index 000000000..ec6c9ab88 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt @@ -0,0 +1,97 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) ++- *(23) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) + +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 200) + +- *(22) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) + +- *(22) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] + +- Union + :- *(7) HashAggregate(keys=[s_store_id#12], functions=[sum(UnscaledValue(ss_ext_sales_price#13)), sum(coalesce(cast(sr_return_amt#14 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#15 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + : +- Exchange hashpartitioning(s_store_id#12, 200) + : +- *(6) HashAggregate(keys=[s_store_id#12], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#13)), partial_sum(coalesce(cast(sr_return_amt#14 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#15 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + : +- *(6) Project [ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16, s_store_id#12] + : +- *(6) BroadcastHashJoin [ss_promo_sk#17], [p_promo_sk#18], Inner, BuildRight + : :- *(6) Project [ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16, s_store_id#12] + : : +- *(6) BroadcastHashJoin [ss_item_sk#19], [i_item_sk#20], Inner, BuildRight + : : :- *(6) Project [ss_item_sk#19, ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16, s_store_id#12] + : : : +- *(6) BroadcastHashJoin [ss_store_sk#21], [s_store_sk#22], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#19, ss_store_sk#21, ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16] + : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#23], [d_date_sk#24], Inner, BuildRight + : : : : :- *(6) Project [ss_sold_date_sk#23, ss_item_sk#19, ss_store_sk#21, ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16] + : : : : : +- *(6) BroadcastHashJoin [cast(ss_item_sk#19 as bigint), cast(ss_ticket_number#25 as bigint)], [sr_item_sk#26, sr_ticket_number#27], LeftOuter, BuildRight + : : : : : :- *(6) Project [ss_sold_date_sk#23, ss_item_sk#19, ss_store_sk#21, ss_promo_sk#17, ss_ticket_number#25, ss_ext_sales_price#13, ss_net_profit#15] + : : : : : : +- *(6) Filter (((isnotnull(ss_sold_date_sk#23) && isnotnull(ss_store_sk#21)) && isnotnull(ss_item_sk#19)) && isnotnull(ss_promo_sk#17)) + : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#23,ss_item_sk#19,ss_store_sk#21,ss_promo_sk#17,ss_ticket_number#25,ss_ext_sales_price#13,ss_net_profit#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)], ReadSchema: struct= 11192)) && (d_date#28 <= 11222)) && isnotnull(d_date_sk#24)) + : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#24,d_date#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), Is..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [s_store_sk#22, s_store_id#12] + : : : +- *(3) Filter isnotnull(s_store_sk#22) + : : : +- *(3) FileScan parquet default.store[s_store_sk#22,s_store_id#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [i_item_sk#20] + : : +- *(4) Filter ((isnotnull(i_current_price#29) && (i_current_price#29 > 50.00)) && isnotnull(i_item_sk#20)) + : : +- *(4) FileScan parquet default.item[i_item_sk#20,i_current_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(5) Project [p_promo_sk#18] + : +- *(5) Filter ((isnotnull(p_channel_tv#30) && (p_channel_tv#30 = N)) && isnotnull(p_promo_sk#18)) + : +- *(5) FileScan parquet default.promotion[p_promo_sk#18,p_channel_tv#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)], ReadSchema: struct + :- *(14) HashAggregate(keys=[cp_catalog_page_id#31], functions=[sum(UnscaledValue(cs_ext_sales_price#32)), sum(coalesce(cast(cr_return_amount#33 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#34 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#35 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + : +- Exchange hashpartitioning(cp_catalog_page_id#31, 200) + : +- *(13) HashAggregate(keys=[cp_catalog_page_id#31], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#32)), partial_sum(coalesce(cast(cr_return_amount#33 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#34 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#35 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + : +- *(13) Project [cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35, cp_catalog_page_id#31] + : +- *(13) BroadcastHashJoin [cs_promo_sk#36], [p_promo_sk#18], Inner, BuildRight + : :- *(13) Project [cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35, cp_catalog_page_id#31] + : : +- *(13) BroadcastHashJoin [cs_item_sk#37], [i_item_sk#20], Inner, BuildRight + : : :- *(13) Project [cs_item_sk#37, cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35, cp_catalog_page_id#31] + : : : +- *(13) BroadcastHashJoin [cs_catalog_page_sk#38], [cp_catalog_page_sk#39], Inner, BuildRight + : : : :- *(13) Project [cs_catalog_page_sk#38, cs_item_sk#37, cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35] + : : : : +- *(13) BroadcastHashJoin [cs_sold_date_sk#40], [d_date_sk#24], Inner, BuildRight + : : : : :- *(13) Project [cs_sold_date_sk#40, cs_catalog_page_sk#38, cs_item_sk#37, cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35] + : : : : : +- *(13) BroadcastHashJoin [cs_item_sk#37, cs_order_number#41], [cr_item_sk#42, cr_order_number#43], LeftOuter, BuildRight + : : : : : :- *(13) Project [cs_sold_date_sk#40, cs_catalog_page_sk#38, cs_item_sk#37, cs_promo_sk#36, cs_order_number#41, cs_ext_sales_price#32, cs_net_profit#34] + : : : : : : +- *(13) Filter (((isnotnull(cs_sold_date_sk#40) && isnotnull(cs_catalog_page_sk#38)) && isnotnull(cs_item_sk#37)) && isnotnull(cs_promo_sk#36)) + : : : : : : +- *(13) FileScan parquet default.catalog_sales[cs_sold_date_sk#40,cs_catalog_page_sk#38,cs_item_sk#37,cs_promo_sk#36,cs_order_number#41,cs_ext_sales_price#32,cs_net_profit#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_p..., ReadSchema: struct + : : : : +- ReusedExchange [d_date_sk#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(10) Project [cp_catalog_page_sk#39, cp_catalog_page_id#31] + : : : +- *(10) Filter isnotnull(cp_catalog_page_sk#39) + : : : +- *(10) FileScan parquet default.catalog_page[cp_catalog_page_sk#39,cp_catalog_page_id#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_page], PartitionFilters: [], PushedFilters: [IsNotNull(cp_catalog_page_sk)], ReadSchema: struct + : : +- ReusedExchange [i_item_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [p_promo_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(21) HashAggregate(keys=[web_site_id#44], functions=[sum(UnscaledValue(ws_ext_sales_price#45)), sum(coalesce(cast(wr_return_amt#46 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#47 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#48 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + +- Exchange hashpartitioning(web_site_id#44, 200) + +- *(20) HashAggregate(keys=[web_site_id#44], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#45)), partial_sum(coalesce(cast(wr_return_amt#46 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#47 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#48 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + +- *(20) Project [ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48, web_site_id#44] + +- *(20) BroadcastHashJoin [ws_promo_sk#49], [p_promo_sk#18], Inner, BuildRight + :- *(20) Project [ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48, web_site_id#44] + : +- *(20) BroadcastHashJoin [ws_item_sk#50], [i_item_sk#20], Inner, BuildRight + : :- *(20) Project [ws_item_sk#50, ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48, web_site_id#44] + : : +- *(20) BroadcastHashJoin [ws_web_site_sk#51], [web_site_sk#52], Inner, BuildRight + : : :- *(20) Project [ws_item_sk#50, ws_web_site_sk#51, ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48] + : : : +- *(20) BroadcastHashJoin [ws_sold_date_sk#53], [d_date_sk#24], Inner, BuildRight + : : : :- *(20) Project [ws_sold_date_sk#53, ws_item_sk#50, ws_web_site_sk#51, ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48] + : : : : +- *(20) BroadcastHashJoin [cast(ws_item_sk#50 as bigint), cast(ws_order_number#54 as bigint)], [wr_item_sk#55, wr_order_number#56], LeftOuter, BuildRight + : : : : :- *(20) Project [ws_sold_date_sk#53, ws_item_sk#50, ws_web_site_sk#51, ws_promo_sk#49, ws_order_number#54, ws_ext_sales_price#45, ws_net_profit#47] + : : : : : +- *(20) Filter (((isnotnull(ws_sold_date_sk#53) && isnotnull(ws_web_site_sk#51)) && isnotnull(ws_item_sk#50)) && isnotnull(ws_promo_sk#49)) + : : : : : +- *(20) FileScan parquet default.web_sales[ws_sold_date_sk#53,ws_item_sk#50,ws_web_site_sk#51,ws_promo_sk#49,ws_order_number#54,ws_ext_sales_price#45,ws_net_profit#47] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo..., ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(17) Project [web_site_sk#52, web_site_id#44] + : : +- *(17) Filter isnotnull(web_site_sk#52) + : : +- *(17) FileScan parquet default.web_site[web_site_sk#52,web_site_id#44] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct + : +- ReusedExchange [i_item_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [p_promo_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt new file mode 100644 index 000000000..e16daec1c --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt @@ -0,0 +1,133 @@ +TakeOrderedAndProject [profit,sales,id,channel,returns] + WholeStageCodegen + HashAggregate [sum,sum(sales),sum(returns),spark_grouping_id,id,channel,sum(profit),sum,sum] [profit,sum,sales,sum(sales),sum(returns),returns,sum(profit),sum,sum] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen + HashAggregate [profit,sum,sum,returns,spark_grouping_id,sum,id,channel,sales,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Expand [profit,id,returns,sales,channel] + InputAdapter + Union + WholeStageCodegen + HashAggregate [sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),s_store_id,sum,sum(UnscaledValue(ss_ext_sales_price)),sum] [id,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sales,sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),profit,sum,returns,sum(UnscaledValue(ss_ext_sales_price)),channel,sum] + InputAdapter + Exchange [s_store_id] #2 + WholeStageCodegen + HashAggregate [sum,sum,sum,sum,sr_return_amt,s_store_id,ss_ext_sales_price,sum,sr_net_loss,ss_net_profit,sum] [sum,sum,sum,sum,sum,sum] + Project [s_store_id,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [s_store_id,ss_ext_sales_price,ss_net_profit,ss_promo_sk,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_id,ss_item_sk,ss_ext_sales_price,ss_net_profit,ss_promo_sk,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_promo_sk,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_promo_sk,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_promo_sk,ss_ticket_number] + Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk,ss_promo_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_promo_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_promo_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + Filter [sr_item_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [s_store_sk,s_store_id] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_store_id] [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_item_sk] + Filter [i_current_price,i_item_sk] + Scan parquet default.item [i_item_sk,i_current_price] [i_item_sk,i_current_price] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [p_promo_sk] + Filter [p_channel_tv,p_promo_sk] + Scan parquet default.promotion [p_promo_sk,p_channel_tv] [p_promo_sk,p_channel_tv] + WholeStageCodegen + HashAggregate [sum(UnscaledValue(cs_ext_sales_price)),cp_catalog_page_id,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00))] [sum(UnscaledValue(cs_ext_sales_price)),sales,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),channel,sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),returns,id,profit] + InputAdapter + Exchange [cp_catalog_page_id] #8 + WholeStageCodegen + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum,cr_return_amount,sum,cs_net_profit,sum,cr_net_loss,cs_ext_sales_price] [sum,sum,sum,sum,sum,sum] + Project [cr_net_loss,cp_catalog_page_id,cr_return_amount,cs_net_profit,cs_ext_sales_price] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_promo_sk,cr_net_loss,cp_catalog_page_id,cr_return_amount,cs_net_profit,cs_ext_sales_price] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_promo_sk,cr_net_loss,cp_catalog_page_id,cr_return_amount,cs_item_sk,cs_net_profit,cs_ext_sales_price] + BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk] + Project [cs_promo_sk,cr_net_loss,cs_catalog_page_sk,cr_return_amount,cs_item_sk,cs_net_profit,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_promo_sk,cr_net_loss,cs_catalog_page_sk,cs_sold_date_sk,cr_return_amount,cs_item_sk,cs_net_profit,cs_ext_sales_price] + BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + Project [cs_promo_sk,cs_catalog_page_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit,cs_ext_sales_price] + Filter [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk] + Scan parquet default.catalog_sales [cs_promo_sk,cs_catalog_page_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit,cs_ext_sales_price] [cs_promo_sk,cs_catalog_page_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit,cs_ext_sales_price] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + Filter [cr_item_sk,cr_order_number] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #4 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [cp_catalog_page_sk,cp_catalog_page_id] + Filter [cp_catalog_page_sk] + Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] [cp_catalog_page_sk,cp_catalog_page_id] + InputAdapter + ReusedExchange [i_item_sk] [i_item_sk] #6 + InputAdapter + ReusedExchange [p_promo_sk] [p_promo_sk] #7 + WholeStageCodegen + HashAggregate [web_site_id,sum,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ws_ext_sales_price)),sum,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00))] [sum,sales,id,returns,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ws_ext_sales_price)),profit,sum,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),channel] + InputAdapter + Exchange [web_site_id] #11 + WholeStageCodegen + HashAggregate [web_site_id,sum,sum,sum,sum,ws_ext_sales_price,ws_net_profit,sum,wr_return_amt,wr_net_loss,sum] [sum,sum,sum,sum,sum,sum] + Project [web_site_id,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss] + BroadcastHashJoin [ws_promo_sk,p_promo_sk] + Project [web_site_id,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss,ws_promo_sk] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [web_site_id,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss,ws_promo_sk,ws_item_sk] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_web_site_sk,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss,ws_promo_sk,ws_item_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_web_site_sk,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss,ws_sold_date_sk,ws_promo_sk,ws_item_sk] + BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] + Project [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_promo_sk,ws_item_sk] + Filter [ws_sold_date_sk,ws_web_site_sk,ws_item_sk,ws_promo_sk] + Scan parquet default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_promo_sk,ws_item_sk] [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_promo_sk,ws_item_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen + Project [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + Filter [wr_item_sk,wr_order_number] + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #4 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen + Project [web_site_sk,web_site_id] + Filter [web_site_sk] + Scan parquet default.web_site [web_site_sk,web_site_id] [web_site_sk,web_site_id] + InputAdapter + ReusedExchange [i_item_sk] [i_item_sk] #6 + InputAdapter + ReusedExchange [p_promo_sk] [p_promo_sk] #7 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt new file mode 100644 index 000000000..92713c532 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt @@ -0,0 +1,52 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salutation#2 ASC NULLS FIRST,c_first_name#3 ASC NULLS FIRST,c_last_name#4 ASC NULLS FIRST,ca_street_number#5 ASC NULLS FIRST,ca_street_name#6 ASC NULLS FIRST,ca_street_type#7 ASC NULLS FIRST,ca_suite_number#8 ASC NULLS FIRST,ca_city#9 ASC NULLS FIRST,ca_county#10 ASC NULLS FIRST,ca_state#11 ASC NULLS FIRST,ca_zip#12 ASC NULLS FIRST,ca_country#13 ASC NULLS FIRST,ca_gmt_offset#14 ASC NULLS FIRST,ca_location_type#15 ASC NULLS FIRST,ctr_total_return#16 ASC NULLS FIRST], output=[c_customer_id#1,c_salutation#2,c_first_name#3,c_last_name#4,ca_street_number#5,ca_street_name#6,ca_street_type#7,ca_suite_number#8,ca_city#9,ca_county#10,ca_state#11,ca_zip#12,ca_country#13,ca_gmt_offset#14,ca_location_type#15,ctr_total_return#16]) ++- *(11) Project [c_customer_id#1, c_salutation#2, c_first_name#3, c_last_name#4, ca_street_number#5, ca_street_name#6, ca_street_type#7, ca_suite_number#8, ca_city#9, ca_county#10, ca_state#11, ca_zip#12, ca_country#13, ca_gmt_offset#14, ca_location_type#15, ctr_total_return#16] + +- *(11) BroadcastHashJoin [c_current_addr_sk#17], [ca_address_sk#18], Inner, BuildRight + :- *(11) Project [ctr_total_return#16, c_customer_id#1, c_current_addr_sk#17, c_salutation#2, c_first_name#3, c_last_name#4] + : +- *(11) BroadcastHashJoin [ctr_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + : :- *(11) Project [ctr_customer_sk#19, ctr_total_return#16] + : : +- *(11) BroadcastHashJoin [ctr_state#21], [ctr_state#21#22], Inner, BuildRight, (cast(ctr_total_return#16 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#23) + : : :- *(11) Filter isnotnull(ctr_total_return#16) + : : : +- *(11) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[sum(UnscaledValue(cr_return_amt_inc_tax#25))]) + : : : +- Exchange hashpartitioning(cr_returning_customer_sk#24, ca_state#11, 200) + : : : +- *(3) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[partial_sum(UnscaledValue(cr_return_amt_inc_tax#25))]) + : : : +- *(3) Project [cr_returning_customer_sk#24, cr_return_amt_inc_tax#25, ca_state#11] + : : : +- *(3) BroadcastHashJoin [cr_returning_addr_sk#26], [ca_address_sk#18], Inner, BuildRight + : : : :- *(3) Project [cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] + : : : : +- *(3) BroadcastHashJoin [cr_returned_date_sk#27], [d_date_sk#28], Inner, BuildRight + : : : : :- *(3) Project [cr_returned_date_sk#27, cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] + : : : : : +- *(3) Filter ((isnotnull(cr_returned_date_sk#27) && isnotnull(cr_returning_addr_sk#26)) && isnotnull(cr_returning_customer_sk#24)) + : : : : : +- *(3) FileScan parquet default.catalog_returns[cr_returned_date_sk#27,cr_returning_customer_sk#24,cr_returning_addr_sk#26,cr_return_amt_inc_tax#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk), IsNotNull(cr_returning_customer..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [ca_address_sk#18, ca_state#11] + : : : +- *(2) Filter (isnotnull(ca_address_sk#18) && isnotnull(ca_state#11)) + : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#18,ca_state#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) + : : +- *(8) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#23) + : : +- *(8) HashAggregate(keys=[ctr_state#21], functions=[avg(ctr_total_return#16)]) + : : +- Exchange hashpartitioning(ctr_state#21, 200) + : : +- *(7) HashAggregate(keys=[ctr_state#21], functions=[partial_avg(ctr_total_return#16)]) + : : +- *(7) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[sum(UnscaledValue(cr_return_amt_inc_tax#25))]) + : : +- Exchange hashpartitioning(cr_returning_customer_sk#24, ca_state#11, 200) + : : +- *(6) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[partial_sum(UnscaledValue(cr_return_amt_inc_tax#25))]) + : : +- *(6) Project [cr_returning_customer_sk#24, cr_return_amt_inc_tax#25, ca_state#11] + : : +- *(6) BroadcastHashJoin [cr_returning_addr_sk#26], [ca_address_sk#18], Inner, BuildRight + : : :- *(6) Project [cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] + : : : +- *(6) BroadcastHashJoin [cr_returned_date_sk#27], [d_date_sk#28], Inner, BuildRight + : : : :- *(6) Project [cr_returned_date_sk#27, cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] + : : : : +- *(6) Filter (isnotnull(cr_returned_date_sk#27) && isnotnull(cr_returning_addr_sk#26)) + : : : : +- *(6) FileScan parquet default.catalog_returns[cr_returned_date_sk#27,cr_returning_customer_sk#24,cr_returning_addr_sk#26,cr_return_amt_inc_tax#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk)], ReadSchema: struct= 62.00)) && (cast(i_current_price#3 as decimal(12,2)) <= 92.00)) && i_manufact_id#9 IN (129,270,821,423)) && isnotnull(i_item_sk#4)) + : : : +- *(4) FileScan parquet default.item[i_item_sk#4,i_item_id#1,i_item_desc#2,i_current_price#3,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,62.00), In(i_manufact_id, [129,27..., ReadSchema: struct= 100)) && (inv_quantity_on_hand#10 <= 500)) && isnotnull(inv_item_sk#8)) && isnotnull(inv_date_sk#6)) + : : +- *(1) FileScan parquet default.inventory[inv_date_sk#6,inv_item_sk#8,inv_quantity_on_hand#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(i..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#7] + : +- *(2) Filter (((isnotnull(d_date#11) && (d_date#11 >= 11102)) && (d_date#11 <= 11162)) && isnotnull(d_date_sk#7)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_date#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-05-25), LessThanOrEqual(d_date,2000-07-24), Is..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [ss_item_sk#5] + +- *(3) Filter isnotnull(ss_item_sk#5) + +- *(3) FileScan parquet default.store_sales[ss_item_sk#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt new file mode 100644 index 000000000..e7c48e44c --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,i_current_price] + InputAdapter + Exchange [i_item_id,i_item_desc,i_current_price] #1 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,i_current_price] + Project [i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [i_current_price,i_item_sk,inv_date_sk,i_item_desc,i_item_id] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + Filter [i_current_price,i_manufact_id,i_item_sk] + Scan parquet default.item [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [inv_date_sk,inv_item_sk] + Filter [inv_quantity_on_hand,inv_item_sk,inv_date_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ss_item_sk] + Filter [ss_item_sk] + Scan parquet default.store_sales [ss_item_sk] [ss_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt new file mode 100644 index 000000000..46662f99a --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt @@ -0,0 +1,69 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,sr_item_qty#2 ASC NULLS FIRST], output=[item_id#1,sr_item_qty#2,sr_dev#3,cr_item_qty#4,cr_dev#5,wr_item_qty#6,wr_dev#7,average#8]) ++- *(18) Project [item_id#1, sr_item_qty#2, (((cast(sr_item_qty#2 as double) / cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as double)) / 3.0) * 100.0) AS sr_dev#3, cr_item_qty#4, (((cast(cr_item_qty#4 as double) / cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as double)) / 3.0) * 100.0) AS cr_dev#5, wr_item_qty#6, (((cast(wr_item_qty#6 as double) / cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as double)) / 3.0) * 100.0) AS wr_dev#7, CheckOverflow((promote_precision(cast(cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as decimal(20,0)) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#8] + +- *(18) BroadcastHashJoin [item_id#1], [item_id#9], Inner, BuildRight + :- *(18) Project [item_id#1, sr_item_qty#2, cr_item_qty#4] + : +- *(18) BroadcastHashJoin [item_id#1], [item_id#10], Inner, BuildRight + : :- *(18) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(sr_return_quantity#12 as bigint))]) + : : +- Exchange hashpartitioning(i_item_id#11, 200) + : : +- *(5) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(sr_return_quantity#12 as bigint))]) + : : +- *(5) Project [sr_return_quantity#12, i_item_id#11] + : : +- *(5) BroadcastHashJoin [sr_returned_date_sk#13], [cast(d_date_sk#14 as bigint)], Inner, BuildRight + : : :- *(5) Project [sr_returned_date_sk#13, sr_return_quantity#12, i_item_id#11] + : : : +- *(5) BroadcastHashJoin [sr_item_sk#15], [cast(i_item_sk#16 as bigint)], Inner, BuildRight + : : : :- *(5) Project [sr_returned_date_sk#13, sr_item_sk#15, sr_return_quantity#12] + : : : : +- *(5) Filter (isnotnull(sr_item_sk#15) && isnotnull(sr_returned_date_sk#13)) + : : : : +- *(5) FileScan parquet default.store_returns[sr_returned_date_sk#13,sr_item_sk#15,sr_return_quantity#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [i_item_sk#16, i_item_id#11] + : : : +- *(1) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) + : : : +- *(1) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [d_date_sk#14] + : : +- *(4) BroadcastHashJoin [d_date#17], [d_date#17#18], LeftSemi, BuildRight + : : :- *(4) Project [d_date_sk#14, d_date#17] + : : : +- *(4) Filter isnotnull(d_date_sk#14) + : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) + : : +- *(3) Project [d_date#17 AS d_date#17#18] + : : +- *(3) BroadcastHashJoin [d_week_seq#19], [d_week_seq#19#20], LeftSemi, BuildRight + : : :- *(3) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_week_seq#19 AS d_week_seq#19#20] + : : +- *(2) Filter cast(d_date#17 as string) IN (2000-06-30,2000-09-27,2000-11-17) + : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- *(11) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(cr_return_quantity#21 as bigint))]) + : +- Exchange hashpartitioning(i_item_id#11, 200) + : +- *(10) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(cr_return_quantity#21 as bigint))]) + : +- *(10) Project [cr_return_quantity#21, i_item_id#11] + : +- *(10) BroadcastHashJoin [cr_returned_date_sk#22], [d_date_sk#14], Inner, BuildRight + : :- *(10) Project [cr_returned_date_sk#22, cr_return_quantity#21, i_item_id#11] + : : +- *(10) BroadcastHashJoin [cr_item_sk#23], [i_item_sk#16], Inner, BuildRight + : : :- *(10) Project [cr_returned_date_sk#22, cr_item_sk#23, cr_return_quantity#21] + : : : +- *(10) Filter (isnotnull(cr_item_sk#23) && isnotnull(cr_returned_date_sk#22)) + : : : +- *(10) FileScan parquet default.catalog_returns[cr_returned_date_sk#22,cr_item_sk#23,cr_return_quantity#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_returned_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [i_item_sk#16, i_item_id#11] + : : +- *(6) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) + : : +- *(6) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(9) Project [d_date_sk#14] + : +- *(9) BroadcastHashJoin [d_date#17], [d_date#17#24], LeftSemi, BuildRight + : :- *(9) Project [d_date_sk#14, d_date#17] + : : +- *(9) Filter isnotnull(d_date_sk#14) + : : +- *(9) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : +- ReusedExchange [d_date#17#24], BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(17) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(wr_return_quantity#25 as bigint))]) + +- Exchange hashpartitioning(i_item_id#11, 200) + +- *(16) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(wr_return_quantity#25 as bigint))]) + +- *(16) Project [wr_return_quantity#25, i_item_id#11] + +- *(16) BroadcastHashJoin [wr_returned_date_sk#26], [cast(d_date_sk#14 as bigint)], Inner, BuildRight + :- *(16) Project [wr_returned_date_sk#26, wr_return_quantity#25, i_item_id#11] + : +- *(16) BroadcastHashJoin [wr_item_sk#27], [cast(i_item_sk#16 as bigint)], Inner, BuildRight + : :- *(16) Project [wr_returned_date_sk#26, wr_item_sk#27, wr_return_quantity#25] + : : +- *(16) Filter (isnotnull(wr_item_sk#27) && isnotnull(wr_returned_date_sk#26)) + : : +- *(16) FileScan parquet default.web_returns[wr_returned_date_sk#26,wr_item_sk#27,wr_return_quantity#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_returned_date_sk)], ReadSchema: struct + : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [d_date_sk#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt new file mode 100644 index 000000000..027dc649e --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt @@ -0,0 +1,94 @@ +TakeOrderedAndProject [wr_item_qty,cr_item_qty,wr_dev,sr_dev,sr_item_qty,average,item_id,cr_dev] + WholeStageCodegen + Project [item_id,sr_item_qty,cr_item_qty,wr_item_qty] + BroadcastHashJoin [item_id,item_id] + Project [item_id,sr_item_qty,cr_item_qty] + BroadcastHashJoin [item_id,item_id] + HashAggregate [i_item_id,sum,sum(cast(sr_return_quantity as bigint))] [sum(cast(sr_return_quantity as bigint)),item_id,sr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen + HashAggregate [i_item_id,sr_return_quantity,sum,sum] [sum,sum] + Project [sr_return_quantity,i_item_id] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [sr_returned_date_sk,sr_return_quantity,i_item_id] + BroadcastHashJoin [sr_item_sk,i_item_sk] + Project [sr_returned_date_sk,sr_item_sk,sr_return_quantity] + Filter [sr_item_sk,sr_returned_date_sk] + Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_return_quantity] [sr_returned_date_sk,sr_item_sk,sr_return_quantity] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_item_sk,i_item_id] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Project [d_date_sk,d_date] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date] + BroadcastHashJoin [d_week_seq,d_week_seq] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(cast(cr_return_quantity as bigint))] [sum(cast(cr_return_quantity as bigint)),item_id,cr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen + HashAggregate [i_item_id,cr_return_quantity,sum,sum] [sum,sum] + Project [cr_return_quantity,i_item_id] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Project [cr_returned_date_sk,cr_return_quantity,i_item_id] + BroadcastHashJoin [cr_item_sk,i_item_sk] + Project [cr_returned_date_sk,cr_item_sk,cr_return_quantity] + Filter [cr_item_sk,cr_returned_date_sk] + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_item_sk,cr_return_quantity] [cr_returned_date_sk,cr_item_sk,cr_return_quantity] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_item_sk,i_item_id] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Project [d_date_sk,d_date] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + ReusedExchange [d_date] [d_date] #4 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(cast(wr_return_quantity as bigint))] [sum(cast(wr_return_quantity as bigint)),item_id,wr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #11 + WholeStageCodegen + HashAggregate [i_item_id,wr_return_quantity,sum,sum] [sum,sum] + Project [wr_return_quantity,i_item_id] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Project [wr_returned_date_sk,wr_return_quantity,i_item_id] + BroadcastHashJoin [wr_item_sk,i_item_sk] + Project [wr_returned_date_sk,wr_item_sk,wr_return_quantity] + Filter [wr_item_sk,wr_returned_date_sk] + Scan parquet default.web_returns [wr_returned_date_sk,wr_item_sk,wr_return_quantity] [wr_returned_date_sk,wr_item_sk,wr_return_quantity] + InputAdapter + ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #2 + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt new file mode 100644 index 000000000..c0cde7a5d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt @@ -0,0 +1,35 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST], output=[customer_id#2,customername#3]) ++- *(6) Project [c_customer_id#1 AS customer_id#2, concat(c_last_name#4, , , c_first_name#5) AS customername#3, c_customer_id#1] + +- *(6) BroadcastHashJoin [cast(cd_demo_sk#6 as bigint)], [sr_cdemo_sk#7], Inner, BuildRight + :- *(6) Project [c_customer_id#1, c_first_name#5, c_last_name#4, cd_demo_sk#6] + : +- *(6) BroadcastHashJoin [hd_income_band_sk#8], [ib_income_band_sk#9], Inner, BuildRight + : :- *(6) Project [c_customer_id#1, c_first_name#5, c_last_name#4, cd_demo_sk#6, hd_income_band_sk#8] + : : +- *(6) BroadcastHashJoin [c_current_hdemo_sk#10], [hd_demo_sk#11], Inner, BuildRight + : : :- *(6) Project [c_customer_id#1, c_current_hdemo_sk#10, c_first_name#5, c_last_name#4, cd_demo_sk#6] + : : : +- *(6) BroadcastHashJoin [c_current_cdemo_sk#12], [cd_demo_sk#6], Inner, BuildRight + : : : :- *(6) Project [c_customer_id#1, c_current_cdemo_sk#12, c_current_hdemo_sk#10, c_first_name#5, c_last_name#4] + : : : : +- *(6) BroadcastHashJoin [c_current_addr_sk#13], [ca_address_sk#14], Inner, BuildRight + : : : : :- *(6) Project [c_customer_id#1, c_current_cdemo_sk#12, c_current_hdemo_sk#10, c_current_addr_sk#13, c_first_name#5, c_last_name#4] + : : : : : +- *(6) Filter ((isnotnull(c_current_addr_sk#13) && isnotnull(c_current_cdemo_sk#12)) && isnotnull(c_current_hdemo_sk#10)) + : : : : : +- *(6) FileScan parquet default.customer[c_customer_id#1,c_current_cdemo_sk#12,c_current_hdemo_sk#10,c_current_addr_sk#13,c_first_name#5,c_last_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [cd_demo_sk#6] + : : : +- *(2) Filter isnotnull(cd_demo_sk#6) + : : : +- *(2) FileScan parquet default.customer_demographics[cd_demo_sk#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [hd_demo_sk#11, hd_income_band_sk#8] + : : +- *(3) Filter (isnotnull(hd_demo_sk#11) && isnotnull(hd_income_band_sk#8)) + : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#11,hd_income_band_sk#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [ib_income_band_sk#9] + : +- *(4) Filter ((((isnotnull(ib_lower_bound#16) && isnotnull(ib_upper_bound#17)) && (ib_lower_bound#16 >= 38128)) && (ib_upper_bound#17 <= 88128)) && isnotnull(ib_income_band_sk#9)) + : +- *(4) FileScan parquet default.income_band[ib_income_band_sk#9,ib_lower_bound#16,ib_upper_bound#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/income_band], PartitionFilters: [], PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), ..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) + +- *(5) Project [sr_cdemo_sk#7] + +- *(5) Filter isnotnull(sr_cdemo_sk#7) + +- *(5) FileScan parquet default.store_returns[sr_cdemo_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_cdemo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt new file mode 100644 index 000000000..95001b56d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt @@ -0,0 +1,45 @@ +TakeOrderedAndProject [c_customer_id,customer_id,customername] + WholeStageCodegen + Project [c_customer_id,c_last_name,c_first_name] + BroadcastHashJoin [cd_demo_sk,sr_cdemo_sk] + Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [cd_demo_sk,c_customer_id,hd_income_band_sk,c_last_name,c_first_name] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [cd_demo_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] + Scan parquet default.customer [c_current_cdemo_sk,c_current_addr_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] [c_current_cdemo_sk,c_current_addr_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_city,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_city] [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [cd_demo_sk] + Filter [cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk] [cd_demo_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [hd_demo_sk,hd_income_band_sk] + Filter [hd_demo_sk,hd_income_band_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ib_income_band_sk] + Filter [ib_lower_bound,ib_upper_bound,ib_income_band_sk] + Scan parquet default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound] [ib_income_band_sk,ib_lower_bound,ib_upper_bound] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [sr_cdemo_sk] + Filter [sr_cdemo_sk] + Scan parquet default.store_returns [sr_cdemo_sk] [sr_cdemo_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt new file mode 100644 index 000000000..560b1eba0 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt @@ -0,0 +1,50 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[substring(r_reason_desc, 1, 20)#1 ASC NULLS FIRST,aggOrder#2 ASC NULLS FIRST,avg(wr_refunded_cash)#3 ASC NULLS FIRST,avg(wr_fee)#4 ASC NULLS FIRST], output=[substring(r_reason_desc, 1, 20)#1,avg(ws_quantity)#5,avg(wr_refunded_cash)#3,avg(wr_fee)#4]) ++- *(9) HashAggregate(keys=[r_reason_desc#6], functions=[avg(cast(ws_quantity#7 as bigint)), avg(UnscaledValue(wr_refunded_cash#8)), avg(UnscaledValue(wr_fee#9))]) + +- Exchange hashpartitioning(r_reason_desc#6, 200) + +- *(8) HashAggregate(keys=[r_reason_desc#6], functions=[partial_avg(cast(ws_quantity#7 as bigint)), partial_avg(UnscaledValue(wr_refunded_cash#8)), partial_avg(UnscaledValue(wr_fee#9))]) + +- *(8) Project [ws_quantity#7, wr_fee#9, wr_refunded_cash#8, r_reason_desc#6] + +- *(8) BroadcastHashJoin [wr_reason_sk#10], [cast(r_reason_sk#11 as bigint)], Inner, BuildRight + :- *(8) Project [ws_quantity#7, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] + : +- *(8) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] + : : +- *(8) BroadcastHashJoin [wr_refunded_addr_sk#14], [cast(ca_address_sk#15 as bigint)], Inner, BuildRight, ((((ca_state#16 IN (IN,OH,NJ) && (ws_net_profit#17 >= 100.00)) && (ws_net_profit#17 <= 200.00)) || ((ca_state#16 IN (WI,CT,KY) && (ws_net_profit#17 >= 150.00)) && (ws_net_profit#17 <= 300.00))) || ((ca_state#16 IN (LA,IA,AR) && (ws_net_profit#17 >= 50.00)) && (ws_net_profit#17 <= 250.00))) + : : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, ws_net_profit#17, wr_refunded_addr_sk#14, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] + : : : +- *(8) BroadcastHashJoin [wr_returning_cdemo_sk#18, cd_marital_status#19, cd_education_status#20], [cast(cd_demo_sk#21 as bigint), cd_marital_status#22, cd_education_status#23], Inner, BuildRight + : : : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, ws_net_profit#17, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#18, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8, cd_marital_status#19, cd_education_status#20] + : : : : +- *(8) BroadcastHashJoin [wr_refunded_cdemo_sk#24], [cast(cd_demo_sk#25 as bigint)], Inner, BuildRight, ((((((cd_marital_status#19 = M) && (cd_education_status#20 = Advanced Degree)) && (ws_sales_price#26 >= 100.00)) && (ws_sales_price#26 <= 150.00)) || ((((cd_marital_status#19 = S) && (cd_education_status#20 = College)) && (ws_sales_price#26 >= 50.00)) && (ws_sales_price#26 <= 100.00))) || ((((cd_marital_status#19 = W) && (cd_education_status#20 = 2 yr Degree)) && (ws_sales_price#26 >= 150.00)) && (ws_sales_price#26 <= 200.00))) + : : : : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, ws_sales_price#26, ws_net_profit#17, wr_refunded_cdemo_sk#24, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#18, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] + : : : : : +- *(8) BroadcastHashJoin [ws_web_page_sk#27], [wp_web_page_sk#28], Inner, BuildRight + : : : : : :- *(8) Project [ws_sold_date_sk#12, ws_web_page_sk#27, ws_quantity#7, ws_sales_price#26, ws_net_profit#17, wr_refunded_cdemo_sk#24, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#18, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] + : : : : : : +- *(8) BroadcastHashJoin [cast(ws_item_sk#29 as bigint), cast(ws_order_number#30 as bigint)], [wr_item_sk#31, wr_order_number#32], Inner, BuildRight + : : : : : : :- *(8) Project [ws_sold_date_sk#12, ws_item_sk#29, ws_web_page_sk#27, ws_order_number#30, ws_quantity#7, ws_sales_price#26, ws_net_profit#17] + : : : : : : : +- *(8) Filter (((isnotnull(ws_item_sk#29) && isnotnull(ws_order_number#30)) && isnotnull(ws_web_page_sk#27)) && isnotnull(ws_sold_date_sk#12)) + : : : : : : : +- *(8) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#29,ws_web_page_sk#27,ws_order_number#30,ws_quantity#7,ws_sales_price#26,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(3) Project [cd_demo_sk#25, cd_marital_status#19, cd_education_status#20] + : : : : +- *(3) Filter ((isnotnull(cd_demo_sk#25) && isnotnull(cd_education_status#20)) && isnotnull(cd_marital_status#19)) + : : : : +- *(3) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#19,cd_education_status#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_education_status), IsNotNull(cd_marital_status)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint), input[1, string, true], input[2, string, true])) + : : : +- *(4) Project [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] + : : : +- *(4) Filter ((isnotnull(cd_education_status#23) && isnotnull(cd_marital_status#22)) && isnotnull(cd_demo_sk#21)) + : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_marital_status#22,cd_education_status#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_education_status), IsNotNull(cd_marital_status), IsNotNull(cd_demo_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [ca_address_sk#15, ca_state#16] + : : +- *(5) Filter ((isnotnull(ca_country#33) && (ca_country#33 = United States)) && isnotnull(ca_address_sk#15)) + : : +- *(5) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#16,ca_country#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [d_date_sk#13] + : +- *(6) Filter ((isnotnull(d_year#34) && (d_year#34 = 2000)) && isnotnull(d_date_sk#13)) + : +- *(6) FileScan parquet default.date_dim[d_date_sk#13,d_year#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [r_reason_sk#11, r_reason_desc#6] + +- *(7) Filter isnotnull(r_reason_sk#11) + +- *(7) FileScan parquet default.reason[r_reason_sk#11,r_reason_desc#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt new file mode 100644 index 000000000..b9fdfc26f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt @@ -0,0 +1,66 @@ +TakeOrderedAndProject [avg(wr_fee),aggOrder,avg(ws_quantity),substring(r_reason_desc, 1, 20),avg(wr_refunded_cash)] + WholeStageCodegen + HashAggregate [r_reason_desc,count,sum,avg(UnscaledValue(wr_fee)),avg(UnscaledValue(wr_refunded_cash)),sum,count,count,avg(cast(ws_quantity as bigint)),sum] [count,avg(wr_fee),aggOrder,avg(ws_quantity),substring(r_reason_desc, 1, 20),sum,avg(UnscaledValue(wr_fee)),avg(wr_refunded_cash),avg(UnscaledValue(wr_refunded_cash)),sum,count,count,avg(cast(ws_quantity as bigint)),sum] + InputAdapter + Exchange [r_reason_desc] #1 + WholeStageCodegen + HashAggregate [r_reason_desc,count,wr_refunded_cash,sum,count,wr_fee,sum,sum,sum,count,count,count,count,sum,ws_quantity,sum] [count,sum,count,sum,sum,sum,count,count,count,count,sum,sum] + Project [ws_quantity,wr_fee,wr_refunded_cash,r_reason_desc] + BroadcastHashJoin [wr_reason_sk,r_reason_sk] + Project [ws_quantity,wr_reason_sk,wr_fee,wr_refunded_cash] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [wr_reason_sk,ws_quantity,wr_refunded_cash,ws_sold_date_sk,wr_fee] + BroadcastHashJoin [wr_refunded_addr_sk,ca_address_sk,ca_state,ws_net_profit] + Project [wr_refunded_addr_sk,wr_reason_sk,ws_quantity,ws_net_profit,wr_refunded_cash,ws_sold_date_sk,wr_fee] + BroadcastHashJoin [cd_education_status,cd_demo_sk,wr_returning_cdemo_sk,cd_education_status,cd_marital_status,cd_marital_status] + Project [wr_refunded_addr_sk,wr_reason_sk,ws_quantity,ws_net_profit,cd_marital_status,wr_refunded_cash,ws_sold_date_sk,wr_fee,wr_returning_cdemo_sk,cd_education_status] + BroadcastHashJoin [cd_education_status,ws_sales_price,cd_demo_sk,cd_marital_status,wr_refunded_cdemo_sk] + Project [wr_refunded_addr_sk,wr_reason_sk,ws_quantity,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_cash,ws_sold_date_sk,wr_fee,wr_returning_cdemo_sk,ws_sales_price] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [wr_refunded_addr_sk,wr_reason_sk,ws_web_page_sk,ws_quantity,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_cash,ws_sold_date_sk,wr_fee,wr_returning_cdemo_sk,ws_sales_price] + BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] + Project [ws_web_page_sk,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_sales_price,ws_item_sk] + Filter [ws_item_sk,ws_order_number,ws_web_page_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_web_page_sk,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_sales_price,ws_item_sk] [ws_web_page_sk,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_sales_price,ws_item_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [wr_order_number,wr_refunded_addr_sk,wr_reason_sk,wr_refunded_cdemo_sk,wr_refunded_cash,wr_fee,wr_returning_cdemo_sk,wr_item_sk] + Filter [wr_item_sk,wr_reason_sk,wr_order_number,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_refunded_cdemo_sk] + Scan parquet default.web_returns [wr_order_number,wr_refunded_addr_sk,wr_reason_sk,wr_refunded_cdemo_sk,wr_refunded_cash,wr_fee,wr_returning_cdemo_sk,wr_item_sk] [wr_order_number,wr_refunded_addr_sk,wr_reason_sk,wr_refunded_cdemo_sk,wr_refunded_cash,wr_fee,wr_returning_cdemo_sk,wr_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [wp_web_page_sk] + Filter [wp_web_page_sk] + Scan parquet default.web_page [wp_web_page_sk] [wp_web_page_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [cd_demo_sk,cd_marital_status,cd_education_status] + Filter [cd_demo_sk,cd_education_status,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [cd_demo_sk,cd_marital_status,cd_education_status] + Filter [cd_education_status,cd_demo_sk,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [ca_address_sk,ca_state] + Filter [ca_country,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country] [ca_address_sk,ca_state,ca_country] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [r_reason_sk,r_reason_desc] + Filter [r_reason_sk] + Scan parquet default.reason [r_reason_sk,r_reason_desc] [r_reason_sk,r_reason_desc] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt new file mode 100644 index 000000000..9a562efa3 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt @@ -0,0 +1,25 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WHEN (cast(lochierarchy#1 as int) = 0) THEN i_category#2 END ASC NULLS FIRST,rank_within_parent#3 ASC NULLS FIRST], output=[total_sum#4,i_category#2,i_class#5,lochierarchy#1,rank_within_parent#3]) ++- *(6) Project [total_sum#4, i_category#2, i_class#5, lochierarchy#1, rank_within_parent#3] + +- Window [rank(_w3#6) windowspecdefinition(_w1#7, _w2#8, _w3#6 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#3], [_w1#7, _w2#8], [_w3#6 DESC NULLS LAST] + +- *(5) Sort [_w1#7 ASC NULLS FIRST, _w2#8 ASC NULLS FIRST, _w3#6 DESC NULLS LAST], false, 0 + +- Exchange hashpartitioning(_w1#7, _w2#8, 200) + +- *(4) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[sum(UnscaledValue(ws_net_paid#10))]) + +- Exchange hashpartitioning(i_category#2, i_class#5, spark_grouping_id#9, 200) + +- *(3) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[partial_sum(UnscaledValue(ws_net_paid#10))]) + +- *(3) Expand [List(ws_net_paid#10, i_category#11, i_class#12, 0), List(ws_net_paid#10, i_category#11, null, 1), List(ws_net_paid#10, null, null, 3)], [ws_net_paid#10, i_category#2, i_class#5, spark_grouping_id#9] + +- *(3) Project [ws_net_paid#10, i_category#13 AS i_category#11, i_class#14 AS i_class#12] + +- *(3) BroadcastHashJoin [ws_item_sk#15], [i_item_sk#16], Inner, BuildRight + :- *(3) Project [ws_item_sk#15, ws_net_paid#10] + : +- *(3) BroadcastHashJoin [ws_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : :- *(3) Project [ws_sold_date_sk#17, ws_item_sk#15, ws_net_paid#10] + : : +- *(3) Filter (isnotnull(ws_sold_date_sk#17) && isnotnull(ws_item_sk#15)) + : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#17,ws_item_sk#15,ws_net_paid#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [d_date_sk#18] + : +- *(1) Filter (((isnotnull(d_month_seq#19) && (d_month_seq#19 >= 1200)) && (d_month_seq#19 <= 1211)) && isnotnull(d_date_sk#18)) + : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_month_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [i_item_sk#16, i_class#14, i_category#13] + +- *(2) Filter isnotnull(i_item_sk#16) + +- *(2) FileScan parquet default.item[i_item_sk#16,i_class#14,i_category#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt new file mode 100644 index 000000000..71aae564f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt @@ -0,0 +1,35 @@ +TakeOrderedAndProject [i_category,total_sum,rank_within_parent,lochierarchy,i_class] + WholeStageCodegen + Project [i_category,total_sum,rank_within_parent,lochierarchy,i_class] + InputAdapter + Window [_w3,_w1,_w2] + WholeStageCodegen + Sort [_w1,_w2,_w3] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen + HashAggregate [i_category,sum(UnscaledValue(ws_net_paid)),sum,i_class,spark_grouping_id] [_w1,total_sum,sum(UnscaledValue(ws_net_paid)),sum,_w2,lochierarchy,_w3] + InputAdapter + Exchange [i_category,i_class,spark_grouping_id] #2 + WholeStageCodegen + HashAggregate [i_category,sum,ws_net_paid,i_class,sum,spark_grouping_id] [sum,sum] + Expand [ws_net_paid,i_category,i_class] + Project [ws_net_paid,i_category,i_class] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_net_paid] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_net_paid] + Filter [ws_sold_date_sk,ws_item_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_net_paid] [ws_sold_date_sk,ws_item_sk,ws_net_paid] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_item_sk,i_class,i_category] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_class,i_category] [i_item_sk,i_class,i_category] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt new file mode 100644 index 000000000..2868a7ac8 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt @@ -0,0 +1,54 @@ +== Physical Plan == +*(13) HashAggregate(keys=[], functions=[count(1)]) ++- Exchange SinglePartition + +- *(12) HashAggregate(keys=[], functions=[partial_count(1)]) + +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#4, ), coalesce(c_first_name#5, ), coalesce(d_date#6, 0)], LeftAnti, BuildRight, (((c_last_name#1 <=> c_last_name#4) && (c_first_name#2 <=> c_first_name#5)) && (d_date#3 <=> d_date#6)) + :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#7, ), coalesce(c_first_name#8, ), coalesce(d_date#9, 0)], LeftAnti, BuildRight, (((c_last_name#1 <=> c_last_name#7) && (c_first_name#2 <=> c_first_name#8)) && (d_date#3 <=> d_date#9)) + : :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, d_date#3, 200) + : : +- *(3) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : : +- *(3) Project [c_last_name#1, c_first_name#2, d_date#3] + : : +- *(3) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight + : : :- *(3) Project [ss_customer_sk#10, d_date#3] + : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : : : :- *(3) Project [ss_sold_date_sk#12, ss_customer_sk#10] + : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#12) && isnotnull(ss_customer_sk#10)) + : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_customer_sk#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#13, d_date#3] + : : : +- *(1) Filter (((isnotnull(d_month_seq#14) && (d_month_seq#14 >= 1200)) && (d_month_seq#14 <= 1211)) && isnotnull(d_date_sk#13)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#13,d_date#3,d_month_seq#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [c_customer_sk#11, c_first_name#2, c_last_name#1] + : : +- *(2) Filter isnotnull(c_customer_sk#11) + : : +- *(2) FileScan parquet default.customer[c_customer_sk#11,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) + : +- *(7) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) + : +- Exchange hashpartitioning(c_last_name#7, c_first_name#8, d_date#9, 200) + : +- *(6) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) + : +- *(6) Project [c_last_name#7, c_first_name#8, d_date#9] + : +- *(6) BroadcastHashJoin [cs_bill_customer_sk#15], [c_customer_sk#16], Inner, BuildRight + : :- *(6) Project [cs_bill_customer_sk#15, d_date#9] + : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#17, cs_bill_customer_sk#15] + : : : +- *(6) Filter (isnotnull(cs_sold_date_sk#17) && isnotnull(cs_bill_customer_sk#15)) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#17,cs_bill_customer_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#18, d_date#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [c_customer_sk#16, c_first_name#8, c_last_name#7], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) + +- *(11) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) + +- Exchange hashpartitioning(c_last_name#4, c_first_name#5, d_date#6, 200) + +- *(10) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) + +- *(10) Project [c_last_name#4, c_first_name#5, d_date#6] + +- *(10) BroadcastHashJoin [ws_bill_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + :- *(10) Project [ws_bill_customer_sk#19, d_date#6] + : +- *(10) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight + : :- *(10) Project [ws_sold_date_sk#21, ws_bill_customer_sk#19] + : : +- *(10) Filter (isnotnull(ws_sold_date_sk#21) && isnotnull(ws_bill_customer_sk#19)) + : : +- *(10) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#22, d_date#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [c_customer_sk#20, c_first_name#5, c_last_name#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt new file mode 100644 index 000000000..c5008bdd8 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt @@ -0,0 +1,74 @@ +WholeStageCodegen + HashAggregate [count,count(1)] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [count,count] [count,count] + HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_first_name,d_date,d_date,c_last_name,c_last_name,c_first_name] + HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + BroadcastHashJoin [d_date,c_last_name,c_first_name,c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #2 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_customer_sk] + Filter [ss_sold_date_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] [ss_sold_date_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] [d_date_sk,d_date,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_sk] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #6 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_bill_customer_sk] + Filter [cs_sold_date_sk,cs_bill_customer_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] [cs_sold_date_sk,cs_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #8 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_bill_customer_sk,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_bill_customer_sk] + Filter [ws_sold_date_sk,ws_bill_customer_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] [ws_sold_date_sk,ws_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt new file mode 100644 index 000000000..d729f0b3f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt @@ -0,0 +1,165 @@ +== Physical Plan == +BroadcastNestedLoopJoin BuildRight, Inner +:- BroadcastNestedLoopJoin BuildRight, Inner +: :- BroadcastNestedLoopJoin BuildRight, Inner +: : :- BroadcastNestedLoopJoin BuildRight, Inner +: : : :- BroadcastNestedLoopJoin BuildRight, Inner +: : : : :- BroadcastNestedLoopJoin BuildRight, Inner +: : : : : :- BroadcastNestedLoopJoin BuildRight, Inner +: : : : : : :- *(5) HashAggregate(keys=[], functions=[count(1)]) +: : : : : : : +- Exchange SinglePartition +: : : : : : : +- *(4) HashAggregate(keys=[], functions=[partial_count(1)]) +: : : : : : : +- *(4) Project +: : : : : : : +- *(4) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : : : : : : :- *(4) Project [ss_store_sk#1] +: : : : : : : : +- *(4) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : : : : : : :- *(4) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : : : : : : +- *(4) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : : : : : : :- *(4) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : : : : : : +- *(4) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : : : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : : : : +- *(1) Project [hd_demo_sk#6] +: : : : : : : : : +- *(1) Filter (((((hd_dep_count#7 = 4) && (hd_vehicle_count#8 <= 6)) || ((hd_dep_count#7 = 2) && (hd_vehicle_count#8 <= 4))) || ((hd_dep_count#7 = 0) && (hd_vehicle_count#8 <= 2))) && isnotnull(hd_demo_sk#6)) +: : : : : : : : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#6,hd_dep_count#7,hd_vehicle_count#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(Or(And(EqualTo(hd_dep_count,4),LessThanOrEqual(hd_vehicle_count,6)),And(EqualTo(hd_dep_count,..., ReadSchema: struct +: : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : : : +- *(2) Project [t_time_sk#4] +: : : : : : : : +- *(2) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 8)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) +: : : : : : : : +- *(2) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,8), GreaterThanOrEqual(t_minute,30), IsNo..., ReadSchema: struct +: : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : : +- *(3) Project [s_store_sk#2] +: : : : : : : +- *(3) Filter ((isnotnull(s_store_name#11) && (s_store_name#11 = ese)) && isnotnull(s_store_sk#2)) +: : : : : : : +- *(3) FileScan parquet default.store[s_store_sk#2,s_store_name#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)], ReadSchema: struct +: : : : : : +- BroadcastExchange IdentityBroadcastMode +: : : : : : +- *(10) HashAggregate(keys=[], functions=[count(1)]) +: : : : : : +- Exchange SinglePartition +: : : : : : +- *(9) HashAggregate(keys=[], functions=[partial_count(1)]) +: : : : : : +- *(9) Project +: : : : : : +- *(9) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : : : : : :- *(9) Project [ss_store_sk#1] +: : : : : : : +- *(9) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : : : : : :- *(9) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : : : : : +- *(9) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : : : : : :- *(9) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : : : : : +- *(9) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : : : : : +- *(9) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : : +- *(7) Project [t_time_sk#4] +: : : : : : : +- *(7) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 9)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) +: : : : : : : +- *(7) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), LessThan(t_minute,30), IsNotNull(t_ti..., ReadSchema: struct +: : : : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : +- BroadcastExchange IdentityBroadcastMode +: : : : : +- *(15) HashAggregate(keys=[], functions=[count(1)]) +: : : : : +- Exchange SinglePartition +: : : : : +- *(14) HashAggregate(keys=[], functions=[partial_count(1)]) +: : : : : +- *(14) Project +: : : : : +- *(14) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : : : : :- *(14) Project [ss_store_sk#1] +: : : : : : +- *(14) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : : : : :- *(14) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : : : : +- *(14) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : : : : :- *(14) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : : : : +- *(14) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : : : : +- *(14) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : +- *(12) Project [t_time_sk#4] +: : : : : : +- *(12) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 9)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) +: : : : : : +- *(12) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), GreaterThanOrEqual(t_minute,30), IsNo..., ReadSchema: struct +: : : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : +- BroadcastExchange IdentityBroadcastMode +: : : : +- *(20) HashAggregate(keys=[], functions=[count(1)]) +: : : : +- Exchange SinglePartition +: : : : +- *(19) HashAggregate(keys=[], functions=[partial_count(1)]) +: : : : +- *(19) Project +: : : : +- *(19) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : : : :- *(19) Project [ss_store_sk#1] +: : : : : +- *(19) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : : : :- *(19) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : : : +- *(19) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : : : :- *(19) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : : : +- *(19) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : : : +- *(19) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : +- *(17) Project [t_time_sk#4] +: : : : : +- *(17) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 10)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) +: : : : : +- *(17) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct +: : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : +- BroadcastExchange IdentityBroadcastMode +: : : +- *(25) HashAggregate(keys=[], functions=[count(1)]) +: : : +- Exchange SinglePartition +: : : +- *(24) HashAggregate(keys=[], functions=[partial_count(1)]) +: : : +- *(24) Project +: : : +- *(24) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : : :- *(24) Project [ss_store_sk#1] +: : : : +- *(24) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : : :- *(24) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : : +- *(24) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : : :- *(24) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : : +- *(24) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : : +- *(24) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : +- *(22) Project [t_time_sk#4] +: : : : +- *(22) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 10)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) +: : : : +- *(22) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct +: : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : +- BroadcastExchange IdentityBroadcastMode +: : +- *(30) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(29) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(29) Project +: : +- *(29) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : :- *(29) Project [ss_store_sk#1] +: : : +- *(29) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : :- *(29) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : +- *(29) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : :- *(29) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : +- *(29) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : +- *(29) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : +- *(27) Project [t_time_sk#4] +: : : +- *(27) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 11)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) +: : : +- *(27) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct +: : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: +- BroadcastExchange IdentityBroadcastMode +: +- *(35) HashAggregate(keys=[], functions=[count(1)]) +: +- Exchange SinglePartition +: +- *(34) HashAggregate(keys=[], functions=[partial_count(1)]) +: +- *(34) Project +: +- *(34) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: :- *(34) Project [ss_store_sk#1] +: : +- *(34) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : :- *(34) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : +- *(34) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : :- *(34) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : +- *(34) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : +- *(34) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : +- *(32) Project [t_time_sk#4] +: : +- *(32) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 11)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) +: : +- *(32) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct +: +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) ++- BroadcastExchange IdentityBroadcastMode + +- *(40) HashAggregate(keys=[], functions=[count(1)]) + +- Exchange SinglePartition + +- *(39) HashAggregate(keys=[], functions=[partial_count(1)]) + +- *(39) Project + +- *(39) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight + :- *(39) Project [ss_store_sk#1] + : +- *(39) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight + : :- *(39) Project [ss_sold_time_sk#3, ss_store_sk#1] + : : +- *(39) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight + : : :- *(39) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] + : : : +- *(39) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) + : : : +- *(39) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(37) Project [t_time_sk#4] + : +- *(37) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 12)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) + : +- *(37) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,12), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct + +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt new file mode 100644 index 000000000..0a4bce5c6 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt @@ -0,0 +1,222 @@ +BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),h8_30_to_9,count] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [hd_demo_sk] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_name,s_store_sk] + Scan parquet default.store [s_store_sk,s_store_name] [s_store_sk,s_store_name] + BroadcastExchange #5 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),h9_to_9_30,count] + InputAdapter + Exchange #6 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] [s_store_sk] #4 + BroadcastExchange #8 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),h9_30_to_10,count] + InputAdapter + Exchange #9 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] [s_store_sk] #4 + BroadcastExchange #11 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),h10_to_10_30,count] + InputAdapter + Exchange #12 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] [s_store_sk] #4 + BroadcastExchange #14 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),h10_30_to_11,count] + InputAdapter + Exchange #15 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] [s_store_sk] #4 + BroadcastExchange #17 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),h11_to_11_30,count] + InputAdapter + Exchange #18 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #19 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] [s_store_sk] #4 + BroadcastExchange #20 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),h11_30_to_12,count] + InputAdapter + Exchange #21 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #22 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] [s_store_sk] #4 + BroadcastExchange #23 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),h12_to_12_30,count] + InputAdapter + Exchange #24 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #25 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] [s_store_sk] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt new file mode 100644 index 000000000..4a69feebe --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt @@ -0,0 +1,31 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,s_store_name#3 ASC NULLS FIRST], output=[i_category#4,i_class#5,i_brand#6,s_store_name#3,s_company_name#7,d_moy#8,sum_sales#1,avg_monthly_sales#2]) ++- *(7) Project [i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8, sum_sales#1, avg_monthly_sales#2] + +- *(7) Filter (CASE WHEN NOT (avg_monthly_sales#2 = 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) + +- Window [avg(_w0#9) windowspecdefinition(i_category#4, i_brand#6, s_store_name#3, s_company_name#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#6, s_store_name#3, s_company_name#7] + +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#6 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#7 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(i_category#4, i_brand#6, s_store_name#3, s_company_name#7, 200) + +- *(5) HashAggregate(keys=[i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#10))]) + +- Exchange hashpartitioning(i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8, 200) + +- *(4) HashAggregate(keys=[i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#10))]) + +- *(4) Project [i_brand#6, i_class#5, i_category#4, ss_sales_price#10, d_moy#8, s_store_name#3, s_company_name#7] + +- *(4) BroadcastHashJoin [ss_store_sk#11], [s_store_sk#12], Inner, BuildRight + :- *(4) Project [i_brand#6, i_class#5, i_category#4, ss_store_sk#11, ss_sales_price#10, d_moy#8] + : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + : :- *(4) Project [i_brand#6, i_class#5, i_category#4, ss_sold_date_sk#13, ss_store_sk#11, ss_sales_price#10] + : : +- *(4) BroadcastHashJoin [i_item_sk#15], [ss_item_sk#16], Inner, BuildRight + : : :- *(4) Project [i_item_sk#15, i_brand#6, i_class#5, i_category#4] + : : : +- *(4) Filter (((i_category#4 IN (Books,Electronics,Sports) && i_class#5 IN (computers,stereo,football)) || (i_category#4 IN (Men,Jewelry,Women) && i_class#5 IN (shirts,birdal,dresses))) && isnotnull(i_item_sk#15)) + : : : +- *(4) FileScan parquet default.item[i_item_sk#15,i_brand#6,i_class#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(And(In(i_category, [Books,Electronics,Sports]),In(i_class, [computers,stereo,football])),And(..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- *(1) Project [ss_sold_date_sk#13, ss_item_sk#16, ss_store_sk#11, ss_sales_price#10] + : : +- *(1) Filter ((isnotnull(ss_item_sk#16) && isnotnull(ss_sold_date_sk#13)) && isnotnull(ss_store_sk#11)) + : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_item_sk#16,ss_store_sk#11,ss_sales_price#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#14, d_moy#8] + : +- *(2) Filter ((isnotnull(d_year#17) && (d_year#17 = 1999)) && isnotnull(d_date_sk#14)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#14,d_year#17,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [s_store_sk#12, s_store_name#3, s_company_name#7] + +- *(3) Filter isnotnull(s_store_sk#12) + +- *(3) FileScan parquet default.store[s_store_sk#12,s_store_name#3,s_company_name#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt new file mode 100644 index 000000000..31fd0675f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt @@ -0,0 +1,43 @@ +TakeOrderedAndProject [s_company_name,d_moy,sum_sales,i_brand,i_category,avg_monthly_sales,s_store_name,i_class] + WholeStageCodegen + Project [s_company_name,d_moy,sum_sales,i_brand,avg_monthly_sales,i_category,i_class,s_store_name] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [s_company_name,i_brand,i_category,s_store_name,_w0] + WholeStageCodegen + Sort [i_category,i_brand,s_store_name,s_company_name] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + WholeStageCodegen + HashAggregate [s_company_name,d_moy,i_brand,sum(UnscaledValue(ss_sales_price)),i_category,sum,i_class,s_store_name] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [s_company_name,d_moy,i_brand,i_category,i_class,s_store_name] #2 + WholeStageCodegen + HashAggregate [s_company_name,d_moy,i_brand,ss_sales_price,i_category,sum,sum,i_class,s_store_name] [sum,sum] + Project [i_class,s_store_name,s_company_name,d_moy,i_category,ss_sales_price,i_brand] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_class,d_moy,ss_store_sk,i_category,ss_sales_price,i_brand] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_class,ss_store_sk,i_category,ss_sales_price,ss_sold_date_sk,i_brand] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_brand,i_class,i_category] + Filter [i_category,i_class,i_item_sk] + Scan parquet default.item [i_item_sk,i_brand,i_class,i_category] [i_item_sk,i_brand,i_class,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk,d_moy] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_company_name] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_store_name,s_company_name] [s_store_sk,s_store_name,s_company_name] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt new file mode 100644 index 000000000..0ed6b1727 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt @@ -0,0 +1,109 @@ +== Physical Plan == +*(1) Project [CASE WHEN (Subquery subquery1150 > 62316685) THEN Subquery subquery1151 ELSE Subquery subquery1152 END AS bucket1#1, CASE WHEN (Subquery subquery1154 > 19045798) THEN Subquery subquery1155 ELSE Subquery subquery1156 END AS bucket2#2, CASE WHEN (Subquery subquery1158 > 365541424) THEN Subquery subquery1159 ELSE Subquery subquery1160 END AS bucket3#3, CASE WHEN (Subquery subquery1162 > 216357808) THEN Subquery subquery1163 ELSE Subquery subquery1164 END AS bucket4#4, CASE WHEN (Subquery subquery1166 > 184483884) THEN Subquery subquery1167 ELSE Subquery subquery1168 END AS bucket5#5] +: :- Subquery subquery1150 +: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(1) Project +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct +: :- Subquery subquery1151 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- *(1) Project [ss_ext_discount_amt#7] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct +: :- Subquery subquery1152 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) +: : +- *(1) Project [ss_net_paid#8] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct +: :- Subquery subquery1154 +: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(1) Project +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct +: :- Subquery subquery1155 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- *(1) Project [ss_ext_discount_amt#7] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct +: :- Subquery subquery1156 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) +: : +- *(1) Project [ss_net_paid#8] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct +: :- Subquery subquery1158 +: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(1) Project +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct +: :- Subquery subquery1159 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- *(1) Project [ss_ext_discount_amt#7] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct +: :- Subquery subquery1160 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) +: : +- *(1) Project [ss_net_paid#8] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct +: :- Subquery subquery1162 +: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(1) Project +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct +: :- Subquery subquery1163 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- *(1) Project [ss_ext_discount_amt#7] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct +: :- Subquery subquery1164 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) +: : +- *(1) Project [ss_net_paid#8] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct +: :- Subquery subquery1166 +: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(1) Project +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct +: :- Subquery subquery1167 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- *(1) Project [ss_ext_discount_amt#7] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct +: +- Subquery subquery1168 +: +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) +: +- Exchange SinglePartition +: +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) +: +- *(1) Project [ss_net_paid#8] +: +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) +: +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct ++- *(1) Filter (isnotnull(r_reason_sk#9) && (r_reason_sk#9 = 1)) + +- *(1) FileScan parquet default.reason[r_reason_sk#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk), EqualTo(r_reason_sk,1)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt new file mode 100644 index 000000000..1851df752 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt @@ -0,0 +1,154 @@ +WholeStageCodegen + Project + Subquery #1 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity] [ss_quantity] + Subquery #2 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_ext_discount_amt))] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + InputAdapter + Exchange #2 + WholeStageCodegen + HashAggregate [sum,ss_ext_discount_amt,sum,count,count] [sum,count,sum,count] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] + Subquery #3 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_net_paid))] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + InputAdapter + Exchange #3 + WholeStageCodegen + HashAggregate [sum,count,ss_net_paid,sum,count] [sum,count,sum,count] + Project [ss_net_paid] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] + Subquery #4 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),count(1),count] + InputAdapter + Exchange #4 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity] [ss_quantity] + Subquery #5 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_ext_discount_amt))] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + InputAdapter + Exchange #5 + WholeStageCodegen + HashAggregate [count,sum,count,ss_ext_discount_amt,sum] [sum,count,sum,count] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] + Subquery #6 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_net_paid))] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + InputAdapter + Exchange #6 + WholeStageCodegen + HashAggregate [sum,sum,ss_net_paid,count,count] [sum,count,sum,count] + Project [ss_net_paid] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] + Subquery #7 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),count(1),count] + InputAdapter + Exchange #7 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity] [ss_quantity] + Subquery #8 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_ext_discount_amt))] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + InputAdapter + Exchange #8 + WholeStageCodegen + HashAggregate [sum,ss_ext_discount_amt,count,count,sum] [sum,count,sum,count] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] + Subquery #9 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_net_paid))] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + InputAdapter + Exchange #9 + WholeStageCodegen + HashAggregate [sum,count,count,sum,ss_net_paid] [sum,count,sum,count] + Project [ss_net_paid] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] + Subquery #10 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),count(1),count] + InputAdapter + Exchange #10 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity] [ss_quantity] + Subquery #11 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_ext_discount_amt))] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + InputAdapter + Exchange #11 + WholeStageCodegen + HashAggregate [ss_ext_discount_amt,sum,count,count,sum] [sum,count,sum,count] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] + Subquery #12 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_net_paid))] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + InputAdapter + Exchange #12 + WholeStageCodegen + HashAggregate [count,sum,ss_net_paid,count,sum] [sum,count,sum,count] + Project [ss_net_paid] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] + Subquery #13 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),count(1),count] + InputAdapter + Exchange #13 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity] [ss_quantity] + Subquery #14 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_ext_discount_amt))] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + InputAdapter + Exchange #14 + WholeStageCodegen + HashAggregate [sum,ss_ext_discount_amt,sum,count,count] [sum,count,sum,count] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] + Subquery #15 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_net_paid))] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + InputAdapter + Exchange #15 + WholeStageCodegen + HashAggregate [count,ss_net_paid,sum,count,sum] [sum,count,sum,count] + Project [ss_net_paid] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] + Filter [r_reason_sk] + Scan parquet default.reason [r_reason_sk] [r_reason_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt new file mode 100644 index 000000000..0d0c65f33 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt @@ -0,0 +1,47 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[am_pm_ratio#1 ASC NULLS FIRST], output=[am_pm_ratio#1]) ++- *(11) Project [CheckOverflow((promote_precision(cast(amc#2 as decimal(15,4))) / promote_precision(cast(pmc#3 as decimal(15,4)))), DecimalType(35,20)) AS am_pm_ratio#1] + +- BroadcastNestedLoopJoin BuildRight, Inner + :- *(5) HashAggregate(keys=[], functions=[count(1)]) + : +- Exchange SinglePartition + : +- *(4) HashAggregate(keys=[], functions=[partial_count(1)]) + : +- *(4) Project + : +- *(4) BroadcastHashJoin [ws_web_page_sk#4], [wp_web_page_sk#5], Inner, BuildRight + : :- *(4) Project [ws_web_page_sk#4] + : : +- *(4) BroadcastHashJoin [ws_sold_time_sk#6], [t_time_sk#7], Inner, BuildRight + : : :- *(4) Project [ws_sold_time_sk#6, ws_web_page_sk#4] + : : : +- *(4) BroadcastHashJoin [ws_ship_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight + : : : :- *(4) Project [ws_sold_time_sk#6, ws_ship_hdemo_sk#8, ws_web_page_sk#4] + : : : : +- *(4) Filter ((isnotnull(ws_ship_hdemo_sk#8) && isnotnull(ws_sold_time_sk#6)) && isnotnull(ws_web_page_sk#4)) + : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_time_sk#6,ws_ship_hdemo_sk#8,ws_web_page_sk#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [hd_demo_sk#9] + : : : +- *(1) Filter ((isnotnull(hd_dep_count#10) && (hd_dep_count#10 = 6)) && isnotnull(hd_demo_sk#9)) + : : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#9,hd_dep_count#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,6), IsNotNull(hd_demo_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [t_time_sk#7] + : : +- *(2) Filter (((isnotnull(t_hour#11) && (t_hour#11 >= 8)) && (t_hour#11 <= 9)) && isnotnull(t_time_sk#7)) + : : +- *(2) FileScan parquet default.time_dim[t_time_sk#7,t_hour#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,8), LessThanOrEqual(t_hour,9), IsNotNull(t_time_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [wp_web_page_sk#5] + : +- *(3) Filter (((isnotnull(wp_char_count#12) && (wp_char_count#12 >= 5000)) && (wp_char_count#12 <= 5200)) && isnotnull(wp_web_page_sk#5)) + : +- *(3) FileScan parquet default.web_page[wp_web_page_sk#5,wp_char_count#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_page], PartitionFilters: [], PushedFilters: [IsNotNull(wp_char_count), GreaterThanOrEqual(wp_char_count,5000), LessThanOrEqual(wp_char_count,..., ReadSchema: struct + +- BroadcastExchange IdentityBroadcastMode + +- *(10) HashAggregate(keys=[], functions=[count(1)]) + +- Exchange SinglePartition + +- *(9) HashAggregate(keys=[], functions=[partial_count(1)]) + +- *(9) Project + +- *(9) BroadcastHashJoin [ws_web_page_sk#4], [wp_web_page_sk#5], Inner, BuildRight + :- *(9) Project [ws_web_page_sk#4] + : +- *(9) BroadcastHashJoin [ws_sold_time_sk#6], [t_time_sk#7], Inner, BuildRight + : :- *(9) Project [ws_sold_time_sk#6, ws_web_page_sk#4] + : : +- *(9) BroadcastHashJoin [ws_ship_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight + : : :- *(9) Project [ws_sold_time_sk#6, ws_ship_hdemo_sk#8, ws_web_page_sk#4] + : : : +- *(9) Filter ((isnotnull(ws_ship_hdemo_sk#8) && isnotnull(ws_sold_time_sk#6)) && isnotnull(ws_web_page_sk#4)) + : : : +- *(9) FileScan parquet default.web_sales[ws_sold_time_sk#6,ws_ship_hdemo_sk#8,ws_web_page_sk#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct + : : +- ReusedExchange [hd_demo_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [t_time_sk#7] + : +- *(7) Filter (((isnotnull(t_hour#11) && (t_hour#11 >= 19)) && (t_hour#11 <= 20)) && isnotnull(t_time_sk#7)) + : +- *(7) FileScan parquet default.time_dim[t_time_sk#7,t_hour#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,19), LessThanOrEqual(t_hour,20), IsNotNull(t_time_sk)], ReadSchema: struct + +- ReusedExchange [wp_web_page_sk#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt new file mode 100644 index 000000000..c14580e27 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt @@ -0,0 +1,64 @@ +TakeOrderedAndProject [am_pm_ratio] + WholeStageCodegen + Project [amc,pmc] + InputAdapter + BroadcastNestedLoopJoin + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),amc,count] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk] + BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + Project [ws_sold_time_sk,ws_web_page_sk] + BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] + Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [hd_demo_sk] + Filter [hd_dep_count,hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] [hd_demo_sk,hd_dep_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour] [t_time_sk,t_hour] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [wp_web_page_sk] + Filter [wp_char_count,wp_web_page_sk] + Scan parquet default.web_page [wp_web_page_sk,wp_char_count] [wp_web_page_sk,wp_char_count] + BroadcastExchange #5 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),pmc,count] + InputAdapter + Exchange #6 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk] + BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + Project [ws_sold_time_sk,ws_web_page_sk] + BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] + Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + InputAdapter + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour] [t_time_sk,t_hour] + InputAdapter + ReusedExchange [wp_web_page_sk] [wp_web_page_sk] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt new file mode 100644 index 000000000..90c7f4cff --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt @@ -0,0 +1,45 @@ +== Physical Plan == +*(9) Sort [Returns_Loss#1 DESC NULLS LAST], true, 0 ++- Exchange rangepartitioning(Returns_Loss#1 DESC NULLS LAST, 200) + +- *(8) HashAggregate(keys=[cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6], functions=[sum(UnscaledValue(cr_net_loss#7))]) + +- Exchange hashpartitioning(cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6, 200) + +- *(7) HashAggregate(keys=[cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6], functions=[partial_sum(UnscaledValue(cr_net_loss#7))]) + +- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#5, cd_education_status#6] + +- *(7) BroadcastHashJoin [c_current_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight + :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#8, cd_marital_status#5, cd_education_status#6] + : +- *(7) BroadcastHashJoin [c_current_cdemo_sk#10], [cd_demo_sk#11], Inner, BuildRight + : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#10, c_current_hdemo_sk#8] + : : +- *(7) BroadcastHashJoin [c_current_addr_sk#12], [ca_address_sk#13], Inner, BuildRight + : : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#10, c_current_hdemo_sk#8, c_current_addr_sk#12] + : : : +- *(7) BroadcastHashJoin [cr_returning_customer_sk#14], [c_customer_sk#15], Inner, BuildRight + : : : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#14, cr_net_loss#7] + : : : : +- *(7) BroadcastHashJoin [cr_returned_date_sk#16], [d_date_sk#17], Inner, BuildRight + : : : : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returned_date_sk#16, cr_returning_customer_sk#14, cr_net_loss#7] + : : : : : +- *(7) BroadcastHashJoin [cc_call_center_sk#18], [cr_call_center_sk#19], Inner, BuildRight + : : : : : :- *(7) Project [cc_call_center_sk#18, cc_call_center_id#2, cc_name#3, cc_manager#4] + : : : : : : +- *(7) Filter isnotnull(cc_call_center_sk#18) + : : : : : : +- *(7) FileScan parquet default.call_center[cc_call_center_sk#18,cc_call_center_id#2,cc_name#3,cc_manager#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) + : : : : : +- *(1) Project [cr_returned_date_sk#16, cr_returning_customer_sk#14, cr_call_center_sk#19, cr_net_loss#7] + : : : : : +- *(1) Filter ((isnotnull(cr_call_center_sk#19) && isnotnull(cr_returned_date_sk#16)) && isnotnull(cr_returning_customer_sk#14)) + : : : : : +- *(1) FileScan parquet default.catalog_returns[cr_returned_date_sk#16,cr_returning_customer_sk#14,cr_call_center_sk#19,cr_net_loss#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [c_customer_sk#15, c_current_cdemo_sk#10, c_current_hdemo_sk#8, c_current_addr_sk#12] + : : : +- *(3) Filter (((isnotnull(c_customer_sk#15) && isnotnull(c_current_addr_sk#12)) && isnotnull(c_current_cdemo_sk#10)) && isnotnull(c_current_hdemo_sk#8)) + : : : +- *(3) FileScan parquet default.customer[c_customer_sk#15,c_current_cdemo_sk#10,c_current_hdemo_sk#8,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [ca_address_sk#13] + : : +- *(4) Filter ((isnotnull(ca_gmt_offset#22) && (ca_gmt_offset#22 = -7.00)) && isnotnull(ca_address_sk#13)) + : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#13,ca_gmt_offset#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(5) Project [cd_demo_sk#11, cd_marital_status#5, cd_education_status#6] + : +- *(5) Filter ((((cd_marital_status#5 = M) && (cd_education_status#6 = Unknown)) || ((cd_marital_status#5 = W) && (cd_education_status#6 = Advanced Degree))) && isnotnull(cd_demo_sk#11)) + : +- *(5) FileScan parquet default.customer_demographics[cd_demo_sk#11,cd_marital_status#5,cd_education_status#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(6) Project [hd_demo_sk#9] + +- *(6) Filter ((isnotnull(hd_buy_potential#23) && StartsWith(hd_buy_potential#23, Unknown)) && isnotnull(hd_demo_sk#9)) + +- *(6) FileScan parquet default.household_demographics[hd_demo_sk#9,hd_buy_potential#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt new file mode 100644 index 000000000..4b2c97c9f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt @@ -0,0 +1,61 @@ +WholeStageCodegen + Sort [Returns_Loss] + InputAdapter + Exchange [Returns_Loss] #1 + WholeStageCodegen + HashAggregate [cc_call_center_id,sum(UnscaledValue(cr_net_loss)),sum,cc_name,cd_education_status,cc_manager,cd_marital_status] [sum(UnscaledValue(cr_net_loss)),sum,Manager,Call_Center_Name,Returns_Loss,Call_Center] + InputAdapter + Exchange [cc_call_center_id,cc_name,cd_education_status,cc_manager,cd_marital_status] #2 + WholeStageCodegen + HashAggregate [cc_call_center_id,sum,cc_name,cd_education_status,cc_manager,cr_net_loss,sum,cd_marital_status] [sum,sum] + Project [cr_net_loss,cd_marital_status,cc_call_center_id,cc_name,cc_manager,cd_education_status] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [cr_net_loss,c_current_hdemo_sk,cd_marital_status,cc_call_center_id,cc_name,cc_manager,cd_education_status] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,cr_net_loss,c_current_hdemo_sk,cc_call_center_id,cc_name,cc_manager] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk,cr_net_loss,c_current_hdemo_sk,cc_call_center_id,cc_name,cc_manager] + BroadcastHashJoin [cr_returning_customer_sk,c_customer_sk] + Project [cr_returning_customer_sk,cr_net_loss,cc_call_center_id,cc_name,cc_manager] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Project [cr_returning_customer_sk,cr_net_loss,cc_call_center_id,cr_returned_date_sk,cc_name,cc_manager] + BroadcastHashJoin [cc_call_center_sk,cr_call_center_sk] + Project [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] + Filter [cc_call_center_sk] + Scan parquet default.call_center [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] + Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk] + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] + Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [cd_demo_sk,cd_marital_status,cd_education_status] + Filter [cd_marital_status,cd_education_status,cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential] [hd_demo_sk,hd_buy_potential] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt new file mode 100644 index 000000000..4a18c2b2d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt @@ -0,0 +1,33 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[Excess Discount Amount #1 ASC NULLS FIRST], output=[Excess Discount Amount #1]) ++- *(7) HashAggregate(keys=[], functions=[sum(UnscaledValue(ws_ext_discount_amt#2))]) + +- Exchange SinglePartition + +- *(6) HashAggregate(keys=[], functions=[partial_sum(UnscaledValue(ws_ext_discount_amt#2))]) + +- *(6) Project [ws_ext_discount_amt#2] + +- *(6) BroadcastHashJoin [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + :- *(6) Project [ws_sold_date_sk#3, ws_ext_discount_amt#2] + : +- *(6) BroadcastHashJoin [i_item_sk#5], [ws_item_sk#6#7], Inner, BuildRight, (cast(ws_ext_discount_amt#2 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#8) + : :- *(6) Project [ws_sold_date_sk#3, ws_ext_discount_amt#2, i_item_sk#5] + : : +- *(6) BroadcastHashJoin [ws_item_sk#6], [i_item_sk#5], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#3, ws_item_sk#6, ws_ext_discount_amt#2] + : : : +- *(6) Filter ((isnotnull(ws_item_sk#6) && isnotnull(ws_ext_discount_amt#2)) && isnotnull(ws_sold_date_sk#3)) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#3,ws_item_sk#6,ws_ext_discount_amt#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_ext_discount_amt), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [i_item_sk#5] + : : +- *(1) Filter ((isnotnull(i_manufact_id#9) && (i_manufact_id#9 = 350)) && isnotnull(i_item_sk#5)) + : : +- *(1) FileScan parquet default.item[i_item_sk#5,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,350), IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : +- *(4) Filter isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#8) + : +- *(4) HashAggregate(keys=[ws_item_sk#6], functions=[avg(UnscaledValue(ws_ext_discount_amt#2))]) + : +- Exchange hashpartitioning(ws_item_sk#6, 200) + : +- *(3) HashAggregate(keys=[ws_item_sk#6], functions=[partial_avg(UnscaledValue(ws_ext_discount_amt#2))]) + : +- *(3) Project [ws_item_sk#6, ws_ext_discount_amt#2] + : +- *(3) BroadcastHashJoin [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + : :- *(3) Project [ws_sold_date_sk#3, ws_item_sk#6, ws_ext_discount_amt#2] + : : +- *(3) Filter (isnotnull(ws_sold_date_sk#3) && isnotnull(ws_item_sk#6)) + : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#3,ws_item_sk#6,ws_ext_discount_amt#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#4] + : +- *(2) Filter (((isnotnull(d_date#10) && (cast(d_date#10 as string) >= 2000-01-27)) && (d_date#10 <= 11073)) && isnotnull(d_date_sk#4)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#4,d_date#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt new file mode 100644 index 000000000..6137607f6 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt @@ -0,0 +1,44 @@ +TakeOrderedAndProject [Excess Discount Amount ] + WholeStageCodegen + HashAggregate [sum,sum(UnscaledValue(ws_ext_discount_amt))] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [ws_ext_discount_amt,sum,sum] [sum,sum] + Project [ws_ext_discount_amt] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_ext_discount_amt] + BroadcastHashJoin [i_item_sk,ws_item_sk,ws_ext_discount_amt,(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))] + Project [ws_sold_date_sk,ws_ext_discount_amt,i_item_sk] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] + Filter [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [i_item_sk] + Filter [i_manufact_id,i_item_sk] + Scan parquet default.item [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))] + HashAggregate [ws_item_sk,sum,count,avg(UnscaledValue(ws_ext_discount_amt))] [sum,(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),avg(UnscaledValue(ws_ext_discount_amt)),count,ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #4 + WholeStageCodegen + HashAggregate [ws_ext_discount_amt,ws_item_sk,sum,sum,count,count] [sum,count,sum,count] + Project [ws_item_sk,ws_ext_discount_amt] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] + Filter [ws_sold_date_sk,ws_item_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt new file mode 100644 index 000000000..4788a00a3 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt @@ -0,0 +1,18 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[sumsales#1 ASC NULLS FIRST,ss_customer_sk#2 ASC NULLS FIRST], output=[ss_customer_sk#2,sumsales#1]) ++- *(4) HashAggregate(keys=[ss_customer_sk#2], functions=[sum(act_sales#3)]) + +- Exchange hashpartitioning(ss_customer_sk#2, 200) + +- *(3) HashAggregate(keys=[ss_customer_sk#2], functions=[partial_sum(act_sales#3)]) + +- *(3) Project [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#4) THEN CheckOverflow((promote_precision(cast(cast((ss_quantity#5 - sr_return_quantity#4) as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#6 as decimal(12,2)))), DecimalType(18,2)) ELSE CheckOverflow((promote_precision(cast(cast(ss_quantity#5 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#6 as decimal(12,2)))), DecimalType(18,2)) END AS act_sales#3] + +- *(3) BroadcastHashJoin [sr_reason_sk#7], [cast(r_reason_sk#8 as bigint)], Inner, BuildRight + :- *(3) Project [ss_customer_sk#2, ss_quantity#5, ss_sales_price#6, sr_reason_sk#7, sr_return_quantity#4] + : +- *(3) BroadcastHashJoin [cast(ss_item_sk#9 as bigint), cast(ss_ticket_number#10 as bigint)], [sr_item_sk#11, sr_ticket_number#12], Inner, BuildRight + : :- *(3) FileScan parquet default.store_sales[ss_item_sk#9,ss_customer_sk#2,ss_ticket_number#10,ss_quantity#5,ss_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [r_reason_sk#8] + +- *(2) Filter ((isnotnull(r_reason_desc#13) && (r_reason_desc#13 = reason 28)) && isnotnull(r_reason_sk#8)) + +- *(2) FileScan parquet default.reason[r_reason_sk#8,r_reason_desc#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_desc), EqualTo(r_reason_desc,reason 28), IsNotNull(r_reason_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt new file mode 100644 index 000000000..5c7f07a1b --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt @@ -0,0 +1,24 @@ +TakeOrderedAndProject [sumsales,ss_customer_sk] + WholeStageCodegen + HashAggregate [ss_customer_sk,sum,sum(act_sales)] [sum(act_sales),sumsales,sum] + InputAdapter + Exchange [ss_customer_sk] #1 + WholeStageCodegen + HashAggregate [ss_customer_sk,act_sales,sum,sum] [sum,sum] + Project [ss_customer_sk,sr_return_quantity,ss_quantity,ss_sales_price] + BroadcastHashJoin [sr_reason_sk,r_reason_sk] + Project [ss_quantity,sr_return_quantity,ss_customer_sk,sr_reason_sk,ss_sales_price] + BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_ticket_number] [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] + Filter [sr_item_sk,sr_ticket_number,sr_reason_sk] + Scan parquet default.store_returns [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [r_reason_sk] + Filter [r_reason_desc,r_reason_sk] + Scan parquet default.reason [r_reason_sk,r_reason_desc] [r_reason_sk,r_reason_desc] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt new file mode 100644 index 000000000..8034c8beb --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt @@ -0,0 +1,37 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], output=[order count #1,total shipping cost #2,total net profit #3]) ++- *(8) HashAggregate(keys=[], functions=[sum(UnscaledValue(ws_ext_ship_cost#4)), sum(UnscaledValue(ws_net_profit#5)), count(distinct ws_order_number#6)]) + +- Exchange SinglePartition + +- *(7) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5)), partial_count(distinct ws_order_number#6)]) + +- *(7) HashAggregate(keys=[ws_order_number#6], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5))]) + +- Exchange hashpartitioning(ws_order_number#6, 200) + +- *(6) HashAggregate(keys=[ws_order_number#6], functions=[partial_sum(UnscaledValue(ws_ext_ship_cost#4)), partial_sum(UnscaledValue(ws_net_profit#5))]) + +- *(6) Project [ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + +- *(6) BroadcastHashJoin [ws_web_site_sk#7], [web_site_sk#8], Inner, BuildRight + :- *(6) Project [ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : +- *(6) BroadcastHashJoin [ws_ship_addr_sk#9], [ca_address_sk#10], Inner, BuildRight + : :- *(6) Project [ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : : +- *(6) BroadcastHashJoin [ws_ship_date_sk#11], [d_date_sk#12], Inner, BuildRight + : : :- *(6) BroadcastHashJoin [cast(ws_order_number#6 as bigint)], [wr_order_number#13], LeftAnti, BuildRight + : : : :- *(6) Project [ws_ship_date_sk#11, ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : : : : +- *(6) BroadcastHashJoin [ws_order_number#6], [ws_order_number#6#14], LeftSemi, BuildRight, NOT (ws_warehouse_sk#15 = ws_warehouse_sk#15#16) + : : : : :- *(6) Project [ws_ship_date_sk#11, ws_ship_addr_sk#9, ws_web_site_sk#7, ws_warehouse_sk#15, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : : : : : +- *(6) Filter ((isnotnull(ws_ship_date_sk#11) && isnotnull(ws_ship_addr_sk#9)) && isnotnull(ws_web_site_sk#7)) + : : : : : +- *(6) FileScan parquet default.web_sales[ws_ship_date_sk#11,ws_ship_addr_sk#9,ws_web_site_sk#7,ws_warehouse_sk#15,ws_order_number#6,ws_ext_ship_cost#4,ws_net_profit#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) + : : : +- *(2) FileScan parquet default.web_returns[wr_order_number#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#12] + : : +- *(3) Filter (((isnotnull(d_date#17) && (cast(d_date#17 as string) >= 1999-02-01)) && (d_date#17 <= 10683)) && isnotnull(d_date_sk#12)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [ca_address_sk#10] + : +- *(4) Filter ((isnotnull(ca_state#18) && (ca_state#18 = IL)) && isnotnull(ca_address_sk#10)) + : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [web_site_sk#8] + +- *(5) Filter ((isnotnull(web_company_name#19) && (web_company_name#19 = pri)) && isnotnull(web_site_sk#8)) + +- *(5) FileScan parquet default.web_site[web_site_sk#8,web_company_name#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt new file mode 100644 index 000000000..cc709ac3c --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt @@ -0,0 +1,51 @@ +TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] + WholeStageCodegen + HashAggregate [sum,sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),count,sum] [sum,sum(UnscaledValue(ws_net_profit)),order count ,total net profit ,count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),count,sum,total shipping cost ] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [sum,count,sum,sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),ws_order_number,sum,sum] [sum,count,sum,sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),sum,count,sum] + HashAggregate [sum,sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),ws_order_number,sum,sum] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),sum,sum] + InputAdapter + Exchange [ws_order_number] #2 + WholeStageCodegen + HashAggregate [sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),ws_order_number,sum,ws_net_profit,ws_ext_ship_cost] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),sum,sum] + Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] + Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number] + BroadcastHashJoin [ws_ship_date_sk,d_date_sk] + BroadcastHashJoin [ws_order_number,wr_order_number] + Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] + BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk,ws_warehouse_sk] + Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] + Scan parquet default.web_sales [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk,ws_warehouse_sk] [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk,ws_warehouse_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ws_warehouse_sk,ws_order_number] + Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] [ws_warehouse_sk,ws_order_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Scan parquet default.web_returns [wr_order_number] [wr_order_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [web_site_sk] + Filter [web_company_name,web_site_sk] + Scan parquet default.web_site [web_site_sk,web_company_name] [web_site_sk,web_company_name] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt new file mode 100644 index 000000000..a3de90e6d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt @@ -0,0 +1,54 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], output=[order count #1,total shipping cost #2,total net profit #3]) ++- *(11) HashAggregate(keys=[], functions=[sum(UnscaledValue(ws_ext_ship_cost#4)), sum(UnscaledValue(ws_net_profit#5)), count(distinct ws_order_number#6)]) + +- Exchange SinglePartition + +- *(10) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5)), partial_count(distinct ws_order_number#6)]) + +- *(10) HashAggregate(keys=[ws_order_number#6], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5))]) + +- Exchange hashpartitioning(ws_order_number#6, 200) + +- *(9) HashAggregate(keys=[ws_order_number#6], functions=[partial_sum(UnscaledValue(ws_ext_ship_cost#4)), partial_sum(UnscaledValue(ws_net_profit#5))]) + +- *(9) Project [ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + +- *(9) BroadcastHashJoin [ws_web_site_sk#7], [web_site_sk#8], Inner, BuildRight + :- *(9) Project [ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : +- *(9) BroadcastHashJoin [ws_ship_addr_sk#9], [ca_address_sk#10], Inner, BuildRight + : :- *(9) Project [ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : : +- *(9) BroadcastHashJoin [ws_ship_date_sk#11], [d_date_sk#12], Inner, BuildRight + : : :- *(9) BroadcastHashJoin [cast(ws_order_number#6 as bigint)], [wr_order_number#13], LeftSemi, BuildRight + : : : :- *(9) BroadcastHashJoin [ws_order_number#6], [ws_order_number#6#14], LeftSemi, BuildRight + : : : : :- *(9) Project [ws_ship_date_sk#11, ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : : : : : +- *(9) Filter ((isnotnull(ws_ship_date_sk#11) && isnotnull(ws_ship_addr_sk#9)) && isnotnull(ws_web_site_sk#7)) + : : : : : +- *(9) FileScan parquet default.web_sales[ws_ship_date_sk#11,ws_ship_addr_sk#9,ws_web_site_sk#7,ws_order_number#6,ws_ext_ship_cost#4,ws_net_profit#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : : +- *(1) Project [ws_warehouse_sk#17, ws_order_number#15] + : : : : +- *(1) Filter (isnotnull(ws_order_number#15) && isnotnull(ws_warehouse_sk#17)) + : : : : +- *(1) FileScan parquet default.web_sales[ws_warehouse_sk#17,ws_order_number#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) + : : : +- *(5) Project [wr_order_number#13] + : : : +- *(5) BroadcastHashJoin [wr_order_number#13], [cast(ws_order_number#6 as bigint)], Inner, BuildRight + : : : :- *(5) Project [wr_order_number#13] + : : : : +- *(5) Filter isnotnull(wr_order_number#13) + : : : : +- *(5) FileScan parquet default.web_returns[wr_order_number#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_order_number)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [ws_order_number#6] + : : : +- *(4) BroadcastHashJoin [ws_order_number#6], [ws_order_number#15], Inner, BuildRight, NOT (ws_warehouse_sk#16 = ws_warehouse_sk#17) + : : : :- *(4) Project [ws_warehouse_sk#16, ws_order_number#6] + : : : : +- *(4) Filter (isnotnull(ws_order_number#6) && isnotnull(ws_warehouse_sk#16)) + : : : : +- *(4) FileScan parquet default.web_sales[ws_warehouse_sk#16,ws_order_number#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)], ReadSchema: struct + : : : +- ReusedExchange [ws_warehouse_sk#17, ws_order_number#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [d_date_sk#12] + : : +- *(6) Filter (((isnotnull(d_date#18) && (cast(d_date#18 as string) >= 1999-02-01)) && (d_date#18 <= 10683)) && isnotnull(d_date_sk#12)) + : : +- *(6) FileScan parquet default.date_dim[d_date_sk#12,d_date#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [ca_address_sk#10] + : +- *(7) Filter ((isnotnull(ca_state#19) && (ca_state#19 = IL)) && isnotnull(ca_address_sk#10)) + : +- *(7) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [web_site_sk#8] + +- *(8) Filter ((isnotnull(web_company_name#20) && (web_company_name#20 = pri)) && isnotnull(web_site_sk#8)) + +- *(8) FileScan parquet default.web_site[web_site_sk#8,web_company_name#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt new file mode 100644 index 000000000..5b6f33ebd --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt @@ -0,0 +1,73 @@ +TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] + WholeStageCodegen + HashAggregate [count(ws_order_number),sum(UnscaledValue(ws_net_profit)),count,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] [order count ,total shipping cost ,count(ws_order_number),sum(UnscaledValue(ws_net_profit)),count,sum,total net profit ,sum(UnscaledValue(ws_ext_ship_cost)),sum] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [sum,sum,count,count(ws_order_number),sum(UnscaledValue(ws_net_profit)),sum,ws_order_number,sum(UnscaledValue(ws_ext_ship_cost)),sum] [sum,sum,count,count(ws_order_number),sum(UnscaledValue(ws_net_profit)),count,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] + HashAggregate [sum,sum,sum(UnscaledValue(ws_net_profit)),sum,ws_order_number,sum(UnscaledValue(ws_ext_ship_cost)),sum] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] + InputAdapter + Exchange [ws_order_number] #2 + WholeStageCodegen + HashAggregate [sum,sum,sum(UnscaledValue(ws_net_profit)),ws_order_number,sum(UnscaledValue(ws_ext_ship_cost)),ws_net_profit,ws_ext_ship_cost] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] + Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] + Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number] + BroadcastHashJoin [ws_ship_date_sk,d_date_sk] + BroadcastHashJoin [ws_order_number,wr_order_number] + BroadcastHashJoin [ws_order_number,ws_order_number] + Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] + Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] + Scan parquet default.web_sales [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ws_order_number] + BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + Project [ws_warehouse_sk,ws_order_number] + Filter [ws_order_number,ws_warehouse_sk] + Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] [ws_warehouse_sk,ws_order_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ws_warehouse_sk,ws_order_number] + Filter [ws_order_number,ws_warehouse_sk] + Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] [ws_warehouse_sk,ws_order_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [wr_order_number] + BroadcastHashJoin [wr_order_number,ws_order_number] + Project [wr_order_number] + Filter [wr_order_number] + Scan parquet default.web_returns [wr_order_number] [wr_order_number] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [ws_order_number] + BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + Project [ws_warehouse_sk,ws_order_number] + Filter [ws_order_number,ws_warehouse_sk] + Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] [ws_warehouse_sk,ws_order_number] + InputAdapter + ReusedExchange [ws_warehouse_sk,ws_order_number] [ws_warehouse_sk,ws_order_number] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [web_site_sk] + Filter [web_company_name,web_site_sk] + Scan parquet default.web_site [web_site_sk,web_company_name] [web_site_sk,web_company_name] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt new file mode 100644 index 000000000..c66e7e331 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt @@ -0,0 +1,26 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[count(1)#1 ASC NULLS FIRST], output=[count(1)#1]) ++- *(5) HashAggregate(keys=[], functions=[count(1)]) + +- Exchange SinglePartition + +- *(4) HashAggregate(keys=[], functions=[partial_count(1)]) + +- *(4) Project + +- *(4) BroadcastHashJoin [ss_store_sk#2], [s_store_sk#3], Inner, BuildRight + :- *(4) Project [ss_store_sk#2] + : +- *(4) BroadcastHashJoin [ss_sold_time_sk#4], [t_time_sk#5], Inner, BuildRight + : :- *(4) Project [ss_sold_time_sk#4, ss_store_sk#2] + : : +- *(4) BroadcastHashJoin [ss_hdemo_sk#6], [hd_demo_sk#7], Inner, BuildRight + : : :- *(4) Project [ss_sold_time_sk#4, ss_hdemo_sk#6, ss_store_sk#2] + : : : +- *(4) Filter ((isnotnull(ss_hdemo_sk#6) && isnotnull(ss_sold_time_sk#4)) && isnotnull(ss_store_sk#2)) + : : : +- *(4) FileScan parquet default.store_sales[ss_sold_time_sk#4,ss_hdemo_sk#6,ss_store_sk#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [hd_demo_sk#7] + : : +- *(1) Filter ((isnotnull(hd_dep_count#8) && (hd_dep_count#8 = 7)) && isnotnull(hd_demo_sk#7)) + : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#7,hd_dep_count#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,7), IsNotNull(hd_demo_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [t_time_sk#5] + : +- *(2) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 20)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#5)) + : +- *(2) FileScan parquet default.time_dim[t_time_sk#5,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,20), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [s_store_sk#3] + +- *(3) Filter ((isnotnull(s_store_name#11) && (s_store_name#11 = ese)) && isnotnull(s_store_sk#3)) + +- *(3) FileScan parquet default.store[s_store_sk#3,s_store_name#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt new file mode 100644 index 000000000..fada2566d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [count(1)] + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [hd_demo_sk] + Filter [hd_dep_count,hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] [hd_demo_sk,hd_dep_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_name,s_store_sk] + Scan parquet default.store [s_store_sk,s_store_name] [s_store_sk,s_store_name] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt new file mode 100644 index 000000000..136c5a89f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt @@ -0,0 +1,32 @@ +== Physical Plan == +CollectLimit 100 ++- *(10) HashAggregate(keys=[], functions=[sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint))]) + +- Exchange SinglePartition + +- *(9) HashAggregate(keys=[], functions=[partial_sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint))]) + +- *(9) Project [customer_sk#1, customer_sk#2] + +- SortMergeJoin [customer_sk#1, item_sk#3], [customer_sk#2, item_sk#4], FullOuter + :- *(4) Sort [customer_sk#1 ASC NULLS FIRST, item_sk#3 ASC NULLS FIRST], false, 0 + : +- Exchange hashpartitioning(customer_sk#1, item_sk#3, 200) + : +- *(3) HashAggregate(keys=[ss_customer_sk#5, ss_item_sk#6], functions=[]) + : +- Exchange hashpartitioning(ss_customer_sk#5, ss_item_sk#6, 200) + : +- *(2) HashAggregate(keys=[ss_customer_sk#5, ss_item_sk#6], functions=[]) + : +- *(2) Project [ss_item_sk#6, ss_customer_sk#5] + : +- *(2) BroadcastHashJoin [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + : :- *(2) Project [ss_sold_date_sk#7, ss_item_sk#6, ss_customer_sk#5] + : : +- *(2) Filter isnotnull(ss_sold_date_sk#7) + : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#7,ss_item_sk#6,ss_customer_sk#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [d_date_sk#8] + : +- *(1) Filter (((isnotnull(d_month_seq#9) && (d_month_seq#9 >= 1200)) && (d_month_seq#9 <= 1211)) && isnotnull(d_date_sk#8)) + : +- *(1) FileScan parquet default.date_dim[d_date_sk#8,d_month_seq#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + +- *(8) Sort [customer_sk#2 ASC NULLS FIRST, item_sk#4 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(customer_sk#2, item_sk#4, 200) + +- *(7) HashAggregate(keys=[cs_bill_customer_sk#10, cs_item_sk#11], functions=[]) + +- Exchange hashpartitioning(cs_bill_customer_sk#10, cs_item_sk#11, 200) + +- *(6) HashAggregate(keys=[cs_bill_customer_sk#10, cs_item_sk#11], functions=[]) + +- *(6) Project [cs_bill_customer_sk#10, cs_item_sk#11] + +- *(6) BroadcastHashJoin [cs_sold_date_sk#12], [d_date_sk#8], Inner, BuildRight + :- *(6) Project [cs_sold_date_sk#12, cs_bill_customer_sk#10, cs_item_sk#11] + : +- *(6) Filter isnotnull(cs_sold_date_sk#12) + : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_bill_customer_sk#10,cs_item_sk#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt new file mode 100644 index 000000000..a3cc25d23 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt @@ -0,0 +1,48 @@ +CollectLimit + WholeStageCodegen + HashAggregate [sum,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] [store_only,sum,catalog_only,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_and_catalog] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [sum,sum,sum,customer_sk,sum,customer_sk,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [customer_sk,customer_sk] + InputAdapter + SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] + WholeStageCodegen + Sort [customer_sk,item_sk] + InputAdapter + Exchange [customer_sk,item_sk] #2 + WholeStageCodegen + HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [ss_customer_sk,ss_item_sk] #3 + WholeStageCodegen + HashAggregate [ss_customer_sk,ss_item_sk] + Project [ss_item_sk,ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + WholeStageCodegen + Sort [customer_sk,item_sk] + InputAdapter + Exchange [customer_sk,item_sk] #5 + WholeStageCodegen + HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #6 + WholeStageCodegen + HashAggregate [cs_bill_customer_sk,cs_item_sk] + Project [cs_bill_customer_sk,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt new file mode 100644 index 000000000..eee60f541 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt @@ -0,0 +1,26 @@ +== Physical Plan == +*(7) Project [i_item_desc#1, i_category#2, i_class#3, i_current_price#4, itemrevenue#5, revenueratio#6] ++- *(7) Sort [i_category#2 ASC NULLS FIRST, i_class#3 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#1 ASC NULLS FIRST, revenueratio#6 ASC NULLS FIRST], true, 0 + +- Exchange rangepartitioning(i_category#2 ASC NULLS FIRST, i_class#3 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#1 ASC NULLS FIRST, revenueratio#6 ASC NULLS FIRST, 200) + +- *(6) Project [i_item_desc#1, i_category#2, i_class#3, i_current_price#4, itemrevenue#5, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#8) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#9)), DecimalType(38,17)) AS revenueratio#6, i_item_id#7] + +- Window [sum(_w1#10) windowspecdefinition(i_class#3, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#9], [i_class#3] + +- *(5) Sort [i_class#3 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(i_class#3, 200) + +- *(4) HashAggregate(keys=[i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4], functions=[sum(UnscaledValue(ss_ext_sales_price#11))]) + +- Exchange hashpartitioning(i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4, 200) + +- *(3) HashAggregate(keys=[i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#11))]) + +- *(3) Project [ss_ext_sales_price#11, i_item_id#7, i_item_desc#1, i_current_price#4, i_class#3, i_category#2] + +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + :- *(3) Project [ss_sold_date_sk#12, ss_ext_sales_price#11, i_item_id#7, i_item_desc#1, i_current_price#4, i_class#3, i_category#2] + : +- *(3) BroadcastHashJoin [ss_item_sk#14], [i_item_sk#15], Inner, BuildRight + : :- *(3) Project [ss_sold_date_sk#12, ss_item_sk#14, ss_ext_sales_price#11] + : : +- *(3) Filter (isnotnull(ss_item_sk#14) && isnotnull(ss_sold_date_sk#12)) + : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_item_sk#14,ss_ext_sales_price#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [i_item_sk#15, i_item_id#7, i_item_desc#1, i_current_price#4, i_class#3, i_category#2] + : +- *(1) Filter (i_category#2 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) + : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#7,i_item_desc#1,i_current_price#4,i_class#3,i_category#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) + +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt new file mode 100644 index 000000000..be26ab7ec --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt @@ -0,0 +1,38 @@ +WholeStageCodegen + Project [revenueratio,i_item_desc,itemrevenue,i_category,i_current_price,i_class] + Sort [revenueratio,i_item_id,i_item_desc,i_category,i_class] + InputAdapter + Exchange [revenueratio,i_item_id,i_item_desc,i_category,i_class] #1 + WholeStageCodegen + Project [i_item_id,i_item_desc,_w0,itemrevenue,i_category,_we0,i_current_price,i_class] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen + Sort [i_class] + InputAdapter + Exchange [i_class] #2 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,sum,sum(UnscaledValue(ss_ext_sales_price)),i_category,i_current_price,i_class] [sum,sum(UnscaledValue(ss_ext_sales_price)),_w0,itemrevenue,_w1] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_current_price,i_class] #3 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,sum,i_category,ss_ext_sales_price,sum,i_current_price,i_class] [sum,sum] + Project [i_class,i_current_price,ss_ext_sales_price,i_category,i_item_desc,i_item_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_class,i_current_price,ss_ext_sales_price,i_category,ss_sold_date_sk,i_item_desc,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Filter [i_category,i_item_sk] + Scan parquet default.item [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt new file mode 100644 index 000000000..fbd4c2dc5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt @@ -0,0 +1,32 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[substring(w_warehouse_name, 1, 20)#1 ASC NULLS FIRST,sm_type#2 ASC NULLS FIRST,cc_name#3 ASC NULLS FIRST], output=[substring(w_warehouse_name, 1, 20)#1,sm_type#2,cc_name#3,30 days #4,31 - 60 days #5,61 - 90 days #6,91 - 120 days #7,>120 days #8]) ++- *(6) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3], functions=[sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 30) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 60) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 90) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) + +- Exchange hashpartitioning(substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3, 200) + +- *(5) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20) AS substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3], functions=[partial_sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 30) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 60) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 90) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) + +- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, w_warehouse_name#9, sm_type#2, cc_name#3] + +- *(5) BroadcastHashJoin [cs_ship_date_sk#11], [d_date_sk#13], Inner, BuildRight + :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, w_warehouse_name#9, sm_type#2, cc_name#3] + : +- *(5) BroadcastHashJoin [cs_call_center_sk#14], [cc_call_center_sk#15], Inner, BuildRight + : :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, cs_call_center_sk#14, w_warehouse_name#9, sm_type#2] + : : +- *(5) BroadcastHashJoin [cs_ship_mode_sk#16], [sm_ship_mode_sk#17], Inner, BuildRight + : : :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, cs_call_center_sk#14, cs_ship_mode_sk#16, w_warehouse_name#9] + : : : +- *(5) BroadcastHashJoin [cs_warehouse_sk#18], [w_warehouse_sk#19], Inner, BuildRight + : : : :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, cs_call_center_sk#14, cs_ship_mode_sk#16, cs_warehouse_sk#18] + : : : : +- *(5) Filter (((isnotnull(cs_warehouse_sk#18) && isnotnull(cs_ship_mode_sk#16)) && isnotnull(cs_call_center_sk#14)) && isnotnull(cs_ship_date_sk#11)) + : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_ship_date_sk#11,cs_call_center_sk#14,cs_ship_mode_sk#16,cs_warehouse_sk#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_ship_mode_sk), IsNotNull(cs_call_center_sk), IsNotNull(..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [sm_ship_mode_sk#17, sm_type#2] + : : +- *(2) Filter isnotnull(sm_ship_mode_sk#17) + : : +- *(2) FileScan parquet default.ship_mode[sm_ship_mode_sk#17,sm_type#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/ship_mode], PartitionFilters: [], PushedFilters: [IsNotNull(sm_ship_mode_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [cc_call_center_sk#15, cc_name#3] + : +- *(3) Filter isnotnull(cc_call_center_sk#15) + : +- *(3) FileScan parquet default.call_center[cc_call_center_sk#15,cc_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [d_date_sk#13] + +- *(4) Filter (((isnotnull(d_month_seq#20) && (d_month_seq#20 >= 1200)) && (d_month_seq#20 <= 1211)) && isnotnull(d_date_sk#13)) + +- *(4) FileScan parquet default.date_dim[d_date_sk#13,d_month_seq#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt new file mode 100644 index 000000000..934bf3e47 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [substring(w_warehouse_name, 1, 20),61 - 90 days ,cc_name,>120 days ,91 - 120 days ,30 days ,sm_type,31 - 60 days ] + WholeStageCodegen + HashAggregate [sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum,cc_name,sum,sm_type,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum] [sum,substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,sum,>120 days ,sum,91 - 120 days ,30 days ,31 - 60 days ,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum] + InputAdapter + Exchange [substring(w_warehouse_name, 1, 20),sm_type,cc_name] #1 + WholeStageCodegen + HashAggregate [sum,sum,sum,substring(w_warehouse_name, 1, 20),w_warehouse_name,sum,sum,cs_sold_date_sk,sum,cc_name,sum,sum,sm_type,cs_ship_date_sk,sum,sum] [sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum] + Project [cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name,cc_name] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk] + Project [cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name,cc_name] + BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] + Project [cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name,cs_call_center_sk] + BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] + Project [cs_ship_date_sk,cs_sold_date_sk,cs_ship_mode_sk,w_warehouse_name,cs_call_center_sk] + BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] + Project [cs_ship_date_sk,cs_warehouse_sk,cs_sold_date_sk,cs_ship_mode_sk,cs_call_center_sk] + Filter [cs_warehouse_sk,cs_ship_mode_sk,cs_call_center_sk,cs_ship_date_sk] + Scan parquet default.catalog_sales [cs_ship_date_sk,cs_warehouse_sk,cs_sold_date_sk,cs_ship_mode_sk,cs_call_center_sk] [cs_ship_date_sk,cs_warehouse_sk,cs_sold_date_sk,cs_ship_mode_sk,cs_call_center_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [w_warehouse_sk,w_warehouse_name] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [sm_ship_mode_sk,sm_type] + Filter [sm_ship_mode_sk] + Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] [sm_ship_mode_sk,sm_type] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [cc_call_center_sk,cc_name] + Filter [cc_call_center_sk] + Scan parquet default.call_center [cc_call_center_sk,cc_name] [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] diff --git a/src/test/resources/tpcds/q1.sql b/src/test/resources/tpcds/q1.sql new file mode 100644 index 000000000..4d20faad8 --- /dev/null +++ b/src/test/resources/tpcds/q1.sql @@ -0,0 +1,19 @@ +WITH customer_total_return AS +( SELECT + sr_customer_sk AS ctr_customer_sk, + sr_store_sk AS ctr_store_sk, + sum(sr_return_amt) AS ctr_total_return + FROM store_returns, date_dim + WHERE sr_returned_date_sk = d_date_sk AND d_year = 2000 + GROUP BY sr_customer_sk, sr_store_sk) +SELECT c_customer_id +FROM customer_total_return ctr1, store, customer +WHERE ctr1.ctr_total_return > + (SELECT avg(ctr_total_return) * 1.2 + FROM customer_total_return ctr2 + WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk) + AND s_store_sk = ctr1.ctr_store_sk + AND s_state = 'TN' + AND ctr1.ctr_customer_sk = c_customer_sk +ORDER BY c_customer_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q10.sql b/src/test/resources/tpcds/q10.sql new file mode 100644 index 000000000..5500e1aea --- /dev/null +++ b/src/test/resources/tpcds/q10.sql @@ -0,0 +1,57 @@ +SELECT + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 +FROM + customer c, customer_address ca, customer_demographics +WHERE + c.c_current_addr_sk = ca.ca_address_sk AND + ca_county IN ('Rush County', 'Toole County', 'Jefferson County', + 'Dona Ana County', 'La Porte County') AND + cd_demo_sk = c.c_current_cdemo_sk AND + exists(SELECT * + FROM store_sales, date_dim + WHERE c.c_customer_sk = ss_customer_sk AND + ss_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_moy BETWEEN 1 AND 1 + 3) AND + (exists(SELECT * + FROM web_sales, date_dim + WHERE c.c_customer_sk = ws_bill_customer_sk AND + ws_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_moy BETWEEN 1 AND 1 + 3) OR + exists(SELECT * + FROM catalog_sales, date_dim + WHERE c.c_customer_sk = cs_ship_customer_sk AND + cs_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_moy BETWEEN 1 AND 1 + 3)) +GROUP BY cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +ORDER BY cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +LIMIT 100 diff --git a/src/test/resources/tpcds/q11.sql b/src/test/resources/tpcds/q11.sql new file mode 100644 index 000000000..3618fb14f --- /dev/null +++ b/src/test/resources/tpcds/q11.sql @@ -0,0 +1,68 @@ +WITH year_total AS ( + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + c_preferred_cust_flag customer_preferred_cust_flag, + c_birth_country customer_birth_country, + c_login customer_login, + c_email_address customer_email_address, + d_year dyear, + sum(ss_ext_list_price - ss_ext_discount_amt) year_total, + 's' sale_type + FROM customer, store_sales, date_dim + WHERE c_customer_sk = ss_customer_sk + AND ss_sold_date_sk = d_date_sk + GROUP BY c_customer_id + , c_first_name + , c_last_name + , d_year + , c_preferred_cust_flag + , c_birth_country + , c_login + , c_email_address + , d_year + UNION ALL + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + c_preferred_cust_flag customer_preferred_cust_flag, + c_birth_country customer_birth_country, + c_login customer_login, + c_email_address customer_email_address, + d_year dyear, + sum(ws_ext_list_price - ws_ext_discount_amt) year_total, + 'w' sale_type + FROM customer, web_sales, date_dim + WHERE c_customer_sk = ws_bill_customer_sk + AND ws_sold_date_sk = d_date_sk + GROUP BY + c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, + c_login, c_email_address, d_year) +SELECT t_s_secyear.customer_preferred_cust_flag +FROM year_total t_s_firstyear + , year_total t_s_secyear + , year_total t_w_firstyear + , year_total t_w_secyear +WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id + AND t_s_firstyear.customer_id = t_w_secyear.customer_id + AND t_s_firstyear.customer_id = t_w_firstyear.customer_id + AND t_s_firstyear.sale_type = 's' + AND t_w_firstyear.sale_type = 'w' + AND t_s_secyear.sale_type = 's' + AND t_w_secyear.sale_type = 'w' + AND t_s_firstyear.dyear = 2001 + AND t_s_secyear.dyear = 2001 + 1 + AND t_w_firstyear.dyear = 2001 + AND t_w_secyear.dyear = 2001 + 1 + AND t_s_firstyear.year_total > 0 + AND t_w_firstyear.year_total > 0 + AND CASE WHEN t_w_firstyear.year_total > 0 + THEN t_w_secyear.year_total / t_w_firstyear.year_total + ELSE NULL END + > CASE WHEN t_s_firstyear.year_total > 0 + THEN t_s_secyear.year_total / t_s_firstyear.year_total + ELSE NULL END +ORDER BY t_s_secyear.customer_preferred_cust_flag +LIMIT 100 diff --git a/src/test/resources/tpcds/q12.sql b/src/test/resources/tpcds/q12.sql new file mode 100644 index 000000000..0382737f5 --- /dev/null +++ b/src/test/resources/tpcds/q12.sql @@ -0,0 +1,22 @@ +SELECT + i_item_desc, + i_category, + i_class, + i_current_price, + sum(ws_ext_sales_price) AS itemrevenue, + sum(ws_ext_sales_price) * 100 / sum(sum(ws_ext_sales_price)) + OVER + (PARTITION BY i_class) AS revenueratio +FROM + web_sales, item, date_dim +WHERE + ws_item_sk = i_item_sk + AND i_category IN ('Sports', 'Books', 'Home') + AND ws_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('1999-02-22' AS DATE) + AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) +GROUP BY + i_item_id, i_item_desc, i_category, i_class, i_current_price +ORDER BY + i_category, i_class, i_item_id, i_item_desc, revenueratio +LIMIT 100 diff --git a/src/test/resources/tpcds/q13.sql b/src/test/resources/tpcds/q13.sql new file mode 100644 index 000000000..32dc9e260 --- /dev/null +++ b/src/test/resources/tpcds/q13.sql @@ -0,0 +1,49 @@ +SELECT + avg(ss_quantity), + avg(ss_ext_sales_price), + avg(ss_ext_wholesale_cost), + sum(ss_ext_wholesale_cost) +FROM store_sales + , store + , customer_demographics + , household_demographics + , customer_address + , date_dim +WHERE s_store_sk = ss_store_sk + AND ss_sold_date_sk = d_date_sk AND d_year = 2001 + AND ((ss_hdemo_sk = hd_demo_sk + AND cd_demo_sk = ss_cdemo_sk + AND cd_marital_status = 'M' + AND cd_education_status = 'Advanced Degree' + AND ss_sales_price BETWEEN 100.00 AND 150.00 + AND hd_dep_count = 3 +) OR + (ss_hdemo_sk = hd_demo_sk + AND cd_demo_sk = ss_cdemo_sk + AND cd_marital_status = 'S' + AND cd_education_status = 'College' + AND ss_sales_price BETWEEN 50.00 AND 100.00 + AND hd_dep_count = 1 + ) OR + (ss_hdemo_sk = hd_demo_sk + AND cd_demo_sk = ss_cdemo_sk + AND cd_marital_status = 'W' + AND cd_education_status = '2 yr Degree' + AND ss_sales_price BETWEEN 150.00 AND 200.00 + AND hd_dep_count = 1 + )) + AND ((ss_addr_sk = ca_address_sk + AND ca_country = 'United States' + AND ca_state IN ('TX', 'OH', 'TX') + AND ss_net_profit BETWEEN 100 AND 200 +) OR + (ss_addr_sk = ca_address_sk + AND ca_country = 'United States' + AND ca_state IN ('OR', 'NM', 'KY') + AND ss_net_profit BETWEEN 150 AND 300 + ) OR + (ss_addr_sk = ca_address_sk + AND ca_country = 'United States' + AND ca_state IN ('VA', 'TX', 'MS') + AND ss_net_profit BETWEEN 50 AND 250 + )) diff --git a/src/test/resources/tpcds/q14a.sql b/src/test/resources/tpcds/q14a.sql new file mode 100644 index 000000000..954ddd41b --- /dev/null +++ b/src/test/resources/tpcds/q14a.sql @@ -0,0 +1,120 @@ +WITH cross_items AS +(SELECT i_item_sk ss_item_sk + FROM item, + (SELECT + iss.i_brand_id brand_id, + iss.i_class_id class_id, + iss.i_category_id category_id + FROM store_sales, item iss, date_dim d1 + WHERE ss_item_sk = iss.i_item_sk + AND ss_sold_date_sk = d1.d_date_sk + AND d1.d_year BETWEEN 1999 AND 1999 + 2 + INTERSECT + SELECT + ics.i_brand_id, + ics.i_class_id, + ics.i_category_id + FROM catalog_sales, item ics, date_dim d2 + WHERE cs_item_sk = ics.i_item_sk + AND cs_sold_date_sk = d2.d_date_sk + AND d2.d_year BETWEEN 1999 AND 1999 + 2 + INTERSECT + SELECT + iws.i_brand_id, + iws.i_class_id, + iws.i_category_id + FROM web_sales, item iws, date_dim d3 + WHERE ws_item_sk = iws.i_item_sk + AND ws_sold_date_sk = d3.d_date_sk + AND d3.d_year BETWEEN 1999 AND 1999 + 2) x + WHERE i_brand_id = brand_id + AND i_class_id = class_id + AND i_category_id = category_id +), + avg_sales AS + (SELECT avg(quantity * list_price) average_sales + FROM ( + SELECT + ss_quantity quantity, + ss_list_price list_price + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk + AND d_year BETWEEN 1999 AND 2001 + UNION ALL + SELECT + cs_quantity quantity, + cs_list_price list_price + FROM catalog_sales, date_dim + WHERE cs_sold_date_sk = d_date_sk + AND d_year BETWEEN 1999 AND 1999 + 2 + UNION ALL + SELECT + ws_quantity quantity, + ws_list_price list_price + FROM web_sales, date_dim + WHERE ws_sold_date_sk = d_date_sk + AND d_year BETWEEN 1999 AND 1999 + 2) x) +SELECT + channel, + i_brand_id, + i_class_id, + i_category_id, + sum(sales), + sum(number_sales) +FROM ( + SELECT + 'store' channel, + i_brand_id, + i_class_id, + i_category_id, + sum(ss_quantity * ss_list_price) sales, + count(*) number_sales + FROM store_sales, item, date_dim + WHERE ss_item_sk IN (SELECT ss_item_sk + FROM cross_items) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_year = 1999 + 2 + AND d_moy = 11 + GROUP BY i_brand_id, i_class_id, i_category_id + HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales + FROM avg_sales) + UNION ALL + SELECT + 'catalog' channel, + i_brand_id, + i_class_id, + i_category_id, + sum(cs_quantity * cs_list_price) sales, + count(*) number_sales + FROM catalog_sales, item, date_dim + WHERE cs_item_sk IN (SELECT ss_item_sk + FROM cross_items) + AND cs_item_sk = i_item_sk + AND cs_sold_date_sk = d_date_sk + AND d_year = 1999 + 2 + AND d_moy = 11 + GROUP BY i_brand_id, i_class_id, i_category_id + HAVING sum(cs_quantity * cs_list_price) > (SELECT average_sales FROM avg_sales) + UNION ALL + SELECT + 'web' channel, + i_brand_id, + i_class_id, + i_category_id, + sum(ws_quantity * ws_list_price) sales, + count(*) number_sales + FROM web_sales, item, date_dim + WHERE ws_item_sk IN (SELECT ss_item_sk + FROM cross_items) + AND ws_item_sk = i_item_sk + AND ws_sold_date_sk = d_date_sk + AND d_year = 1999 + 2 + AND d_moy = 11 + GROUP BY i_brand_id, i_class_id, i_category_id + HAVING sum(ws_quantity * ws_list_price) > (SELECT average_sales + FROM avg_sales) + ) y +GROUP BY ROLLUP (channel, i_brand_id, i_class_id, i_category_id) +ORDER BY channel, i_brand_id, i_class_id, i_category_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q14b.sql b/src/test/resources/tpcds/q14b.sql new file mode 100644 index 000000000..929a8484b --- /dev/null +++ b/src/test/resources/tpcds/q14b.sql @@ -0,0 +1,95 @@ +WITH cross_items AS +(SELECT i_item_sk ss_item_sk + FROM item, + (SELECT + iss.i_brand_id brand_id, + iss.i_class_id class_id, + iss.i_category_id category_id + FROM store_sales, item iss, date_dim d1 + WHERE ss_item_sk = iss.i_item_sk + AND ss_sold_date_sk = d1.d_date_sk + AND d1.d_year BETWEEN 1999 AND 1999 + 2 + INTERSECT + SELECT + ics.i_brand_id, + ics.i_class_id, + ics.i_category_id + FROM catalog_sales, item ics, date_dim d2 + WHERE cs_item_sk = ics.i_item_sk + AND cs_sold_date_sk = d2.d_date_sk + AND d2.d_year BETWEEN 1999 AND 1999 + 2 + INTERSECT + SELECT + iws.i_brand_id, + iws.i_class_id, + iws.i_category_id + FROM web_sales, item iws, date_dim d3 + WHERE ws_item_sk = iws.i_item_sk + AND ws_sold_date_sk = d3.d_date_sk + AND d3.d_year BETWEEN 1999 AND 1999 + 2) x + WHERE i_brand_id = brand_id + AND i_class_id = class_id + AND i_category_id = category_id +), + avg_sales AS + (SELECT avg(quantity * list_price) average_sales + FROM (SELECT + ss_quantity quantity, + ss_list_price list_price + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2 + UNION ALL + SELECT + cs_quantity quantity, + cs_list_price list_price + FROM catalog_sales, date_dim + WHERE cs_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2 + UNION ALL + SELECT + ws_quantity quantity, + ws_list_price list_price + FROM web_sales, date_dim + WHERE ws_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2) x) +SELECT * +FROM + (SELECT + 'store' channel, + i_brand_id, + i_class_id, + i_category_id, + sum(ss_quantity * ss_list_price) sales, + count(*) number_sales + FROM store_sales, item, date_dim + WHERE ss_item_sk IN (SELECT ss_item_sk + FROM cross_items) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_week_seq = (SELECT d_week_seq + FROM date_dim + WHERE d_year = 1999 + 1 AND d_moy = 12 AND d_dom = 11) + GROUP BY i_brand_id, i_class_id, i_category_id + HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales + FROM avg_sales)) this_year, + (SELECT + 'store' channel, + i_brand_id, + i_class_id, + i_category_id, + sum(ss_quantity * ss_list_price) sales, + count(*) number_sales + FROM store_sales, item, date_dim + WHERE ss_item_sk IN (SELECT ss_item_sk + FROM cross_items) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_week_seq = (SELECT d_week_seq + FROM date_dim + WHERE d_year = 1999 AND d_moy = 12 AND d_dom = 11) + GROUP BY i_brand_id, i_class_id, i_category_id + HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales + FROM avg_sales)) last_year +WHERE this_year.i_brand_id = last_year.i_brand_id + AND this_year.i_class_id = last_year.i_class_id + AND this_year.i_category_id = last_year.i_category_id +ORDER BY this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q15.sql b/src/test/resources/tpcds/q15.sql new file mode 100644 index 000000000..b8182e23b --- /dev/null +++ b/src/test/resources/tpcds/q15.sql @@ -0,0 +1,15 @@ +SELECT + ca_zip, + sum(cs_sales_price) +FROM catalog_sales, customer, customer_address, date_dim +WHERE cs_bill_customer_sk = c_customer_sk + AND c_current_addr_sk = ca_address_sk + AND (substr(ca_zip, 1, 5) IN ('85669', '86197', '88274', '83405', '86475', + '85392', '85460', '80348', '81792') + OR ca_state IN ('CA', 'WA', 'GA') + OR cs_sales_price > 500) + AND cs_sold_date_sk = d_date_sk + AND d_qoy = 2 AND d_year = 2001 +GROUP BY ca_zip +ORDER BY ca_zip +LIMIT 100 diff --git a/src/test/resources/tpcds/q16.sql b/src/test/resources/tpcds/q16.sql new file mode 100644 index 000000000..732ad0d84 --- /dev/null +++ b/src/test/resources/tpcds/q16.sql @@ -0,0 +1,23 @@ +SELECT + count(DISTINCT cs_order_number) AS `order count `, + sum(cs_ext_ship_cost) AS `total shipping cost `, + sum(cs_net_profit) AS `total net profit ` +FROM + catalog_sales cs1, date_dim, customer_address, call_center +WHERE + d_date BETWEEN '2002-02-01' AND (CAST('2002-02-01' AS DATE) + INTERVAL 60 days) + AND cs1.cs_ship_date_sk = d_date_sk + AND cs1.cs_ship_addr_sk = ca_address_sk + AND ca_state = 'GA' + AND cs1.cs_call_center_sk = cc_call_center_sk + AND cc_county IN + ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County') + AND EXISTS(SELECT * + FROM catalog_sales cs2 + WHERE cs1.cs_order_number = cs2.cs_order_number + AND cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) + AND NOT EXISTS(SELECT * + FROM catalog_returns cr1 + WHERE cs1.cs_order_number = cr1.cr_order_number) +ORDER BY count(DISTINCT cs_order_number) +LIMIT 100 diff --git a/src/test/resources/tpcds/q17.sql b/src/test/resources/tpcds/q17.sql new file mode 100644 index 000000000..4d647f795 --- /dev/null +++ b/src/test/resources/tpcds/q17.sql @@ -0,0 +1,33 @@ +SELECT + i_item_id, + i_item_desc, + s_state, + count(ss_quantity) AS store_sales_quantitycount, + avg(ss_quantity) AS store_sales_quantityave, + stddev_samp(ss_quantity) AS store_sales_quantitystdev, + stddev_samp(ss_quantity) / avg(ss_quantity) AS store_sales_quantitycov, + count(sr_return_quantity) as_store_returns_quantitycount, + avg(sr_return_quantity) as_store_returns_quantityave, + stddev_samp(sr_return_quantity) as_store_returns_quantitystdev, + stddev_samp(sr_return_quantity) / avg(sr_return_quantity) AS store_returns_quantitycov, + count(cs_quantity) AS catalog_sales_quantitycount, + avg(cs_quantity) AS catalog_sales_quantityave, + stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitystdev, + stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitycov +FROM store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, store, item +WHERE d1.d_quarter_name = '2001Q1' + AND d1.d_date_sk = ss_sold_date_sk + AND i_item_sk = ss_item_sk + AND s_store_sk = ss_store_sk + AND ss_customer_sk = sr_customer_sk + AND ss_item_sk = sr_item_sk + AND ss_ticket_number = sr_ticket_number + AND sr_returned_date_sk = d2.d_date_sk + AND d2.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3') + AND sr_customer_sk = cs_bill_customer_sk + AND sr_item_sk = cs_item_sk + AND cs_sold_date_sk = d3.d_date_sk + AND d3.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3') +GROUP BY i_item_id, i_item_desc, s_state +ORDER BY i_item_id, i_item_desc, s_state +LIMIT 100 diff --git a/src/test/resources/tpcds/q18.sql b/src/test/resources/tpcds/q18.sql new file mode 100644 index 000000000..4055c80fd --- /dev/null +++ b/src/test/resources/tpcds/q18.sql @@ -0,0 +1,28 @@ +SELECT + i_item_id, + ca_country, + ca_state, + ca_county, + avg(cast(cs_quantity AS DECIMAL(12, 2))) agg1, + avg(cast(cs_list_price AS DECIMAL(12, 2))) agg2, + avg(cast(cs_coupon_amt AS DECIMAL(12, 2))) agg3, + avg(cast(cs_sales_price AS DECIMAL(12, 2))) agg4, + avg(cast(cs_net_profit AS DECIMAL(12, 2))) agg5, + avg(cast(c_birth_year AS DECIMAL(12, 2))) agg6, + avg(cast(cd1.cd_dep_count AS DECIMAL(12, 2))) agg7 +FROM catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item +WHERE cs_sold_date_sk = d_date_sk AND + cs_item_sk = i_item_sk AND + cs_bill_cdemo_sk = cd1.cd_demo_sk AND + cs_bill_customer_sk = c_customer_sk AND + cd1.cd_gender = 'F' AND + cd1.cd_education_status = 'Unknown' AND + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_addr_sk = ca_address_sk AND + c_birth_month IN (1, 6, 8, 9, 12, 2) AND + d_year = 1998 AND + ca_state IN ('MS', 'IN', 'ND', 'OK', 'NM', 'VA', 'MS') +GROUP BY ROLLUP (i_item_id, ca_country, ca_state, ca_county) +ORDER BY ca_country, ca_state, ca_county, i_item_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q19.sql b/src/test/resources/tpcds/q19.sql new file mode 100644 index 000000000..e38ab7f26 --- /dev/null +++ b/src/test/resources/tpcds/q19.sql @@ -0,0 +1,19 @@ +SELECT + i_brand_id brand_id, + i_brand brand, + i_manufact_id, + i_manufact, + sum(ss_ext_sales_price) ext_price +FROM date_dim, store_sales, item, customer, customer_address, store +WHERE d_date_sk = ss_sold_date_sk + AND ss_item_sk = i_item_sk + AND i_manager_id = 8 + AND d_moy = 11 + AND d_year = 1998 + AND ss_customer_sk = c_customer_sk + AND c_current_addr_sk = ca_address_sk + AND substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5) + AND ss_store_sk = s_store_sk +GROUP BY i_brand, i_brand_id, i_manufact_id, i_manufact +ORDER BY ext_price DESC, brand, brand_id, i_manufact_id, i_manufact +LIMIT 100 diff --git a/src/test/resources/tpcds/q2.sql b/src/test/resources/tpcds/q2.sql new file mode 100644 index 000000000..52c0e90c4 --- /dev/null +++ b/src/test/resources/tpcds/q2.sql @@ -0,0 +1,81 @@ +WITH wscs AS +( SELECT + sold_date_sk, + sales_price + FROM (SELECT + ws_sold_date_sk sold_date_sk, + ws_ext_sales_price sales_price + FROM web_sales) x + UNION ALL + (SELECT + cs_sold_date_sk sold_date_sk, + cs_ext_sales_price sales_price + FROM catalog_sales)), + wswscs AS + ( SELECT + d_week_seq, + sum(CASE WHEN (d_day_name = 'Sunday') + THEN sales_price + ELSE NULL END) + sun_sales, + sum(CASE WHEN (d_day_name = 'Monday') + THEN sales_price + ELSE NULL END) + mon_sales, + sum(CASE WHEN (d_day_name = 'Tuesday') + THEN sales_price + ELSE NULL END) + tue_sales, + sum(CASE WHEN (d_day_name = 'Wednesday') + THEN sales_price + ELSE NULL END) + wed_sales, + sum(CASE WHEN (d_day_name = 'Thursday') + THEN sales_price + ELSE NULL END) + thu_sales, + sum(CASE WHEN (d_day_name = 'Friday') + THEN sales_price + ELSE NULL END) + fri_sales, + sum(CASE WHEN (d_day_name = 'Saturday') + THEN sales_price + ELSE NULL END) + sat_sales + FROM wscs, date_dim + WHERE d_date_sk = sold_date_sk + GROUP BY d_week_seq) +SELECT + d_week_seq1, + round(sun_sales1 / sun_sales2, 2), + round(mon_sales1 / mon_sales2, 2), + round(tue_sales1 / tue_sales2, 2), + round(wed_sales1 / wed_sales2, 2), + round(thu_sales1 / thu_sales2, 2), + round(fri_sales1 / fri_sales2, 2), + round(sat_sales1 / sat_sales2, 2) +FROM + (SELECT + wswscs.d_week_seq d_week_seq1, + sun_sales sun_sales1, + mon_sales mon_sales1, + tue_sales tue_sales1, + wed_sales wed_sales1, + thu_sales thu_sales1, + fri_sales fri_sales1, + sat_sales sat_sales1 + FROM wswscs, date_dim + WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001) y, + (SELECT + wswscs.d_week_seq d_week_seq2, + sun_sales sun_sales2, + mon_sales mon_sales2, + tue_sales tue_sales2, + wed_sales wed_sales2, + thu_sales thu_sales2, + fri_sales fri_sales2, + sat_sales sat_sales2 + FROM wswscs, date_dim + WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001 + 1) z +WHERE d_week_seq1 = d_week_seq2 - 53 +ORDER BY d_week_seq1 diff --git a/src/test/resources/tpcds/q20.sql b/src/test/resources/tpcds/q20.sql new file mode 100644 index 000000000..7ac6c7a75 --- /dev/null +++ b/src/test/resources/tpcds/q20.sql @@ -0,0 +1,18 @@ +SELECT + i_item_desc, + i_category, + i_class, + i_current_price, + sum(cs_ext_sales_price) AS itemrevenue, + sum(cs_ext_sales_price) * 100 / sum(sum(cs_ext_sales_price)) + OVER + (PARTITION BY i_class) AS revenueratio +FROM catalog_sales, item, date_dim +WHERE cs_item_sk = i_item_sk + AND i_category IN ('Sports', 'Books', 'Home') + AND cs_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('1999-02-22' AS DATE) +AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) +GROUP BY i_item_id, i_item_desc, i_category, i_class, i_current_price +ORDER BY i_category, i_class, i_item_id, i_item_desc, revenueratio +LIMIT 100 diff --git a/src/test/resources/tpcds/q21.sql b/src/test/resources/tpcds/q21.sql new file mode 100644 index 000000000..550881143 --- /dev/null +++ b/src/test/resources/tpcds/q21.sql @@ -0,0 +1,25 @@ +SELECT * +FROM ( + SELECT + w_warehouse_name, + i_item_id, + sum(CASE WHEN (cast(d_date AS DATE) < cast('2000-03-11' AS DATE)) + THEN inv_quantity_on_hand + ELSE 0 END) AS inv_before, + sum(CASE WHEN (cast(d_date AS DATE) >= cast('2000-03-11' AS DATE)) + THEN inv_quantity_on_hand + ELSE 0 END) AS inv_after + FROM inventory, warehouse, item, date_dim + WHERE i_current_price BETWEEN 0.99 AND 1.49 + AND i_item_sk = inv_item_sk + AND inv_warehouse_sk = w_warehouse_sk + AND inv_date_sk = d_date_sk + AND d_date BETWEEN (cast('2000-03-11' AS DATE) - INTERVAL 30 days) + AND (cast('2000-03-11' AS DATE) + INTERVAL 30 days) + GROUP BY w_warehouse_name, i_item_id) x +WHERE (CASE WHEN inv_before > 0 + THEN inv_after / inv_before + ELSE NULL + END) BETWEEN 2.0 / 3.0 AND 3.0 / 2.0 +ORDER BY w_warehouse_name, i_item_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q22.sql b/src/test/resources/tpcds/q22.sql new file mode 100644 index 000000000..add3b41f7 --- /dev/null +++ b/src/test/resources/tpcds/q22.sql @@ -0,0 +1,14 @@ +SELECT + i_product_name, + i_brand, + i_class, + i_category, + avg(inv_quantity_on_hand) qoh +FROM inventory, date_dim, item, warehouse +WHERE inv_date_sk = d_date_sk + AND inv_item_sk = i_item_sk + AND inv_warehouse_sk = w_warehouse_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 +GROUP BY ROLLUP (i_product_name, i_brand, i_class, i_category) +ORDER BY qoh, i_product_name, i_brand, i_class, i_category +LIMIT 100 diff --git a/src/test/resources/tpcds/q23a.sql b/src/test/resources/tpcds/q23a.sql new file mode 100644 index 000000000..37791f643 --- /dev/null +++ b/src/test/resources/tpcds/q23a.sql @@ -0,0 +1,53 @@ +WITH frequent_ss_items AS +(SELECT + substr(i_item_desc, 1, 30) itemdesc, + i_item_sk item_sk, + d_date solddate, + count(*) cnt + FROM store_sales, date_dim, item + WHERE ss_sold_date_sk = d_date_sk + AND ss_item_sk = i_item_sk + AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) + GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date + HAVING count(*) > 4), + max_store_sales AS + (SELECT max(csales) tpcds_cmax + FROM (SELECT + c_customer_sk, + sum(ss_quantity * ss_sales_price) csales + FROM store_sales, customer, date_dim + WHERE ss_customer_sk = c_customer_sk + AND ss_sold_date_sk = d_date_sk + AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) + GROUP BY c_customer_sk) x), + best_ss_customer AS + (SELECT + c_customer_sk, + sum(ss_quantity * ss_sales_price) ssales + FROM store_sales, customer + WHERE ss_customer_sk = c_customer_sk + GROUP BY c_customer_sk + HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) * + (SELECT * + FROM max_store_sales)) +SELECT sum(sales) +FROM ((SELECT cs_quantity * cs_list_price sales +FROM catalog_sales, date_dim +WHERE d_year = 2000 + AND d_moy = 2 + AND cs_sold_date_sk = d_date_sk + AND cs_item_sk IN (SELECT item_sk +FROM frequent_ss_items) + AND cs_bill_customer_sk IN (SELECT c_customer_sk +FROM best_ss_customer)) + UNION ALL + (SELECT ws_quantity * ws_list_price sales + FROM web_sales, date_dim + WHERE d_year = 2000 + AND d_moy = 2 + AND ws_sold_date_sk = d_date_sk + AND ws_item_sk IN (SELECT item_sk + FROM frequent_ss_items) + AND ws_bill_customer_sk IN (SELECT c_customer_sk + FROM best_ss_customer))) y +LIMIT 100 diff --git a/src/test/resources/tpcds/q23b.sql b/src/test/resources/tpcds/q23b.sql new file mode 100644 index 000000000..01150197a --- /dev/null +++ b/src/test/resources/tpcds/q23b.sql @@ -0,0 +1,68 @@ +WITH frequent_ss_items AS +(SELECT + substr(i_item_desc, 1, 30) itemdesc, + i_item_sk item_sk, + d_date solddate, + count(*) cnt + FROM store_sales, date_dim, item + WHERE ss_sold_date_sk = d_date_sk + AND ss_item_sk = i_item_sk + AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) + GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date + HAVING count(*) > 4), + max_store_sales AS + (SELECT max(csales) tpcds_cmax + FROM (SELECT + c_customer_sk, + sum(ss_quantity * ss_sales_price) csales + FROM store_sales, customer, date_dim + WHERE ss_customer_sk = c_customer_sk + AND ss_sold_date_sk = d_date_sk + AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) + GROUP BY c_customer_sk) x), + best_ss_customer AS + (SELECT + c_customer_sk, + sum(ss_quantity * ss_sales_price) ssales + FROM store_sales + , customer + WHERE ss_customer_sk = c_customer_sk + GROUP BY c_customer_sk + HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) * + (SELECT * + FROM max_store_sales)) +SELECT + c_last_name, + c_first_name, + sales +FROM ((SELECT + c_last_name, + c_first_name, + sum(cs_quantity * cs_list_price) sales +FROM catalog_sales, customer, date_dim +WHERE d_year = 2000 + AND d_moy = 2 + AND cs_sold_date_sk = d_date_sk + AND cs_item_sk IN (SELECT item_sk +FROM frequent_ss_items) + AND cs_bill_customer_sk IN (SELECT c_customer_sk +FROM best_ss_customer) + AND cs_bill_customer_sk = c_customer_sk +GROUP BY c_last_name, c_first_name) + UNION ALL + (SELECT + c_last_name, + c_first_name, + sum(ws_quantity * ws_list_price) sales + FROM web_sales, customer, date_dim + WHERE d_year = 2000 + AND d_moy = 2 + AND ws_sold_date_sk = d_date_sk + AND ws_item_sk IN (SELECT item_sk + FROM frequent_ss_items) + AND ws_bill_customer_sk IN (SELECT c_customer_sk + FROM best_ss_customer) + AND ws_bill_customer_sk = c_customer_sk + GROUP BY c_last_name, c_first_name)) y +ORDER BY c_last_name, c_first_name, sales +LIMIT 100 diff --git a/src/test/resources/tpcds/q24a.sql b/src/test/resources/tpcds/q24a.sql new file mode 100644 index 000000000..bcc189486 --- /dev/null +++ b/src/test/resources/tpcds/q24a.sql @@ -0,0 +1,34 @@ +WITH ssales AS +(SELECT + c_last_name, + c_first_name, + s_store_name, + ca_state, + s_state, + i_color, + i_current_price, + i_manager_id, + i_units, + i_size, + sum(ss_net_paid) netpaid + FROM store_sales, store_returns, store, item, customer, customer_address + WHERE ss_ticket_number = sr_ticket_number + AND ss_item_sk = sr_item_sk + AND ss_customer_sk = c_customer_sk + AND ss_item_sk = i_item_sk + AND ss_store_sk = s_store_sk + AND c_birth_country = upper(ca_country) + AND s_zip = ca_zip + AND s_market_id = 8 + GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, + i_current_price, i_manager_id, i_units, i_size) +SELECT + c_last_name, + c_first_name, + s_store_name, + sum(netpaid) paid +FROM ssales +WHERE i_color = 'pale' +GROUP BY c_last_name, c_first_name, s_store_name +HAVING sum(netpaid) > (SELECT 0.05 * avg(netpaid) +FROM ssales) diff --git a/src/test/resources/tpcds/q24b.sql b/src/test/resources/tpcds/q24b.sql new file mode 100644 index 000000000..830eb670b --- /dev/null +++ b/src/test/resources/tpcds/q24b.sql @@ -0,0 +1,34 @@ +WITH ssales AS +(SELECT + c_last_name, + c_first_name, + s_store_name, + ca_state, + s_state, + i_color, + i_current_price, + i_manager_id, + i_units, + i_size, + sum(ss_net_paid) netpaid + FROM store_sales, store_returns, store, item, customer, customer_address + WHERE ss_ticket_number = sr_ticket_number + AND ss_item_sk = sr_item_sk + AND ss_customer_sk = c_customer_sk + AND ss_item_sk = i_item_sk + AND ss_store_sk = s_store_sk + AND c_birth_country = upper(ca_country) + AND s_zip = ca_zip + AND s_market_id = 8 + GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, + i_color, i_current_price, i_manager_id, i_units, i_size) +SELECT + c_last_name, + c_first_name, + s_store_name, + sum(netpaid) paid +FROM ssales +WHERE i_color = 'chiffon' +GROUP BY c_last_name, c_first_name, s_store_name +HAVING sum(netpaid) > (SELECT 0.05 * avg(netpaid) +FROM ssales) diff --git a/src/test/resources/tpcds/q25.sql b/src/test/resources/tpcds/q25.sql new file mode 100644 index 000000000..a4d78a3c5 --- /dev/null +++ b/src/test/resources/tpcds/q25.sql @@ -0,0 +1,33 @@ +SELECT + i_item_id, + i_item_desc, + s_store_id, + s_store_name, + sum(ss_net_profit) AS store_sales_profit, + sum(sr_net_loss) AS store_returns_loss, + sum(cs_net_profit) AS catalog_sales_profit +FROM + store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, + store, item +WHERE + d1.d_moy = 4 + AND d1.d_year = 2001 + AND d1.d_date_sk = ss_sold_date_sk + AND i_item_sk = ss_item_sk + AND s_store_sk = ss_store_sk + AND ss_customer_sk = sr_customer_sk + AND ss_item_sk = sr_item_sk + AND ss_ticket_number = sr_ticket_number + AND sr_returned_date_sk = d2.d_date_sk + AND d2.d_moy BETWEEN 4 AND 10 + AND d2.d_year = 2001 + AND sr_customer_sk = cs_bill_customer_sk + AND sr_item_sk = cs_item_sk + AND cs_sold_date_sk = d3.d_date_sk + AND d3.d_moy BETWEEN 4 AND 10 + AND d3.d_year = 2001 +GROUP BY + i_item_id, i_item_desc, s_store_id, s_store_name +ORDER BY + i_item_id, i_item_desc, s_store_id, s_store_name +LIMIT 100 \ No newline at end of file diff --git a/src/test/resources/tpcds/q26.sql b/src/test/resources/tpcds/q26.sql new file mode 100644 index 000000000..6d395a1d7 --- /dev/null +++ b/src/test/resources/tpcds/q26.sql @@ -0,0 +1,19 @@ +SELECT + i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 +FROM catalog_sales, customer_demographics, date_dim, item, promotion +WHERE cs_sold_date_sk = d_date_sk AND + cs_item_sk = i_item_sk AND + cs_bill_cdemo_sk = cd_demo_sk AND + cs_promo_sk = p_promo_sk AND + cd_gender = 'M' AND + cd_marital_status = 'S' AND + cd_education_status = 'College' AND + (p_channel_email = 'N' OR p_channel_event = 'N') AND + d_year = 2000 +GROUP BY i_item_id +ORDER BY i_item_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q27.sql b/src/test/resources/tpcds/q27.sql new file mode 100644 index 000000000..b0e2fd95f --- /dev/null +++ b/src/test/resources/tpcds/q27.sql @@ -0,0 +1,21 @@ +SELECT + i_item_id, + s_state, + grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 +FROM store_sales, customer_demographics, date_dim, store, item +WHERE ss_sold_date_sk = d_date_sk AND + ss_item_sk = i_item_sk AND + ss_store_sk = s_store_sk AND + ss_cdemo_sk = cd_demo_sk AND + cd_gender = 'M' AND + cd_marital_status = 'S' AND + cd_education_status = 'College' AND + d_year = 2002 AND + s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN') +GROUP BY ROLLUP (i_item_id, s_state) +ORDER BY i_item_id, s_state +LIMIT 100 diff --git a/src/test/resources/tpcds/q28.sql b/src/test/resources/tpcds/q28.sql new file mode 100644 index 000000000..f34c2bb0e --- /dev/null +++ b/src/test/resources/tpcds/q28.sql @@ -0,0 +1,56 @@ +SELECT * +FROM (SELECT + avg(ss_list_price) B1_LP, + count(ss_list_price) B1_CNT, + count(DISTINCT ss_list_price) B1_CNTD +FROM store_sales +WHERE ss_quantity BETWEEN 0 AND 5 + AND (ss_list_price BETWEEN 8 AND 8 + 10 + OR ss_coupon_amt BETWEEN 459 AND 459 + 1000 + OR ss_wholesale_cost BETWEEN 57 AND 57 + 20)) B1, + (SELECT + avg(ss_list_price) B2_LP, + count(ss_list_price) B2_CNT, + count(DISTINCT ss_list_price) B2_CNTD + FROM store_sales + WHERE ss_quantity BETWEEN 6 AND 10 + AND (ss_list_price BETWEEN 90 AND 90 + 10 + OR ss_coupon_amt BETWEEN 2323 AND 2323 + 1000 + OR ss_wholesale_cost BETWEEN 31 AND 31 + 20)) B2, + (SELECT + avg(ss_list_price) B3_LP, + count(ss_list_price) B3_CNT, + count(DISTINCT ss_list_price) B3_CNTD + FROM store_sales + WHERE ss_quantity BETWEEN 11 AND 15 + AND (ss_list_price BETWEEN 142 AND 142 + 10 + OR ss_coupon_amt BETWEEN 12214 AND 12214 + 1000 + OR ss_wholesale_cost BETWEEN 79 AND 79 + 20)) B3, + (SELECT + avg(ss_list_price) B4_LP, + count(ss_list_price) B4_CNT, + count(DISTINCT ss_list_price) B4_CNTD + FROM store_sales + WHERE ss_quantity BETWEEN 16 AND 20 + AND (ss_list_price BETWEEN 135 AND 135 + 10 + OR ss_coupon_amt BETWEEN 6071 AND 6071 + 1000 + OR ss_wholesale_cost BETWEEN 38 AND 38 + 20)) B4, + (SELECT + avg(ss_list_price) B5_LP, + count(ss_list_price) B5_CNT, + count(DISTINCT ss_list_price) B5_CNTD + FROM store_sales + WHERE ss_quantity BETWEEN 21 AND 25 + AND (ss_list_price BETWEEN 122 AND 122 + 10 + OR ss_coupon_amt BETWEEN 836 AND 836 + 1000 + OR ss_wholesale_cost BETWEEN 17 AND 17 + 20)) B5, + (SELECT + avg(ss_list_price) B6_LP, + count(ss_list_price) B6_CNT, + count(DISTINCT ss_list_price) B6_CNTD + FROM store_sales + WHERE ss_quantity BETWEEN 26 AND 30 + AND (ss_list_price BETWEEN 154 AND 154 + 10 + OR ss_coupon_amt BETWEEN 7326 AND 7326 + 1000 + OR ss_wholesale_cost BETWEEN 7 AND 7 + 20)) B6 +LIMIT 100 diff --git a/src/test/resources/tpcds/q29.sql b/src/test/resources/tpcds/q29.sql new file mode 100644 index 000000000..3f1fd553f --- /dev/null +++ b/src/test/resources/tpcds/q29.sql @@ -0,0 +1,32 @@ +SELECT + i_item_id, + i_item_desc, + s_store_id, + s_store_name, + sum(ss_quantity) AS store_sales_quantity, + sum(sr_return_quantity) AS store_returns_quantity, + sum(cs_quantity) AS catalog_sales_quantity +FROM + store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, + date_dim d3, store, item +WHERE + d1.d_moy = 9 + AND d1.d_year = 1999 + AND d1.d_date_sk = ss_sold_date_sk + AND i_item_sk = ss_item_sk + AND s_store_sk = ss_store_sk + AND ss_customer_sk = sr_customer_sk + AND ss_item_sk = sr_item_sk + AND ss_ticket_number = sr_ticket_number + AND sr_returned_date_sk = d2.d_date_sk + AND d2.d_moy BETWEEN 9 AND 9 + 3 + AND d2.d_year = 1999 + AND sr_customer_sk = cs_bill_customer_sk + AND sr_item_sk = cs_item_sk + AND cs_sold_date_sk = d3.d_date_sk + AND d3.d_year IN (1999, 1999 + 1, 1999 + 2) +GROUP BY + i_item_id, i_item_desc, s_store_id, s_store_name +ORDER BY + i_item_id, i_item_desc, s_store_id, s_store_name +LIMIT 100 diff --git a/src/test/resources/tpcds/q3.sql b/src/test/resources/tpcds/q3.sql new file mode 100644 index 000000000..181509df9 --- /dev/null +++ b/src/test/resources/tpcds/q3.sql @@ -0,0 +1,13 @@ +SELECT + dt.d_year, + item.i_brand_id brand_id, + item.i_brand brand, + SUM(ss_ext_sales_price) sum_agg +FROM date_dim dt, store_sales, item +WHERE dt.d_date_sk = store_sales.ss_sold_date_sk + AND store_sales.ss_item_sk = item.i_item_sk + AND item.i_manufact_id = 128 + AND dt.d_moy = 11 +GROUP BY dt.d_year, item.i_brand, item.i_brand_id +ORDER BY dt.d_year, sum_agg DESC, brand_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q30.sql b/src/test/resources/tpcds/q30.sql new file mode 100644 index 000000000..986bef566 --- /dev/null +++ b/src/test/resources/tpcds/q30.sql @@ -0,0 +1,35 @@ +WITH customer_total_return AS +(SELECT + wr_returning_customer_sk AS ctr_customer_sk, + ca_state AS ctr_state, + sum(wr_return_amt) AS ctr_total_return + FROM web_returns, date_dim, customer_address + WHERE wr_returned_date_sk = d_date_sk + AND d_year = 2002 + AND wr_returning_addr_sk = ca_address_sk + GROUP BY wr_returning_customer_sk, ca_state) +SELECT + c_customer_id, + c_salutation, + c_first_name, + c_last_name, + c_preferred_cust_flag, + c_birth_day, + c_birth_month, + c_birth_year, + c_birth_country, + c_login, + c_email_address, + c_last_review_date, + ctr_total_return +FROM customer_total_return ctr1, customer_address, customer +WHERE ctr1.ctr_total_return > (SELECT avg(ctr_total_return) * 1.2 +FROM customer_total_return ctr2 +WHERE ctr1.ctr_state = ctr2.ctr_state) + AND ca_address_sk = c_current_addr_sk + AND ca_state = 'GA' + AND ctr1.ctr_customer_sk = c_customer_sk +ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag + , c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address + , c_last_review_date, ctr_total_return +LIMIT 100 diff --git a/src/test/resources/tpcds/q31.sql b/src/test/resources/tpcds/q31.sql new file mode 100644 index 000000000..3e543d543 --- /dev/null +++ b/src/test/resources/tpcds/q31.sql @@ -0,0 +1,60 @@ +WITH ss AS +(SELECT + ca_county, + d_qoy, + d_year, + sum(ss_ext_sales_price) AS store_sales + FROM store_sales, date_dim, customer_address + WHERE ss_sold_date_sk = d_date_sk + AND ss_addr_sk = ca_address_sk + GROUP BY ca_county, d_qoy, d_year), + ws AS + (SELECT + ca_county, + d_qoy, + d_year, + sum(ws_ext_sales_price) AS web_sales + FROM web_sales, date_dim, customer_address + WHERE ws_sold_date_sk = d_date_sk + AND ws_bill_addr_sk = ca_address_sk + GROUP BY ca_county, d_qoy, d_year) +SELECT + ss1.ca_county, + ss1.d_year, + ws2.web_sales / ws1.web_sales web_q1_q2_increase, + ss2.store_sales / ss1.store_sales store_q1_q2_increase, + ws3.web_sales / ws2.web_sales web_q2_q3_increase, + ss3.store_sales / ss2.store_sales store_q2_q3_increase +FROM + ss ss1, ss ss2, ss ss3, ws ws1, ws ws2, ws ws3 +WHERE + ss1.d_qoy = 1 + AND ss1.d_year = 2000 + AND ss1.ca_county = ss2.ca_county + AND ss2.d_qoy = 2 + AND ss2.d_year = 2000 + AND ss2.ca_county = ss3.ca_county + AND ss3.d_qoy = 3 + AND ss3.d_year = 2000 + AND ss1.ca_county = ws1.ca_county + AND ws1.d_qoy = 1 + AND ws1.d_year = 2000 + AND ws1.ca_county = ws2.ca_county + AND ws2.d_qoy = 2 + AND ws2.d_year = 2000 + AND ws1.ca_county = ws3.ca_county + AND ws3.d_qoy = 3 + AND ws3.d_year = 2000 + AND CASE WHEN ws1.web_sales > 0 + THEN ws2.web_sales / ws1.web_sales + ELSE NULL END + > CASE WHEN ss1.store_sales > 0 + THEN ss2.store_sales / ss1.store_sales + ELSE NULL END + AND CASE WHEN ws2.web_sales > 0 + THEN ws3.web_sales / ws2.web_sales + ELSE NULL END + > CASE WHEN ss2.store_sales > 0 + THEN ss3.store_sales / ss2.store_sales + ELSE NULL END +ORDER BY ss1.ca_county diff --git a/src/test/resources/tpcds/q32.sql b/src/test/resources/tpcds/q32.sql new file mode 100644 index 000000000..a6f59ecb8 --- /dev/null +++ b/src/test/resources/tpcds/q32.sql @@ -0,0 +1,15 @@ +SELECT 1 AS `excess discount amount ` +FROM + catalog_sales, item, date_dim +WHERE + i_manufact_id = 977 + AND i_item_sk = cs_item_sk + AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + interval 90 days) + AND d_date_sk = cs_sold_date_sk + AND cs_ext_discount_amt > ( + SELECT 1.3 * avg(cs_ext_discount_amt) + FROM catalog_sales, date_dim + WHERE cs_item_sk = i_item_sk + AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + interval 90 days) + AND d_date_sk = cs_sold_date_sk) +LIMIT 100 diff --git a/src/test/resources/tpcds/q33.sql b/src/test/resources/tpcds/q33.sql new file mode 100644 index 000000000..d24856aa5 --- /dev/null +++ b/src/test/resources/tpcds/q33.sql @@ -0,0 +1,65 @@ +WITH ss AS ( + SELECT + i_manufact_id, + sum(ss_ext_sales_price) total_sales + FROM + store_sales, date_dim, customer_address, item + WHERE + i_manufact_id IN (SELECT i_manufact_id + FROM item + WHERE i_category IN ('Electronics')) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 5 + AND ss_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_manufact_id), cs AS +(SELECT + i_manufact_id, + sum(cs_ext_sales_price) total_sales + FROM catalog_sales, date_dim, customer_address, item + WHERE + i_manufact_id IN ( + SELECT i_manufact_id + FROM item + WHERE + i_category IN ('Electronics')) + AND cs_item_sk = i_item_sk + AND cs_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 5 + AND cs_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_manufact_id), + ws AS ( + SELECT + i_manufact_id, + sum(ws_ext_sales_price) total_sales + FROM + web_sales, date_dim, customer_address, item + WHERE + i_manufact_id IN (SELECT i_manufact_id + FROM item + WHERE i_category IN ('Electronics')) + AND ws_item_sk = i_item_sk + AND ws_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 5 + AND ws_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_manufact_id) +SELECT + i_manufact_id, + sum(total_sales) total_sales +FROM (SELECT * + FROM ss + UNION ALL + SELECT * + FROM cs + UNION ALL + SELECT * + FROM ws) tmp1 +GROUP BY i_manufact_id +ORDER BY total_sales +LIMIT 100 diff --git a/src/test/resources/tpcds/q34.sql b/src/test/resources/tpcds/q34.sql new file mode 100644 index 000000000..33396bf16 --- /dev/null +++ b/src/test/resources/tpcds/q34.sql @@ -0,0 +1,32 @@ +SELECT + c_last_name, + c_first_name, + c_salutation, + c_preferred_cust_flag, + ss_ticket_number, + cnt +FROM + (SELECT + ss_ticket_number, + ss_customer_sk, + count(*) cnt + FROM store_sales, date_dim, store, household_demographics + WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_store_sk = store.s_store_sk + AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + AND (date_dim.d_dom BETWEEN 1 AND 3 OR date_dim.d_dom BETWEEN 25 AND 28) + AND (household_demographics.hd_buy_potential = '>10000' OR + household_demographics.hd_buy_potential = 'unknown') + AND household_demographics.hd_vehicle_count > 0 + AND (CASE WHEN household_demographics.hd_vehicle_count > 0 + THEN household_demographics.hd_dep_count / household_demographics.hd_vehicle_count + ELSE NULL + END) > 1.2 + AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) + AND store.s_county IN + ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', + 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County') + GROUP BY ss_ticket_number, ss_customer_sk) dn, customer +WHERE ss_customer_sk = c_customer_sk + AND cnt BETWEEN 15 AND 20 +ORDER BY c_last_name, c_first_name, c_salutation, c_preferred_cust_flag DESC diff --git a/src/test/resources/tpcds/q35.sql b/src/test/resources/tpcds/q35.sql new file mode 100644 index 000000000..cfe4342d8 --- /dev/null +++ b/src/test/resources/tpcds/q35.sql @@ -0,0 +1,46 @@ +SELECT + ca_state, + cd_gender, + cd_marital_status, + count(*) cnt1, + min(cd_dep_count), + max(cd_dep_count), + avg(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + min(cd_dep_employed_count), + max(cd_dep_employed_count), + avg(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + min(cd_dep_college_count), + max(cd_dep_college_count), + avg(cd_dep_college_count) +FROM + customer c, customer_address ca, customer_demographics +WHERE + c.c_current_addr_sk = ca.ca_address_sk AND + cd_demo_sk = c.c_current_cdemo_sk AND + exists(SELECT * + FROM store_sales, date_dim + WHERE c.c_customer_sk = ss_customer_sk AND + ss_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_qoy < 4) AND + (exists(SELECT * + FROM web_sales, date_dim + WHERE c.c_customer_sk = ws_bill_customer_sk AND + ws_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_qoy < 4) OR + exists(SELECT * + FROM catalog_sales, date_dim + WHERE c.c_customer_sk = cs_ship_customer_sk AND + cs_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_qoy < 4)) +GROUP BY ca_state, cd_gender, cd_marital_status, cd_dep_count, + cd_dep_employed_count, cd_dep_college_count +ORDER BY ca_state, cd_gender, cd_marital_status, cd_dep_count, + cd_dep_employed_count, cd_dep_college_count +LIMIT 100 diff --git a/src/test/resources/tpcds/q36.sql b/src/test/resources/tpcds/q36.sql new file mode 100644 index 000000000..a8f93df76 --- /dev/null +++ b/src/test/resources/tpcds/q36.sql @@ -0,0 +1,26 @@ +SELECT + sum(ss_net_profit) / sum(ss_ext_sales_price) AS gross_margin, + i_category, + i_class, + grouping(i_category) + grouping(i_class) AS lochierarchy, + rank() + OVER ( + PARTITION BY grouping(i_category) + grouping(i_class), + CASE WHEN grouping(i_class) = 0 + THEN i_category END + ORDER BY sum(ss_net_profit) / sum(ss_ext_sales_price) ASC) AS rank_within_parent +FROM + store_sales, date_dim d1, item, store +WHERE + d1.d_year = 2001 + AND d1.d_date_sk = ss_sold_date_sk + AND i_item_sk = ss_item_sk + AND s_store_sk = ss_store_sk + AND s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN') +GROUP BY ROLLUP (i_category, i_class) +ORDER BY + lochierarchy DESC + , CASE WHEN lochierarchy = 0 + THEN i_category END + , rank_within_parent +LIMIT 100 diff --git a/src/test/resources/tpcds/q37.sql b/src/test/resources/tpcds/q37.sql new file mode 100644 index 000000000..11b3821fa --- /dev/null +++ b/src/test/resources/tpcds/q37.sql @@ -0,0 +1,15 @@ +SELECT + i_item_id, + i_item_desc, + i_current_price +FROM item, inventory, date_dim, catalog_sales +WHERE i_current_price BETWEEN 68 AND 68 + 30 + AND inv_item_sk = i_item_sk + AND d_date_sk = inv_date_sk + AND d_date BETWEEN cast('2000-02-01' AS DATE) AND (cast('2000-02-01' AS DATE) + INTERVAL 60 days) + AND i_manufact_id IN (677, 940, 694, 808) + AND inv_quantity_on_hand BETWEEN 100 AND 500 + AND cs_item_sk = i_item_sk +GROUP BY i_item_id, i_item_desc, i_current_price +ORDER BY i_item_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q38.sql b/src/test/resources/tpcds/q38.sql new file mode 100644 index 000000000..1c8d53ee2 --- /dev/null +++ b/src/test/resources/tpcds/q38.sql @@ -0,0 +1,30 @@ +SELECT count(*) +FROM ( + SELECT DISTINCT + c_last_name, + c_first_name, + d_date + FROM store_sales, date_dim, customer + WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + INTERSECT + SELECT DISTINCT + c_last_name, + c_first_name, + d_date + FROM catalog_sales, date_dim, customer + WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + INTERSECT + SELECT DISTINCT + c_last_name, + c_first_name, + d_date + FROM web_sales, date_dim, customer + WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk + AND web_sales.ws_bill_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + ) hot_cust +LIMIT 100 diff --git a/src/test/resources/tpcds/q39a.sql b/src/test/resources/tpcds/q39a.sql new file mode 100644 index 000000000..9fc4c1701 --- /dev/null +++ b/src/test/resources/tpcds/q39a.sql @@ -0,0 +1,47 @@ +WITH inv AS +(SELECT + w_warehouse_name, + w_warehouse_sk, + i_item_sk, + d_moy, + stdev, + mean, + CASE mean + WHEN 0 + THEN NULL + ELSE stdev / mean END cov + FROM (SELECT + w_warehouse_name, + w_warehouse_sk, + i_item_sk, + d_moy, + stddev_samp(inv_quantity_on_hand) stdev, + avg(inv_quantity_on_hand) mean + FROM inventory, item, warehouse, date_dim + WHERE inv_item_sk = i_item_sk + AND inv_warehouse_sk = w_warehouse_sk + AND inv_date_sk = d_date_sk + AND d_year = 2001 + GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy) foo + WHERE CASE mean + WHEN 0 + THEN 0 + ELSE stdev / mean END > 1) +SELECT + inv1.w_warehouse_sk, + inv1.i_item_sk, + inv1.d_moy, + inv1.mean, + inv1.cov, + inv2.w_warehouse_sk, + inv2.i_item_sk, + inv2.d_moy, + inv2.mean, + inv2.cov +FROM inv inv1, inv inv2 +WHERE inv1.i_item_sk = inv2.i_item_sk + AND inv1.w_warehouse_sk = inv2.w_warehouse_sk + AND inv1.d_moy = 1 + AND inv2.d_moy = 1 + 1 +ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov + , inv2.d_moy, inv2.mean, inv2.cov diff --git a/src/test/resources/tpcds/q39b.sql b/src/test/resources/tpcds/q39b.sql new file mode 100644 index 000000000..6f8493029 --- /dev/null +++ b/src/test/resources/tpcds/q39b.sql @@ -0,0 +1,48 @@ +WITH inv AS +(SELECT + w_warehouse_name, + w_warehouse_sk, + i_item_sk, + d_moy, + stdev, + mean, + CASE mean + WHEN 0 + THEN NULL + ELSE stdev / mean END cov + FROM (SELECT + w_warehouse_name, + w_warehouse_sk, + i_item_sk, + d_moy, + stddev_samp(inv_quantity_on_hand) stdev, + avg(inv_quantity_on_hand) mean + FROM inventory, item, warehouse, date_dim + WHERE inv_item_sk = i_item_sk + AND inv_warehouse_sk = w_warehouse_sk + AND inv_date_sk = d_date_sk + AND d_year = 2001 + GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy) foo + WHERE CASE mean + WHEN 0 + THEN 0 + ELSE stdev / mean END > 1) +SELECT + inv1.w_warehouse_sk, + inv1.i_item_sk, + inv1.d_moy, + inv1.mean, + inv1.cov, + inv2.w_warehouse_sk, + inv2.i_item_sk, + inv2.d_moy, + inv2.mean, + inv2.cov +FROM inv inv1, inv inv2 +WHERE inv1.i_item_sk = inv2.i_item_sk + AND inv1.w_warehouse_sk = inv2.w_warehouse_sk + AND inv1.d_moy = 1 + AND inv2.d_moy = 1 + 1 + AND inv1.cov > 1.5 +ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov + , inv2.d_moy, inv2.mean, inv2.cov diff --git a/src/test/resources/tpcds/q4.sql b/src/test/resources/tpcds/q4.sql new file mode 100644 index 000000000..b9f27fbc9 --- /dev/null +++ b/src/test/resources/tpcds/q4.sql @@ -0,0 +1,120 @@ +WITH year_total AS ( + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + c_preferred_cust_flag customer_preferred_cust_flag, + c_birth_country customer_birth_country, + c_login customer_login, + c_email_address customer_email_address, + d_year dyear, + sum(((ss_ext_list_price - ss_ext_wholesale_cost - ss_ext_discount_amt) + + ss_ext_sales_price) / 2) year_total, + 's' sale_type + FROM customer, store_sales, date_dim + WHERE c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk + GROUP BY c_customer_id, + c_first_name, + c_last_name, + c_preferred_cust_flag, + c_birth_country, + c_login, + c_email_address, + d_year + UNION ALL + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + c_preferred_cust_flag customer_preferred_cust_flag, + c_birth_country customer_birth_country, + c_login customer_login, + c_email_address customer_email_address, + d_year dyear, + sum((((cs_ext_list_price - cs_ext_wholesale_cost - cs_ext_discount_amt) + + cs_ext_sales_price) / 2)) year_total, + 'c' sale_type + FROM customer, catalog_sales, date_dim + WHERE c_customer_sk = cs_bill_customer_sk AND cs_sold_date_sk = d_date_sk + GROUP BY c_customer_id, + c_first_name, + c_last_name, + c_preferred_cust_flag, + c_birth_country, + c_login, + c_email_address, + d_year + UNION ALL + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + c_preferred_cust_flag customer_preferred_cust_flag, + c_birth_country customer_birth_country, + c_login customer_login, + c_email_address customer_email_address, + d_year dyear, + sum((((ws_ext_list_price - ws_ext_wholesale_cost - ws_ext_discount_amt) + ws_ext_sales_price) / + 2)) year_total, + 'w' sale_type + FROM customer, web_sales, date_dim + WHERE c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk + GROUP BY c_customer_id, + c_first_name, + c_last_name, + c_preferred_cust_flag, + c_birth_country, + c_login, + c_email_address, + d_year) +SELECT + t_s_secyear.customer_id, + t_s_secyear.customer_first_name, + t_s_secyear.customer_last_name, + t_s_secyear.customer_preferred_cust_flag, + t_s_secyear.customer_birth_country, + t_s_secyear.customer_login, + t_s_secyear.customer_email_address +FROM year_total t_s_firstyear, year_total t_s_secyear, year_total t_c_firstyear, + year_total t_c_secyear, year_total t_w_firstyear, year_total t_w_secyear +WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id + AND t_s_firstyear.customer_id = t_c_secyear.customer_id + AND t_s_firstyear.customer_id = t_c_firstyear.customer_id + AND t_s_firstyear.customer_id = t_w_firstyear.customer_id + AND t_s_firstyear.customer_id = t_w_secyear.customer_id + AND t_s_firstyear.sale_type = 's' + AND t_c_firstyear.sale_type = 'c' + AND t_w_firstyear.sale_type = 'w' + AND t_s_secyear.sale_type = 's' + AND t_c_secyear.sale_type = 'c' + AND t_w_secyear.sale_type = 'w' + AND t_s_firstyear.dyear = 2001 + AND t_s_secyear.dyear = 2001 + 1 + AND t_c_firstyear.dyear = 2001 + AND t_c_secyear.dyear = 2001 + 1 + AND t_w_firstyear.dyear = 2001 + AND t_w_secyear.dyear = 2001 + 1 + AND t_s_firstyear.year_total > 0 + AND t_c_firstyear.year_total > 0 + AND t_w_firstyear.year_total > 0 + AND CASE WHEN t_c_firstyear.year_total > 0 + THEN t_c_secyear.year_total / t_c_firstyear.year_total + ELSE NULL END + > CASE WHEN t_s_firstyear.year_total > 0 + THEN t_s_secyear.year_total / t_s_firstyear.year_total + ELSE NULL END + AND CASE WHEN t_c_firstyear.year_total > 0 + THEN t_c_secyear.year_total / t_c_firstyear.year_total + ELSE NULL END + > CASE WHEN t_w_firstyear.year_total > 0 + THEN t_w_secyear.year_total / t_w_firstyear.year_total + ELSE NULL END +ORDER BY + t_s_secyear.customer_id, + t_s_secyear.customer_first_name, + t_s_secyear.customer_last_name, + t_s_secyear.customer_preferred_cust_flag, + t_s_secyear.customer_birth_country, + t_s_secyear.customer_login, + t_s_secyear.customer_email_address +LIMIT 100 diff --git a/src/test/resources/tpcds/q40.sql b/src/test/resources/tpcds/q40.sql new file mode 100644 index 000000000..66d8b73ac --- /dev/null +++ b/src/test/resources/tpcds/q40.sql @@ -0,0 +1,25 @@ +SELECT + w_state, + i_item_id, + sum(CASE WHEN (cast(d_date AS DATE) < cast('2000-03-11' AS DATE)) + THEN cs_sales_price - coalesce(cr_refunded_cash, 0) + ELSE 0 END) AS sales_before, + sum(CASE WHEN (cast(d_date AS DATE) >= cast('2000-03-11' AS DATE)) + THEN cs_sales_price - coalesce(cr_refunded_cash, 0) + ELSE 0 END) AS sales_after +FROM + catalog_sales + LEFT OUTER JOIN catalog_returns ON + (cs_order_number = cr_order_number + AND cs_item_sk = cr_item_sk) + , warehouse, item, date_dim +WHERE + i_current_price BETWEEN 0.99 AND 1.49 + AND i_item_sk = cs_item_sk + AND cs_warehouse_sk = w_warehouse_sk + AND cs_sold_date_sk = d_date_sk + AND d_date BETWEEN (cast('2000-03-11' AS DATE) - INTERVAL 30 days) + AND (cast('2000-03-11' AS DATE) + INTERVAL 30 days) +GROUP BY w_state, i_item_id +ORDER BY w_state, i_item_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q41.sql b/src/test/resources/tpcds/q41.sql new file mode 100644 index 000000000..25e317e0e --- /dev/null +++ b/src/test/resources/tpcds/q41.sql @@ -0,0 +1,49 @@ +SELECT DISTINCT (i_product_name) +FROM item i1 +WHERE i_manufact_id BETWEEN 738 AND 738 + 40 + AND (SELECT count(*) AS item_cnt +FROM item +WHERE (i_manufact = i1.i_manufact AND + ((i_category = 'Women' AND + (i_color = 'powder' OR i_color = 'khaki') AND + (i_units = 'Ounce' OR i_units = 'Oz') AND + (i_size = 'medium' OR i_size = 'extra large') + ) OR + (i_category = 'Women' AND + (i_color = 'brown' OR i_color = 'honeydew') AND + (i_units = 'Bunch' OR i_units = 'Ton') AND + (i_size = 'N/A' OR i_size = 'small') + ) OR + (i_category = 'Men' AND + (i_color = 'floral' OR i_color = 'deep') AND + (i_units = 'N/A' OR i_units = 'Dozen') AND + (i_size = 'petite' OR i_size = 'large') + ) OR + (i_category = 'Men' AND + (i_color = 'light' OR i_color = 'cornflower') AND + (i_units = 'Box' OR i_units = 'Pound') AND + (i_size = 'medium' OR i_size = 'extra large') + ))) OR + (i_manufact = i1.i_manufact AND + ((i_category = 'Women' AND + (i_color = 'midnight' OR i_color = 'snow') AND + (i_units = 'Pallet' OR i_units = 'Gross') AND + (i_size = 'medium' OR i_size = 'extra large') + ) OR + (i_category = 'Women' AND + (i_color = 'cyan' OR i_color = 'papaya') AND + (i_units = 'Cup' OR i_units = 'Dram') AND + (i_size = 'N/A' OR i_size = 'small') + ) OR + (i_category = 'Men' AND + (i_color = 'orange' OR i_color = 'frosted') AND + (i_units = 'Each' OR i_units = 'Tbl') AND + (i_size = 'petite' OR i_size = 'large') + ) OR + (i_category = 'Men' AND + (i_color = 'forest' OR i_color = 'ghost') AND + (i_units = 'Lb' OR i_units = 'Bundle') AND + (i_size = 'medium' OR i_size = 'extra large') + )))) > 0 +ORDER BY i_product_name +LIMIT 100 diff --git a/src/test/resources/tpcds/q42.sql b/src/test/resources/tpcds/q42.sql new file mode 100644 index 000000000..4d2e71760 --- /dev/null +++ b/src/test/resources/tpcds/q42.sql @@ -0,0 +1,18 @@ +SELECT + dt.d_year, + item.i_category_id, + item.i_category, + sum(ss_ext_sales_price) +FROM date_dim dt, store_sales, item +WHERE dt.d_date_sk = store_sales.ss_sold_date_sk + AND store_sales.ss_item_sk = item.i_item_sk + AND item.i_manager_id = 1 + AND dt.d_moy = 11 + AND dt.d_year = 2000 +GROUP BY dt.d_year + , item.i_category_id + , item.i_category +ORDER BY sum(ss_ext_sales_price) DESC, dt.d_year + , item.i_category_id + , item.i_category +LIMIT 100 diff --git a/src/test/resources/tpcds/q43.sql b/src/test/resources/tpcds/q43.sql new file mode 100644 index 000000000..45411772c --- /dev/null +++ b/src/test/resources/tpcds/q43.sql @@ -0,0 +1,33 @@ +SELECT + s_store_name, + s_store_id, + sum(CASE WHEN (d_day_name = 'Sunday') + THEN ss_sales_price + ELSE NULL END) sun_sales, + sum(CASE WHEN (d_day_name = 'Monday') + THEN ss_sales_price + ELSE NULL END) mon_sales, + sum(CASE WHEN (d_day_name = 'Tuesday') + THEN ss_sales_price + ELSE NULL END) tue_sales, + sum(CASE WHEN (d_day_name = 'Wednesday') + THEN ss_sales_price + ELSE NULL END) wed_sales, + sum(CASE WHEN (d_day_name = 'Thursday') + THEN ss_sales_price + ELSE NULL END) thu_sales, + sum(CASE WHEN (d_day_name = 'Friday') + THEN ss_sales_price + ELSE NULL END) fri_sales, + sum(CASE WHEN (d_day_name = 'Saturday') + THEN ss_sales_price + ELSE NULL END) sat_sales +FROM date_dim, store_sales, store +WHERE d_date_sk = ss_sold_date_sk AND + s_store_sk = ss_store_sk AND + s_gmt_offset = -5 AND + d_year = 2000 +GROUP BY s_store_name, s_store_id +ORDER BY s_store_name, s_store_id, sun_sales, mon_sales, tue_sales, wed_sales, + thu_sales, fri_sales, sat_sales +LIMIT 100 diff --git a/src/test/resources/tpcds/q44.sql b/src/test/resources/tpcds/q44.sql new file mode 100644 index 000000000..379e60478 --- /dev/null +++ b/src/test/resources/tpcds/q44.sql @@ -0,0 +1,46 @@ +SELECT + asceding.rnk, + i1.i_product_name best_performing, + i2.i_product_name worst_performing +FROM (SELECT * +FROM (SELECT + item_sk, + rank() + OVER ( + ORDER BY rank_col ASC) rnk +FROM (SELECT + ss_item_sk item_sk, + avg(ss_net_profit) rank_col +FROM store_sales ss1 +WHERE ss_store_sk = 4 +GROUP BY ss_item_sk +HAVING avg(ss_net_profit) > 0.9 * (SELECT avg(ss_net_profit) rank_col +FROM store_sales +WHERE ss_store_sk = 4 + AND ss_addr_sk IS NULL +GROUP BY ss_store_sk)) V1) V11 +WHERE rnk < 11) asceding, + (SELECT * + FROM (SELECT + item_sk, + rank() + OVER ( + ORDER BY rank_col DESC) rnk + FROM (SELECT + ss_item_sk item_sk, + avg(ss_net_profit) rank_col + FROM store_sales ss1 + WHERE ss_store_sk = 4 + GROUP BY ss_item_sk + HAVING avg(ss_net_profit) > 0.9 * (SELECT avg(ss_net_profit) rank_col + FROM store_sales + WHERE ss_store_sk = 4 + AND ss_addr_sk IS NULL + GROUP BY ss_store_sk)) V2) V21 + WHERE rnk < 11) descending, + item i1, item i2 +WHERE asceding.rnk = descending.rnk + AND i1.i_item_sk = asceding.item_sk + AND i2.i_item_sk = descending.item_sk +ORDER BY asceding.rnk +LIMIT 100 diff --git a/src/test/resources/tpcds/q45.sql b/src/test/resources/tpcds/q45.sql new file mode 100644 index 000000000..907438f19 --- /dev/null +++ b/src/test/resources/tpcds/q45.sql @@ -0,0 +1,21 @@ +SELECT + ca_zip, + ca_city, + sum(ws_sales_price) +FROM web_sales, customer, customer_address, date_dim, item +WHERE ws_bill_customer_sk = c_customer_sk + AND c_current_addr_sk = ca_address_sk + AND ws_item_sk = i_item_sk + AND (substr(ca_zip, 1, 5) IN + ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') + OR + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_item_sk IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) +) + AND ws_sold_date_sk = d_date_sk + AND d_qoy = 2 AND d_year = 2001 +GROUP BY ca_zip, ca_city +ORDER BY ca_zip, ca_city +LIMIT 100 diff --git a/src/test/resources/tpcds/q46.sql b/src/test/resources/tpcds/q46.sql new file mode 100644 index 000000000..0911677df --- /dev/null +++ b/src/test/resources/tpcds/q46.sql @@ -0,0 +1,32 @@ +SELECT + c_last_name, + c_first_name, + ca_city, + bought_city, + ss_ticket_number, + amt, + profit +FROM + (SELECT + ss_ticket_number, + ss_customer_sk, + ca_city bought_city, + sum(ss_coupon_amt) amt, + sum(ss_net_profit) profit + FROM store_sales, date_dim, store, household_demographics, customer_address + WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_store_sk = store.s_store_sk + AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + AND store_sales.ss_addr_sk = customer_address.ca_address_sk + AND (household_demographics.hd_dep_count = 4 OR + household_demographics.hd_vehicle_count = 3) + AND date_dim.d_dow IN (6, 0) + AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) + AND store.s_city IN ('Fairview', 'Midway', 'Fairview', 'Fairview', 'Fairview') + GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, customer, + customer_address current_addr +WHERE ss_customer_sk = c_customer_sk + AND customer.c_current_addr_sk = current_addr.ca_address_sk + AND current_addr.ca_city <> bought_city +ORDER BY c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number +LIMIT 100 diff --git a/src/test/resources/tpcds/q47.sql b/src/test/resources/tpcds/q47.sql new file mode 100644 index 000000000..cfc37a4ce --- /dev/null +++ b/src/test/resources/tpcds/q47.sql @@ -0,0 +1,63 @@ +WITH v1 AS ( + SELECT + i_category, + i_brand, + s_store_name, + s_company_name, + d_year, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) + OVER + (PARTITION BY i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() + OVER + (PARTITION BY i_category, i_brand, + s_store_name, s_company_name + ORDER BY d_year, d_moy) rn + FROM item, store_sales, date_dim, store + WHERE ss_item_sk = i_item_sk AND + ss_sold_date_sk = d_date_sk AND + ss_store_sk = s_store_sk AND + ( + d_year = 1999 OR + (d_year = 1999 - 1 AND d_moy = 12) OR + (d_year = 1999 + 1 AND d_moy = 1) + ) + GROUP BY i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 AS ( + SELECT + v1.i_category, + v1.i_brand, + v1.s_store_name, + v1.s_company_name, + v1.d_year, + v1.d_moy, + v1.avg_monthly_sales, + v1.sum_sales, + v1_lag.sum_sales psum, + v1_lead.sum_sales nsum + FROM v1, v1 v1_lag, v1 v1_lead + WHERE v1.i_category = v1_lag.i_category AND + v1.i_category = v1_lead.i_category AND + v1.i_brand = v1_lag.i_brand AND + v1.i_brand = v1_lead.i_brand AND + v1.s_store_name = v1_lag.s_store_name AND + v1.s_store_name = v1_lead.s_store_name AND + v1.s_company_name = v1_lag.s_company_name AND + v1.s_company_name = v1_lead.s_company_name AND + v1.rn = v1_lag.rn + 1 AND + v1.rn = v1_lead.rn - 1) +SELECT * +FROM v2 +WHERE d_year = 1999 AND + avg_monthly_sales > 0 AND + CASE WHEN avg_monthly_sales > 0 + THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales + ELSE NULL END > 0.1 +ORDER BY sum_sales - avg_monthly_sales, 3 +LIMIT 100 diff --git a/src/test/resources/tpcds/q48.sql b/src/test/resources/tpcds/q48.sql new file mode 100644 index 000000000..fdb9f38e2 --- /dev/null +++ b/src/test/resources/tpcds/q48.sql @@ -0,0 +1,63 @@ +SELECT sum(ss_quantity) +FROM store_sales, store, customer_demographics, customer_address, date_dim +WHERE s_store_sk = ss_store_sk + AND ss_sold_date_sk = d_date_sk AND d_year = 2001 + AND + ( + ( + cd_demo_sk = ss_cdemo_sk + AND + cd_marital_status = 'M' + AND + cd_education_status = '4 yr Degree' + AND + ss_sales_price BETWEEN 100.00 AND 150.00 + ) + OR + ( + cd_demo_sk = ss_cdemo_sk + AND + cd_marital_status = 'D' + AND + cd_education_status = '2 yr Degree' + AND + ss_sales_price BETWEEN 50.00 AND 100.00 + ) + OR + ( + cd_demo_sk = ss_cdemo_sk + AND + cd_marital_status = 'S' + AND + cd_education_status = 'College' + AND + ss_sales_price BETWEEN 150.00 AND 200.00 + ) + ) + AND + ( + ( + ss_addr_sk = ca_address_sk + AND + ca_country = 'United States' + AND + ca_state IN ('CO', 'OH', 'TX') + AND ss_net_profit BETWEEN 0 AND 2000 + ) + OR + (ss_addr_sk = ca_address_sk + AND + ca_country = 'United States' + AND + ca_state IN ('OR', 'MN', 'KY') + AND ss_net_profit BETWEEN 150 AND 3000 + ) + OR + (ss_addr_sk = ca_address_sk + AND + ca_country = 'United States' + AND + ca_state IN ('VA', 'CA', 'MS') + AND ss_net_profit BETWEEN 50 AND 25000 + ) + ) diff --git a/src/test/resources/tpcds/q49.sql b/src/test/resources/tpcds/q49.sql new file mode 100644 index 000000000..9568d8b92 --- /dev/null +++ b/src/test/resources/tpcds/q49.sql @@ -0,0 +1,126 @@ +SELECT + 'web' AS channel, + web.item, + web.return_ratio, + web.return_rank, + web.currency_rank +FROM ( + SELECT + item, + return_ratio, + currency_ratio, + rank() + OVER ( + ORDER BY return_ratio) AS return_rank, + rank() + OVER ( + ORDER BY currency_ratio) AS currency_rank + FROM + (SELECT + ws.ws_item_sk AS item, + (cast(sum(coalesce(wr.wr_return_quantity, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(ws.ws_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, + (cast(sum(coalesce(wr.wr_return_amt, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(ws.ws_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio + FROM + web_sales ws LEFT OUTER JOIN web_returns wr + ON (ws.ws_order_number = wr.wr_order_number AND + ws.ws_item_sk = wr.wr_item_sk) + , date_dim + WHERE + wr.wr_return_amt > 10000 + AND ws.ws_net_profit > 1 + AND ws.ws_net_paid > 0 + AND ws.ws_quantity > 0 + AND ws_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 12 + GROUP BY ws.ws_item_sk + ) in_web + ) web +WHERE (web.return_rank <= 10 OR web.currency_rank <= 10) +UNION +SELECT + 'catalog' AS channel, + catalog.item, + catalog.return_ratio, + catalog.return_rank, + catalog.currency_rank +FROM ( + SELECT + item, + return_ratio, + currency_ratio, + rank() + OVER ( + ORDER BY return_ratio) AS return_rank, + rank() + OVER ( + ORDER BY currency_ratio) AS currency_rank + FROM + (SELECT + cs.cs_item_sk AS item, + (cast(sum(coalesce(cr.cr_return_quantity, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(cs.cs_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, + (cast(sum(coalesce(cr.cr_return_amount, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(cs.cs_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio + FROM + catalog_sales cs LEFT OUTER JOIN catalog_returns cr + ON (cs.cs_order_number = cr.cr_order_number AND + cs.cs_item_sk = cr.cr_item_sk) + , date_dim + WHERE + cr.cr_return_amount > 10000 + AND cs.cs_net_profit > 1 + AND cs.cs_net_paid > 0 + AND cs.cs_quantity > 0 + AND cs_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 12 + GROUP BY cs.cs_item_sk + ) in_cat + ) catalog +WHERE (catalog.return_rank <= 10 OR catalog.currency_rank <= 10) +UNION +SELECT + 'store' AS channel, + store.item, + store.return_ratio, + store.return_rank, + store.currency_rank +FROM ( + SELECT + item, + return_ratio, + currency_ratio, + rank() + OVER ( + ORDER BY return_ratio) AS return_rank, + rank() + OVER ( + ORDER BY currency_ratio) AS currency_rank + FROM + (SELECT + sts.ss_item_sk AS item, + (cast(sum(coalesce(sr.sr_return_quantity, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(sts.ss_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, + (cast(sum(coalesce(sr.sr_return_amt, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(sts.ss_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio + FROM + store_sales sts LEFT OUTER JOIN store_returns sr + ON (sts.ss_ticket_number = sr.sr_ticket_number AND sts.ss_item_sk = sr.sr_item_sk) + , date_dim + WHERE + sr.sr_return_amt > 10000 + AND sts.ss_net_profit > 1 + AND sts.ss_net_paid > 0 + AND sts.ss_quantity > 0 + AND ss_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 12 + GROUP BY sts.ss_item_sk + ) in_store + ) store +WHERE (store.return_rank <= 10 OR store.currency_rank <= 10) +ORDER BY 1, 4, 5 +LIMIT 100 diff --git a/src/test/resources/tpcds/q5.sql b/src/test/resources/tpcds/q5.sql new file mode 100644 index 000000000..b87cf3a44 --- /dev/null +++ b/src/test/resources/tpcds/q5.sql @@ -0,0 +1,131 @@ +WITH ssr AS +( SELECT + s_store_id, + sum(sales_price) AS sales, + sum(profit) AS profit, + sum(return_amt) AS RETURNS, + sum(net_loss) AS profit_loss + FROM + (SELECT + ss_store_sk AS store_sk, + ss_sold_date_sk AS date_sk, + ss_ext_sales_price AS sales_price, + ss_net_profit AS profit, + cast(0 AS DECIMAL(7, 2)) AS return_amt, + cast(0 AS DECIMAL(7, 2)) AS net_loss + FROM store_sales + UNION ALL + SELECT + sr_store_sk AS store_sk, + sr_returned_date_sk AS date_sk, + cast(0 AS DECIMAL(7, 2)) AS sales_price, + cast(0 AS DECIMAL(7, 2)) AS profit, + sr_return_amt AS return_amt, + sr_net_loss AS net_loss + FROM store_returns) + salesreturns, date_dim, store + WHERE date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) + AND store_sk = s_store_sk + GROUP BY s_store_id), + csr AS + ( SELECT + cp_catalog_page_id, + sum(sales_price) AS sales, + sum(profit) AS profit, + sum(return_amt) AS RETURNS, + sum(net_loss) AS profit_loss + FROM + (SELECT + cs_catalog_page_sk AS page_sk, + cs_sold_date_sk AS date_sk, + cs_ext_sales_price AS sales_price, + cs_net_profit AS profit, + cast(0 AS DECIMAL(7, 2)) AS return_amt, + cast(0 AS DECIMAL(7, 2)) AS net_loss + FROM catalog_sales + UNION ALL + SELECT + cr_catalog_page_sk AS page_sk, + cr_returned_date_sk AS date_sk, + cast(0 AS DECIMAL(7, 2)) AS sales_price, + cast(0 AS DECIMAL(7, 2)) AS profit, + cr_return_amount AS return_amt, + cr_net_loss AS net_loss + FROM catalog_returns + ) salesreturns, date_dim, catalog_page + WHERE date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) + AND page_sk = cp_catalog_page_sk + GROUP BY cp_catalog_page_id) + , + wsr AS + ( SELECT + web_site_id, + sum(sales_price) AS sales, + sum(profit) AS profit, + sum(return_amt) AS RETURNS, + sum(net_loss) AS profit_loss + FROM + (SELECT + ws_web_site_sk AS wsr_web_site_sk, + ws_sold_date_sk AS date_sk, + ws_ext_sales_price AS sales_price, + ws_net_profit AS profit, + cast(0 AS DECIMAL(7, 2)) AS return_amt, + cast(0 AS DECIMAL(7, 2)) AS net_loss + FROM web_sales + UNION ALL + SELECT + ws_web_site_sk AS wsr_web_site_sk, + wr_returned_date_sk AS date_sk, + cast(0 AS DECIMAL(7, 2)) AS sales_price, + cast(0 AS DECIMAL(7, 2)) AS profit, + wr_return_amt AS return_amt, + wr_net_loss AS net_loss + FROM web_returns + LEFT OUTER JOIN web_sales ON + (wr_item_sk = ws_item_sk + AND wr_order_number = ws_order_number) + ) salesreturns, date_dim, web_site + WHERE date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) + AND wsr_web_site_sk = web_site_sk + GROUP BY web_site_id) +SELECT + channel, + id, + sum(sales) AS sales, + sum(returns) AS returns, + sum(profit) AS profit +FROM + (SELECT + 'store channel' AS channel, + concat('store', s_store_id) AS id, + sales, + returns, + (profit - profit_loss) AS profit + FROM ssr + UNION ALL + SELECT + 'catalog channel' AS channel, + concat('catalog_page', cp_catalog_page_id) AS id, + sales, + returns, + (profit - profit_loss) AS profit + FROM csr + UNION ALL + SELECT + 'web channel' AS channel, + concat('web_site', web_site_id) AS id, + sales, + returns, + (profit - profit_loss) AS profit + FROM wsr + ) x +GROUP BY ROLLUP (channel, id) +ORDER BY channel, id +LIMIT 100 diff --git a/src/test/resources/tpcds/q50.sql b/src/test/resources/tpcds/q50.sql new file mode 100644 index 000000000..f1d4b1544 --- /dev/null +++ b/src/test/resources/tpcds/q50.sql @@ -0,0 +1,47 @@ +SELECT + s_store_name, + s_company_id, + s_street_number, + s_street_name, + s_street_type, + s_suite_number, + s_city, + s_county, + s_state, + s_zip, + sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk <= 30) + THEN 1 + ELSE 0 END) AS `30 days `, + sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 30) AND + (sr_returned_date_sk - ss_sold_date_sk <= 60) + THEN 1 + ELSE 0 END) AS `31 - 60 days `, + sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 60) AND + (sr_returned_date_sk - ss_sold_date_sk <= 90) + THEN 1 + ELSE 0 END) AS `61 - 90 days `, + sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 90) AND + (sr_returned_date_sk - ss_sold_date_sk <= 120) + THEN 1 + ELSE 0 END) AS `91 - 120 days `, + sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 120) + THEN 1 + ELSE 0 END) AS `>120 days ` +FROM + store_sales, store_returns, store, date_dim d1, date_dim d2 +WHERE + d2.d_year = 2001 + AND d2.d_moy = 8 + AND ss_ticket_number = sr_ticket_number + AND ss_item_sk = sr_item_sk + AND ss_sold_date_sk = d1.d_date_sk + AND sr_returned_date_sk = d2.d_date_sk + AND ss_customer_sk = sr_customer_sk + AND ss_store_sk = s_store_sk +GROUP BY + s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, + s_suite_number, s_city, s_county, s_state, s_zip +ORDER BY + s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, + s_suite_number, s_city, s_county, s_state, s_zip +LIMIT 100 diff --git a/src/test/resources/tpcds/q51.sql b/src/test/resources/tpcds/q51.sql new file mode 100644 index 000000000..62b003eb6 --- /dev/null +++ b/src/test/resources/tpcds/q51.sql @@ -0,0 +1,55 @@ +WITH web_v1 AS ( + SELECT + ws_item_sk item_sk, + d_date, + sum(sum(ws_sales_price)) + OVER (PARTITION BY ws_item_sk + ORDER BY d_date + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales + FROM web_sales, date_dim + WHERE ws_sold_date_sk = d_date_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + AND ws_item_sk IS NOT NULL + GROUP BY ws_item_sk, d_date), + store_v1 AS ( + SELECT + ss_item_sk item_sk, + d_date, + sum(sum(ss_sales_price)) + OVER (PARTITION BY ss_item_sk + ORDER BY d_date + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + AND ss_item_sk IS NOT NULL + GROUP BY ss_item_sk, d_date) +SELECT * +FROM (SELECT + item_sk, + d_date, + web_sales, + store_sales, + max(web_sales) + OVER (PARTITION BY item_sk + ORDER BY d_date + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) web_cumulative, + max(store_sales) + OVER (PARTITION BY item_sk + ORDER BY d_date + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) store_cumulative +FROM (SELECT + CASE WHEN web.item_sk IS NOT NULL + THEN web.item_sk + ELSE store.item_sk END item_sk, + CASE WHEN web.d_date IS NOT NULL + THEN web.d_date + ELSE store.d_date END d_date, + web.cume_sales web_sales, + store.cume_sales store_sales +FROM web_v1 web FULL OUTER JOIN store_v1 store ON (web.item_sk = store.item_sk + AND web.d_date = store.d_date) + ) x) y +WHERE web_cumulative > store_cumulative +ORDER BY item_sk, d_date +LIMIT 100 diff --git a/src/test/resources/tpcds/q52.sql b/src/test/resources/tpcds/q52.sql new file mode 100644 index 000000000..467d1ae05 --- /dev/null +++ b/src/test/resources/tpcds/q52.sql @@ -0,0 +1,14 @@ +SELECT + dt.d_year, + item.i_brand_id brand_id, + item.i_brand brand, + sum(ss_ext_sales_price) ext_price +FROM date_dim dt, store_sales, item +WHERE dt.d_date_sk = store_sales.ss_sold_date_sk + AND store_sales.ss_item_sk = item.i_item_sk + AND item.i_manager_id = 1 + AND dt.d_moy = 11 + AND dt.d_year = 2000 +GROUP BY dt.d_year, item.i_brand, item.i_brand_id +ORDER BY dt.d_year, ext_price DESC, brand_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q53.sql b/src/test/resources/tpcds/q53.sql new file mode 100644 index 000000000..b42c68dcf --- /dev/null +++ b/src/test/resources/tpcds/q53.sql @@ -0,0 +1,30 @@ +SELECT * +FROM + (SELECT + i_manufact_id, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) + OVER (PARTITION BY i_manufact_id) avg_quarterly_sales + FROM item, store_sales, date_dim, store + WHERE ss_item_sk = i_item_sk AND + ss_sold_date_sk = d_date_sk AND + ss_store_sk = s_store_sk AND + d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, + 1200 + 7, 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11) AND + ((i_category IN ('Books', 'Children', 'Electronics') AND + i_class IN ('personal', 'portable', 'reference', 'self-help') AND + i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', + 'exportiunivamalg #9', 'scholaramalgamalg #9')) + OR + (i_category IN ('Women', 'Music', 'Men') AND + i_class IN ('accessories', 'classical', 'fragrances', 'pants') AND + i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', + 'importoamalg #1'))) + GROUP BY i_manufact_id, d_qoy) tmp1 +WHERE CASE WHEN avg_quarterly_sales > 0 + THEN abs(sum_sales - avg_quarterly_sales) / avg_quarterly_sales + ELSE NULL END > 0.1 +ORDER BY avg_quarterly_sales, + sum_sales, + i_manufact_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q54.sql b/src/test/resources/tpcds/q54.sql new file mode 100644 index 000000000..897237fb6 --- /dev/null +++ b/src/test/resources/tpcds/q54.sql @@ -0,0 +1,61 @@ +WITH my_customers AS ( + SELECT DISTINCT + c_customer_sk, + c_current_addr_sk + FROM + (SELECT + cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + FROM catalog_sales + UNION ALL + SELECT + ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + FROM web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + WHERE sold_date_sk = d_date_sk + AND item_sk = i_item_sk + AND i_category = 'Women' + AND i_class = 'maternity' + AND c_customer_sk = cs_or_ws_sales.customer_sk + AND d_moy = 12 + AND d_year = 1998 +) + , my_revenue AS ( + SELECT + c_customer_sk, + sum(ss_ext_sales_price) AS revenue + FROM my_customers, + store_sales, + customer_address, + store, + date_dim + WHERE c_current_addr_sk = ca_address_sk + AND ca_county = s_county + AND ca_state = s_state + AND ss_sold_date_sk = d_date_sk + AND c_customer_sk = ss_customer_sk + AND d_month_seq BETWEEN (SELECT DISTINCT d_month_seq + 1 + FROM date_dim + WHERE d_year = 1998 AND d_moy = 12) + AND (SELECT DISTINCT d_month_seq + 3 + FROM date_dim + WHERE d_year = 1998 AND d_moy = 12) + GROUP BY c_customer_sk +) + , segments AS +(SELECT cast((revenue / 50) AS INT) AS segment + FROM my_revenue) +SELECT + segment, + count(*) AS num_customers, + segment * 50 AS segment_base +FROM segments +GROUP BY segment +ORDER BY segment, num_customers +LIMIT 100 diff --git a/src/test/resources/tpcds/q55.sql b/src/test/resources/tpcds/q55.sql new file mode 100644 index 000000000..bc5d888c9 --- /dev/null +++ b/src/test/resources/tpcds/q55.sql @@ -0,0 +1,13 @@ +SELECT + i_brand_id brand_id, + i_brand brand, + sum(ss_ext_sales_price) ext_price +FROM date_dim, store_sales, item +WHERE d_date_sk = ss_sold_date_sk + AND ss_item_sk = i_item_sk + AND i_manager_id = 28 + AND d_moy = 11 + AND d_year = 1999 +GROUP BY i_brand, i_brand_id +ORDER BY ext_price DESC, brand_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q56.sql b/src/test/resources/tpcds/q56.sql new file mode 100644 index 000000000..2fa1738dc --- /dev/null +++ b/src/test/resources/tpcds/q56.sql @@ -0,0 +1,65 @@ +WITH ss AS ( + SELECT + i_item_id, + sum(ss_ext_sales_price) total_sales + FROM + store_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_color IN ('slate', 'blanched', 'burnished')) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 2 + AND ss_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id), + cs AS ( + SELECT + i_item_id, + sum(cs_ext_sales_price) total_sales + FROM + catalog_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_color IN ('slate', 'blanched', 'burnished')) + AND cs_item_sk = i_item_sk + AND cs_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 2 + AND cs_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id), + ws AS ( + SELECT + i_item_id, + sum(ws_ext_sales_price) total_sales + FROM + web_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_color IN ('slate', 'blanched', 'burnished')) + AND ws_item_sk = i_item_sk + AND ws_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 2 + AND ws_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id) +SELECT + i_item_id, + sum(total_sales) total_sales +FROM (SELECT * + FROM ss + UNION ALL + SELECT * + FROM cs + UNION ALL + SELECT * + FROM ws) tmp1 +GROUP BY i_item_id +ORDER BY total_sales +LIMIT 100 diff --git a/src/test/resources/tpcds/q57.sql b/src/test/resources/tpcds/q57.sql new file mode 100644 index 000000000..cf70d4b90 --- /dev/null +++ b/src/test/resources/tpcds/q57.sql @@ -0,0 +1,56 @@ +WITH v1 AS ( + SELECT + i_category, + i_brand, + cc_name, + d_year, + d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) + OVER + (PARTITION BY i_category, i_brand, cc_name, d_year) + avg_monthly_sales, + rank() + OVER + (PARTITION BY i_category, i_brand, cc_name + ORDER BY d_year, d_moy) rn + FROM item, catalog_sales, date_dim, call_center + WHERE cs_item_sk = i_item_sk AND + cs_sold_date_sk = d_date_sk AND + cc_call_center_sk = cs_call_center_sk AND + ( + d_year = 1999 OR + (d_year = 1999 - 1 AND d_moy = 12) OR + (d_year = 1999 + 1 AND d_moy = 1) + ) + GROUP BY i_category, i_brand, + cc_name, d_year, d_moy), + v2 AS ( + SELECT + v1.i_category, + v1.i_brand, + v1.cc_name, + v1.d_year, + v1.d_moy, + v1.avg_monthly_sales, + v1.sum_sales, + v1_lag.sum_sales psum, + v1_lead.sum_sales nsum + FROM v1, v1 v1_lag, v1 v1_lead + WHERE v1.i_category = v1_lag.i_category AND + v1.i_category = v1_lead.i_category AND + v1.i_brand = v1_lag.i_brand AND + v1.i_brand = v1_lead.i_brand AND + v1.cc_name = v1_lag.cc_name AND + v1.cc_name = v1_lead.cc_name AND + v1.rn = v1_lag.rn + 1 AND + v1.rn = v1_lead.rn - 1) +SELECT * +FROM v2 +WHERE d_year = 1999 AND + avg_monthly_sales > 0 AND + CASE WHEN avg_monthly_sales > 0 + THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales + ELSE NULL END > 0.1 +ORDER BY sum_sales - avg_monthly_sales, 3 +LIMIT 100 diff --git a/src/test/resources/tpcds/q58.sql b/src/test/resources/tpcds/q58.sql new file mode 100644 index 000000000..5f63f33dc --- /dev/null +++ b/src/test/resources/tpcds/q58.sql @@ -0,0 +1,59 @@ +WITH ss_items AS +(SELECT + i_item_id item_id, + sum(ss_ext_sales_price) ss_item_rev + FROM store_sales, item, date_dim + WHERE ss_item_sk = i_item_sk + AND d_date IN (SELECT d_date + FROM date_dim + WHERE d_week_seq = (SELECT d_week_seq + FROM date_dim + WHERE d_date = '2000-01-03')) + AND ss_sold_date_sk = d_date_sk + GROUP BY i_item_id), + cs_items AS + (SELECT + i_item_id item_id, + sum(cs_ext_sales_price) cs_item_rev + FROM catalog_sales, item, date_dim + WHERE cs_item_sk = i_item_sk + AND d_date IN (SELECT d_date + FROM date_dim + WHERE d_week_seq = (SELECT d_week_seq + FROM date_dim + WHERE d_date = '2000-01-03')) + AND cs_sold_date_sk = d_date_sk + GROUP BY i_item_id), + ws_items AS + (SELECT + i_item_id item_id, + sum(ws_ext_sales_price) ws_item_rev + FROM web_sales, item, date_dim + WHERE ws_item_sk = i_item_sk + AND d_date IN (SELECT d_date + FROM date_dim + WHERE d_week_seq = (SELECT d_week_seq + FROM date_dim + WHERE d_date = '2000-01-03')) + AND ws_sold_date_sk = d_date_sk + GROUP BY i_item_id) +SELECT + ss_items.item_id, + ss_item_rev, + ss_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 ss_dev, + cs_item_rev, + cs_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 cs_dev, + ws_item_rev, + ws_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 ws_dev, + (ss_item_rev + cs_item_rev + ws_item_rev) / 3 average +FROM ss_items, cs_items, ws_items +WHERE ss_items.item_id = cs_items.item_id + AND ss_items.item_id = ws_items.item_id + AND ss_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev + AND ss_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev + AND cs_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev + AND cs_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev + AND ws_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev + AND ws_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev +ORDER BY item_id, ss_item_rev +LIMIT 100 diff --git a/src/test/resources/tpcds/q59.sql b/src/test/resources/tpcds/q59.sql new file mode 100644 index 000000000..3cef20276 --- /dev/null +++ b/src/test/resources/tpcds/q59.sql @@ -0,0 +1,75 @@ +WITH wss AS +(SELECT + d_week_seq, + ss_store_sk, + sum(CASE WHEN (d_day_name = 'Sunday') + THEN ss_sales_price + ELSE NULL END) sun_sales, + sum(CASE WHEN (d_day_name = 'Monday') + THEN ss_sales_price + ELSE NULL END) mon_sales, + sum(CASE WHEN (d_day_name = 'Tuesday') + THEN ss_sales_price + ELSE NULL END) tue_sales, + sum(CASE WHEN (d_day_name = 'Wednesday') + THEN ss_sales_price + ELSE NULL END) wed_sales, + sum(CASE WHEN (d_day_name = 'Thursday') + THEN ss_sales_price + ELSE NULL END) thu_sales, + sum(CASE WHEN (d_day_name = 'Friday') + THEN ss_sales_price + ELSE NULL END) fri_sales, + sum(CASE WHEN (d_day_name = 'Saturday') + THEN ss_sales_price + ELSE NULL END) sat_sales + FROM store_sales, date_dim + WHERE d_date_sk = ss_sold_date_sk + GROUP BY d_week_seq, ss_store_sk +) +SELECT + s_store_name1, + s_store_id1, + d_week_seq1, + sun_sales1 / sun_sales2, + mon_sales1 / mon_sales2, + tue_sales1 / tue_sales2, + wed_sales1 / wed_sales2, + thu_sales1 / thu_sales2, + fri_sales1 / fri_sales2, + sat_sales1 / sat_sales2 +FROM + (SELECT + s_store_name s_store_name1, + wss.d_week_seq d_week_seq1, + s_store_id s_store_id1, + sun_sales sun_sales1, + mon_sales mon_sales1, + tue_sales tue_sales1, + wed_sales wed_sales1, + thu_sales thu_sales1, + fri_sales fri_sales1, + sat_sales sat_sales1 + FROM wss, store, date_dim d + WHERE d.d_week_seq = wss.d_week_seq AND + ss_store_sk = s_store_sk AND + d_month_seq BETWEEN 1212 AND 1212 + 11) y, + (SELECT + s_store_name s_store_name2, + wss.d_week_seq d_week_seq2, + s_store_id s_store_id2, + sun_sales sun_sales2, + mon_sales mon_sales2, + tue_sales tue_sales2, + wed_sales wed_sales2, + thu_sales thu_sales2, + fri_sales fri_sales2, + sat_sales sat_sales2 + FROM wss, store, date_dim d + WHERE d.d_week_seq = wss.d_week_seq AND + ss_store_sk = s_store_sk AND + d_month_seq BETWEEN 1212 + 12 AND 1212 + 23) x +WHERE s_store_id1 = s_store_id2 + AND d_week_seq1 = d_week_seq2 - 52 +ORDER BY s_store_name1, s_store_id1, d_week_seq1 +LIMIT 100 diff --git a/src/test/resources/tpcds/q6.sql b/src/test/resources/tpcds/q6.sql new file mode 100644 index 000000000..f0f5cf05a --- /dev/null +++ b/src/test/resources/tpcds/q6.sql @@ -0,0 +1,21 @@ +SELECT + a.ca_state state, + count(*) cnt +FROM + customer_address a, customer c, store_sales s, date_dim d, item i +WHERE a.ca_address_sk = c.c_current_addr_sk + AND c.c_customer_sk = s.ss_customer_sk + AND s.ss_sold_date_sk = d.d_date_sk + AND s.ss_item_sk = i.i_item_sk + AND d.d_month_seq = + (SELECT DISTINCT (d_month_seq) + FROM date_dim + WHERE d_year = 2000 AND d_moy = 1) + AND i.i_current_price > 1.2 * + (SELECT avg(j.i_current_price) + FROM item j + WHERE j.i_category = i.i_category) +GROUP BY a.ca_state +HAVING count(*) >= 10 +ORDER BY cnt +LIMIT 100 diff --git a/src/test/resources/tpcds/q60.sql b/src/test/resources/tpcds/q60.sql new file mode 100644 index 000000000..41b963f44 --- /dev/null +++ b/src/test/resources/tpcds/q60.sql @@ -0,0 +1,62 @@ +WITH ss AS ( + SELECT + i_item_id, + sum(ss_ext_sales_price) total_sales + FROM store_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_category IN ('Music')) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 9 + AND ss_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id), + cs AS ( + SELECT + i_item_id, + sum(cs_ext_sales_price) total_sales + FROM catalog_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_category IN ('Music')) + AND cs_item_sk = i_item_sk + AND cs_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 9 + AND cs_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id), + ws AS ( + SELECT + i_item_id, + sum(ws_ext_sales_price) total_sales + FROM web_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_category IN ('Music')) + AND ws_item_sk = i_item_sk + AND ws_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 9 + AND ws_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id) +SELECT + i_item_id, + sum(total_sales) total_sales +FROM (SELECT * + FROM ss + UNION ALL + SELECT * + FROM cs + UNION ALL + SELECT * + FROM ws) tmp1 +GROUP BY i_item_id +ORDER BY i_item_id, total_sales +LIMIT 100 diff --git a/src/test/resources/tpcds/q61.sql b/src/test/resources/tpcds/q61.sql new file mode 100644 index 000000000..b0a872b4b --- /dev/null +++ b/src/test/resources/tpcds/q61.sql @@ -0,0 +1,33 @@ +SELECT + promotions, + total, + cast(promotions AS DECIMAL(15, 4)) / cast(total AS DECIMAL(15, 4)) * 100 +FROM + (SELECT sum(ss_ext_sales_price) promotions + FROM store_sales, store, promotion, date_dim, customer, customer_address, item + WHERE ss_sold_date_sk = d_date_sk + AND ss_store_sk = s_store_sk + AND ss_promo_sk = p_promo_sk + AND ss_customer_sk = c_customer_sk + AND ca_address_sk = c_current_addr_sk + AND ss_item_sk = i_item_sk + AND ca_gmt_offset = -5 + AND i_category = 'Jewelry' + AND (p_channel_dmail = 'Y' OR p_channel_email = 'Y' OR p_channel_tv = 'Y') + AND s_gmt_offset = -5 + AND d_year = 1998 + AND d_moy = 11) promotional_sales, + (SELECT sum(ss_ext_sales_price) total + FROM store_sales, store, date_dim, customer, customer_address, item + WHERE ss_sold_date_sk = d_date_sk + AND ss_store_sk = s_store_sk + AND ss_customer_sk = c_customer_sk + AND ca_address_sk = c_current_addr_sk + AND ss_item_sk = i_item_sk + AND ca_gmt_offset = -5 + AND i_category = 'Jewelry' + AND s_gmt_offset = -5 + AND d_year = 1998 + AND d_moy = 11) all_sales +ORDER BY promotions, total +LIMIT 100 diff --git a/src/test/resources/tpcds/q62.sql b/src/test/resources/tpcds/q62.sql new file mode 100644 index 000000000..8a414f154 --- /dev/null +++ b/src/test/resources/tpcds/q62.sql @@ -0,0 +1,35 @@ +SELECT + substr(w_warehouse_name, 1, 20), + sm_type, + web_name, + sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk <= 30) + THEN 1 + ELSE 0 END) AS `30 days `, + sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 30) AND + (ws_ship_date_sk - ws_sold_date_sk <= 60) + THEN 1 + ELSE 0 END) AS `31 - 60 days `, + sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 60) AND + (ws_ship_date_sk - ws_sold_date_sk <= 90) + THEN 1 + ELSE 0 END) AS `61 - 90 days `, + sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 90) AND + (ws_ship_date_sk - ws_sold_date_sk <= 120) + THEN 1 + ELSE 0 END) AS `91 - 120 days `, + sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 120) + THEN 1 + ELSE 0 END) AS `>120 days ` +FROM + web_sales, warehouse, ship_mode, web_site, date_dim +WHERE + d_month_seq BETWEEN 1200 AND 1200 + 11 + AND ws_ship_date_sk = d_date_sk + AND ws_warehouse_sk = w_warehouse_sk + AND ws_ship_mode_sk = sm_ship_mode_sk + AND ws_web_site_sk = web_site_sk +GROUP BY + substr(w_warehouse_name, 1, 20), sm_type, web_name +ORDER BY + substr(w_warehouse_name, 1, 20), sm_type, web_name +LIMIT 100 diff --git a/src/test/resources/tpcds/q63.sql b/src/test/resources/tpcds/q63.sql new file mode 100644 index 000000000..ef6867e0a --- /dev/null +++ b/src/test/resources/tpcds/q63.sql @@ -0,0 +1,31 @@ +SELECT * +FROM (SELECT + i_manager_id, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) + OVER (PARTITION BY i_manager_id) avg_monthly_sales +FROM item + , store_sales + , date_dim + , store +WHERE ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND ss_store_sk = s_store_sk + AND d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, 1200 + 7, + 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11) + AND ((i_category IN ('Books', 'Children', 'Electronics') + AND i_class IN ('personal', 'portable', 'refernece', 'self-help') + AND i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', + 'exportiunivamalg #9', 'scholaramalgamalg #9')) + OR (i_category IN ('Women', 'Music', 'Men') + AND i_class IN ('accessories', 'classical', 'fragrances', 'pants') + AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', + 'importoamalg #1'))) +GROUP BY i_manager_id, d_moy) tmp1 +WHERE CASE WHEN avg_monthly_sales > 0 + THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales + ELSE NULL END > 0.1 +ORDER BY i_manager_id + , avg_monthly_sales + , sum_sales +LIMIT 100 diff --git a/src/test/resources/tpcds/q64.sql b/src/test/resources/tpcds/q64.sql new file mode 100644 index 000000000..8ec1d31b6 --- /dev/null +++ b/src/test/resources/tpcds/q64.sql @@ -0,0 +1,92 @@ +WITH cs_ui AS +(SELECT + cs_item_sk, + sum(cs_ext_list_price) AS sale, + sum(cr_refunded_cash + cr_reversed_charge + cr_store_credit) AS refund + FROM catalog_sales + , catalog_returns + WHERE cs_item_sk = cr_item_sk + AND cs_order_number = cr_order_number + GROUP BY cs_item_sk + HAVING sum(cs_ext_list_price) > 2 * sum(cr_refunded_cash + cr_reversed_charge + cr_store_credit)), + cross_sales AS + (SELECT + i_product_name product_name, + i_item_sk item_sk, + s_store_name store_name, + s_zip store_zip, + ad1.ca_street_number b_street_number, + ad1.ca_street_name b_streen_name, + ad1.ca_city b_city, + ad1.ca_zip b_zip, + ad2.ca_street_number c_street_number, + ad2.ca_street_name c_street_name, + ad2.ca_city c_city, + ad2.ca_zip c_zip, + d1.d_year AS syear, + d2.d_year AS fsyear, + d3.d_year s2year, + count(*) cnt, + sum(ss_wholesale_cost) s1, + sum(ss_list_price) s2, + sum(ss_coupon_amt) s3 + FROM store_sales, store_returns, cs_ui, date_dim d1, date_dim d2, date_dim d3, + store, customer, customer_demographics cd1, customer_demographics cd2, + promotion, household_demographics hd1, household_demographics hd2, + customer_address ad1, customer_address ad2, income_band ib1, income_band ib2, item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk = cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk AND + ss_item_sk = i_item_sk AND + ss_item_sk = sr_item_sk AND + ss_ticket_number = sr_ticket_number AND + ss_item_sk = cs_ui.cs_item_sk AND + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk AND + c_first_sales_date_sk = d2.d_date_sk AND + c_first_shipto_date_sk = d3.d_date_sk AND + ss_promo_sk = p_promo_sk AND + hd1.hd_income_band_sk = ib1.ib_income_band_sk AND + hd2.hd_income_band_sk = ib2.ib_income_band_sk AND + cd1.cd_marital_status <> cd2.cd_marital_status AND + i_color IN ('purple', 'burlywood', 'indian', 'spring', 'floral', 'medium') AND + i_current_price BETWEEN 64 AND 64 + 10 AND + i_current_price BETWEEN 64 + 1 AND 64 + 15 + GROUP BY i_product_name, i_item_sk, s_store_name, s_zip, ad1.ca_street_number, + ad1.ca_street_name, ad1.ca_city, ad1.ca_zip, ad2.ca_street_number, + ad2.ca_street_name, ad2.ca_city, ad2.ca_zip, d1.d_year, d2.d_year, d3.d_year + ) +SELECT + cs1.product_name, + cs1.store_name, + cs1.store_zip, + cs1.b_street_number, + cs1.b_streen_name, + cs1.b_city, + cs1.b_zip, + cs1.c_street_number, + cs1.c_street_name, + cs1.c_city, + cs1.c_zip, + cs1.syear, + cs1.cnt, + cs1.s1, + cs1.s2, + cs1.s3, + cs2.s1, + cs2.s2, + cs2.s3, + cs2.syear, + cs2.cnt +FROM cross_sales cs1, cross_sales cs2 +WHERE cs1.item_sk = cs2.item_sk AND + cs1.syear = 1999 AND + cs2.syear = 1999 + 1 AND + cs2.cnt <= cs1.cnt AND + cs1.store_name = cs2.store_name AND + cs1.store_zip = cs2.store_zip +ORDER BY cs1.product_name, cs1.store_name, cs2.cnt diff --git a/src/test/resources/tpcds/q65.sql b/src/test/resources/tpcds/q65.sql new file mode 100644 index 000000000..aad04be1b --- /dev/null +++ b/src/test/resources/tpcds/q65.sql @@ -0,0 +1,33 @@ +SELECT + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand +FROM store, item, + (SELECT + ss_store_sk, + avg(revenue) AS ave + FROM + (SELECT + ss_store_sk, + ss_item_sk, + sum(ss_sales_price) AS revenue + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176 + 11 + GROUP BY ss_store_sk, ss_item_sk) sa + GROUP BY ss_store_sk) sb, + (SELECT + ss_store_sk, + ss_item_sk, + sum(ss_sales_price) AS revenue + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176 + 11 + GROUP BY ss_store_sk, ss_item_sk) sc +WHERE sb.ss_store_sk = sc.ss_store_sk AND + sc.revenue <= 0.1 * sb.ave AND + s_store_sk = sc.ss_store_sk AND + i_item_sk = sc.ss_item_sk +ORDER BY s_store_name, i_item_desc +LIMIT 100 diff --git a/src/test/resources/tpcds/q66.sql b/src/test/resources/tpcds/q66.sql new file mode 100644 index 000000000..f826b4164 --- /dev/null +++ b/src/test/resources/tpcds/q66.sql @@ -0,0 +1,240 @@ +SELECT + w_warehouse_name, + w_warehouse_sq_ft, + w_city, + w_county, + w_state, + w_country, + ship_carriers, + year, + sum(jan_sales) AS jan_sales, + sum(feb_sales) AS feb_sales, + sum(mar_sales) AS mar_sales, + sum(apr_sales) AS apr_sales, + sum(may_sales) AS may_sales, + sum(jun_sales) AS jun_sales, + sum(jul_sales) AS jul_sales, + sum(aug_sales) AS aug_sales, + sum(sep_sales) AS sep_sales, + sum(oct_sales) AS oct_sales, + sum(nov_sales) AS nov_sales, + sum(dec_sales) AS dec_sales, + sum(jan_sales / w_warehouse_sq_ft) AS jan_sales_per_sq_foot, + sum(feb_sales / w_warehouse_sq_ft) AS feb_sales_per_sq_foot, + sum(mar_sales / w_warehouse_sq_ft) AS mar_sales_per_sq_foot, + sum(apr_sales / w_warehouse_sq_ft) AS apr_sales_per_sq_foot, + sum(may_sales / w_warehouse_sq_ft) AS may_sales_per_sq_foot, + sum(jun_sales / w_warehouse_sq_ft) AS jun_sales_per_sq_foot, + sum(jul_sales / w_warehouse_sq_ft) AS jul_sales_per_sq_foot, + sum(aug_sales / w_warehouse_sq_ft) AS aug_sales_per_sq_foot, + sum(sep_sales / w_warehouse_sq_ft) AS sep_sales_per_sq_foot, + sum(oct_sales / w_warehouse_sq_ft) AS oct_sales_per_sq_foot, + sum(nov_sales / w_warehouse_sq_ft) AS nov_sales_per_sq_foot, + sum(dec_sales / w_warehouse_sq_ft) AS dec_sales_per_sq_foot, + sum(jan_net) AS jan_net, + sum(feb_net) AS feb_net, + sum(mar_net) AS mar_net, + sum(apr_net) AS apr_net, + sum(may_net) AS may_net, + sum(jun_net) AS jun_net, + sum(jul_net) AS jul_net, + sum(aug_net) AS aug_net, + sum(sep_net) AS sep_net, + sum(oct_net) AS oct_net, + sum(nov_net) AS nov_net, + sum(dec_net) AS dec_net +FROM ( + (SELECT + w_warehouse_name, + w_warehouse_sq_ft, + w_city, + w_county, + w_state, + w_country, + concat('DHL', ',', 'BARIAN') AS ship_carriers, + d_year AS year, + sum(CASE WHEN d_moy = 1 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS jan_sales, + sum(CASE WHEN d_moy = 2 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS feb_sales, + sum(CASE WHEN d_moy = 3 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS mar_sales, + sum(CASE WHEN d_moy = 4 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS apr_sales, + sum(CASE WHEN d_moy = 5 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS may_sales, + sum(CASE WHEN d_moy = 6 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS jun_sales, + sum(CASE WHEN d_moy = 7 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS jul_sales, + sum(CASE WHEN d_moy = 8 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS aug_sales, + sum(CASE WHEN d_moy = 9 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS sep_sales, + sum(CASE WHEN d_moy = 10 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS oct_sales, + sum(CASE WHEN d_moy = 11 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS nov_sales, + sum(CASE WHEN d_moy = 12 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS dec_sales, + sum(CASE WHEN d_moy = 1 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS jan_net, + sum(CASE WHEN d_moy = 2 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS feb_net, + sum(CASE WHEN d_moy = 3 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS mar_net, + sum(CASE WHEN d_moy = 4 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS apr_net, + sum(CASE WHEN d_moy = 5 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS may_net, + sum(CASE WHEN d_moy = 6 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS jun_net, + sum(CASE WHEN d_moy = 7 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS jul_net, + sum(CASE WHEN d_moy = 8 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS aug_net, + sum(CASE WHEN d_moy = 9 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS sep_net, + sum(CASE WHEN d_moy = 10 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS oct_net, + sum(CASE WHEN d_moy = 11 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS nov_net, + sum(CASE WHEN d_moy = 12 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS dec_net + FROM + web_sales, warehouse, date_dim, time_dim, ship_mode + WHERE + ws_warehouse_sk = w_warehouse_sk + AND ws_sold_date_sk = d_date_sk + AND ws_sold_time_sk = t_time_sk + AND ws_ship_mode_sk = sm_ship_mode_sk + AND d_year = 2001 + AND t_time BETWEEN 30838 AND 30838 + 28800 + AND sm_carrier IN ('DHL', 'BARIAN') + GROUP BY + w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, d_year) + UNION ALL + (SELECT + w_warehouse_name, + w_warehouse_sq_ft, + w_city, + w_county, + w_state, + w_country, + concat('DHL', ',', 'BARIAN') AS ship_carriers, + d_year AS year, + sum(CASE WHEN d_moy = 1 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS jan_sales, + sum(CASE WHEN d_moy = 2 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS feb_sales, + sum(CASE WHEN d_moy = 3 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS mar_sales, + sum(CASE WHEN d_moy = 4 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS apr_sales, + sum(CASE WHEN d_moy = 5 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS may_sales, + sum(CASE WHEN d_moy = 6 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS jun_sales, + sum(CASE WHEN d_moy = 7 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS jul_sales, + sum(CASE WHEN d_moy = 8 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS aug_sales, + sum(CASE WHEN d_moy = 9 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS sep_sales, + sum(CASE WHEN d_moy = 10 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS oct_sales, + sum(CASE WHEN d_moy = 11 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS nov_sales, + sum(CASE WHEN d_moy = 12 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS dec_sales, + sum(CASE WHEN d_moy = 1 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS jan_net, + sum(CASE WHEN d_moy = 2 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS feb_net, + sum(CASE WHEN d_moy = 3 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS mar_net, + sum(CASE WHEN d_moy = 4 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS apr_net, + sum(CASE WHEN d_moy = 5 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS may_net, + sum(CASE WHEN d_moy = 6 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS jun_net, + sum(CASE WHEN d_moy = 7 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS jul_net, + sum(CASE WHEN d_moy = 8 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS aug_net, + sum(CASE WHEN d_moy = 9 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS sep_net, + sum(CASE WHEN d_moy = 10 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS oct_net, + sum(CASE WHEN d_moy = 11 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS nov_net, + sum(CASE WHEN d_moy = 12 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS dec_net + FROM + catalog_sales, warehouse, date_dim, time_dim, ship_mode + WHERE + cs_warehouse_sk = w_warehouse_sk + AND cs_sold_date_sk = d_date_sk + AND cs_sold_time_sk = t_time_sk + AND cs_ship_mode_sk = sm_ship_mode_sk + AND d_year = 2001 + AND t_time BETWEEN 30838 AND 30838 + 28800 + AND sm_carrier IN ('DHL', 'BARIAN') + GROUP BY + w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, d_year + ) + ) x +GROUP BY + w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, + ship_carriers, year +ORDER BY w_warehouse_name +LIMIT 100 diff --git a/src/test/resources/tpcds/q67.sql b/src/test/resources/tpcds/q67.sql new file mode 100644 index 000000000..f66e2252b --- /dev/null +++ b/src/test/resources/tpcds/q67.sql @@ -0,0 +1,38 @@ +SELECT * +FROM + (SELECT + i_category, + i_class, + i_brand, + i_product_name, + d_year, + d_qoy, + d_moy, + s_store_id, + sumsales, + rank() + OVER (PARTITION BY i_category + ORDER BY sumsales DESC) rk + FROM + (SELECT + i_category, + i_class, + i_brand, + i_product_name, + d_year, + d_qoy, + d_moy, + s_store_id, + sum(coalesce(ss_sales_price * ss_quantity, 0)) sumsales + FROM store_sales, date_dim, store, item + WHERE ss_sold_date_sk = d_date_sk + AND ss_item_sk = i_item_sk + AND ss_store_sk = s_store_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + GROUP BY ROLLUP (i_category, i_class, i_brand, i_product_name, d_year, d_qoy, + d_moy, s_store_id)) dw1) dw2 +WHERE rk <= 100 +ORDER BY + i_category, i_class, i_brand, i_product_name, d_year, + d_qoy, d_moy, s_store_id, sumsales, rk +LIMIT 100 diff --git a/src/test/resources/tpcds/q68.sql b/src/test/resources/tpcds/q68.sql new file mode 100644 index 000000000..adb8a7189 --- /dev/null +++ b/src/test/resources/tpcds/q68.sql @@ -0,0 +1,34 @@ +SELECT + c_last_name, + c_first_name, + ca_city, + bought_city, + ss_ticket_number, + extended_price, + extended_tax, + list_price +FROM (SELECT + ss_ticket_number, + ss_customer_sk, + ca_city bought_city, + sum(ss_ext_sales_price) extended_price, + sum(ss_ext_list_price) list_price, + sum(ss_ext_tax) extended_tax +FROM store_sales, date_dim, store, household_demographics, customer_address +WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_store_sk = store.s_store_sk + AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + AND store_sales.ss_addr_sk = customer_address.ca_address_sk + AND date_dim.d_dom BETWEEN 1 AND 2 + AND (household_demographics.hd_dep_count = 4 OR + household_demographics.hd_vehicle_count = 3) + AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) + AND store.s_city IN ('Midway', 'Fairview') +GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, + customer, + customer_address current_addr +WHERE ss_customer_sk = c_customer_sk + AND customer.c_current_addr_sk = current_addr.ca_address_sk + AND current_addr.ca_city <> bought_city +ORDER BY c_last_name, ss_ticket_number +LIMIT 100 diff --git a/src/test/resources/tpcds/q69.sql b/src/test/resources/tpcds/q69.sql new file mode 100644 index 000000000..1f0ee64f5 --- /dev/null +++ b/src/test/resources/tpcds/q69.sql @@ -0,0 +1,38 @@ +SELECT + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 +FROM + customer c, customer_address ca, customer_demographics +WHERE + c.c_current_addr_sk = ca.ca_address_sk AND + ca_state IN ('KY', 'GA', 'NM') AND + cd_demo_sk = c.c_current_cdemo_sk AND + exists(SELECT * + FROM store_sales, date_dim + WHERE c.c_customer_sk = ss_customer_sk AND + ss_sold_date_sk = d_date_sk AND + d_year = 2001 AND + d_moy BETWEEN 4 AND 4 + 2) AND + (NOT exists(SELECT * + FROM web_sales, date_dim + WHERE c.c_customer_sk = ws_bill_customer_sk AND + ws_sold_date_sk = d_date_sk AND + d_year = 2001 AND + d_moy BETWEEN 4 AND 4 + 2) AND + NOT exists(SELECT * + FROM catalog_sales, date_dim + WHERE c.c_customer_sk = cs_ship_customer_sk AND + cs_sold_date_sk = d_date_sk AND + d_year = 2001 AND + d_moy BETWEEN 4 AND 4 + 2)) +GROUP BY cd_gender, cd_marital_status, cd_education_status, + cd_purchase_estimate, cd_credit_rating +ORDER BY cd_gender, cd_marital_status, cd_education_status, + cd_purchase_estimate, cd_credit_rating +LIMIT 100 diff --git a/src/test/resources/tpcds/q7.sql b/src/test/resources/tpcds/q7.sql new file mode 100644 index 000000000..6630a0054 --- /dev/null +++ b/src/test/resources/tpcds/q7.sql @@ -0,0 +1,19 @@ +SELECT + i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 +FROM store_sales, customer_demographics, date_dim, item, promotion +WHERE ss_sold_date_sk = d_date_sk AND + ss_item_sk = i_item_sk AND + ss_cdemo_sk = cd_demo_sk AND + ss_promo_sk = p_promo_sk AND + cd_gender = 'M' AND + cd_marital_status = 'S' AND + cd_education_status = 'College' AND + (p_channel_email = 'N' OR p_channel_event = 'N') AND + d_year = 2000 +GROUP BY i_item_id +ORDER BY i_item_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q70.sql b/src/test/resources/tpcds/q70.sql new file mode 100644 index 000000000..625011b21 --- /dev/null +++ b/src/test/resources/tpcds/q70.sql @@ -0,0 +1,38 @@ +SELECT + sum(ss_net_profit) AS total_sum, + s_state, + s_county, + grouping(s_state) + grouping(s_county) AS lochierarchy, + rank() + OVER ( + PARTITION BY grouping(s_state) + grouping(s_county), + CASE WHEN grouping(s_county) = 0 + THEN s_state END + ORDER BY sum(ss_net_profit) DESC) AS rank_within_parent +FROM + store_sales, date_dim d1, store +WHERE + d1.d_month_seq BETWEEN 1200 AND 1200 + 11 + AND d1.d_date_sk = ss_sold_date_sk + AND s_store_sk = ss_store_sk + AND s_state IN + (SELECT s_state + FROM + (SELECT + s_state AS s_state, + rank() + OVER (PARTITION BY s_state + ORDER BY sum(ss_net_profit) DESC) AS ranking + FROM store_sales, store, date_dim + WHERE d_month_seq BETWEEN 1200 AND 1200 + 11 + AND d_date_sk = ss_sold_date_sk + AND s_store_sk = ss_store_sk + GROUP BY s_state) tmp1 + WHERE ranking <= 5) +GROUP BY ROLLUP (s_state, s_county) +ORDER BY + lochierarchy DESC + , CASE WHEN lochierarchy = 0 + THEN s_state END + , rank_within_parent +LIMIT 100 diff --git a/src/test/resources/tpcds/q71.sql b/src/test/resources/tpcds/q71.sql new file mode 100644 index 000000000..8d724b924 --- /dev/null +++ b/src/test/resources/tpcds/q71.sql @@ -0,0 +1,44 @@ +SELECT + i_brand_id brand_id, + i_brand brand, + t_hour, + t_minute, + sum(ext_price) ext_price +FROM item, + (SELECT + ws_ext_sales_price AS ext_price, + ws_sold_date_sk AS sold_date_sk, + ws_item_sk AS sold_item_sk, + ws_sold_time_sk AS time_sk + FROM web_sales, date_dim + WHERE d_date_sk = ws_sold_date_sk + AND d_moy = 11 + AND d_year = 1999 + UNION ALL + SELECT + cs_ext_sales_price AS ext_price, + cs_sold_date_sk AS sold_date_sk, + cs_item_sk AS sold_item_sk, + cs_sold_time_sk AS time_sk + FROM catalog_sales, date_dim + WHERE d_date_sk = cs_sold_date_sk + AND d_moy = 11 + AND d_year = 1999 + UNION ALL + SELECT + ss_ext_sales_price AS ext_price, + ss_sold_date_sk AS sold_date_sk, + ss_item_sk AS sold_item_sk, + ss_sold_time_sk AS time_sk + FROM store_sales, date_dim + WHERE d_date_sk = ss_sold_date_sk + AND d_moy = 11 + AND d_year = 1999 + ) AS tmp, time_dim +WHERE + sold_item_sk = i_item_sk + AND i_manager_id = 1 + AND time_sk = t_time_sk + AND (t_meal_time = 'breakfast' OR t_meal_time = 'dinner') +GROUP BY i_brand, i_brand_id, t_hour, t_minute +ORDER BY ext_price DESC, brand_id diff --git a/src/test/resources/tpcds/q72.sql b/src/test/resources/tpcds/q72.sql new file mode 100644 index 000000000..99b3eee54 --- /dev/null +++ b/src/test/resources/tpcds/q72.sql @@ -0,0 +1,33 @@ +SELECT + i_item_desc, + w_warehouse_name, + d1.d_week_seq, + count(CASE WHEN p_promo_sk IS NULL + THEN 1 + ELSE 0 END) no_promo, + count(CASE WHEN p_promo_sk IS NOT NULL + THEN 1 + ELSE 0 END) promo, + count(*) total_cnt +FROM catalog_sales + JOIN inventory ON (cs_item_sk = inv_item_sk) + JOIN warehouse ON (w_warehouse_sk = inv_warehouse_sk) + JOIN item ON (i_item_sk = cs_item_sk) + JOIN customer_demographics ON (cs_bill_cdemo_sk = cd_demo_sk) + JOIN household_demographics ON (cs_bill_hdemo_sk = hd_demo_sk) + JOIN date_dim d1 ON (cs_sold_date_sk = d1.d_date_sk) + JOIN date_dim d2 ON (inv_date_sk = d2.d_date_sk) + JOIN date_dim d3 ON (cs_ship_date_sk = d3.d_date_sk) + LEFT OUTER JOIN promotion ON (cs_promo_sk = p_promo_sk) + LEFT OUTER JOIN catalog_returns ON (cr_item_sk = cs_item_sk AND cr_order_number = cs_order_number) +WHERE d1.d_week_seq = d2.d_week_seq + AND inv_quantity_on_hand < cs_quantity + AND d3.d_date > (cast(d1.d_date AS DATE) + interval 5 days) + AND hd_buy_potential = '>10000' + AND d1.d_year = 1999 + AND hd_buy_potential = '>10000' + AND cd_marital_status = 'D' + AND d1.d_year = 1999 +GROUP BY i_item_desc, w_warehouse_name, d1.d_week_seq +ORDER BY total_cnt DESC, i_item_desc, w_warehouse_name, d_week_seq +LIMIT 100 diff --git a/src/test/resources/tpcds/q73.sql b/src/test/resources/tpcds/q73.sql new file mode 100644 index 000000000..881be2e90 --- /dev/null +++ b/src/test/resources/tpcds/q73.sql @@ -0,0 +1,30 @@ +SELECT + c_last_name, + c_first_name, + c_salutation, + c_preferred_cust_flag, + ss_ticket_number, + cnt +FROM + (SELECT + ss_ticket_number, + ss_customer_sk, + count(*) cnt + FROM store_sales, date_dim, store, household_demographics + WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_store_sk = store.s_store_sk + AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + AND date_dim.d_dom BETWEEN 1 AND 2 + AND (household_demographics.hd_buy_potential = '>10000' OR + household_demographics.hd_buy_potential = 'unknown') + AND household_demographics.hd_vehicle_count > 0 + AND CASE WHEN household_demographics.hd_vehicle_count > 0 + THEN + household_demographics.hd_dep_count / household_demographics.hd_vehicle_count + ELSE NULL END > 1 + AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) + AND store.s_county IN ('Williamson County', 'Franklin Parish', 'Bronx County', 'Orange County') + GROUP BY ss_ticket_number, ss_customer_sk) dj, customer +WHERE ss_customer_sk = c_customer_sk + AND cnt BETWEEN 1 AND 5 +ORDER BY cnt DESC diff --git a/src/test/resources/tpcds/q74.sql b/src/test/resources/tpcds/q74.sql new file mode 100644 index 000000000..154b26d68 --- /dev/null +++ b/src/test/resources/tpcds/q74.sql @@ -0,0 +1,58 @@ +WITH year_total AS ( + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + d_year AS year, + sum(ss_net_paid) year_total, + 's' sale_type + FROM + customer, store_sales, date_dim + WHERE c_customer_sk = ss_customer_sk + AND ss_sold_date_sk = d_date_sk + AND d_year IN (2001, 2001 + 1) + GROUP BY + c_customer_id, c_first_name, c_last_name, d_year + UNION ALL + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + d_year AS year, + sum(ws_net_paid) year_total, + 'w' sale_type + FROM + customer, web_sales, date_dim + WHERE c_customer_sk = ws_bill_customer_sk + AND ws_sold_date_sk = d_date_sk + AND d_year IN (2001, 2001 + 1) + GROUP BY + c_customer_id, c_first_name, c_last_name, d_year) +SELECT + t_s_secyear.customer_id, + t_s_secyear.customer_first_name, + t_s_secyear.customer_last_name +FROM + year_total t_s_firstyear, year_total t_s_secyear, + year_total t_w_firstyear, year_total t_w_secyear +WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id + AND t_s_firstyear.customer_id = t_w_secyear.customer_id + AND t_s_firstyear.customer_id = t_w_firstyear.customer_id + AND t_s_firstyear.sale_type = 's' + AND t_w_firstyear.sale_type = 'w' + AND t_s_secyear.sale_type = 's' + AND t_w_secyear.sale_type = 'w' + AND t_s_firstyear.year = 2001 + AND t_s_secyear.year = 2001 + 1 + AND t_w_firstyear.year = 2001 + AND t_w_secyear.year = 2001 + 1 + AND t_s_firstyear.year_total > 0 + AND t_w_firstyear.year_total > 0 + AND CASE WHEN t_w_firstyear.year_total > 0 + THEN t_w_secyear.year_total / t_w_firstyear.year_total + ELSE NULL END + > CASE WHEN t_s_firstyear.year_total > 0 + THEN t_s_secyear.year_total / t_s_firstyear.year_total + ELSE NULL END +ORDER BY 1, 1, 1 +LIMIT 100 diff --git a/src/test/resources/tpcds/q75.sql b/src/test/resources/tpcds/q75.sql new file mode 100644 index 000000000..2a143232b --- /dev/null +++ b/src/test/resources/tpcds/q75.sql @@ -0,0 +1,76 @@ +WITH all_sales AS ( + SELECT + d_year, + i_brand_id, + i_class_id, + i_category_id, + i_manufact_id, + SUM(sales_cnt) AS sales_cnt, + SUM(sales_amt) AS sales_amt + FROM ( + SELECT + d_year, + i_brand_id, + i_class_id, + i_category_id, + i_manufact_id, + cs_quantity - COALESCE(cr_return_quantity, 0) AS sales_cnt, + cs_ext_sales_price - COALESCE(cr_return_amount, 0.0) AS sales_amt + FROM catalog_sales + JOIN item ON i_item_sk = cs_item_sk + JOIN date_dim ON d_date_sk = cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number = cr_order_number + AND cs_item_sk = cr_item_sk) + WHERE i_category = 'Books' + UNION + SELECT + d_year, + i_brand_id, + i_class_id, + i_category_id, + i_manufact_id, + ss_quantity - COALESCE(sr_return_quantity, 0) AS sales_cnt, + ss_ext_sales_price - COALESCE(sr_return_amt, 0.0) AS sales_amt + FROM store_sales + JOIN item ON i_item_sk = ss_item_sk + JOIN date_dim ON d_date_sk = ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number = sr_ticket_number + AND ss_item_sk = sr_item_sk) + WHERE i_category = 'Books' + UNION + SELECT + d_year, + i_brand_id, + i_class_id, + i_category_id, + i_manufact_id, + ws_quantity - COALESCE(wr_return_quantity, 0) AS sales_cnt, + ws_ext_sales_price - COALESCE(wr_return_amt, 0.0) AS sales_amt + FROM web_sales + JOIN item ON i_item_sk = ws_item_sk + JOIN date_dim ON d_date_sk = ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number = wr_order_number + AND ws_item_sk = wr_item_sk) + WHERE i_category = 'Books') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) +SELECT + prev_yr.d_year AS prev_year, + curr_yr.d_year AS year, + curr_yr.i_brand_id, + curr_yr.i_class_id, + curr_yr.i_category_id, + curr_yr.i_manufact_id, + prev_yr.sales_cnt AS prev_yr_cnt, + curr_yr.sales_cnt AS curr_yr_cnt, + curr_yr.sales_cnt - prev_yr.sales_cnt AS sales_cnt_diff, + curr_yr.sales_amt - prev_yr.sales_amt AS sales_amt_diff +FROM all_sales curr_yr, all_sales prev_yr +WHERE curr_yr.i_brand_id = prev_yr.i_brand_id + AND curr_yr.i_class_id = prev_yr.i_class_id + AND curr_yr.i_category_id = prev_yr.i_category_id + AND curr_yr.i_manufact_id = prev_yr.i_manufact_id + AND curr_yr.d_year = 2002 + AND prev_yr.d_year = 2002 - 1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17, 2)) / CAST(prev_yr.sales_cnt AS DECIMAL(17, 2)) < 0.9 +ORDER BY sales_cnt_diff +LIMIT 100 diff --git a/src/test/resources/tpcds/q76.sql b/src/test/resources/tpcds/q76.sql new file mode 100644 index 000000000..815fa922b --- /dev/null +++ b/src/test/resources/tpcds/q76.sql @@ -0,0 +1,47 @@ +SELECT + channel, + col_name, + d_year, + d_qoy, + i_category, + COUNT(*) sales_cnt, + SUM(ext_sales_price) sales_amt +FROM ( + SELECT + 'store' AS channel, + ss_store_sk col_name, + d_year, + d_qoy, + i_category, + ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_store_sk IS NULL + AND ss_sold_date_sk = d_date_sk + AND ss_item_sk = i_item_sk + UNION ALL + SELECT + 'web' AS channel, + ws_ship_customer_sk col_name, + d_year, + d_qoy, + i_category, + ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_ship_customer_sk IS NULL + AND ws_sold_date_sk = d_date_sk + AND ws_item_sk = i_item_sk + UNION ALL + SELECT + 'catalog' AS channel, + cs_ship_addr_sk col_name, + d_year, + d_qoy, + i_category, + cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_ship_addr_sk IS NULL + AND cs_sold_date_sk = d_date_sk + AND cs_item_sk = i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +LIMIT 100 diff --git a/src/test/resources/tpcds/q77.sql b/src/test/resources/tpcds/q77.sql new file mode 100644 index 000000000..a69df9fbc --- /dev/null +++ b/src/test/resources/tpcds/q77.sql @@ -0,0 +1,100 @@ +WITH ss AS +(SELECT + s_store_sk, + sum(ss_ext_sales_price) AS sales, + sum(ss_net_profit) AS profit + FROM store_sales, date_dim, store + WHERE ss_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days) + AND ss_store_sk = s_store_sk + GROUP BY s_store_sk), + sr AS + (SELECT + s_store_sk, + sum(sr_return_amt) AS returns, + sum(sr_net_loss) AS profit_loss + FROM store_returns, date_dim, store + WHERE sr_returned_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days) + AND sr_store_sk = s_store_sk + GROUP BY s_store_sk), + cs AS + (SELECT + cs_call_center_sk, + sum(cs_ext_sales_price) AS sales, + sum(cs_net_profit) AS profit + FROM catalog_sales, date_dim + WHERE cs_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days) + GROUP BY cs_call_center_sk), + cr AS + (SELECT + sum(cr_return_amount) AS returns, + sum(cr_net_loss) AS profit_loss + FROM catalog_returns, date_dim + WHERE cr_returned_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days)), + ws AS + (SELECT + wp_web_page_sk, + sum(ws_ext_sales_price) AS sales, + sum(ws_net_profit) AS profit + FROM web_sales, date_dim, web_page + WHERE ws_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days) + AND ws_web_page_sk = wp_web_page_sk + GROUP BY wp_web_page_sk), + wr AS + (SELECT + wp_web_page_sk, + sum(wr_return_amt) AS returns, + sum(wr_net_loss) AS profit_loss + FROM web_returns, date_dim, web_page + WHERE wr_returned_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days) + AND wr_web_page_sk = wp_web_page_sk + GROUP BY wp_web_page_sk) +SELECT + channel, + id, + sum(sales) AS sales, + sum(returns) AS returns, + sum(profit) AS profit +FROM + (SELECT + 'store channel' AS channel, + ss.s_store_sk AS id, + sales, + coalesce(returns, 0) AS returns, + (profit - coalesce(profit_loss, 0)) AS profit + FROM ss + LEFT JOIN sr + ON ss.s_store_sk = sr.s_store_sk + UNION ALL + SELECT + 'catalog channel' AS channel, + cs_call_center_sk AS id, + sales, + returns, + (profit - profit_loss) AS profit + FROM cs, cr + UNION ALL + SELECT + 'web channel' AS channel, + ws.wp_web_page_sk AS id, + sales, + coalesce(returns, 0) returns, + (profit - coalesce(profit_loss, 0)) AS profit + FROM ws + LEFT JOIN wr + ON ws.wp_web_page_sk = wr.wp_web_page_sk + ) x +GROUP BY ROLLUP (channel, id) +ORDER BY channel, id +LIMIT 100 diff --git a/src/test/resources/tpcds/q78.sql b/src/test/resources/tpcds/q78.sql new file mode 100644 index 000000000..07b0940e2 --- /dev/null +++ b/src/test/resources/tpcds/q78.sql @@ -0,0 +1,64 @@ +WITH ws AS +(SELECT + d_year AS ws_sold_year, + ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + FROM web_sales + LEFT JOIN web_returns ON wr_order_number = ws_order_number AND ws_item_sk = wr_item_sk + JOIN date_dim ON ws_sold_date_sk = d_date_sk + WHERE wr_order_number IS NULL + GROUP BY d_year, ws_item_sk, ws_bill_customer_sk +), + cs AS + (SELECT + d_year AS cs_sold_year, + cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + FROM catalog_sales + LEFT JOIN catalog_returns ON cr_order_number = cs_order_number AND cs_item_sk = cr_item_sk + JOIN date_dim ON cs_sold_date_sk = d_date_sk + WHERE cr_order_number IS NULL + GROUP BY d_year, cs_item_sk, cs_bill_customer_sk + ), + ss AS + (SELECT + d_year AS ss_sold_year, + ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + FROM store_sales + LEFT JOIN store_returns ON sr_ticket_number = ss_ticket_number AND ss_item_sk = sr_item_sk + JOIN date_dim ON ss_sold_date_sk = d_date_sk + WHERE sr_ticket_number IS NULL + GROUP BY d_year, ss_item_sk, ss_customer_sk + ) +SELECT + round(ss_qty / (coalesce(ws_qty + cs_qty, 1)), 2) ratio, + ss_qty store_qty, + ss_wc store_wholesale_cost, + ss_sp store_sales_price, + coalesce(ws_qty, 0) + coalesce(cs_qty, 0) other_chan_qty, + coalesce(ws_wc, 0) + coalesce(cs_wc, 0) other_chan_wholesale_cost, + coalesce(ws_sp, 0) + coalesce(cs_sp, 0) other_chan_sales_price +FROM ss + LEFT JOIN ws + ON (ws_sold_year = ss_sold_year AND ws_item_sk = ss_item_sk AND ws_customer_sk = ss_customer_sk) + LEFT JOIN cs + ON (cs_sold_year = ss_sold_year AND cs_item_sk = ss_item_sk AND cs_customer_sk = ss_customer_sk) +WHERE coalesce(ws_qty, 0) > 0 AND coalesce(cs_qty, 0) > 0 AND ss_sold_year = 2000 +ORDER BY + ratio, + ss_qty DESC, ss_wc DESC, ss_sp DESC, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + round(ss_qty / (coalesce(ws_qty + cs_qty, 1)), 2) +LIMIT 100 diff --git a/src/test/resources/tpcds/q79.sql b/src/test/resources/tpcds/q79.sql new file mode 100644 index 000000000..08f86dc20 --- /dev/null +++ b/src/test/resources/tpcds/q79.sql @@ -0,0 +1,27 @@ +SELECT + c_last_name, + c_first_name, + substr(s_city, 1, 30), + ss_ticket_number, + amt, + profit +FROM + (SELECT + ss_ticket_number, + ss_customer_sk, + store.s_city, + sum(ss_coupon_amt) amt, + sum(ss_net_profit) profit + FROM store_sales, date_dim, store, household_demographics + WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_store_sk = store.s_store_sk + AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + AND (household_demographics.hd_dep_count = 6 OR + household_demographics.hd_vehicle_count > 2) + AND date_dim.d_dow = 1 + AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) + AND store.s_number_employees BETWEEN 200 AND 295 + GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, store.s_city) ms, customer +WHERE ss_customer_sk = c_customer_sk +ORDER BY c_last_name, c_first_name, substr(s_city, 1, 30), profit +LIMIT 100 diff --git a/src/test/resources/tpcds/q8.sql b/src/test/resources/tpcds/q8.sql new file mode 100644 index 000000000..497725111 --- /dev/null +++ b/src/test/resources/tpcds/q8.sql @@ -0,0 +1,87 @@ +SELECT + s_store_name, + sum(ss_net_profit) +FROM store_sales, date_dim, store, + (SELECT ca_zip + FROM ( + (SELECT substr(ca_zip, 1, 5) ca_zip + FROM customer_address + WHERE substr(ca_zip, 1, 5) IN ( + '24128','76232','65084','87816','83926','77556','20548', + '26231','43848','15126','91137','61265','98294','25782', + '17920','18426','98235','40081','84093','28577','55565', + '17183','54601','67897','22752','86284','18376','38607', + '45200','21756','29741','96765','23932','89360','29839', + '25989','28898','91068','72550','10390','18845','47770', + '82636','41367','76638','86198','81312','37126','39192', + '88424','72175','81426','53672','10445','42666','66864', + '66708','41248','48583','82276','18842','78890','49448', + '14089','38122','34425','79077','19849','43285','39861', + '66162','77610','13695','99543','83444','83041','12305', + '57665','68341','25003','57834','62878','49130','81096', + '18840','27700','23470','50412','21195','16021','76107', + '71954','68309','18119','98359','64544','10336','86379', + '27068','39736','98569','28915','24206','56529','57647', + '54917','42961','91110','63981','14922','36420','23006', + '67467','32754','30903','20260','31671','51798','72325', + '85816','68621','13955','36446','41766','68806','16725', + '15146','22744','35850','88086','51649','18270','52867', + '39972','96976','63792','11376','94898','13595','10516', + '90225','58943','39371','94945','28587','96576','57855', + '28488','26105','83933','25858','34322','44438','73171', + '30122','34102','22685','71256','78451','54364','13354', + '45375','40558','56458','28286','45266','47305','69399', + '83921','26233','11101','15371','69913','35942','15882', + '25631','24610','44165','99076','33786','70738','26653', + '14328','72305','62496','22152','10144','64147','48425', + '14663','21076','18799','30450','63089','81019','68893', + '24996','51200','51211','45692','92712','70466','79994', + '22437','25280','38935','71791','73134','56571','14060', + '19505','72425','56575','74351','68786','51650','20004', + '18383','76614','11634','18906','15765','41368','73241', + '76698','78567','97189','28545','76231','75691','22246', + '51061','90578','56691','68014','51103','94167','57047', + '14867','73520','15734','63435','25733','35474','24676', + '94627','53535','17879','15559','53268','59166','11928', + '59402','33282','45721','43933','68101','33515','36634', + '71286','19736','58058','55253','67473','41918','19515', + '36495','19430','22351','77191','91393','49156','50298', + '87501','18652','53179','18767','63193','23968','65164', + '68880','21286','72823','58470','67301','13394','31016', + '70372','67030','40604','24317','45748','39127','26065', + '77721','31029','31880','60576','24671','45549','13376', + '50016','33123','19769','22927','97789','46081','72151', + '15723','46136','51949','68100','96888','64528','14171', + '79777','28709','11489','25103','32213','78668','22245', + '15798','27156','37930','62971','21337','51622','67853', + '10567','38415','15455','58263','42029','60279','37125', + '56240','88190','50308','26859','64457','89091','82136', + '62377','36233','63837','58078','17043','30010','60099', + '28810','98025','29178','87343','73273','30469','64034', + '39516','86057','21309','90257','67875','40162','11356', + '73650','61810','72013','30431','22461','19512','13375', + '55307','30625','83849','68908','26689','96451','38193', + '46820','88885','84935','69035','83144','47537','56616', + '94983','48033','69952','25486','61547','27385','61860', + '58048','56910','16807','17871','35258','31387','35458', + '35576')) + INTERSECT + (SELECT ca_zip + FROM + (SELECT + substr(ca_zip, 1, 5) ca_zip, + count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk AND + c_preferred_cust_flag = 'Y' + GROUP BY ca_zip + HAVING count(*) > 10) A1) + ) A2 + ) V1 +WHERE ss_store_sk = s_store_sk + AND ss_sold_date_sk = d_date_sk + AND d_qoy = 2 AND d_year = 1998 + AND (substr(s_zip, 1, 2) = substr(V1.ca_zip, 1, 2)) +GROUP BY s_store_name +ORDER BY s_store_name +LIMIT 100 diff --git a/src/test/resources/tpcds/q80.sql b/src/test/resources/tpcds/q80.sql new file mode 100644 index 000000000..433db87d2 --- /dev/null +++ b/src/test/resources/tpcds/q80.sql @@ -0,0 +1,94 @@ +WITH ssr AS +(SELECT + s_store_id AS store_id, + sum(ss_ext_sales_price) AS sales, + sum(coalesce(sr_return_amt, 0)) AS returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) AS profit + FROM store_sales + LEFT OUTER JOIN store_returns ON + (ss_item_sk = sr_item_sk AND + ss_ticket_number = sr_ticket_number) + , + date_dim, store, item, promotion + WHERE ss_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) + AND ss_store_sk = s_store_sk + AND ss_item_sk = i_item_sk + AND i_current_price > 50 + AND ss_promo_sk = p_promo_sk + AND p_channel_tv = 'N' + GROUP BY s_store_id), + csr AS + (SELECT + cp_catalog_page_id AS catalog_page_id, + sum(cs_ext_sales_price) AS sales, + sum(coalesce(cr_return_amount, 0)) AS returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) AS profit + FROM catalog_sales + LEFT OUTER JOIN catalog_returns ON + (cs_item_sk = cr_item_sk AND + cs_order_number = cr_order_number) + , + date_dim, catalog_page, item, promotion + WHERE cs_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) + AND cs_catalog_page_sk = cp_catalog_page_sk + AND cs_item_sk = i_item_sk + AND i_current_price > 50 + AND cs_promo_sk = p_promo_sk + AND p_channel_tv = 'N' + GROUP BY cp_catalog_page_id), + wsr AS + (SELECT + web_site_id, + sum(ws_ext_sales_price) AS sales, + sum(coalesce(wr_return_amt, 0)) AS returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) AS profit + FROM web_sales + LEFT OUTER JOIN web_returns ON + (ws_item_sk = wr_item_sk AND ws_order_number = wr_order_number) + , + date_dim, web_site, item, promotion + WHERE ws_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) + AND ws_web_site_sk = web_site_sk + AND ws_item_sk = i_item_sk + AND i_current_price > 50 + AND ws_promo_sk = p_promo_sk + AND p_channel_tv = 'N' + GROUP BY web_site_id) +SELECT + channel, + id, + sum(sales) AS sales, + sum(returns) AS returns, + sum(profit) AS profit +FROM (SELECT + 'store channel' AS channel, + concat('store', store_id) AS id, + sales, + returns, + profit + FROM ssr + UNION ALL + SELECT + 'catalog channel' AS channel, + concat('catalog_page', catalog_page_id) AS id, + sales, + returns, + profit + FROM csr + UNION ALL + SELECT + 'web channel' AS channel, + concat('web_site', web_site_id) AS id, + sales, + returns, + profit + FROM wsr) x +GROUP BY ROLLUP (channel, id) +ORDER BY channel, id +LIMIT 100 diff --git a/src/test/resources/tpcds/q81.sql b/src/test/resources/tpcds/q81.sql new file mode 100644 index 000000000..18f0ffa7e --- /dev/null +++ b/src/test/resources/tpcds/q81.sql @@ -0,0 +1,38 @@ +WITH customer_total_return AS +(SELECT + cr_returning_customer_sk AS ctr_customer_sk, + ca_state AS ctr_state, + sum(cr_return_amt_inc_tax) AS ctr_total_return + FROM catalog_returns, date_dim, customer_address + WHERE cr_returned_date_sk = d_date_sk + AND d_year = 2000 + AND cr_returning_addr_sk = ca_address_sk + GROUP BY cr_returning_customer_sk, ca_state ) +SELECT + c_customer_id, + c_salutation, + c_first_name, + c_last_name, + ca_street_number, + ca_street_name, + ca_street_type, + ca_suite_number, + ca_city, + ca_county, + ca_state, + ca_zip, + ca_country, + ca_gmt_offset, + ca_location_type, + ctr_total_return +FROM customer_total_return ctr1, customer_address, customer +WHERE ctr1.ctr_total_return > (SELECT avg(ctr_total_return) * 1.2 +FROM customer_total_return ctr2 +WHERE ctr1.ctr_state = ctr2.ctr_state) + AND ca_address_sk = c_current_addr_sk + AND ca_state = 'GA' + AND ctr1.ctr_customer_sk = c_customer_sk +ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, ca_street_number, ca_street_name + , ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset + , ca_location_type, ctr_total_return +LIMIT 100 diff --git a/src/test/resources/tpcds/q82.sql b/src/test/resources/tpcds/q82.sql new file mode 100644 index 000000000..20942cfeb --- /dev/null +++ b/src/test/resources/tpcds/q82.sql @@ -0,0 +1,15 @@ +SELECT + i_item_id, + i_item_desc, + i_current_price +FROM item, inventory, date_dim, store_sales +WHERE i_current_price BETWEEN 62 AND 62 + 30 + AND inv_item_sk = i_item_sk + AND d_date_sk = inv_date_sk + AND d_date BETWEEN cast('2000-05-25' AS DATE) AND (cast('2000-05-25' AS DATE) + INTERVAL 60 days) + AND i_manufact_id IN (129, 270, 821, 423) + AND inv_quantity_on_hand BETWEEN 100 AND 500 + AND ss_item_sk = i_item_sk +GROUP BY i_item_id, i_item_desc, i_current_price +ORDER BY i_item_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q83.sql b/src/test/resources/tpcds/q83.sql new file mode 100644 index 000000000..53c10c7de --- /dev/null +++ b/src/test/resources/tpcds/q83.sql @@ -0,0 +1,56 @@ +WITH sr_items AS +(SELECT + i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + FROM store_returns, item, date_dim + WHERE sr_item_sk = i_item_sk + AND d_date IN (SELECT d_date + FROM date_dim + WHERE d_week_seq IN + (SELECT d_week_seq + FROM date_dim + WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) + AND sr_returned_date_sk = d_date_sk + GROUP BY i_item_id), + cr_items AS + (SELECT + i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + FROM catalog_returns, item, date_dim + WHERE cr_item_sk = i_item_sk + AND d_date IN (SELECT d_date + FROM date_dim + WHERE d_week_seq IN + (SELECT d_week_seq + FROM date_dim + WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) + AND cr_returned_date_sk = d_date_sk + GROUP BY i_item_id), + wr_items AS + (SELECT + i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + FROM web_returns, item, date_dim + WHERE wr_item_sk = i_item_sk AND d_date IN + (SELECT d_date + FROM date_dim + WHERE d_week_seq IN + (SELECT d_week_seq + FROM date_dim + WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) + AND wr_returned_date_sk = d_date_sk + GROUP BY i_item_id) +SELECT + sr_items.item_id, + sr_item_qty, + sr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 sr_dev, + cr_item_qty, + cr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 cr_dev, + wr_item_qty, + wr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 wr_dev, + (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 average +FROM sr_items, cr_items, wr_items +WHERE sr_items.item_id = cr_items.item_id + AND sr_items.item_id = wr_items.item_id +ORDER BY sr_items.item_id, sr_item_qty +LIMIT 100 diff --git a/src/test/resources/tpcds/q84.sql b/src/test/resources/tpcds/q84.sql new file mode 100644 index 000000000..a1076b57c --- /dev/null +++ b/src/test/resources/tpcds/q84.sql @@ -0,0 +1,19 @@ +SELECT + c_customer_id AS customer_id, + concat(c_last_name, ', ', c_first_name) AS customername +FROM customer + , customer_address + , customer_demographics + , household_demographics + , income_band + , store_returns +WHERE ca_city = 'Edgewood' + AND c_current_addr_sk = ca_address_sk + AND ib_lower_bound >= 38128 + AND ib_upper_bound <= 38128 + 50000 + AND ib_income_band_sk = hd_income_band_sk + AND cd_demo_sk = c_current_cdemo_sk + AND hd_demo_sk = c_current_hdemo_sk + AND sr_cdemo_sk = cd_demo_sk +ORDER BY c_customer_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q85.sql b/src/test/resources/tpcds/q85.sql new file mode 100644 index 000000000..cf718b0f8 --- /dev/null +++ b/src/test/resources/tpcds/q85.sql @@ -0,0 +1,82 @@ +SELECT + substr(r_reason_desc, 1, 20), + avg(ws_quantity), + avg(wr_refunded_cash), + avg(wr_fee) +FROM web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason +WHERE ws_web_page_sk = wp_web_page_sk + AND ws_item_sk = wr_item_sk + AND ws_order_number = wr_order_number + AND ws_sold_date_sk = d_date_sk AND d_year = 2000 + AND cd1.cd_demo_sk = wr_refunded_cdemo_sk + AND cd2.cd_demo_sk = wr_returning_cdemo_sk + AND ca_address_sk = wr_refunded_addr_sk + AND r_reason_sk = wr_reason_sk + AND + ( + ( + cd1.cd_marital_status = 'M' + AND + cd1.cd_marital_status = cd2.cd_marital_status + AND + cd1.cd_education_status = 'Advanced Degree' + AND + cd1.cd_education_status = cd2.cd_education_status + AND + ws_sales_price BETWEEN 100.00 AND 150.00 + ) + OR + ( + cd1.cd_marital_status = 'S' + AND + cd1.cd_marital_status = cd2.cd_marital_status + AND + cd1.cd_education_status = 'College' + AND + cd1.cd_education_status = cd2.cd_education_status + AND + ws_sales_price BETWEEN 50.00 AND 100.00 + ) + OR + ( + cd1.cd_marital_status = 'W' + AND + cd1.cd_marital_status = cd2.cd_marital_status + AND + cd1.cd_education_status = '2 yr Degree' + AND + cd1.cd_education_status = cd2.cd_education_status + AND + ws_sales_price BETWEEN 150.00 AND 200.00 + ) + ) + AND + ( + ( + ca_country = 'United States' + AND + ca_state IN ('IN', 'OH', 'NJ') + AND ws_net_profit BETWEEN 100 AND 200 + ) + OR + ( + ca_country = 'United States' + AND + ca_state IN ('WI', 'CT', 'KY') + AND ws_net_profit BETWEEN 150 AND 300 + ) + OR + ( + ca_country = 'United States' + AND + ca_state IN ('LA', 'IA', 'AR') + AND ws_net_profit BETWEEN 50 AND 250 + ) + ) +GROUP BY r_reason_desc +ORDER BY substr(r_reason_desc, 1, 20) + , avg(ws_quantity) + , avg(wr_refunded_cash) + , avg(wr_fee) +LIMIT 100 diff --git a/src/test/resources/tpcds/q86.sql b/src/test/resources/tpcds/q86.sql new file mode 100644 index 000000000..789a4abf7 --- /dev/null +++ b/src/test/resources/tpcds/q86.sql @@ -0,0 +1,24 @@ +SELECT + sum(ws_net_paid) AS total_sum, + i_category, + i_class, + grouping(i_category) + grouping(i_class) AS lochierarchy, + rank() + OVER ( + PARTITION BY grouping(i_category) + grouping(i_class), + CASE WHEN grouping(i_class) = 0 + THEN i_category END + ORDER BY sum(ws_net_paid) DESC) AS rank_within_parent +FROM + web_sales, date_dim d1, item +WHERE + d1.d_month_seq BETWEEN 1200 AND 1200 + 11 + AND d1.d_date_sk = ws_sold_date_sk + AND i_item_sk = ws_item_sk +GROUP BY ROLLUP (i_category, i_class) +ORDER BY + lochierarchy DESC, + CASE WHEN lochierarchy = 0 + THEN i_category END, + rank_within_parent +LIMIT 100 diff --git a/src/test/resources/tpcds/q87.sql b/src/test/resources/tpcds/q87.sql new file mode 100644 index 000000000..4aaa9f39d --- /dev/null +++ b/src/test/resources/tpcds/q87.sql @@ -0,0 +1,28 @@ +SELECT count(*) +FROM ((SELECT DISTINCT + c_last_name, + c_first_name, + d_date +FROM store_sales, date_dim, customer +WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11) + EXCEPT + (SELECT DISTINCT + c_last_name, + c_first_name, + d_date + FROM catalog_sales, date_dim, customer + WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11) + EXCEPT + (SELECT DISTINCT + c_last_name, + c_first_name, + d_date + FROM web_sales, date_dim, customer + WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk + AND web_sales.ws_bill_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11) + ) cool_cust diff --git a/src/test/resources/tpcds/q88.sql b/src/test/resources/tpcds/q88.sql new file mode 100644 index 000000000..25bcd90f4 --- /dev/null +++ b/src/test/resources/tpcds/q88.sql @@ -0,0 +1,122 @@ +SELECT * +FROM + (SELECT count(*) h8_30_to_9 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 8 + AND time_dim.t_minute >= 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s1, + (SELECT count(*) h9_to_9_30 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 9 + AND time_dim.t_minute < 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s2, + (SELECT count(*) h9_30_to_10 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 9 + AND time_dim.t_minute >= 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s3, + (SELECT count(*) h10_to_10_30 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 10 + AND time_dim.t_minute < 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s4, + (SELECT count(*) h10_30_to_11 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 10 + AND time_dim.t_minute >= 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s5, + (SELECT count(*) h11_to_11_30 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 11 + AND time_dim.t_minute < 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s6, + (SELECT count(*) h11_30_to_12 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 11 + AND time_dim.t_minute >= 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s7, + (SELECT count(*) h12_to_12_30 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 12 + AND time_dim.t_minute < 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s8 diff --git a/src/test/resources/tpcds/q89.sql b/src/test/resources/tpcds/q89.sql new file mode 100644 index 000000000..75408cb03 --- /dev/null +++ b/src/test/resources/tpcds/q89.sql @@ -0,0 +1,30 @@ +SELECT * +FROM ( + SELECT + i_category, + i_class, + i_brand, + s_store_name, + s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) + OVER + (PARTITION BY i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales + FROM item, store_sales, date_dim, store + WHERE ss_item_sk = i_item_sk AND + ss_sold_date_sk = d_date_sk AND + ss_store_sk = s_store_sk AND + d_year IN (1999) AND + ((i_category IN ('Books', 'Electronics', 'Sports') AND + i_class IN ('computers', 'stereo', 'football')) + OR (i_category IN ('Men', 'Jewelry', 'Women') AND + i_class IN ('shirts', 'birdal', 'dresses'))) + GROUP BY i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +WHERE CASE WHEN (avg_monthly_sales <> 0) + THEN (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) + ELSE NULL END > 0.1 +ORDER BY sum_sales - avg_monthly_sales, s_store_name +LIMIT 100 diff --git a/src/test/resources/tpcds/q9.sql b/src/test/resources/tpcds/q9.sql new file mode 100644 index 000000000..de3db9d98 --- /dev/null +++ b/src/test/resources/tpcds/q9.sql @@ -0,0 +1,48 @@ +SELECT + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 1 AND 20) > 62316685 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 1 AND 20) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 1 AND 20) END bucket1, + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 21 AND 40) > 19045798 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 21 AND 40) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 21 AND 40) END bucket2, + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 41 AND 60) > 365541424 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 41 AND 60) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 41 AND 60) END bucket3, + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 61 AND 80) > 216357808 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 61 AND 80) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 61 AND 80) END bucket4, + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 81 AND 100) > 184483884 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 81 AND 100) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 81 AND 100) END bucket5 +FROM reason +WHERE r_reason_sk = 1 diff --git a/src/test/resources/tpcds/q90.sql b/src/test/resources/tpcds/q90.sql new file mode 100644 index 000000000..85e35bf8b --- /dev/null +++ b/src/test/resources/tpcds/q90.sql @@ -0,0 +1,19 @@ +SELECT cast(amc AS DECIMAL(15, 4)) / cast(pmc AS DECIMAL(15, 4)) am_pm_ratio +FROM (SELECT count(*) amc +FROM web_sales, household_demographics, time_dim, web_page +WHERE ws_sold_time_sk = time_dim.t_time_sk + AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk + AND ws_web_page_sk = web_page.wp_web_page_sk + AND time_dim.t_hour BETWEEN 8 AND 8 + 1 + AND household_demographics.hd_dep_count = 6 + AND web_page.wp_char_count BETWEEN 5000 AND 5200) at, + (SELECT count(*) pmc + FROM web_sales, household_demographics, time_dim, web_page + WHERE ws_sold_time_sk = time_dim.t_time_sk + AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk + AND ws_web_page_sk = web_page.wp_web_page_sk + AND time_dim.t_hour BETWEEN 19 AND 19 + 1 + AND household_demographics.hd_dep_count = 6 + AND web_page.wp_char_count BETWEEN 5000 AND 5200) pt +ORDER BY am_pm_ratio +LIMIT 100 diff --git a/src/test/resources/tpcds/q91.sql b/src/test/resources/tpcds/q91.sql new file mode 100644 index 000000000..9ca7ce00a --- /dev/null +++ b/src/test/resources/tpcds/q91.sql @@ -0,0 +1,23 @@ +SELECT + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +FROM + call_center, catalog_returns, date_dim, customer, customer_address, + customer_demographics, household_demographics +WHERE + cr_call_center_sk = cc_call_center_sk + AND cr_returned_date_sk = d_date_sk + AND cr_returning_customer_sk = c_customer_sk + AND cd_demo_sk = c_current_cdemo_sk + AND hd_demo_sk = c_current_hdemo_sk + AND ca_address_sk = c_current_addr_sk + AND d_year = 1998 + AND d_moy = 11 + AND ((cd_marital_status = 'M' AND cd_education_status = 'Unknown') + OR (cd_marital_status = 'W' AND cd_education_status = 'Advanced Degree')) + AND hd_buy_potential LIKE 'Unknown%' + AND ca_gmt_offset = -7 +GROUP BY cc_call_center_id, cc_name, cc_manager, cd_marital_status, cd_education_status +ORDER BY sum(cr_net_loss) DESC diff --git a/src/test/resources/tpcds/q92.sql b/src/test/resources/tpcds/q92.sql new file mode 100644 index 000000000..99129c3bd --- /dev/null +++ b/src/test/resources/tpcds/q92.sql @@ -0,0 +1,16 @@ +SELECT sum(ws_ext_discount_amt) AS `Excess Discount Amount ` +FROM web_sales, item, date_dim +WHERE i_manufact_id = 350 + AND i_item_sk = ws_item_sk + AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days) + AND d_date_sk = ws_sold_date_sk + AND ws_ext_discount_amt > + ( + SELECT 1.3 * avg(ws_ext_discount_amt) + FROM web_sales, date_dim + WHERE ws_item_sk = i_item_sk + AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days) + AND d_date_sk = ws_sold_date_sk + ) +ORDER BY sum(ws_ext_discount_amt) +LIMIT 100 diff --git a/src/test/resources/tpcds/q93.sql b/src/test/resources/tpcds/q93.sql new file mode 100644 index 000000000..222dc31c1 --- /dev/null +++ b/src/test/resources/tpcds/q93.sql @@ -0,0 +1,19 @@ +SELECT + ss_customer_sk, + sum(act_sales) sumsales +FROM (SELECT + ss_item_sk, + ss_ticket_number, + ss_customer_sk, + CASE WHEN sr_return_quantity IS NOT NULL + THEN (ss_quantity - sr_return_quantity) * ss_sales_price + ELSE (ss_quantity * ss_sales_price) END act_sales +FROM store_sales + LEFT OUTER JOIN store_returns + ON (sr_item_sk = ss_item_sk AND sr_ticket_number = ss_ticket_number) + , + reason +WHERE sr_reason_sk = r_reason_sk AND r_reason_desc = 'reason 28') t +GROUP BY ss_customer_sk +ORDER BY sumsales, ss_customer_sk +LIMIT 100 diff --git a/src/test/resources/tpcds/q94.sql b/src/test/resources/tpcds/q94.sql new file mode 100644 index 000000000..d6de3d75b --- /dev/null +++ b/src/test/resources/tpcds/q94.sql @@ -0,0 +1,23 @@ +SELECT + count(DISTINCT ws_order_number) AS `order count `, + sum(ws_ext_ship_cost) AS `total shipping cost `, + sum(ws_net_profit) AS `total net profit ` +FROM + web_sales ws1, date_dim, customer_address, web_site +WHERE + d_date BETWEEN '1999-02-01' AND + (CAST('1999-02-01' AS DATE) + INTERVAL 60 days) + AND ws1.ws_ship_date_sk = d_date_sk + AND ws1.ws_ship_addr_sk = ca_address_sk + AND ca_state = 'IL' + AND ws1.ws_web_site_sk = web_site_sk + AND web_company_name = 'pri' + AND EXISTS(SELECT * + FROM web_sales ws2 + WHERE ws1.ws_order_number = ws2.ws_order_number + AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + AND NOT EXISTS(SELECT * + FROM web_returns wr1 + WHERE ws1.ws_order_number = wr1.wr_order_number) +ORDER BY count(DISTINCT ws_order_number) +LIMIT 100 diff --git a/src/test/resources/tpcds/q95.sql b/src/test/resources/tpcds/q95.sql new file mode 100644 index 000000000..df71f00bd --- /dev/null +++ b/src/test/resources/tpcds/q95.sql @@ -0,0 +1,29 @@ +WITH ws_wh AS +(SELECT + ws1.ws_order_number, + ws1.ws_warehouse_sk wh1, + ws2.ws_warehouse_sk wh2 + FROM web_sales ws1, web_sales ws2 + WHERE ws1.ws_order_number = ws2.ws_order_number + AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +SELECT + count(DISTINCT ws_order_number) AS `order count `, + sum(ws_ext_ship_cost) AS `total shipping cost `, + sum(ws_net_profit) AS `total net profit ` +FROM + web_sales ws1, date_dim, customer_address, web_site +WHERE + d_date BETWEEN '1999-02-01' AND + (CAST('1999-02-01' AS DATE) + INTERVAL 60 DAY) + AND ws1.ws_ship_date_sk = d_date_sk + AND ws1.ws_ship_addr_sk = ca_address_sk + AND ca_state = 'IL' + AND ws1.ws_web_site_sk = web_site_sk + AND web_company_name = 'pri' + AND ws1.ws_order_number IN (SELECT ws_order_number + FROM ws_wh) + AND ws1.ws_order_number IN (SELECT wr_order_number + FROM web_returns, ws_wh + WHERE wr_order_number = ws_wh.ws_order_number) +ORDER BY count(DISTINCT ws_order_number) +LIMIT 100 diff --git a/src/test/resources/tpcds/q96.sql b/src/test/resources/tpcds/q96.sql new file mode 100644 index 000000000..7ab17e7bc --- /dev/null +++ b/src/test/resources/tpcds/q96.sql @@ -0,0 +1,11 @@ +SELECT count(*) +FROM store_sales, household_demographics, time_dim, store +WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 20 + AND time_dim.t_minute >= 30 + AND household_demographics.hd_dep_count = 7 + AND store.s_store_name = 'ese' +ORDER BY count(*) +LIMIT 100 diff --git a/src/test/resources/tpcds/q97.sql b/src/test/resources/tpcds/q97.sql new file mode 100644 index 000000000..e7e0b1a05 --- /dev/null +++ b/src/test/resources/tpcds/q97.sql @@ -0,0 +1,30 @@ +WITH ssci AS ( + SELECT + ss_customer_sk customer_sk, + ss_item_sk item_sk + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + GROUP BY ss_customer_sk, ss_item_sk), + csci AS ( + SELECT + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + FROM catalog_sales, date_dim + WHERE cs_sold_date_sk = d_date_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + GROUP BY cs_bill_customer_sk, cs_item_sk) +SELECT + sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NULL + THEN 1 + ELSE 0 END) store_only, + sum(CASE WHEN ssci.customer_sk IS NULL AND csci.customer_sk IS NOT NULL + THEN 1 + ELSE 0 END) catalog_only, + sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NOT NULL + THEN 1 + ELSE 0 END) store_and_catalog +FROM ssci + FULL OUTER JOIN csci ON (ssci.customer_sk = csci.customer_sk + AND ssci.item_sk = csci.item_sk) +LIMIT 100 diff --git a/src/test/resources/tpcds/q98.sql b/src/test/resources/tpcds/q98.sql new file mode 100644 index 000000000..bb10d4bf8 --- /dev/null +++ b/src/test/resources/tpcds/q98.sql @@ -0,0 +1,21 @@ +SELECT + i_item_desc, + i_category, + i_class, + i_current_price, + sum(ss_ext_sales_price) AS itemrevenue, + sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price)) + OVER + (PARTITION BY i_class) AS revenueratio +FROM + store_sales, item, date_dim +WHERE + ss_item_sk = i_item_sk + AND i_category IN ('Sports', 'Books', 'Home') + AND ss_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('1999-02-22' AS DATE) + AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) +GROUP BY + i_item_id, i_item_desc, i_category, i_class, i_current_price +ORDER BY + i_category, i_class, i_item_id, i_item_desc, revenueratio diff --git a/src/test/resources/tpcds/q99.sql b/src/test/resources/tpcds/q99.sql new file mode 100644 index 000000000..f1a3d4d2b --- /dev/null +++ b/src/test/resources/tpcds/q99.sql @@ -0,0 +1,34 @@ +SELECT + substr(w_warehouse_name, 1, 20), + sm_type, + cc_name, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk <= 30) + THEN 1 + ELSE 0 END) AS `30 days `, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 30) AND + (cs_ship_date_sk - cs_sold_date_sk <= 60) + THEN 1 + ELSE 0 END) AS `31 - 60 days `, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 60) AND + (cs_ship_date_sk - cs_sold_date_sk <= 90) + THEN 1 + ELSE 0 END) AS `61 - 90 days `, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 90) AND + (cs_ship_date_sk - cs_sold_date_sk <= 120) + THEN 1 + ELSE 0 END) AS `91 - 120 days `, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 120) + THEN 1 + ELSE 0 END) AS `>120 days ` +FROM + catalog_sales, warehouse, ship_mode, call_center, date_dim +WHERE + d_month_seq BETWEEN 1200 AND 1200 + 11 + AND cs_ship_date_sk = d_date_sk + AND cs_warehouse_sk = w_warehouse_sk + AND cs_ship_mode_sk = sm_ship_mode_sk + AND cs_call_center_sk = cc_call_center_sk +GROUP BY + substr(w_warehouse_name, 1, 20), sm_type, cc_name +ORDER BY substr(w_warehouse_name, 1, 20), sm_type, cc_name +LIMIT 100 diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala new file mode 100644 index 000000000..755822967 --- /dev/null +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala @@ -0,0 +1,277 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.microsoft.hyperspace.goldstandard + +import java.io.File +import java.nio.charset.StandardCharsets + +import scala.collection.mutable + +import org.apache.commons.io.FileUtils +import org.apache.spark.internal.Logging +import org.apache.spark.sql.catalyst.expressions.AttributeSet +import org.apache.spark.sql.catalyst.util._ +import org.apache.spark.sql.execution._ +import org.apache.spark.sql.execution.command.ExplainCommand +import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec} + +// scalastyle:off filelinelengthchecker +/** + * Check that TPC-DS SparkPlans don't change. + * If there are plan differences, the error message looks like this: + * Plans did not match: + * last approved simplified plan: /path/to/tpcds-plan-stability/approved-plans-xxx/q1/simplified.txt + * last approved explain plan: /path/to/tpcds-plan-stability/approved-plans-xxx/q1/explain.txt + * [last approved simplified plan] + * + * actual simplified plan: /path/to/tmp/q1.actual.simplified.txt + * actual explain plan: /path/to/tmp/q1.actual.explain.txt + * [actual simplified plan] + * + * The explain files are saved to help debug later, they are not checked. Only the simplified + * plans are checked (by string comparison). + * + * + * To run the entire test suite: + * {{{ + * build/sbt "sql/testOnly *PlanStabilitySuite" + * }}} + * + * To run a single test file upon change: + * {{{ + * build/sbt "sql/testOnly *PlanStabilitySuite -- -z (tpcds-v1.4/q49)" + * }}} + * + * To re-generate golden files for entire suite, run: + * {{{ + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *PlanStabilitySuite" + * }}} + * + * To re-generate golden file for a single test, run: + * {{{ + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *PlanStabilitySuite -- -z (tpcds-v1.4/q49)" + * }}} + */ +// scalastyle:on filelinelengthchecker +trait PlanStabilitySuite extends TPCDSBase with Logging { + + override def beforeAll(): Unit = { + spark.conf.set("spark.sql.crossJoin.enabled", "true") + super.beforeAll() + } + + override def afterAll(): Unit = { + super.afterAll() + } + + private val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1" + + protected val baseResourcePath = { + // use the same way as `SQLQueryTestSuite` to get the resource path + java.nio.file.Paths.get("src", "test", "resources", "tpcds-plan-stability").toFile + } + + private val referenceRegex = "#\\d+".r + private val normalizeRegex = "#\\d+L?".r + + private val clsName = this.getClass.getCanonicalName + + def goldenFilePath: String + + private def getDirForTest(name: String): File = { + new File(goldenFilePath, name) + } + + private def isApproved(dir: File, actualSimplifiedPlan: String): Boolean = { + val file = new File(dir, "simplified.txt") + val expected = FileUtils.readFileToString(file, StandardCharsets.UTF_8) + expected == actualSimplifiedPlan + } + + /** + * Serialize and save this SparkPlan. + * The resulting file is used by [[checkWithApproved]] to check stability. + * + * @param plan the SparkPlan + * @param name the name of the query + * @param explain the full explain output; this is saved to help debug later as the simplified + * plan is not too useful for debugging + */ + private def generateGoldenFile(plan: SparkPlan, name: String, explain: String): Unit = { + val dir = getDirForTest(name) + val simplified = getSimplifiedPlan(plan) + val foundMatch = dir.exists() && isApproved(dir, simplified) + + if (!foundMatch) { + FileUtils.deleteDirectory(dir) + assert(dir.mkdirs()) + + val file = new File(dir, "simplified.txt") + FileUtils.writeStringToFile(file, simplified, StandardCharsets.UTF_8) + val fileOriginalPlan = new File(dir, "explain.txt") + FileUtils.writeStringToFile(fileOriginalPlan, explain, StandardCharsets.UTF_8) + logDebug(s"APPROVED: $file $fileOriginalPlan") + } + } + + private def checkWithApproved(plan: SparkPlan, name: String, explain: String): Unit = { + val dir = getDirForTest(name) + val tempDir = FileUtils.getTempDirectory + val actualSimplified = getSimplifiedPlan(plan) + val foundMatch = isApproved(dir, actualSimplified) + + if (!foundMatch) { + // show diff with last approved + val approvedSimplifiedFile = new File(dir, "simplified.txt") + val approvedExplainFile = new File(dir, "explain.txt") + + val actualSimplifiedFile = new File(tempDir, s"$name.actual.simplified.txt") + val actualExplainFile = new File(tempDir, s"$name.actual.explain.txt") + + val approvedSimplified = + FileUtils.readFileToString(approvedSimplifiedFile, StandardCharsets.UTF_8) + // write out for debugging + FileUtils.writeStringToFile(actualSimplifiedFile, actualSimplified, StandardCharsets.UTF_8) + FileUtils.writeStringToFile(actualExplainFile, explain, StandardCharsets.UTF_8) + + fail(s""" + |Plans did not match: + |last approved simplified plan: ${approvedSimplifiedFile.getAbsolutePath} + |last approved explain plan: ${approvedExplainFile.getAbsolutePath} + | + |$approvedSimplified + | + |actual simplified plan: ${actualSimplifiedFile.getAbsolutePath} + |actual explain plan: ${actualExplainFile.getAbsolutePath} + | + |$actualSimplified + """.stripMargin) + } + } + + /** + * Get the simplified plan for a specific SparkPlan. In the simplified plan, the node only has + * its name and all the sorted reference and produced attributes names(without ExprId) and its + * simplified children as well. And we'll only identify the performance sensitive nodes, e.g., + * Exchange, Subquery, in the simplified plan. Given such a identical but simplified plan, we'd + * expect to avoid frequent plan changing and catch the possible meaningful regression. + */ + private def getSimplifiedPlan(plan: SparkPlan): String = { + val exchangeIdMap = new mutable.HashMap[SparkPlan, Int]() + val subqueriesMap = new mutable.HashMap[SparkPlan, Int]() + + def getId(plan: SparkPlan): Int = plan match { + case exchange: Exchange => exchangeIdMap.getOrElseUpdate(exchange, exchangeIdMap.size + 1) + case ReusedExchangeExec(_, exchange) => + exchangeIdMap.getOrElseUpdate(exchange, exchangeIdMap.size + 1) + case subquery: SubqueryExec => + subqueriesMap.getOrElseUpdate(subquery, subqueriesMap.size + 1) + case _ => -1 + } + + /** + * Some expression names have ExprId in them due to using things such as + * "sum(sr_return_amt#14)", so we remove all of these using regex + */ + def cleanUpReferences(references: AttributeSet): String = { + referenceRegex.replaceAllIn(references.map(_.name).mkString(","), "") + } + + /** + * Generate a simplified plan as a string + * Example output: + * TakeOrderedAndProject [c_customer_id] + * WholeStageCodegen + * Project [c_customer_id] + */ + def simplifyNode(node: SparkPlan, depth: Int): String = { + val padding = " " * depth + var thisNode = node.nodeName + if (node.references.nonEmpty) { + thisNode += s" [${cleanUpReferences(node.references)}]" + } + if (node.producedAttributes.nonEmpty) { + thisNode += s" [${cleanUpReferences(node.producedAttributes)}]" + } + val id = getId(node) + if (id > 0) { + thisNode += s" #$id" + } + val childrenSimplified = node.children.map(simplifyNode(_, depth + 1)) + val subqueriesSimplified = node.subqueries.map(simplifyNode(_, depth + 1)) + s"$padding$thisNode\n${subqueriesSimplified.mkString("")}${childrenSimplified.mkString("")}" + } + + simplifyNode(plan, 0) + } + + private def normalizeIds(plan: String): String = { + val map = new mutable.HashMap[String, String]() + normalizeRegex + .findAllMatchIn(plan) + .map(_.toString) + .foreach(map.getOrElseUpdate(_, (map.size + 1).toString)) + normalizeRegex.replaceAllIn(plan, regexMatch => s"#${map(regexMatch.toString)}") + } + + private def normalizeLocation(plan: String): String = { + plan.replaceAll( + s"Location.*$clsName/", + "Location [not included in comparison]/{warehouse_dir}/") + } + + /** + * Test a TPC-DS query. Depending on the settings this test will either check if the plan matches + * a golden file or it will create a new golden file. + */ + protected def testQuery(tpcdsGroup: String, query: String, suffix: String = ""): Unit = { + val queryString = resourceToString( + s"$tpcdsGroup/$query.sql", + classLoader = Thread.currentThread().getContextClassLoader) + val qe = spark.sql(queryString).queryExecution + val plan = qe.executedPlan + val explain = normalizeLocation(normalizeIds(explainString(qe))) + + if (regenerateGoldenFiles) { + generateGoldenFile(plan, query + suffix, explain) + } else { + checkWithApproved(plan, query + suffix, explain) + } + } + + def explainString(queryExecution: QueryExecution): String = { + val explain = ExplainCommand(queryExecution.logical, extended = false) + spark.sessionState + .executePlan(explain) + .executedPlan + .executeCollect() + .map(_.getString(0)) + .mkString("\n") + } +} + +class TPCDSV1_4_PlanStabilitySuite extends PlanStabilitySuite { + override val goldenFilePath: String = + new File(baseResourcePath, s"approved-plans-v1_4").getAbsolutePath + + tpcdsQueries.foreach { q => + test(s"check simplified (tpcds-v1.4/$q)") { + testQuery("tpcds", q) + } + } +} diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala new file mode 100644 index 000000000..d8670bd48 --- /dev/null +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala @@ -0,0 +1,588 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.microsoft.hyperspace.goldstandard + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.internal.SQLConf + +import com.microsoft.hyperspace.SparkInvolvedSuite + +trait TPCDSBase extends SparkFunSuite with SparkInvolvedSuite { + + val conf = SQLConf.get + + // The TPCDS queries below are based on v1.4 + val tpcdsQueries = Seq( + "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", + "q12", "q13", "q14a", "q14b", "q15", "q16", "q17", "q18", "q19", "q20", + "q21", "q22", "q23a", "q23b", "q24a", "q24b", "q25", "q26", "q27", "q28", "q29", "q30", + "q31", "q32", "q33", "q34", "q35", "q36", "q37", "q38", "q39a", "q39b", "q40", + "q41", "q42", "q43", "q44", "q45", "q46", "q47", "q48", "q49", "q50", + "q51", "q52", "q53", "q54", "q55", "q56", "q57", "q58", "q59", "q60", + "q61", "q62", "q63", "q64", "q65", "q66", "q67", "q68", "q69", "q70", + "q71", "q72", "q73", "q74", "q75", "q76", "q77", "q78", "q79", "q80", + "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90", + "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99") + + // This list only includes TPCDS v2.7 queries that are different from v1.4 ones + val tpcdsQueriesV2_7_0 = Seq( + "q5a", "q6", "q10a", "q11", "q12", "q14", "q14a", "q18a", + "q20", "q22", "q22a", "q24", "q27a", "q34", "q35", "q35a", "q36a", "q47", "q49", + "q51a", "q57", "q64", "q67a", "q70a", "q72", "q74", "q75", "q77a", "q78", + "q80a", "q86a", "q98") + + // These queries are from https://github.com/cloudera/impala-tpcds-kit/tree/master/queries + val modifiedTPCDSQueries = Seq( + "q3", "q7", "q10", "q19", "q27", "q34", "q42", "q43", "q46", "q52", "q53", "q55", "q59", + "q63", "q65", "q68", "q73", "q79", "q89", "q98", "ss_max") + + private val tableColumns = Map( + "store_sales" -> + """ + |`ss_sold_date_sk` INT, + |`ss_sold_time_sk` INT, + |`ss_item_sk` INT, + |`ss_customer_sk` INT, + |`ss_cdemo_sk` INT, + |`ss_hdemo_sk` INT, + |`ss_addr_sk` INT, + |`ss_store_sk` INT, + |`ss_promo_sk` INT, + |`ss_ticket_number` INT, + |`ss_quantity` INT, + |`ss_wholesale_cost` DECIMAL(7,2), + |`ss_list_price` DECIMAL(7,2), + |`ss_sales_price` DECIMAL(7,2), + |`ss_ext_discount_amt` DECIMAL(7,2), + |`ss_ext_sales_price` DECIMAL(7,2), + |`ss_ext_wholesale_cost` DECIMAL(7,2), + |`ss_ext_list_price` DECIMAL(7,2), + |`ss_ext_tax` DECIMAL(7,2), + |`ss_coupon_amt` DECIMAL(7,2), + |`ss_net_paid` DECIMAL(7,2), + |`ss_net_paid_inc_tax` DECIMAL(7,2), + |`ss_net_profit` DECIMAL(7,2) + """.stripMargin, + "store_returns" -> + """ + |`sr_returned_date_sk` BIGINT, + |`sr_return_time_sk` BIGINT, + |`sr_item_sk` BIGINT, + |`sr_customer_sk` BIGINT, + |`sr_cdemo_sk` BIGINT, + |`sr_hdemo_sk` BIGINT, + |`sr_addr_sk` BIGINT, + |`sr_store_sk` BIGINT, + |`sr_reason_sk` BIGINT, + |`sr_ticket_number` BIGINT, + |`sr_return_quantity` INT, + |`sr_return_amt` DECIMAL(7,2), + |`sr_return_tax` DECIMAL(7,2), + |`sr_return_amt_inc_tax` DECIMAL(7,2), + |`sr_fee` DECIMAL(7,2), + |`sr_return_ship_cost` DECIMAL(7,2), + |`sr_refunded_cash` DECIMAL(7,2), + |`sr_reversed_charge` DECIMAL(7,2), + |`sr_store_credit` DECIMAL(7,2), + |`sr_net_loss` DECIMAL(7,2) + """.stripMargin, + "catalog_sales" -> + """ + |`cs_sold_date_sk` INT, + |`cs_sold_time_sk` INT, + |`cs_ship_date_sk` INT, + |`cs_bill_customer_sk` INT, + |`cs_bill_cdemo_sk` INT, + |`cs_bill_hdemo_sk` INT, + |`cs_bill_addr_sk` INT, + |`cs_ship_customer_sk` INT, + |`cs_ship_cdemo_sk` INT, + |`cs_ship_hdemo_sk` INT, + |`cs_ship_addr_sk` INT, + |`cs_call_center_sk` INT, + |`cs_catalog_page_sk` INT, + |`cs_ship_mode_sk` INT, + |`cs_warehouse_sk` INT, + |`cs_item_sk` INT, + |`cs_promo_sk` INT, + |`cs_order_number` INT, + |`cs_quantity` INT, + |`cs_wholesale_cost` DECIMAL(7,2), + |`cs_list_price` DECIMAL(7,2), + |`cs_sales_price` DECIMAL(7,2), + |`cs_ext_discount_amt` DECIMAL(7,2), + |`cs_ext_sales_price` DECIMAL(7,2), + |`cs_ext_wholesale_cost` DECIMAL(7,2), + |`cs_ext_list_price` DECIMAL(7,2), + |`cs_ext_tax` DECIMAL(7,2), + |`cs_coupon_amt` DECIMAL(7,2), + |`cs_ext_ship_cost` DECIMAL(7,2), + |`cs_net_paid` DECIMAL(7,2), + |`cs_net_paid_inc_tax` DECIMAL(7,2), + |`cs_net_paid_inc_ship` DECIMAL(7,2), + |`cs_net_paid_inc_ship_tax` DECIMAL(7,2), + |`cs_net_profit` DECIMAL(7,2) + """.stripMargin, + "catalog_returns" -> + """ + |`cr_returned_date_sk` INT, + |`cr_returned_time_sk` INT, + |`cr_item_sk` INT, + |`cr_refunded_customer_sk` INT, + |`cr_refunded_cdemo_sk` INT, + |`cr_refunded_hdemo_sk` INT, + |`cr_refunded_addr_sk` INT, + |`cr_returning_customer_sk` INT, + |`cr_returning_cdemo_sk` INT, + |`cr_returning_hdemo_sk` INT, + |`cr_returning_addr_sk` INT, + |`cr_call_center_sk` INT, + |`cr_catalog_page_sk` INT, + |`cr_ship_mode_sk` INT, + |`cr_warehouse_sk` INT, + |`cr_reason_sk` INT,`cr_order_number` INT, + |`cr_return_quantity` INT, + |`cr_return_amount` DECIMAL(7,2), + |`cr_return_tax` DECIMAL(7,2), + |`cr_return_amt_inc_tax` DECIMAL(7,2), + |`cr_fee` DECIMAL(7,2), + |`cr_return_ship_cost` DECIMAL(7,2), + |`cr_refunded_cash` DECIMAL(7,2), + |`cr_reversed_charge` DECIMAL(7,2), + |`cr_store_credit` DECIMAL(7,2), + |`cr_net_loss` DECIMAL(7,2) + """.stripMargin, + "web_sales" -> + """ + |`ws_sold_date_sk` INT, + |`ws_sold_time_sk` INT, + |`ws_ship_date_sk` INT, + |`ws_item_sk` INT, + |`ws_bill_customer_sk` INT, + |`ws_bill_cdemo_sk` INT, + |`ws_bill_hdemo_sk` INT, + |`ws_bill_addr_sk` INT, + |`ws_ship_customer_sk` INT, + |`ws_ship_cdemo_sk` INT, + |`ws_ship_hdemo_sk` INT, + |`ws_ship_addr_sk` INT, + |`ws_web_page_sk` INT, + |`ws_web_site_sk` INT, + |`ws_ship_mode_sk` INT, + |`ws_warehouse_sk` INT, + |`ws_promo_sk` INT, + |`ws_order_number` INT, + |`ws_quantity` INT, + |`ws_wholesale_cost` DECIMAL(7,2), + |`ws_list_price` DECIMAL(7,2), + |`ws_sales_price` DECIMAL(7,2), + |`ws_ext_discount_amt` DECIMAL(7,2), + |`ws_ext_sales_price` DECIMAL(7,2), + |`ws_ext_wholesale_cost` DECIMAL(7,2), + |`ws_ext_list_price` DECIMAL(7,2), + |`ws_ext_tax` DECIMAL(7,2), + |`ws_coupon_amt` DECIMAL(7,2), + |`ws_ext_ship_cost` DECIMAL(7,2), + |`ws_net_paid` DECIMAL(7,2), + |`ws_net_paid_inc_tax` DECIMAL(7,2), + |`ws_net_paid_inc_ship` DECIMAL(7,2), + |`ws_net_paid_inc_ship_tax` DECIMAL(7,2), + |`ws_net_profit` DECIMAL(7,2) + """.stripMargin, + "web_returns" -> + """ + |`wr_returned_date_sk` BIGINT, + |`wr_returned_time_sk` BIGINT, + |`wr_item_sk` BIGINT, + |`wr_refunded_customer_sk` BIGINT, + |`wr_refunded_cdemo_sk` BIGINT, + |`wr_refunded_hdemo_sk` BIGINT, + |`wr_refunded_addr_sk` BIGINT, + |`wr_returning_customer_sk` BIGINT, + |`wr_returning_cdemo_sk` BIGINT, + |`wr_returning_hdemo_sk` BIGINT, + |`wr_returning_addr_sk` BIGINT, + |`wr_web_page_sk` BIGINT, + |`wr_reason_sk` BIGINT, + |`wr_order_number` BIGINT, + |`wr_return_quantity` INT, + |`wr_return_amt` DECIMAL(7,2), + |`wr_return_tax` DECIMAL(7,2), + |`wr_return_amt_inc_tax` DECIMAL(7,2), + |`wr_fee` DECIMAL(7,2), + |`wr_return_ship_cost` DECIMAL(7,2), + |`wr_refunded_cash` DECIMAL(7,2), + |`wr_reversed_charge` DECIMAL(7,2), + |`wr_account_credit` DECIMAL(7,2), + |`wr_net_loss` DECIMAL(7,2) + """.stripMargin, + "inventory" -> + """ + |`inv_date_sk` INT, + |`inv_item_sk` INT, + |`inv_warehouse_sk` INT, + |`inv_quantity_on_hand` INT + """.stripMargin, + "store" -> + """ + |`s_store_sk` INT, + |`s_store_id` CHAR(16), + |`s_rec_start_date` DATE, + |`s_rec_end_date` DATE, + |`s_closed_date_sk` INT, + |`s_store_name` VARCHAR(50), + |`s_number_employees` INT, + |`s_floor_space` INT, + |`s_hours` CHAR(20), + |`s_manager` VARCHAR(40), + |`s_market_id` INT, + |`s_geography_class` VARCHAR(100), + |`s_market_desc` VARCHAR(100), + |`s_market_manager` VARCHAR(40), + |`s_division_id` INT, + |`s_division_name` VARCHAR(50), + |`s_company_id` INT, + |`s_company_name` VARCHAR(50), + |`s_street_number` VARCHAR(10), + |`s_street_name` VARCHAR(60), + |`s_street_type` CHAR(15), + |`s_suite_number` CHAR(10), + |`s_city` VARCHAR(60), + |`s_county` VARCHAR(30), + |`s_state` CHAR(2), + |`s_zip` CHAR(10), + |`s_country` VARCHAR(20), + |`s_gmt_offset` DECIMAL(5,2), + |`s_tax_percentage` DECIMAL(5,2) + """.stripMargin, + "call_center" -> + """ + |`cc_call_center_sk` INT, + |`cc_call_center_id` CHAR(16), + |`cc_rec_start_date` DATE, + |`cc_rec_end_date` DATE, + |`cc_closed_date_sk` INT, + |`cc_open_date_sk` INT, + |`cc_name` VARCHAR(50), + |`cc_class` VARCHAR(50), + |`cc_employees` INT, + |`cc_sq_ft` INT, + |`cc_hours` CHAR(20), + |`cc_manager` VARCHAR(40), + |`cc_mkt_id` INT, + |`cc_mkt_class` CHAR(50), + |`cc_mkt_desc` VARCHAR(100), + |`cc_market_manager` VARCHAR(40), + |`cc_division` INT, + |`cc_division_name` VARCHAR(50), + |`cc_company` INT, + |`cc_company_name` CHAR(50), + |`cc_street_number` CHAR(10), + |`cc_street_name` VARCHAR(60), + |`cc_street_type` CHAR(15), + |`cc_suite_number` CHAR(10), + |`cc_city` VARCHAR(60), + |`cc_county` VARCHAR(30), + |`cc_state` CHAR(2), + |`cc_zip` CHAR(10), + |`cc_country` VARCHAR(20), + |`cc_gmt_offset` DECIMAL(5,2), + |`cc_tax_percentage` DECIMAL(5,2) + """.stripMargin, + "catalog_page" -> + """ + |`cp_catalog_page_sk` INT, + |`cp_catalog_page_id` CHAR(16), + |`cp_start_date_sk` INT, + |`cp_end_date_sk` INT, + |`cp_department` VARCHAR(50), + |`cp_catalog_number` INT, + |`cp_catalog_page_number` INT, + |`cp_description` VARCHAR(100), + |`cp_type` VARCHAR(100) + """.stripMargin, + "web_site" -> + """ + |`web_site_sk` INT, + |`web_site_id` CHAR(16), + |`web_rec_start_date` DATE, + |`web_rec_end_date` DATE, + |`web_name` VARCHAR(50), + |`web_open_date_sk` INT, + |`web_close_date_sk` INT, + |`web_class` VARCHAR(50), + |`web_manager` VARCHAR(40), + |`web_mkt_id` INT, + |`web_mkt_class` VARCHAR(50), + |`web_mkt_desc` VARCHAR(100), + |`web_market_manager` VARCHAR(40), + |`web_company_id` INT, + |`web_company_name` CHAR(50), + |`web_street_number` CHAR(10), + |`web_street_name` VARCHAR(60), + |`web_street_type` CHAR(15), + |`web_suite_number` CHAR(10), + |`web_city` VARCHAR(60), + |`web_county` VARCHAR(30), + |`web_state` CHAR(2), + |`web_zip` CHAR(10), + |`web_country` VARCHAR(20), + |`web_gmt_offset` DECIMAL(5,2), + |`web_tax_percentage` DECIMAL(5,2) + """.stripMargin, + "web_page" -> + """ + |`wp_web_page_sk` INT, + |`wp_web_page_id` CHAR(16), + |`wp_rec_start_date` DATE, + |`wp_rec_end_date` DATE, + |`wp_creation_date_sk` INT, + |`wp_access_date_sk` INT, + |`wp_autogen_flag` CHAR(1), + |`wp_customer_sk` INT, + |`wp_url` VARCHAR(100), + |`wp_type` CHAR(50), + |`wp_char_count` INT, + |`wp_link_count` INT, + |`wp_image_count` INT, + |`wp_max_ad_count` INT + """.stripMargin, + "warehouse" -> + """ + |`w_warehouse_sk` INT, + |`w_warehouse_id` CHAR(16), + |`w_warehouse_name` VARCHAR(20), + |`w_warehouse_sq_ft` INT, + |`w_street_number` CHAR(10), + |`w_street_name` VARCHAR(20), + |`w_street_type` CHAR(15), + |`w_suite_number` CHAR(10), + |`w_city` VARCHAR(60), + |`w_county` VARCHAR(30), + |`w_state` CHAR(2), + |`w_zip` CHAR(10), + |`w_country` VARCHAR(20), + |`w_gmt_offset` DECIMAL(5,2) + """.stripMargin, + "customer" -> + """ + |`c_customer_sk` INT, + |`c_customer_id` CHAR(16), + |`c_current_cdemo_sk` INT, + |`c_current_hdemo_sk` INT, + |`c_current_addr_sk` INT, + |`c_first_shipto_date_sk` INT, + |`c_first_sales_date_sk` INT, + |`c_salutation` CHAR(10), + |`c_first_name` CHAR(20), + |`c_last_name` CHAR(30), + |`c_preferred_cust_flag` CHAR(1), + |`c_birth_day` INT, + |`c_birth_month` INT, + |`c_birth_year` INT, + |`c_birth_country` VARCHAR(20), + |`c_login` CHAR(13), + |`c_email_address` CHAR(50), + |`c_last_review_date` INT + """.stripMargin, + "customer_address" -> + """ + |`ca_address_sk` INT, + |`ca_address_id` CHAR(16), + |`ca_street_number` CHAR(10), + |`ca_street_name` VARCHAR(60), + |`ca_street_type` CHAR(15), + |`ca_suite_number` CHAR(10), + |`ca_city` VARCHAR(60), + |`ca_county` VARCHAR(30), + |`ca_state` CHAR(2), + |`ca_zip` CHAR(10), + |`ca_country` VARCHAR(20), + |`ca_gmt_offset` DECIMAL(5,2), + |`ca_location_type` CHAR(20) + """.stripMargin, + "customer_demographics" -> + """ + |`cd_demo_sk` INT, + |`cd_gender` CHAR(1), + |`cd_marital_status` CHAR(1), + |`cd_education_status` CHAR(20), + |`cd_purchase_estimate` INT, + |`cd_credit_rating` CHAR(10), + |`cd_dep_count` INT, + |`cd_dep_employed_count` INT, + |`cd_dep_college_count` INT + """.stripMargin, + "date_dim" -> + """ + |`d_date_sk` INT, + |`d_date_id` CHAR(16), + |`d_date` DATE, + |`d_month_seq` INT, + |`d_week_seq` INT, + |`d_quarter_seq` INT, + |`d_year` INT, + |`d_dow` INT, + |`d_moy` INT, + |`d_dom` INT, + |`d_qoy` INT, + |`d_fy_year` INT, + |`d_fy_quarter_seq` INT, + |`d_fy_week_seq` INT, + |`d_day_name` CHAR(9), + |`d_quarter_name` CHAR(1), + |`d_holiday` CHAR(1), + |`d_weekend` CHAR(1), + |`d_following_holiday` CHAR(1), + |`d_first_dom` INT, + |`d_last_dom` INT, + |`d_same_day_ly` INT, + |`d_same_day_lq` INT, + |`d_current_day` CHAR(1), + |`d_current_week` CHAR(1), + |`d_current_month` CHAR(1), + |`d_current_quarter` CHAR(1), + |`d_current_year` CHAR(1) + """.stripMargin, + "household_demographics" -> + """ + |`hd_demo_sk` INT, + |`hd_income_band_sk` INT, + |`hd_buy_potential` CHAR(15), + |`hd_dep_count` INT, + |`hd_vehicle_count` INT + """.stripMargin, + "item" -> + """ + |`i_item_sk` INT, + |`i_item_id` CHAR(16), + |`i_rec_start_date` DATE, + |`i_rec_end_date` DATE, + |`i_item_desc` VARCHAR(200), + |`i_current_price` DECIMAL(7,2), + |`i_wholesale_cost` DECIMAL(7,2), + |`i_brand_id` INT, + |`i_brand` CHAR(50), + |`i_class_id` INT, + |`i_class` CHAR(50), + |`i_category_id` INT, + |`i_category` CHAR(50), + |`i_manufact_id` INT, + |`i_manufact` CHAR(50), + |`i_size` CHAR(20), + |`i_formulation` CHAR(20), + |`i_color` CHAR(20), + |`i_units` CHAR(10), + |`i_container` CHAR(10), + |`i_manager_id` INT, + |`i_product_name` CHAR(50) + """.stripMargin, + "income_band" -> + """ + |`ib_income_band_sk` INT, + |`ib_lower_bound` INT, + |`ib_upper_bound` INT + """.stripMargin, + "promotion" -> + """ + |`p_promo_sk` INT, + |`p_promo_id` CHAR(16), + |`p_start_date_sk` INT, + |`p_end_date_sk` INT, + |`p_item_sk` INT, + |`p_cost` DECIMAL(15,2), + |`p_response_target` INT, + |`p_promo_name` CHAR(50), + |`p_channel_dmail` CHAR(1), + |`p_channel_email` CHAR(1), + |`p_channel_catalog` CHAR(1), + |`p_channel_tv` CHAR(1), + |`p_channel_radio` CHAR(1), + |`p_channel_press` CHAR(1), + |`p_channel_event` CHAR(1), + |`p_channel_demo` CHAR(1), + |`p_channel_details` VARCHAR(100), + |`p_purpose` CHAR(15), + |`p_discount_active` CHAR(1) + """.stripMargin, + "reason" -> + """ + |`r_reason_sk` INT, + |`r_reason_id` CHAR(16), + |`r_reason_desc` CHAR(100) + """.stripMargin, + "ship_mode" -> + """ + |`sm_ship_mode_sk` INT, + |`sm_ship_mode_id` CHAR(16), + |`sm_type` CHAR(30), + |`sm_code` CHAR(10), + |`sm_carrier` CHAR(20), + |`sm_contract` CHAR(20) + """.stripMargin, + "time_dim" -> + """ + |`t_time_sk` INT, + |`t_time_id` CHAR(16), + |`t_time` INT, + |`t_hour` INT, + |`t_minute` INT, + |`t_second` INT, + |`t_am_pm` CHAR(2), + |`t_shift` CHAR(20), + |`t_sub_shift` CHAR(20), + |`t_meal_time` CHAR(20) + """.stripMargin + ) + + val tableNames: Iterable[String] = tableColumns.keys + + def createTable( + spark: SparkSession, + tableName: String, + format: String = "parquet", + options: Seq[String] = Nil): Unit = { + spark.sql( + s""" + |CREATE TABLE `$tableName` (${tableColumns(tableName)}) + |USING $format + |${options.mkString("\n")} + """.stripMargin) + } + + private val originalCBCEnabled = conf.cboEnabled + private val originalJoinReorderEnabled = conf.joinReorderEnabled + + override def beforeAll(): Unit = { + super.beforeAll() + tableNames.foreach { tableName => + createTable(spark, tableName) + } + } + + override def afterAll(): Unit = { + conf.setConf(SQLConf.CBO_ENABLED, originalCBCEnabled) + conf.setConf(SQLConf.JOIN_REORDER_ENABLED, originalJoinReorderEnabled) + tableNames.foreach { tableName => + spark.sessionState.catalog.dropTable(TableIdentifier(tableName), true, true) + } + super.afterAll() + } +}