introduce join pushdown for dsv2

PetarVasiljevic-DB · PetarVasiljevic-DB · commit ea86140fc7c5 · 2025-05-16T16:47:57.000+02:00
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
@@ -9403,6 +9403,11 @@
       "The number of fields (<numFields>) in the partition identifier is not equal to the partition schema length (<schemaLen>). The identifier might not refer to one partition."
     ]
   },
+  "_LEGACY_ERROR_TEMP_3209": {
+    "message" : [
+      "Unexpected join type: <joinType>"
+    ]
+  },
   "_LEGACY_ERROR_TEMP_3215" : {
     "message" : [
       "Expected a Boolean type expression in replaceNullWithFalse, but got the type <dataType> in <expr>."
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/join/Inner.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/join/Inner.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.join;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * Base class of the public Join type API.
+ *
+ * @since 4.0.0
+ */
+@Evolving
+public final class Inner implements JoinType { }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/join/JoinColumn.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/join/JoinColumn.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.join;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.expressions.Expression;
+import org.apache.spark.sql.connector.expressions.NamedReference;
+
+/**
+ * Represents a column reference used in DSv2 Join pushdown.
+ *
+ * @since 4.0.0
+ */
+@Evolving
+final public class JoinColumn implements NamedReference {
+    public JoinColumn(String[] qualifier, String name, Boolean isInLeftSideOfJoin) {
+        this.qualifier = qualifier;
+        this.name = name;
+        this.isInLeftSideOfJoin = isInLeftSideOfJoin;
+    }
+
+    public String[] qualifier;
+    public String name;
+    public Boolean isInLeftSideOfJoin;
+
+    @Override
+    public String[] fieldNames() {
+        String[] fullyQualified = new String[qualifier.length + 1];
+        System.arraycopy(qualifier, 0, fullyQualified, 0, qualifier.length);
+        fullyQualified[qualifier.length] = name;
+        return qualifier;
+    }
+
+    @Override
+    public Expression[] children() { return EMPTY_EXPRESSION; }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/join/JoinType.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/join/JoinType.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.join;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * Base class of the public Join type API.
+ *
+ * @since 4.0.0
+ */
+@Evolving
+public interface JoinType { }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownJoin.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownJoin.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.read;
+
+import java.util.Optional;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.expressions.filter.Predicate;
+import org.apache.spark.sql.connector.join.JoinType;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * A mix-in interface for {@link ScanBuilder}. Data sources can implement this interface to
+ * push down join operators.
+ *
+ * @since 4.0.0
+ */
+@Evolving
+public interface SupportsPushDownJoin extends ScanBuilder {
+    boolean isRightSideCompatibleForJoin(SupportsPushDownJoin other);
+
+    boolean pushJoin(
+            SupportsPushDownJoin other,
+            JoinType joinType,
+            Optional<Predicate> condition,
+            StructType leftRequiredSchema,
+            StructType rightRequiredSchema
+    );
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/JoinTypeSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/JoinTypeSQLBuilder.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.util;
+
+import org.apache.spark.SparkIllegalArgumentException;
+import org.apache.spark.sql.connector.join.*;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * The builder to generate SQL for specific Join type.
+ *
+ * @since 4.0.0
+ */
+public class JoinTypeSQLBuilder {
+    public String build(JoinType joinType) {
+        if (joinType instanceof Inner inner) {
+            return visitInnerJoin(inner);
+        } else {
+            return visitUnexpectedJoinType(joinType);
+        }
+    }
+
+    protected String visitInnerJoin(Inner inner) {
+        return "INNER JOIN";
+    }
+
+    protected String visitUnexpectedJoinType(JoinType joinType) throws IllegalArgumentException {
+        Map<String, String> params = new HashMap<>();
+        params.put("joinType", String.valueOf(joinType));
+        throw new SparkIllegalArgumentException("_LEGACY_ERROR_TEMP_3209", params);
+    }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.connector.util;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import java.util.StringJoiner;
@@ -42,6 +43,7 @@
 import org.apache.spark.sql.connector.expressions.aggregate.GeneralAggregateFunc;
 import org.apache.spark.sql.connector.expressions.aggregate.Sum;
 import org.apache.spark.sql.connector.expressions.aggregate.UserDefinedAggregateFunc;
+import org.apache.spark.sql.connector.join.JoinColumn;
 import org.apache.spark.sql.types.DataType;
 
 /**
@@ -75,6 +77,8 @@ protected String escapeSpecialCharsForLikePattern(String str) {
   public String build(Expression expr) {
     if (expr instanceof Literal literal) {
       return visitLiteral(literal);
+    } else if (expr instanceof JoinColumn column) {
+    return visitJoinColumn(column);
     } else if (expr instanceof NamedReference namedReference) {
       return visitNamedReference(namedReference);
     } else if (expr instanceof Cast cast) {
@@ -174,6 +178,12 @@ protected String visitNamedReference(NamedReference namedRef) {
     return namedRef.toString();
   }
 
+  protected String visitJoinColumn(JoinColumn column) {
+    List<String> fullyQualifiedName = new ArrayList<>(Arrays.asList(column.qualifier));
+    fullyQualifiedName.add(column.name);
+    return joinListToString(fullyQualifiedName, ".", "", "");
+  }
+
   protected String visitIn(String v, List<String> list) {
     if (list.isEmpty()) {
       return "CASE WHEN " + v + " IS NULL THEN NULL ELSE FALSE END";
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -394,6 +394,14 @@ case class AttributeReference(
   }
 }
 
+case class JoinColumnReference(
+    originalReference: AttributeReference,
+    isReferringColumnFromLeftSubquery: Boolean = true)
+  extends LeafExpression with Unevaluable {
+  override def nullable: Boolean = originalReference.nullable
+  override def dataType: DataType = originalReference.dataType
+}
+
 /**
  * A place holder used when printing expressions without debugging information such as the
  * expression id or the unresolved indicator.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.connector.catalog.functions.ScalarFunction
 import org.apache.spark.sql.connector.expressions.{Cast => V2Cast, Expression => V2Expression, Extract => V2Extract, FieldReference, GeneralScalarExpression, LiteralValue, NullOrdering, SortDirection, SortValue, UserDefinedScalarFunc}
 import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Avg, Count, CountStar, GeneralAggregateFunc, Max, Min, Sum, UserDefinedAggregateFunc}
 import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse, AlwaysTrue, And => V2And, Not => V2Not, Or => V2Or, Predicate => V2Predicate}
+import org.apache.spark.sql.connector.join.JoinColumn
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{BooleanType, DataType, IntegerType, StringType}
 
@@ -79,6 +80,12 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) extends L
     case Literal(true, BooleanType) => Some(new AlwaysTrue())
     case Literal(false, BooleanType) => Some(new AlwaysFalse())
     case Literal(value, dataType) => Some(LiteralValue(value, dataType))
+    case joinRefColumn: JoinColumnReference =>
+      Some (new JoinColumn(
+        Array(),
+        joinRefColumn.originalReference.name,
+        joinRefColumn.isReferringColumnFromLeftSubquery
+      ))
     case col @ ColumnOrField(nameParts) =>
       val ref = FieldReference(nameParts)
       if (isPredicate && col.dataType.isInstanceOf[BooleanType]) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1640,6 +1640,14 @@ object SQLConf {
       .booleanConf
       .createWithDefault(!Utils.isTesting)
 
+  val DATA_SOURCE_V2_JOIN_PUSHDOWN =
+    buildConf("spark.sql.optimizer.datasourceV2JoinPushdown")
+      .internal()
+      .doc("When this config is set to true, join is tried to be pushed down" +
+        "for DSv2 data sources in V2ScanRelationPushdown optimization rule.")
+      .booleanConf
+      .createWithDefault(false)
+
   // This is used to set the default data source
   val DEFAULT_DATA_SOURCE_NAME = buildConf("spark.sql.sources.default")
     .doc("The default data source to use in input/output.")
@@ -5988,6 +5996,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def expressionTreeChangeLogLevel: Level = getConf(EXPRESSION_TREE_CHANGE_LOG_LEVEL)
 
+  def dataSourceV2JoinPushdown: Boolean = getConf(DATA_SOURCE_V2_JOIN_PUSHDOWN)
+
   def dynamicPartitionPruningEnabled: Boolean = getConf(DYNAMIC_PARTITION_PRUNING_ENABLED)
 
   def dynamicPartitionPruningUseStats: Boolean = getConf(DYNAMIC_PARTITION_PRUNING_USE_STATS)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -189,6 +189,12 @@ case class RowDataSourceScanExec(
       seqToString(markedFilters.toSeq)
     }
 
+    val pushedJoins = if (pushedDownOperators.pushedJoins.nonEmpty) {
+      Map("PushedJoins" -> seqToString(pushedDownOperators.pushedJoins))
+    } else {
+      Map()
+    }
+
     Map("ReadSchema" -> requiredSchema.catalogString,
       "PushedFilters" -> pushedFilters) ++
       pushedDownOperators.aggregation.fold(Map[String, String]()) { v =>
@@ -200,7 +206,8 @@ case class RowDataSourceScanExec(
       offsetInfo ++
       pushedDownOperators.sample.map(v => "PushedSample" ->
         s"SAMPLE (${(v.upperBound - v.lowerBound) * 100}) ${v.withReplacement} SEED(${v.seed})"
-      )
+      ) ++
+      pushedJoins
   }
 
   // Don't care about `rdd` and `tableIdentifier`, and `stream` when canonicalizing.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -37,6 +37,7 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, InsertIntoDir, InsertIntoStatement, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
@@ -47,6 +48,7 @@ import org.apache.spark.sql.connector.catalog.{SupportsRead, V1Table}
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.connector.expressions.{Expression => V2Expression, NullOrdering, SortDirection, SortOrder => V2SortOrder, SortValue}
 import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Aggregation}
+import org.apache.spark.sql.connector.join.{Inner => V2Inner, JoinType => V2JoinType}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution
 import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan}
@@ -501,6 +503,13 @@ object DataSourceStrategy
     }
   }
 
+  def translateJoinType(joinType: JoinType): Option[V2JoinType] = {
+    joinType match {
+      case Inner => Some(new V2Inner)
+      case _ => None
+    }
+  }
+
   /**
    * Convert RDD of Row into RDD of InternalRow with objects in catalyst types
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -104,6 +104,8 @@ class JDBCOptions(
       }
   }
 
+  var containsJoinInQuery: Boolean = false
+
   // ------------------------------------------------------------
   // Optional parameters
   // ------------------------------------------------------------
@@ -215,6 +217,10 @@ class JDBCOptions(
   // This only applies to Data Source V2 JDBC
   val pushDownTableSample = parameters.getOrElse(JDBC_PUSHDOWN_TABLESAMPLE, "true").toBoolean
 
+  // An option to allow/disallow pushing down JOIN into JDBC data source
+  // This only applies to Data Source V2 JDBC
+  val pushDownJoin = parameters.getOrElse(JDBC_PUSHDOWN_JOIN, "true").toBoolean
+
   // The local path of user's keytab file, which is assumed to be pre-uploaded to all nodes either
   // by --files option of spark-submit or manually
   val keytab = {
@@ -321,6 +327,7 @@ object JDBCOptions {
   val JDBC_PUSHDOWN_LIMIT = newOption("pushDownLimit")
   val JDBC_PUSHDOWN_OFFSET = newOption("pushDownOffset")
   val JDBC_PUSHDOWN_TABLESAMPLE = newOption("pushDownTableSample")
+  val JDBC_PUSHDOWN_JOIN = newOption("pushDownJoin")
   val JDBC_KEYTAB = newOption("keytab")
   val JDBC_PRINCIPAL = newOption("principal")
   val JDBC_TABLE_COMMENT = newOption("tableComment")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushedDownOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushedDownOperators.scala
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala

Original file line number	Diff line number	Diff line change
`@@ -394,6 +394,14 @@ case class AttributeReference(`
`394`	`394`	`}`
`395`	`395`	`}`
`396`	`396`
	`397`	`+case class JoinColumnReference(`
	`398`	`+ originalReference: AttributeReference,`
	`399`	`+ isReferringColumnFromLeftSubquery: Boolean = true)`
	`400`	`+ extends LeafExpression with Unevaluable {`
	`401`	`+ override def nullable: Boolean = originalReference.nullable`
	`402`	`+ override def dataType: DataType = originalReference.dataType`
	`403`	`+}`
	`404`	`+`
`397`	`405`	`/**`
`398`	`406`	`* A place holder used when printing expressions without debugging information such as the`
`399`	`407`	`* expression id or the unresolved indicator.`
Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,7 @@ case class PushedDownOperators(`
`30`	`30`	`limit: Option[Int],`
`31`	`31`	`offset: Option[Int],`
`32`	`32`	`sortValues: Seq[SortOrder],`
`33`		`- pushedPredicates: Seq[Predicate]) {`
	`33`	`+ pushedPredicates: Seq[Predicate],`
	`34`	`+ pushedJoins: Seq[String] = Seq()) {`
`34`	`35`	`assert((limit.isEmpty && sortValues.isEmpty) \|\| limit.isDefined)`
`35`	`36`	`}`
-Original file line number
+Diff line change
 package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.mutable
 +import scala.jdk.OptionConverters._
 import org.apache.spark.internal.LogKeys.{AGGREGATE_FUNCTIONS, GROUP_BY_EXPRS, POST_SCAN_FILTERS, PUSHED_FILTERS, RELATION_NAME, RELATION_OUTPUT}
 import org.apache.spark.internal.MDC
 -import org.apache.spark.sql.catalyst.expressions.{aggregate, Alias, And, Attribute, AttributeMap, AttributeReference, AttributeSet, Cast, Expression, IntegerLiteral, Literal, NamedExpression, PredicateHelper, ProjectionOverSchema, SortOrder, SubqueryExpression}
 +import org.apache.spark.sql.catalyst.expressions.{aggregate, Alias, And, Attribute, AttributeMap, AttributeReference, AttributeSet, Cast, Expression, ExprId, IntegerLiteral, JoinColumnReference, Literal, NamedExpression, PredicateHelper, ProjectionOverSchema, SortOrder, SubqueryExpression}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.optimizer.CollapseProject
 import org.apache.spark.sql.catalyst.planning.{PhysicalOperation, ScanOperation}
 -import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, LeafNode, Limit, LimitAndOffset, LocalLimit, LogicalPlan, Offset, OffsetAndLimit, Project, Sample, Sort}
 +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Join, LeafNode, Limit, LimitAndOffset, LocalLimit, LogicalPlan, Offset, OffsetAndLimit, Project, Sample, Sort}
 import org.apache.spark.sql.catalyst.rules.Rule
 -import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
 +import org.apache.spark.sql.catalyst.types.DataTypeUtils.{fromAttributes, toAttributes}
 import org.apache.spark.sql.connector.expressions.{SortOrder => V2SortOrder}
 import org.apache.spark.sql.connector.expressions.aggregate.{Aggregation, Avg, Count, CountStar, Max, Min, Sum}
 import org.apache.spark.sql.connector.expressions.filter.Predicate
 -import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownAggregates, SupportsPushDownFilters, V1Scan}
 +import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownAggregates, SupportsPushDownFilters, SupportsPushDownJoin, V1Scan}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.sources
 import org.apache.spark.sql.types.{DataType, DecimalType, IntegerType, StructType}
       createScanBuilder,
       pushDownSample,
       pushDownFilters,
 +      pushDownJoin,
       pushDownAggregates,
       pushDownLimitAndOffset,
       buildScanWithPushedAggregate,
 +      buildScanWithPushedJoin,
       pruneColumns)
     pushdownRules.foldLeft(plan) { (newPlan, pushDownRule) =>
   private def createScanBuilder(plan: LogicalPlan) = plan.transform {
     case r: DataSourceV2Relation =>
 -      ScanBuilderHolder(r.output, r, r.table.asReadable.newScanBuilder(r.options))
 +      val sHolder = ScanBuilderHolder(r.output, r, r.table.asReadable.newScanBuilder(r.options))
 +      sHolder.output.foreach{ e =>
 +        // join column names change when joins are pushed down. At the end, we need to keep
 +        // original names of the plan, so we are storing original names for each of the exprIDs.
 +        sHolder.exprIdToOriginalName.put(e.exprId, e.name)
 +      }
++
 +      sHolder
+  }
   private def pushDownFilters(plan: LogicalPlan) = plan.transform {
       filterCondition.map(Filter(_, sHolder)).getOrElse(sHolder)
+  }
 +  def pushDownJoin(plan: LogicalPlan): LogicalPlan = plan.transformUp {
 +    // Join can be attempted to be pushed down only if left and right side of join are
 +    // compatible (same data source, for example). Also, another requirement is that if
 +    // there are projections between Join and ScanBuilderHolder, these projec
 +    case node @ Join(
 +    PhysicalOperation(
 +    leftProjections,
 +    Nil,
 +    leftHolder @ ScanBuilderHolder(_, _, lBuilder: SupportsPushDownJoin)
 +    ),
 +    PhysicalOperation(
 +    rightProjections,
 +    Nil,
 +    rightHolder @ ScanBuilderHolder(_, _, rBuilder: SupportsPushDownJoin)
 +    ),
 +    joinType,
 +    condition,
 +    _) if conf.dataSourceV2JoinPushdown &&
 +      // TODO: I think projections will always be Seq[AttributeReference] because
 +      // When
 +      // SELECT tbl1.col+2, tbl2.* FROM tbl1 JOIN tlb2
 +      // is executed, col is pruned down, but col + 2 will be projected on top of join.
 +      leftProjections.forall(_.isInstanceOf[AttributeReference]) &&
 +      rightProjections.forall(_.isInstanceOf[AttributeReference]) &&
 +      lBuilder.isRightSideCompatibleForJoin(rBuilder) =>
 +      val normalizedLeftProjections = DataSourceStrategy.normalizeExprs(
 +        leftProjections,
 +        leftHolder.output
 +      ).asInstanceOf[Seq[AttributeReference]]
 +      val leftRequiredSchema = fromAttributes(normalizedLeftProjections)
++
 +      val normalizedRightProjections = DataSourceStrategy.normalizeExprs(
 +        rightProjections,
 +        rightHolder.output
 +      ).asInstanceOf[Seq[AttributeReference]]
 +      val rightRequiredSchema = fromAttributes(normalizedRightProjections)
++
 +      val normalizedCondition = condition.map { e =>
 +        DataSourceStrategy.normalizeExprs(
 +          Seq(e),
 +          leftHolder.output ++ rightHolder.output
 +        ).head
 +      }
++
 +      val conditionWithJoinColumns = normalizedCondition.map { cond =>
 +        cond.transformUp {
 +          case a: AttributeReference =>
 +            val isInLeftSide = leftProjections.filter(_.exprId == a.exprId).nonEmpty
 +            JoinColumnReference(a, isInLeftSide)
 +        }
 +      }
++
 +      val translatedCondition =
 +        conditionWithJoinColumns.flatMap(DataSourceV2Strategy.translateFilterV2(_))
 +      val translatedJoinType = DataSourceStrategy.translateJoinType(joinType)
++
 +      if (translatedCondition.isDefined == condition.isDefined &&
 +        translatedJoinType.isDefined &&
 +        lBuilder.pushJoin(
 +          rBuilder,
 +          translatedJoinType.get,
 +          translatedCondition.toJava,
 +          leftRequiredSchema,
 +          rightRequiredSchema
 +        )) {
 +        leftHolder.joinedRelations = leftHolder.joinedRelations :+ rightHolder.relation
++
 +        val newSchema = leftHolder.builder.build().readSchema()
 +        val newOutput = (leftProjections ++ rightProjections).asInstanceOf[Seq[AttributeReference]]
 +          .zip(newSchema.fields)
 +          .map { case (attr, schemaField) =>
 +            attr.withName(schemaField.name)
 +          }
++
 +        leftHolder.exprIdToOriginalName ++= rightHolder.exprIdToOriginalName
 +        leftHolder.output = newOutput
 +        leftHolder.isJoinPushed = true
 +        leftHolder
 +      } else {
 +        node
 +      }
 +  }
++
   def pushDownAggregates(plan: LogicalPlan): LogicalPlan = plan.transform {
     // update the scan builder with agg pushdown and return a new plan with agg pushed
     case agg: Aggregate => rewriteAggregate(agg)
       val aggExprToOutputOrdinal = mutable.HashMap.empty[Expression, Int]
       val aggregates = collectAggregates(actualResultExprs, aggExprToOutputOrdinal)
 -      val normalizedAggExprs = DataSourceStrategy.normalizeExprs(
 -        aggregates, holder.relation.output).asInstanceOf[Seq[AggregateExpression]]
 -      val normalizedGroupingExpr = DataSourceStrategy.normalizeExprs(
 -        actualGroupExprs, holder.relation.output)
 +      val normalizedAggExprs = if (holder.isJoinPushed) {
 +        DataSourceStrategy.normalizeExprs(aggregates, holder.output)
 +          .asInstanceOf[Seq[AggregateExpression]]
 +      } else {
 +        DataSourceStrategy.normalizeExprs(aggregates, holder.relation.output)
 +          .asInstanceOf[Seq[AggregateExpression]]
 +      }
 +      val normalizedGroupingExpr =
 +        if (holder.isJoinPushed) {
 +          DataSourceStrategy.normalizeExprs(actualGroupExprs, holder.output)
 +        } else {
 +          DataSourceStrategy.normalizeExprs(actualGroupExprs, holder.relation.output)
 +        }
       val translatedAggOpt = DataSourceStrategy.translateAggregation(
         normalizedAggExprs, normalizedGroupingExpr)
       if (translatedAggOpt.isEmpty) {
       Project(projectList, scanRelation)
+  }
 +  def buildScanWithPushedJoin(plan: LogicalPlan): LogicalPlan = plan.transform {
 +    case holder: ScanBuilderHolder if holder.isJoinPushed && !holder.isStreaming =>
 +      val scan = holder.builder.build()
 +      val realOutput = toAttributes(scan.readSchema())
 +      assert(realOutput.length == holder.output.length,
 +        "The data source returns unexpected number of columns")
 +      val wrappedScan = getWrappedScan(scan, holder)
 +      val scanRelation = DataSourceV2ScanRelation(holder.relation, wrappedScan, realOutput)
++
 +      // When join is pushed down, the output of ScanBuilderHolder is going to be, for example,
 +      // subquery_2_col_0#0, subquery_2_col_1#1, subquery_2_col_2#2.
 +      // We should revert these names back to original names. For example,
 +      // SALARY#0, NAME#1, DEPT#1. This is done by adding projection with appropriate aliases.
 +      val projectList = realOutput.zip(holder.output).map { case (a1, a2) =>
 +        val originalName = holder.exprIdToOriginalName(a2.exprId)
 +        Alias(a1, originalName)(a2.exprId)
 +      }
 +      Project(projectList, scanRelation)
 +  }
++
   def pruneColumns(plan: LogicalPlan): LogicalPlan = plan.transform {
     case ScanOperation(project, filtersStayUp, filtersPushDown, sHolder: ScanBuilderHolder) =>
       // column pruning
       } else {
         aliasReplacedOrder.asInstanceOf[Seq[SortOrder]]
+      }
 -      val normalizedOrders = DataSourceStrategy.normalizeExprs(
 -        newOrder, sHolder.relation.output).asInstanceOf[Seq[SortOrder]]
 +      val normalizedOrders = if (sHolder.isJoinPushed) {
 +        DataSourceStrategy.normalizeExprs(
 +          newOrder, sHolder.output).asInstanceOf[Seq[SortOrder]]
 +      } else {
 +        DataSourceStrategy.normalizeExprs(
 +          newOrder, sHolder.relation.output).asInstanceOf[Seq[SortOrder]]
 +      }
       val orders = DataSourceStrategy.translateSortOrders(normalizedOrders)
       if (orders.length == order.length) {
         val (isPushed, isPartiallyPushed) =
           case _ => Array.empty[sources.Filter]
+        }
         val pushedDownOperators = PushedDownOperators(sHolder.pushedAggregate, sHolder.pushedSample,
 -          sHolder.pushedLimit, sHolder.pushedOffset, sHolder.sortOrders, sHolder.pushedPredicates)
 +          sHolder.pushedLimit, sHolder.pushedOffset, sHolder.sortOrders, sHolder.pushedPredicates,
 +          sHolder.joinedRelations.map(_.name))
         V1ScanWrapper(v1, pushedFilters.toImmutableArraySeq, pushedDownOperators)
       case _ => scan
+    }
   var pushedAggregate: Option[Aggregation] = None
   var pushedAggOutputMap: AttributeMap[Expression] = AttributeMap.empty[Expression]
++
 +  var joinedRelations: Seq[DataSourceV2RelationBase] = Seq()
++
 +  var isJoinPushed: Boolean = false
++
 +  var exprIdToOriginalName: scala.collection.mutable.Map[ExprId, String] =
 +    scala.collection.mutable.Map.empty[ExprId, String]
+}
 // A wrapper for v1 scan to carry the translated filters and the handled ones, along with
-Original file line number
+Diff line change
  */
 package org.apache.spark.sql.execution.datasources.v2.jdbc
 +import java.util.Optional
++
 +import scala.jdk.OptionConverters._
 import scala.util.control.NonFatal
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connector.expressions.{FieldReference, SortOrder}
 import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
 import org.apache.spark.sql.connector.expressions.filter.Predicate
 -import org.apache.spark.sql.connector.read.{ScanBuilder, SupportsPushDownAggregates, SupportsPushDownLimit, SupportsPushDownOffset, SupportsPushDownRequiredColumns, SupportsPushDownTableSample, SupportsPushDownTopN, SupportsPushDownV2Filters}
 +import org.apache.spark.sql.connector.join.{JoinColumn, JoinType}
 +import org.apache.spark.sql.connector.read.{ScanBuilder, SupportsPushDownAggregates, SupportsPushDownJoin, SupportsPushDownLimit, SupportsPushDownOffset, SupportsPushDownRequiredColumns, SupportsPushDownTableSample, SupportsPushDownTopN, SupportsPushDownV2Filters}
 import org.apache.spark.sql.execution.datasources.PartitioningUtils
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JDBCRDD, JDBCRelation}
 import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
 case class JDBCScanBuilder(
     session: SparkSession,
     schema: StructType,
 -    jdbcOptions: JDBCOptions)
 +    var jdbcOptions: JDBCOptions)
   extends ScanBuilder
     with SupportsPushDownV2Filters
     with SupportsPushDownRequiredColumns
     with SupportsPushDownOffset
     with SupportsPushDownTableSample
     with SupportsPushDownTopN
 +    with SupportsPushDownJoin
     with Logging {
   private val dialect = JdbcDialects.get(jdbcOptions.url)
+    }
+  }
 +  override def isRightSideCompatibleForJoin(other: SupportsPushDownJoin): Boolean = {
 +    other.isInstanceOf[JDBCScanBuilder] &&
 +      jdbcOptions.url == other.asInstanceOf[JDBCScanBuilder].jdbcOptions.url
 +  };
++
 +  override def pushJoin(
 +    other: SupportsPushDownJoin,
 +    joinType: JoinType,
 +    condition: Optional[Predicate],
 +    leftRequiredSchema: StructType,
 +    rightRequiredSchema: StructType
 +  ): Boolean = {
 +    if (!jdbcOptions.pushDownJoin || !dialect.supportsJoin) return false
++
 +    val leftNodeSQLQuery = buildSQLQuery()
 +    val rightNodeSQLQuery = other.asInstanceOf[JDBCScanBuilder].buildSQLQuery()
++
 +    val leftSideQualifier = JoinOutputAliasIterator.get
 +    val rightSideQualifier = JoinOutputAliasIterator.get
++
 +    val leftProjections: Seq[JoinColumn] = leftRequiredSchema.fields.map { e =>
 +      new JoinColumn(Array(leftSideQualifier), e.name, true)
 +    }.toSeq
 +    val rightProjections: Seq[JoinColumn] = rightRequiredSchema.fields.map { e =>
 +      new JoinColumn(Array(rightSideQualifier), e.name, false)
 +    }.toSeq
++
 +    var aliasedLeftSchema = StructType(Seq())
 +    var aliasedRightSchema = StructType(Seq())
 +    val outputAliasPrefix = JoinOutputAliasIterator.get
++
 +    val aliasedOutput = (leftProjections ++ rightProjections)
 +      .zipWithIndex
 +      .map { case (proj, i) =>
 +        val name = s"${outputAliasPrefix}_col_$i"
 +        val output = FieldReference(name)
 +        if (i < leftProjections.length) {
 +          val field = leftRequiredSchema.fields(i)
 +          aliasedLeftSchema =
 +            aliasedLeftSchema.add(name, field.dataType, field.nullable, field.metadata)
 +        } else {
 +          val field = rightRequiredSchema.fields(i - leftRequiredSchema.fields.length)
 +          aliasedRightSchema =
 +            aliasedRightSchema.add(name, field.dataType, field.nullable, field.metadata)
 +        }
++
 +        s"""${dialect.compileExpression(proj).get} AS ${dialect.compileExpression(output).get}"""
 +      }.mkString(",")
++
 +    val compiledJoinType = dialect.compileJoinType(joinType)
 +    if (!compiledJoinType.isDefined) return false
++
 +    val conditionString = condition.toScala match {
 +      case Some(cond) =>
 +        qualifyCondition(cond, leftSideQualifier, rightSideQualifier)
 +        s"ON ${dialect.compileExpression(cond).get}"
 +      case _ => ""
 +    }
++
 +    val subqueryASKeyword = if (dialect.needsASKeywordForJoinSubquery) {
 +      " AS "
 +    } else {
 +      ""
 +    }
++
 +    val compiledLeftSideQualifier =
 +      dialect.compileExpression(FieldReference(leftSideQualifier)).get
 +    val compiledRightSideQualifier =
 +      dialect.compileExpression(FieldReference(rightSideQualifier)).get
++
 +    val joinQuery =
 +      s"""
 +         |SELECT $aliasedOutput FROM
 +         |($leftNodeSQLQuery)$subqueryASKeyword$compiledLeftSideQualifier
 +         |${compiledJoinType.get}
 +         |($rightNodeSQLQuery)$subqueryASKeyword$compiledRightSideQualifier
 +         |$conditionString
 +         |""".stripMargin
++
 +    val newMap = jdbcOptions.parameters.originalMap +
 +      (JDBCOptions.JDBC_QUERY_STRING -> joinQuery) - (JDBCOptions.JDBC_TABLE_NAME)
++
 +    jdbcOptions = new JDBCOptions(newMap)
 +    jdbcOptions.containsJoinInQuery = true
++
 +    // We can merge schemas since there are no fields with duplicate names
 +    finalSchema = aliasedLeftSchema.merge(aliasedRightSchema)
 +    pushedPredicate = Array.empty[Predicate]
 +    pushedAggregateList = Array()
 +    pushedGroupBys = None
 +    tableSample = None
 +    pushedLimit = 0
 +    sortOrders = Array.empty[String]
 +    pushedOffset = 0
++
 +    true
 +  }
++
 +  def buildSQLQuery(): String = {
 +    build()
 +      .toV1TableScan(session.sqlContext).asInstanceOf[JDBCV1RelationFromV2Scan]
 +      .buildScan().asInstanceOf[JDBCRDD]
 +      .getExternalEngineQuery
 +  }
++
 +  // Fully qualify the condition. For example:
 +  // DEPT=SALARY turns into leftSideQualifier.DEPT = rightSideQualifier=SALARY
 +  def qualifyCondition(condition: Predicate, leftSideQualifier: String, rightSideQualifier: String)
 +  : Unit = {
 +    condition.references()
 +      .filter(_.isInstanceOf[JoinColumn])
 +      .foreach { e =>
 +        val qualifier = if (e.asInstanceOf[JoinColumn].isInLeftSideOfJoin) {
 +          leftSideQualifier
 +        } else {
 +          rightSideQualifier
 +        }
++
 +        e.asInstanceOf[JoinColumn].qualifier = Array(qualifier)
 +      }
 +  }
++
   override def pushTableSample(
       lowerBound: Double,
       upperBound: Double,
       pushedAggregateList, pushedGroupBys, tableSample, pushedLimit, sortOrders, pushedOffset)
+  }
+}
++
 +object JoinOutputAliasIterator {
 +  private var curId = new java.util.concurrent.atomic.AtomicLong()
++
 +  def get: String = {
 +    "subquery_" + curId.getAndIncrement()
 +  }
++
 +  def reset(): Unit = {
 +    curId = new java.util.concurrent.atomic.AtomicLong()
 +  }
 +}
Original file line number	Diff line number	Diff line change
`@@ -298,4 +298,6 @@ private[sql] case class H2Dialect() extends JdbcDialect with NoLegacyJDBCError {`
`298`	`298`	`override def supportsLimit: Boolean = true`
`299`	`299`
`300`	`300`	`override def supportsOffset: Boolean = true`
	`301`	`+`
	`302`	`+ override def supportsJoin: Boolean = true`
`301`	`303`	`}`
-Original file line number
+Diff line change
 import org.apache.spark.sql.connector.expressions.{Expression, Literal, NamedReference}
 import org.apache.spark.sql.connector.expressions.aggregate.AggregateFunc
 import org.apache.spark.sql.connector.expressions.filter.Predicate
 -import org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder
 +import org.apache.spark.sql.connector.join.{JoinColumn, JoinType}
 +import org.apache.spark.sql.connector.util.{JoinTypeSQLBuilder, V2ExpressionSQLBuilder}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.jdbc.{DriverRegistry, JDBCOptions, JdbcOptionsInWrite, JdbcUtils}
 import org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider
       quoteIdentifier(namedRef.fieldNames.head)
+    }
 +    override def visitJoinColumn(column: JoinColumn): String = {
 +      (column.qualifier.toSeq ++ Seq(column.name)).map(quoteIdentifier(_)).mkString(".")
 +    }
++
     override def visitCast(expr: String, exprDataType: DataType, dataType: DataType): String = {
       val databaseTypeDefinition =
         getJDBCType(dataType).map(_.databaseTypeDefinition).getOrElse(dataType.typeName)
+    }
+  }
 +  private[jdbc] class JDBCJoinTypeSQLBuilder extends JoinTypeSQLBuilder {}
++
   /**
    * Returns whether the database supports function.
    * @param funcName Upper-cased function name
+    }
+  }
 +  @Since("4.0.0")
 +  def compileJoinType(joinType: JoinType): Option[String] = {
 +    val joinTypeBuilder = new JDBCJoinTypeSQLBuilder()
 +    try {
 +      Some(joinTypeBuilder.build(joinType))
 +    } catch {
 +      case NonFatal(e) =>
 +        logWarning("Error occurs while compiling join type ", e)
 +        None
 +    }
 +  }
++
   /**
    * Converts aggregate function to String representing a SQL expression.
    * @param aggFunction The aggregate function to be converted.
   def supportsHint: Boolean = false
 +  /**
 +   * Returns true if dialect supports JOIN operator.
 +   */
 +  def supportsJoin: Boolean = false
++
 +  /**
 +   * If true, left/right subquery of JOIN needs to have AS keywords before alias.
 +   * For example,
 +   * SELECT * FROM (subquery1) AS alias1 JOIN ...
 +   *
 +   * If false, SQL query wouldn't have AS keyword, so the query would look like
 +   * SELECT * FROM (subquery1) alias1 JOIN ...
 +   */
 +  def needsASKeywordForJoinSubquery: Boolean = true
++
   /**
    * Return the DB-specific quoted and fully qualified table name
    */
-Original file line number
+Diff line change
 import org.apache.spark.sql.{AnalysisException, DataFrame, ExplainSuiteHelper, QueryTest, Row}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, IndexAlreadyExistsException, NoSuchIndexException}
 -import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, GlobalLimit, LocalLimit, Offset, Sort}
 +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, GlobalLimit, Join, LocalLimit, Offset, Sort}
 import org.apache.spark.sql.connector.{IntegralAverage, StrLen}
 import org.apache.spark.sql.connector.catalog.{Catalogs, Identifier, TableCatalog}
 import org.apache.spark.sql.connector.catalog.functions.{ScalarFunction, UnboundFunction}
     .set("spark.sql.catalog.h2.pushDownAggregate", "true")
     .set("spark.sql.catalog.h2.pushDownLimit", "true")
     .set("spark.sql.catalog.h2.pushDownOffset", "true")
 +    .set("spark.sql.catalog.h2.pushDownJoin", "true")
   private def withConnection[T](f: Connection => T): T = {
     val conn = DriverManager.getConnection(url, new Properties())
     super.afterAll()
+  }
 +  test("Test 2-way join") {
 +    val rows = withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "false") {
 +      sql("SELECT * FROM h2.test.employee a, h2.test.employee b").collect().toSeq
 +    }
++
 +    withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "true") {
 +      val df = sql("SELECT * FROM h2.test.employee a, h2.test.employee b")
 +      val joinNodes = df.queryExecution.optimizedPlan.collect {
 +        case j: Join => j
 +      }
++
 +      assert(joinNodes.isEmpty)
 +      checkAnswer(df, rows)
 +    }
 +  }
++
 +  test("Test multi way join") {
 +    val rows = withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "false") {
 +      sql("SELECT * FROM " +
 +        "h2.test.employee a, " +
 +        "h2.test.employee b, " +
 +        "h2.test.employee c, " +
 +        "h2.test.employee d, " +
 +        "h2.test.employee e")
 +        .collect().toSeq
 +    }
++
 +    withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "true") {
 +      val df = sql("SELECT * FROM " +
 +        "h2.test.employee a, " +
 +        "h2.test.employee b, " +
 +        "h2.test.employee c, " +
 +        "h2.test.employee d, " +
 +        "h2.test.employee e")
++
 +      val joinNodes = df.queryExecution.optimizedPlan.collect {
 +        case j: Join => j
 +      }
++
 +      assert(joinNodes.isEmpty)
 +      checkPushedInfo(df,
 +        "PushedJoins: [h2.test.employee, h2.test.employee, h2.test.employee, h2.test.employee]")
 +      checkAnswer(df, rows)
 +    }
 +  }
++
 +  test("Test join with condition") {
 +    val rows = withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "false") {
 +      sql("SELECT * FROM h2.test.employee a join h2.test.employee b on a.dept = b.dept + 1")
 +        .collect().toSeq
 +    }
++
 +    withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "true") {
 +      val df = sql(
 +        "SELECT * FROM h2.test.employee a join h2.test.employee b on a.dept = b.dept + 1"
 +      )
 +      val joinNodes = df.queryExecution.optimizedPlan.collect {
 +        case j: Join => j
 +      }
++
 +      assert(joinNodes.isEmpty)
 +      checkAnswer(df, rows)
 +    }
 +  }
++
 +  test("Test multi-way-join with conditions") {
 +    val rows = withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "false") {
 +      sql("SELECT * FROM " +
 +        "h2.test.employee a " +
 +        "join h2.test.employee b on b.dept = a.dept + 1 " +
 +        "join h2.test.employee c on c.dept = b.dept - 1 ")
 +        .collect().toSeq
 +    }
++
 +    assert(!rows.isEmpty)
++
 +    withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "true") {
 +      val df = sql("SELECT * FROM " +
 +        "h2.test.employee a " +
 +        "join h2.test.employee b on b.dept = a.dept + 1 " +
 +        "join h2.test.employee c on c.dept = b.dept - 1 ")
 +      val joinNodes = df.queryExecution.optimizedPlan.collect {
 +        case j: Join => j
 +      }
++
 +      assert(joinNodes.isEmpty)
 +      checkAnswer(df, rows)
 +    }
 +  }
++
 +  test("Test join with column pruning") {
 +    val rows = withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "false") {
 +      sql("SELECT a.dept + 2, b.dept, b.salary FROM " +
 +        "h2.test.employee a join h2.test.employee b " +
 +        "on a.dept = b.dept + 1")
 +        .collect().toSeq
 +    }
++
 +    withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "true") {
 +      val df = sql("" +
 +        "SELECT a.dept + 2, b.dept, b.salary FROM " +
 +        "h2.test.employee a join h2.test.employee b " +
 +        "on a.dept = b.dept + 1")
++
 +      val joinNodes = df.queryExecution.optimizedPlan.collect {
 +        case j: Join => j
 +      }
++
 +      assert(joinNodes.isEmpty)
 +      checkAnswer(df, rows)
 +    }
 +  }
++
 +  test("Test multi way join with column pruning") {
 +    val rows = withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "false") {
 +      sql("SELECT a.dept, b.*, c.dept, c.salary + a.salary FROM " +
 +        "h2.test.employee a " +
 +        "join h2.test.employee b on b.dept = a.dept + 1 " +
 +        "join h2.test.employee c on c.dept = b.dept - 1 ")
 +        .collect().toSeq
 +    }
++
 +    withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "true") {
 +      val df = sql("" +
 +        "SELECT a.dept, b.*, c.dept, c.salary + a.salary FROM " +
 +        "h2.test.employee a " +
 +        "join h2.test.employee b on b.dept = a.dept + 1 " +
 +        "join h2.test.employee c on c.dept = b.dept - 1 ")
++
 +      val joinNodes = df.queryExecution.optimizedPlan.collect {
 +        case j: Join => j
 +      }
++
 +      assert(joinNodes.isEmpty)
 +      checkAnswer(df, rows)
 +    }
 +  }
++
 +  test("Test aggregate on top of 2 way join") {
 +    val rows = withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "false") {
 +      sql("SELECT min(a.dept + b.dept), min(a.dept) " +
 +        "FROM h2.test.employee a " +
 +        "join h2.test.employee b on a.dept = b.dept + 1")
 +        .collect().toSeq
 +    }
++
 +    withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "true") {
 +      val df = sql("SELECT min(a.dept + b.dept), min(a.dept) " +
 +        "FROM h2.test.employee a " +
 +        "join h2.test.employee b on a.dept = b.dept + 1")
 +      val joinNodes = df.queryExecution.optimizedPlan.collect {
 +        case j: Join => j
 +      }
++
 +      val aggNodes = df.queryExecution.optimizedPlan.collect {
 +        case a: Aggregate => a
 +      }
++
 +      assert(joinNodes.isEmpty)
 +      assert(aggNodes.isEmpty)
 +      checkAnswer(df, rows)
 +    }
 +  }
++
 +  test("Test aggregate on top of multi way join") {
 +    val rows = withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "false") {
 +      sql("SELECT min(a.dept + b.dept), min(a.dept), min(c.dept - 2) " +
 +        "from h2.test.employee a " +
 +        "join h2.test.employee b on b.dept = a.dept + 1 " +
 +        "join h2.test.employee c on c.dept = b.dept - 1 ")
 +        .collect().toSeq
 +    }
++
 +    withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "true") {
 +      val df = sql("SELECT min(a.dept + b.dept), min(a.dept), min(c.dept - 2) " +
 +        "from h2.test.employee a " +
 +        "join h2.test.employee b on b.dept = a.dept + 1 " +
 +        "join h2.test.employee c on c.dept = b.dept - 1 ")
 +      val joinNodes = df.queryExecution.optimizedPlan.collect {
 +        case j: Join => j
 +      }
++
 +      val aggNodes = df.queryExecution.optimizedPlan.collect {
 +        case a: Aggregate => a
 +      }
++
 +      assert(joinNodes.isEmpty)
 +      assert(aggNodes.isEmpty)
 +      checkAnswer(df, rows)
 +    }
 +  }
++
 +  test("Test sort limit on top of join is pushed down") {
 +    val rows = withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "false") {
 +      sql("SELECT min(a.dept + b.dept), a.dept, b.dept " +
 +        "from h2.test.employee a " +
 +        "join h2.test.employee b on b.dept = a.dept + 1 " +
 +        "GROUP BY a.dept, b.dept " +
 +        "ORDER BY a.dept " +
 +        "LIMIT 1")
 +        .collect().toSeq
 +    }
++
 +    withSQLConf(
 +      SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "true") {
 +      val df = sql("SELECT min(a.dept + b.dept), a.dept, b.dept " +
 +        "from h2.test.employee a " +
 +        "join h2.test.employee b on b.dept = a.dept + 1 " +
 +        "GROUP BY a.dept, b.dept " +
 +        "ORDER BY a.dept " +
 +        "LIMIT 1")
 +      val joinNodes = df.queryExecution.optimizedPlan.collect {
 +        case j: Join => j
 +      }
++
 +      val sortNodes = df.queryExecution.optimizedPlan.collect {
 +        case s: Sort => s
 +      }
++
 +      val limitNodes = df.queryExecution.optimizedPlan.collect {
 +        case l: GlobalLimit => l
 +      }
++
 +      assert(joinNodes.isEmpty)
 +      assert(sortNodes.isEmpty)
 +      assert(limitNodes.isEmpty)
 +      checkAnswer(df, rows)
 +    }
 +  }
++
   test("simple scan") {
     checkAnswer(sql("SELECT * FROM h2.test.empty_table"), Seq())
     checkAnswer(sql("SELECT * FROM h2.test.people"), Seq(Row("fred", 1), Row("mary", 2)))