Skip to content

Commit

Permalink
Merge branch 'main' into arnavb/range-1
Browse files Browse the repository at this point in the history
  • Loading branch information
ArnavBalyan authored Jan 15, 2025
2 parents 24564df + a96e0d2 commit 41e2ccf
Show file tree
Hide file tree
Showing 39 changed files with 352 additions and 472 deletions.
23 changes: 21 additions & 2 deletions .github/workflows/docker_image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ on:
- '.github/workflows/docker_image.yml'
- 'dev/docker/Dockerfile.centos7-static-build'
- 'dev/docker/Dockerfile.centos8-dynamic-build'
- 'dev/docker/Dockerfile.centos8-dynamic-build-jdk11'
- 'dev/docker/Dockerfile.centos8-dynamic-build-jdk17'
schedule:
- cron: '0 20 * * 0'

Expand Down Expand Up @@ -69,10 +71,27 @@ jobs:
username: ${{ secrets.DOCKERHUB_USER }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Build and push Docker image
- name: Build and push Docker image Centos8
uses: docker/build-push-action@v2
with:
context: .
file: dev/docker/Dockerfile.centos8-dynamic-build
push: true
tags: apache/gluten:centos-8
tags: apache/gluten:centos-8 # JDK8 based

- name: Build and push Docker image Centos8 + JDK11
uses: docker/build-push-action@v2
with:
context: .
file: dev/docker/Dockerfile.centos8-dynamic-build-jdk11
push: true
tags: apache/gluten:centos-8-jdk11

- name: Build and push Docker image Centos8 + JDK17
uses: docker/build-push-action@v2
with:
context: .
file: dev/docker/Dockerfile.centos8-dynamic-build-jdk17
push: true
tags: apache/gluten:centos-8-jdk17

Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import org.apache.gluten.execution.CHBroadcastBuildSideCache
import org.apache.gluten.execution.datasource.GlutenFormatFactory
import org.apache.gluten.expression.UDFMappings
import org.apache.gluten.extension.ExpressionExtensionTrait
import org.apache.gluten.extension.columnar.transition.Convention
import org.apache.gluten.jni.JniLibLoader
import org.apache.gluten.vectorized.CHNativeExpressionEvaluator

Expand Down Expand Up @@ -70,7 +71,8 @@ class CHListenerApi extends ListenerApi with Logging {
override def onExecutorShutdown(): Unit = shutdown()

private def initialize(conf: SparkConf, isDriver: Boolean): Unit = {
// Force batch type initializations.
// Do row / batch type initializations.
Convention.ensureSparkRowAndBatchTypesRegistered()
CHBatch.ensureRegistered()
SparkDirectoryUtil.init(conf)
val libPath = conf.get(GlutenConfig.GLUTEN_LIB_PATH, StringUtils.EMPTY)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.extension._
import org.apache.gluten.extension.columnar._
import org.apache.gluten.extension.columnar.MiscColumnarRules.{RemoveGlutenTableCacheColumnarToRow, RemoveTopmostColumnarToRow, RewriteSubqueryBroadcast}
import org.apache.gluten.extension.columnar.enumerated.planner.cost.LegacyCoster
import org.apache.gluten.extension.columnar.heuristic.{ExpandFallbackPolicy, HeuristicTransform}
import org.apache.gluten.extension.columnar.offload.{OffloadExchange, OffloadJoin, OffloadOthers}
import org.apache.gluten.extension.columnar.rewrite._
Expand Down Expand Up @@ -142,6 +143,9 @@ object CHRuleApi {
}

private def injectRas(injector: RasInjector): Unit = {
// Register legacy coster for transition planner.
injector.injectCoster(_ => LegacyCoster)

// CH backend doesn't work with RAS at the moment. Inject a rule that aborts any
// execution calls.
injector.injectPreTransform(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import org.apache.gluten.columnarbatch.VeloxBatch
import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.execution.datasource.GlutenFormatFactory
import org.apache.gluten.expression.UDFMappings
import org.apache.gluten.extension.columnar.transition.Convention
import org.apache.gluten.init.NativeBackendInitializer
import org.apache.gluten.jni.{JniLibLoader, JniWorkspace}
import org.apache.gluten.udf.UdfJniWrapper
Expand Down Expand Up @@ -126,10 +127,11 @@ class VeloxListenerApi extends ListenerApi with Logging {
override def onExecutorShutdown(): Unit = shutdown()

private def initialize(conf: SparkConf, isDriver: Boolean): Unit = {
// Force batch type initializations.
VeloxBatch.ensureRegistered()
// Do row / batch type initializations.
Convention.ensureSparkRowAndBatchTypesRegistered()
ArrowJavaBatch.ensureRegistered()
ArrowNativeBatch.ensureRegistered()
VeloxBatch.ensureRegistered()

// Register columnar shuffle so can be considered when
// `org.apache.spark.shuffle.GlutenShuffleManager` is set as Spark shuffle manager.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import org.apache.gluten.extension._
import org.apache.gluten.extension.columnar._
import org.apache.gluten.extension.columnar.MiscColumnarRules.{RemoveGlutenTableCacheColumnarToRow, RemoveTopmostColumnarToRow, RewriteSubqueryBroadcast}
import org.apache.gluten.extension.columnar.enumerated.{RasOffload, RemoveSort}
import org.apache.gluten.extension.columnar.enumerated.planner.cost.{LegacyCoster, RoughCoster, RoughCoster2}
import org.apache.gluten.extension.columnar.enumerated.planner.cost.{LegacyCoster, RoughCoster}
import org.apache.gluten.extension.columnar.heuristic.{ExpandFallbackPolicy, HeuristicTransform}
import org.apache.gluten.extension.columnar.offload.{OffloadExchange, OffloadJoin, OffloadOthers}
import org.apache.gluten.extension.columnar.rewrite._
Expand Down Expand Up @@ -120,6 +120,10 @@ object VeloxRuleApi {
}

private def injectRas(injector: RasInjector): Unit = {
// Gluten RAS: Costers.
injector.injectCoster(_ => LegacyCoster)
injector.injectCoster(_ => RoughCoster)

// Gluten RAS: Pre rules.
injector.injectPreTransform(_ => RemoveTransitions)
injector.injectPreTransform(_ => PushDownInputFileExpression.PreOffload)
Expand All @@ -131,6 +135,7 @@ object VeloxRuleApi {

// Gluten RAS: The RAS rule.
val validatorBuilder: GlutenConfig => Validator = conf => Validators.newValidator(conf)
injector.injectRasRule(_ => RemoveSort)
val rewrites =
Seq(
RewriteIn,
Expand All @@ -139,10 +144,6 @@ object VeloxRuleApi {
PullOutPreProject,
PullOutPostProject,
ProjectColumnPruning)
injector.injectCoster(_ => LegacyCoster)
injector.injectCoster(_ => RoughCoster)
injector.injectCoster(_ => RoughCoster2)
injector.injectRasRule(_ => RemoveSort)
val offloads: Seq[RasOffload] = Seq(
RasOffload.from[Exchange](OffloadExchange()),
RasOffload.from[BaseJoinExec](OffloadJoin()),
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ package org.apache.gluten.extension.columnar.enumerated.planner

import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.extension.columnar.enumerated.EnumeratedTransform
import org.apache.gluten.extension.columnar.enumerated.planner.cost.{LegacyCoster, LongCostModel}
import org.apache.gluten.extension.columnar.enumerated.planner.cost.{GlutenCostModel, LegacyCoster, LongCostModel}
import org.apache.gluten.extension.columnar.enumerated.planner.property.Conv
import org.apache.gluten.extension.columnar.transition.ConventionReq
import org.apache.gluten.ras.{Cost, CostModel, Ras}
import org.apache.gluten.extension.columnar.transition.{Convention, ConventionReq}
import org.apache.gluten.ras.{Cost, Ras}
import org.apache.gluten.ras.RasSuiteBase._
import org.apache.gluten.ras.path.RasPath
import org.apache.gluten.ras.property.PropertySet
Expand All @@ -37,6 +37,11 @@ import org.apache.spark.sql.types.StringType
class VeloxRasSuite extends SharedSparkSession {
import VeloxRasSuite._

override protected def beforeAll(): Unit = {
super.beforeAll()
Convention.ensureSparkRowAndBatchTypesRegistered()
}

test("C2R, R2C - basic") {
val in = RowUnary(RowLeaf(TRIVIAL_SCHEMA))
val planner = newRas().newPlanner(in)
Expand Down Expand Up @@ -153,14 +158,14 @@ object VeloxRasSuite {
.asInstanceOf[Ras[SparkPlan]]
}

private def legacyCostModel(): CostModel[SparkPlan] = {
private def legacyCostModel(): GlutenCostModel = {
val registry = LongCostModel.registry()
val coster = LegacyCoster
registry.register(coster)
registry.get(coster.kind())
}

private def sessionCostModel(): CostModel[SparkPlan] = {
private def sessionCostModel(): GlutenCostModel = {
val transform = EnumeratedTransform.static()
transform.costModel
}
Expand Down Expand Up @@ -198,23 +203,29 @@ object VeloxRasSuite {
override def shape(): Shape[SparkPlan] = Shapes.fixedHeight(1)
}

class UserCostModel1 extends CostModel[SparkPlan] {
class UserCostModel1 extends GlutenCostModel {
private val base = legacyCostModel()
override def costOf(node: SparkPlan): Cost = node match {
case _: RowUnary => base.makeInfCost()
case other => base.costOf(other)
}
override def costComparator(): Ordering[Cost] = base.costComparator()
override def makeInfCost(): Cost = base.makeInfCost()
override def sum(one: Cost, other: Cost): Cost = base.sum(one, other)
override def diff(one: Cost, other: Cost): Cost = base.diff(one, other)
override def makeZeroCost(): Cost = base.makeZeroCost()
}

class UserCostModel2 extends CostModel[SparkPlan] {
class UserCostModel2 extends GlutenCostModel {
private val base = legacyCostModel()
override def costOf(node: SparkPlan): Cost = node match {
case _: ColumnarUnary => base.makeInfCost()
case other => base.costOf(other)
}
override def costComparator(): Ordering[Cost] = base.costComparator()
override def makeInfCost(): Cost = base.makeInfCost()
override def sum(one: Cost, other: Cost): Cost = base.sum(one, other)
override def diff(one: Cost, other: Cost): Cost = base.diff(one, other)
override def makeZeroCost(): Cost = base.makeZeroCost()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -200,13 +200,13 @@ class VeloxTransitionSuite extends SharedSparkSession {
}

override protected def beforeAll(): Unit = {
api.onExecutorStart(MockVeloxBackend.mockPluginContext())
super.beforeAll()
api.onExecutorStart(MockVeloxBackend.mockPluginContext())
}

override protected def afterAll(): Unit = {
super.afterAll()
api.onExecutorShutdown()
super.afterAll()
}
}

Expand Down
4 changes: 2 additions & 2 deletions cpp/core/utils/qat/QatCodec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@

#include "QatCodec.h"

#define QZ_INIT_FAIL(rc) (QZ_OK != rc && QZ_DUPLICATE != rc)
#define QZ_INIT_FAIL(rc) ((QZ_OK != (rc)) && (QZ_DUPLICATE != (rc)))

#define QZ_SETUP_SESSION_FAIL(rc) (QZ_PARAMS == rc || QZ_NOSW_NO_HW == rc || QZ_NOSW_LOW_MEM == rc)
#define QZ_SETUP_SESSION_FAIL(rc) (QZ_PARAMS == (rc) || QZ_NOSW_NO_HW == (rc) || QZ_NOSW_LOW_MEM == (rc))

namespace gluten {
namespace qat {
Expand Down
14 changes: 7 additions & 7 deletions dev/build-thirdparty.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,40 +12,40 @@ ARCH=`uname -m`
mkdir -p $THIRDPARTY_LIB
function process_setup_ubuntu_2004 {
cp /usr/lib/${ARCH}-linux-gnu/{libroken.so.18,libasn1.so.8,libcrypto.so.1.1,libnghttp2.so.14,libnettle.so.7,libhogweed.so.5,librtmp.so.1,libssh.so.4,libssl.so.1.1,liblber-2.4.so.2,libsasl2.so.2,libwind.so.0,libheimbase.so.1,libhcrypto.so.4,libhx509.so.5,libkrb5.so.26,libheimntlm.so.0,libgssapi.so.3,libldap_r-2.4.so.2,libcurl.so.4,libdouble-conversion.so.3,libevent-2.1.so.7,libgflags.so.2.2,libunwind.so.8,libglog.so.0,libidn.so.11,libntlm.so.0,libgsasl.so.7,libicudata.so.66,libicuuc.so.66,libxml2.so.2,libre2.so.5,libsnappy.so.1,libpsl.so.5,libbrotlidec.so.1,libbrotlicommon.so.1,libthrift-0.13.0.so} $THIRDPARTY_LIB/
cp /usr/local/lib/{libprotobuf.so.32,libhdfs3.so.1,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/
cp /usr/local/lib/{libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/
}

function process_setup_ubuntu_2204 {
cp /usr/lib/${ARCH}-linux-gnu/{libre2.so.9,libdouble-conversion.so.3,libidn.so.12,libglog.so.0,libgflags.so.2.2,libevent-2.1.so.7,libsnappy.so.1,libunwind.so.8,libcurl.so.4,libxml2.so.2,libgsasl.so.7,libicui18n.so.70,libicuuc.so.70,libnghttp2.so.14,libldap-2.5.so.0,liblber-2.5.so.0,libntlm.so.0,librtmp.so.1,libsasl2.so.2,libssh.so.4,libicudata.so.70,libthrift-0.16.0.so} $THIRDPARTY_LIB/
cp /usr/local/lib/{libhdfs3.so.1,libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/
cp /usr/local/lib/{libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/
}

function process_setup_centos_9 {
cp /lib64/{libre2.so.9,libdouble-conversion.so.3,libevent-2.1.so.7,libdwarf.so.0,libgsasl.so.7,libicudata.so.67,libicui18n.so.67,libicuuc.so.67,libidn.so.12,libntlm.so.0,libsodium.so.23} $THIRDPARTY_LIB/
cp /usr/local/lib/{libhdfs3.so.1,libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_regex.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/
cp /usr/local/lib/{libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_regex.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/
cp /usr/local/lib64/{libgflags.so.2.2,libglog.so.1} $THIRDPARTY_LIB/
}

function process_setup_centos_8 {
cp /usr/lib64/{libre2.so.0,libdouble-conversion.so.3,libevent-2.1.so.6,libdwarf.so.1,libgsasl.so.7,libicudata.so.60,libicui18n.so.60,libicuuc.so.60,libidn.so.11,libntlm.so.0,libsodium.so.23} $THIRDPARTY_LIB/
cp /usr/local/lib/{libhdfs3.so.1,libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_regex.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/
cp /usr/local/lib/{libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_regex.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/
cp /usr/local/lib64/{libgflags.so.2.2,libglog.so.1} $THIRDPARTY_LIB/
}

function process_setup_centos_7 {
cp /usr/local/lib64/{libgflags.so.2.2,libglog.so.0} $THIRDPARTY_LIB/
cp /usr/lib64/{libdouble-conversion.so.1,libevent-2.0.so.5,libzstd.so.1,libntlm.so.0,libgsasl.so.7,liblz4.so.1} $THIRDPARTY_LIB/
cp /usr/local/lib/{libre2.so.10,libhdfs3.so.1,libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_regex.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/
cp /usr/local/lib/{libre2.so.10,libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_regex.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/
}

function process_setup_debian_11 {
cp /usr/lib/x86_64-linux-gnu/{libre2.so.9,libthrift-0.13.0.so,libdouble-conversion.so.3,libevent-2.1.so.7,libgflags.so.2.2,libglog.so.0,libsnappy.so.1,libunwind.so.8,libcurl.so.4,libicui18n.so.67,libicuuc.so.67,libnghttp2.so.14,librtmp.so.1,libssh2.so.1,libpsl.so.5,libldap_r-2.4.so.2,liblber-2.4.so.2,libbrotlidec.so.1,libicudata.so.67,libsasl2.so.2,libbrotlicommon.so.1} $THIRDPARTY_LIB/
cp /usr/local/lib/{libhdfs3.so.1,libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/
cp /usr/local/lib/{libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/
}

function process_setup_debian_12 {
cp /usr/lib/x86_64-linux-gnu/{libthrift-0.17.0.so,libdouble-conversion.so.3,libevent-2.1.so.7,libgflags.so.2.2,libglog.so.1,libsnappy.so.1,libunwind.so.8,libcurl.so.4,libicui18n.so.72,libicuuc.so.72,libnghttp2.so.14,librtmp.so.1,libssh2.so.1,libpsl.so.5,libldap-2.5.so.0,liblber-2.5.so.0,libbrotlidec.so.1,libicudata.so.72,libsasl2.so.2,libbrotlicommon.so.1,libcrypto.so.3,libssl.so.3,libgssapi_krb5.so.2,libkrb5.so.3,libk5crypto.so.3,libkrb5support.so.0,libkeyutils.so.1} $THIRDPARTY_LIB/
cp /usr/local/lib/{libprotobuf.so.32,libhdfs3.so.1,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/
cp /usr/local/lib/{libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/
}

if [[ "$LINUX_OS" == "ubuntu" || "$LINUX_OS" == "pop" ]]; then
Expand Down
6 changes: 0 additions & 6 deletions dev/build_helper_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,3 @@ function setup_linux {
exit 1
fi
}

function install_libhdfs3 {
github_checkout oap-project/libhdfs3 master
cmake_install
}

Loading

0 comments on commit 41e2ccf

Please sign in to comment.