From 0ce97b65a14c9a38e126c44622feafd35e31793e Mon Sep 17 00:00:00 2001 From: James Verbus Date: Mon, 12 Feb 2024 11:48:41 -0800 Subject: [PATCH] Fixed issue with avro r/w. Removed build for Spark 2.3.0. Added build for Spark 3.5.0. (#44) --- .github/workflows/ci.yml | 12 ++++++------ README.md | 2 +- isolation-forest/build.gradle | 4 ++-- .../IsolationForestModelReadWrite.scala | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 965e4be..c3c4aa7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,8 +21,6 @@ jobs: strategy: matrix: include: - - scala-version: 2.11.12 - spark-version: 2.3.0 - scala-version: 2.11.12 spark-version: 2.4.3 - scala-version: 2.12.18 @@ -37,12 +35,14 @@ jobs: spark-version: 3.2.4 - scala-version: 2.12.18 spark-version: 3.3.3 - - scala-version: 2.13.11 + - scala-version: 2.13.12 spark-version: 3.3.3 - scala-version: 2.12.18 - spark-version: 3.4.1 - - scala-version: 2.13.11 - spark-version: 3.4.1 + spark-version: 3.4.2 + - scala-version: 2.13.12 + spark-version: 3.4.2 + - scala-version: 2.13.12 + spark-version: 3.5.0 if: "! contains(toJSON(github.event.commits.*.message), '[skip ci]')" steps: - name: Check out code diff --git a/README.md b/README.md index ee1f964..9097ae8 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ If you want to use the library with arbitrary Spark and Scala versions, you can build command. ```bash -./gradlew build -PsparkVersion=3.4.1 -PscalaVersion=2.13.11 +./gradlew build -PsparkVersion=3.4.1 -PscalaVersion=2.13.12 ``` To force a rebuild of the library, you can use: diff --git a/isolation-forest/build.gradle b/isolation-forest/build.gradle index fe5770c..d6ba60b 100644 --- a/isolation-forest/build.gradle +++ b/isolation-forest/build.gradle @@ -4,12 +4,12 @@ plugins { id 'scala' } -def scalaVersion = findProperty("scalaVersion") ?: "2.11.8" // Scala 2.11.8 is the default Scala build version. +def scalaVersion = findProperty("scalaVersion") ?: "2.13.12" println "Scala version: " + scalaVersion // If scalaVersion == "2.11.8", then scalaVersionShort == "2.11". def scalaVersionShort = VersionNumber.parse(scalaVersion).getMajor() + "." + VersionNumber.parse(scalaVersion).getMinor() -def sparkVersion = findProperty("sparkVersion") ?: "2.3.0" // Spark 2.3.0 is the default Spark build version. +def sparkVersion = findProperty("sparkVersion") ?: "3.4.1" println "Spark version: " + sparkVersion dependencies { diff --git a/isolation-forest/src/main/scala/com/linkedin/relevance/isolationforest/IsolationForestModelReadWrite.scala b/isolation-forest/src/main/scala/com/linkedin/relevance/isolationforest/IsolationForestModelReadWrite.scala index 0e2a687..4b593c3 100644 --- a/isolation-forest/src/main/scala/com/linkedin/relevance/isolationforest/IsolationForestModelReadWrite.scala +++ b/isolation-forest/src/main/scala/com/linkedin/relevance/isolationforest/IsolationForestModelReadWrite.scala @@ -87,7 +87,7 @@ private[isolationforest] case object IsolationForestModelReadWrite extends Loggi val dataPath = new Path(path, "data").toString logInfo(s"Loading IsolationForestModel tree data from path ${dataPath}") val nodeData = spark.read - .format("com.databricks.spark.avro") + .format("avro") .load(dataPath) .as[EnsembleNodeData] val rootNodesRDD = nodeData.rdd @@ -260,7 +260,7 @@ private[isolationforest] case object IsolationForestModelReadWrite extends Loggi spark.createDataFrame(nodeDataRDD) .repartition(1) .write - .format("com.databricks.spark.avro") + .format("avro") .save(dataPath) }