diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 210b84b..449037e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,6 +33,8 @@ jobs: spark-version: 3.1.1 - scala-version: 2.12.11 spark-version: 3.2.0 + - scala-version: 2.13.10 + spark-version: 3.2.0 if: "! contains(toJSON(github.event.commits.*.message), '[skip ci]')" steps: - name: Check out code diff --git a/isolation-forest/src/main/scala/com/linkedin/relevance/isolationforest/BaggedPoint.scala b/isolation-forest/src/main/scala/com/linkedin/relevance/isolationforest/BaggedPoint.scala index b469e24..254b5ac 100644 --- a/isolation-forest/src/main/scala/com/linkedin/relevance/isolationforest/BaggedPoint.scala +++ b/isolation-forest/src/main/scala/com/linkedin/relevance/isolationforest/BaggedPoint.scala @@ -164,10 +164,10 @@ private[isolationforest] case object BaggedPoint { subsampleWeightBase + (if (rnd.nextFloat() < subsampleWeightDiff) 1 else 0) } - baggedRdd.flatMap { - baggedPoint => baggedPoint.subsampleWeights.zipWithIndex.flatMap { + baggedRdd.flatMap { baggedPoint => + baggedPoint.subsampleWeights.zipWithIndex.flatMap { case (subsampleWeight, subsampleId) => { - Array.fill(roundWeight(subsampleWeight))((subsampleId, baggedPoint.datum)) + Seq.fill(roundWeight(subsampleWeight))((subsampleId, baggedPoint.datum)) } } } diff --git a/isolation-forest/src/main/scala/com/linkedin/relevance/isolationforest/IsolationTree.scala b/isolation-forest/src/main/scala/com/linkedin/relevance/isolationforest/IsolationTree.scala index 0554276..94075cb 100644 --- a/isolation-forest/src/main/scala/com/linkedin/relevance/isolationforest/IsolationTree.scala +++ b/isolation-forest/src/main/scala/com/linkedin/relevance/isolationforest/IsolationTree.scala @@ -105,7 +105,7 @@ private[isolationforest] case object IsolationTree extends Logging { */ def getFeatureToSplit(data: Array[DataPoint]): (Int, Double) = { - val availableFeatures = featureIndices.to[ListBuffer] + val availableFeatures = ListBuffer.empty[Int] ++ featureIndices var foundFeature = false var featureIndex = -1 var featureSplitValue = 0.0 diff --git a/isolation-forest/src/test/scala/com/linkedin/relevance/isolationforest/BaggedPointTest.scala b/isolation-forest/src/test/scala/com/linkedin/relevance/isolationforest/BaggedPointTest.scala index befa704..0e52016 100644 --- a/isolation-forest/src/test/scala/com/linkedin/relevance/isolationforest/BaggedPointTest.scala +++ b/isolation-forest/src/test/scala/com/linkedin/relevance/isolationforest/BaggedPointTest.scala @@ -175,7 +175,7 @@ class BaggedPointTest { val expectedSumArray = expectedResult.map(x => x._1 + x._2.features.sum).sorted val actualSumArray = flattenedBaggedPointArray.map(x => x._1 + x._2.features.sum).sorted - Assert.assertTrue(expectedSumArray.deep == actualSumArray.deep) + Assert.assertEquals(expectedSumArray.toSeq, actualSumArray.toSeq) } @Test(description = "flattenBaggedRDDNonIntegerWeightTest") diff --git a/isolation-forest/src/test/scala/com/linkedin/relevance/isolationforest/IsolationForestModelWriteReadTest.scala b/isolation-forest/src/test/scala/com/linkedin/relevance/isolationforest/IsolationForestModelWriteReadTest.scala index 3127712..96d3d8c 100644 --- a/isolation-forest/src/test/scala/com/linkedin/relevance/isolationforest/IsolationForestModelWriteReadTest.scala +++ b/isolation-forest/src/test/scala/com/linkedin/relevance/isolationforest/IsolationForestModelWriteReadTest.scala @@ -66,7 +66,7 @@ class IsolationForestModelWriteReadTest extends Logging { // Assert the predicted labels are equal val predictedLabels1 = scores1.map(x => x.predictedLabel).collect val predictedLabels2 = scores2.map(x => x.predictedLabel).collect - Assert.assertEquals(predictedLabels1.deep, predictedLabels2.deep) + Assert.assertEquals(predictedLabels1.toSeq, predictedLabels2.toSeq) // Compare each tree in the original and saved/loaded model and assert they are equal isolationForestModel1.isolationTrees @@ -131,8 +131,8 @@ class IsolationForestModelWriteReadTest extends Logging { val predictedLabels1 = scores1.map(x => x.predictedLabel).collect val predictedLabels2 = scores2.map(x => x.predictedLabel).collect val expectedLabels = Array.fill[Double](predictedLabels1.length)(0.0) - Assert.assertEquals(predictedLabels1.deep, predictedLabels2.deep) - Assert.assertEquals(predictedLabels2.deep, expectedLabels.deep) + Assert.assertEquals(predictedLabels1.toSeq, predictedLabels2.toSeq) + Assert.assertEquals(predictedLabels2.toSeq, expectedLabels.toSeq) // Compare each tree in the original and saved/loaded model and assert they are equal isolationForestModel1.isolationTrees @@ -197,8 +197,8 @@ class IsolationForestModelWriteReadTest extends Logging { val scores2 = isolationForestModel2.transform(data).as[ScoringResult] Assert.assertEquals( - scores1.map(x => x.outlierScore).collect.deep, - scores2.map(x => x.outlierScore).collect.deep) + scores1.map(x => x.outlierScore).collect.toSeq, + scores2.map(x => x.outlierScore).collect.toSeq) spark.stop() } diff --git a/isolation-forest/src/test/scala/com/linkedin/relevance/isolationforest/IsolationForestTest.scala b/isolation-forest/src/test/scala/com/linkedin/relevance/isolationforest/IsolationForestTest.scala index 5e3a968..0ba5371 100644 --- a/isolation-forest/src/test/scala/com/linkedin/relevance/isolationforest/IsolationForestTest.scala +++ b/isolation-forest/src/test/scala/com/linkedin/relevance/isolationforest/IsolationForestTest.scala @@ -150,9 +150,10 @@ class IsolationForestTest { val scores = isolationForestModel.transform(data).as[ScoringResult] val predictedLabels = scores.map(x => x.predictedLabel).collect val expectedLabels = Array.fill[Double](predictedLabels.length)(0.0) + Assert.assertEquals( - predictedLabels.deep, - expectedLabels.deep, + predictedLabels.toSeq, + expectedLabels.toSeq, "expected all predicted labels to be 0.0") spark.stop()