Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed 39 build warnings due to upcoming Scala 3 changes and previously deprecated methods. #52

Merged
merged 1 commit into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class IsolationForest(override val uid: String) extends Estimator[IsolationFores

// Validate $(maxFeatures) and $(maxSamples) against input dataset and determine the values
// actually used to train the model: numFeatures and numSamples
val totalNumFeatures = dataset.head.features.length
val totalNumFeatures = dataset.head().features.length
val numFeatures = if ($(maxFeatures) > 1.0) {
math.floor($(maxFeatures)).toInt
} else {
Expand Down Expand Up @@ -94,7 +94,7 @@ class IsolationForest(override val uid: String) extends Estimator[IsolationFores
.partitionBy(new HashPartitioner($(numEstimators)))
val repartitionedFlattenedSampledDataset = repartitionedFlattenedSampledRdd
.mapPartitions(x => x.map(y => y._2), preservesPartitioning = true)
.toDS
.toDS()
logInfo(s"Training ${$(numEstimators)} isolation trees using" +
s" ${repartitionedFlattenedSampledDataset.rdd.getNumPartitions} partitions.")

Expand All @@ -106,7 +106,7 @@ class IsolationForest(override val uid: String) extends Estimator[IsolationFores
// Use a different seed for each partition to ensure each partition has an independent set of
// random numbers. This ensures each tree is truly trained independently and doing so has a
// measurable effect on the results.
val seed = $(randomSeed) + TaskContext.get.partitionId() + 2
val seed = $(randomSeed) + TaskContext.get().partitionId() + 2
val rnd = new scala.util.Random(seed)

val dataForTree = rnd.shuffle(x.toSeq).slice(0, numSamples).toArray
Expand All @@ -124,7 +124,7 @@ class IsolationForest(override val uid: String) extends Estimator[IsolationFores
// random numbers. This ensures each tree is truly trained independently and doing so has a
// measurable effect on the results.
Iterator(IsolationTree
.fit(dataForTree, $(randomSeed) + $(numEstimators) + TaskContext.get.partitionId() + 2, featureIndices))
.fit(dataForTree, $(randomSeed) + $(numEstimators) + TaskContext.get().partitionId() + 2, featureIndices))
}).collect()

val isolationForestModel = copyValues(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ private[isolationforest] case object IsolationForestModelReadWrite extends Loggi

saveMetadata(instance, path, spark, Some(extraMetadata))
val dataPath = new Path(path, "data").toString
val nodeDataRDD = spark.sparkContext.parallelize(instance.isolationTrees.zipWithIndex)
val nodeDataRDD = spark.sparkContext.parallelize(instance.isolationTrees.zipWithIndex.toIndexedSeq)
.flatMap { case (tree, treeID) => EnsembleNodeData.build(tree, treeID) }
logInfo(s"Saving IsolationForestModel tree data to path ${dataPath}")
spark.createDataFrame(nodeDataRDD)
Expand Down Expand Up @@ -299,7 +299,7 @@ private[isolationforest] case object IsolationForestModelReadWrite extends Loggi

val uid = instance.uid
val cls = instance.getClass.getName
val params = instance.extractParamMap.toSeq
val params = instance.extractParamMap().toSeq
val jsonParams = render(params.map { case ParamPair(p, v) =>
p.name -> parse(p.jsonEncode(v))
}.toList)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ private[isolationforest] case object IsolationTree extends Logging {
def fit(data: Array[DataPoint], randomSeed: Long, featureIndices: Array[Int]): IsolationTree = {

logInfo(s"Fitting isolation tree with random seed ${randomSeed} on" +
s" ${featureIndices.seq.toString} features (indices) using ${data.length} data points.")
s" ${featureIndices.toIndexedSeq.toString} features (indices) using ${data.length} data points.")

def log2(x: Double): Double = math.log10(x) / math.log10(2.0)
val heightLimit = math.ceil(log2(data.length.toDouble)).toInt
Expand Down Expand Up @@ -124,7 +124,7 @@ private[isolationforest] case object IsolationTree extends Logging {
if (minFeatureValue != maxFeatureValue) {
foundFeature = true
featureIndex = featureIndexTrial
featureSplitValue = ((maxFeatureValue - minFeatureValue) * randomState.nextDouble
featureSplitValue = ((maxFeatureValue - minFeatureValue) * randomState.nextDouble()
+ minFeatureValue)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class BaggedPointTest {
numCols: Int,
expectedMean: Double,
expectedStddev: Double,
epsilon: Double) {
epsilon: Double): Unit = {

val values = new mutable.ArrayBuffer[Double]()
data.foreach { row =>
Expand All @@ -63,7 +63,7 @@ class BaggedPointTest {
val spark = getSparkSession

val dataPointArray = generateDataPoints(1, 1000)
val rdd = spark.sparkContext.parallelize(dataPointArray)
val rdd = spark.sparkContext.parallelize(dataPointArray.toIndexedSeq)
val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, 1, false, 42)
baggedRDD.collect().foreach { baggedPoint =>
assert(baggedPoint.subsampleWeights.size == 1 && baggedPoint.subsampleWeights(0) == 1)
Expand All @@ -80,7 +80,7 @@ class BaggedPointTest {

val seeds = Array(123, 5354, 230, 349867, 23987)
val arr = generateDataPoints(1, 1000)
val rdd = spark.sparkContext.parallelize(arr)
val rdd = spark.sparkContext.parallelize(arr.toIndexedSeq)
seeds.foreach { seed =>
val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, numSubsamples, true, seed)
val subsampleCounts: Array[Array[Double]] = baggedRDD
Expand All @@ -101,7 +101,7 @@ class BaggedPointTest {

val seeds = Array(123, 5354, 230, 349867, 23987)
val arr = generateDataPoints(1, 1000)
val rdd = spark.sparkContext.parallelize(arr)
val rdd = spark.sparkContext.parallelize(arr.toIndexedSeq)
seeds.foreach { seed =>
val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, subsample, numSubsamples, true, seed)
val subsampleCounts: Array[Array[Double]] = baggedRDD
Expand All @@ -121,7 +121,7 @@ class BaggedPointTest {

val seeds = Array(123, 5354, 230, 349867, 23987)
val arr = generateDataPoints(1, 1000)
val rdd = spark.sparkContext.parallelize(arr)
val rdd = spark.sparkContext.parallelize(arr.toIndexedSeq)
seeds.foreach { seed =>
val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, numSubsamples, false, seed)
val subsampleCounts: Array[Array[Double]] = baggedRDD
Expand All @@ -142,7 +142,7 @@ class BaggedPointTest {

val seeds = Array(123, 5354, 230, 349867, 23987)
val arr = generateDataPoints(1, 1000)
val rdd = spark.sparkContext.parallelize(arr)
val rdd = spark.sparkContext.parallelize(arr.toIndexedSeq)
seeds.foreach { seed =>
val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, subsample, numSubsamples, false, seed)
val subsampleCounts: Array[Array[Double]] = baggedRDD
Expand All @@ -168,7 +168,7 @@ class BaggedPointTest {
(1, dataPointArray(1)),
(1, dataPointArray(1)))

val dataPointRDD = spark.sparkContext.parallelize(dataPointArray)
val dataPointRDD = spark.sparkContext.parallelize(dataPointArray.toIndexedSeq)
val baggedPointRDD = dataPointRDD.map(x => new BaggedPoint(x, subsampleWeights))
val flattenedBaggedPointRDD = BaggedPoint.flattenBaggedRDD(baggedPointRDD, 1L)
val flattenedBaggedPointArray = flattenedBaggedPointRDD.collect()
Expand All @@ -187,7 +187,7 @@ class BaggedPointTest {
val dataPointArray = generateDataPoints(10, numRecords)
val subsampleWeights = Array(1.3, 1.75)

val dataPointRDD = spark.sparkContext.parallelize(dataPointArray)
val dataPointRDD = spark.sparkContext.parallelize(dataPointArray.toIndexedSeq)
val baggedPointRDD = dataPointRDD.map(x => new BaggedPoint(x, subsampleWeights))
val flattenedBaggedPointRDD = BaggedPoint.flattenBaggedRDD(baggedPointRDD, 1L)
val flattenedBaggedPointArray = flattenedBaggedPointRDD.collect()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,14 @@ class IsolationForestModelWriteReadTest extends Logging {

// Write the trained model to disk and then read it back from disk
val savePath = System.getProperty("java.io.tmpdir") + "/savedIsolationForestModel"
isolationForestModel1.write.overwrite.save(savePath)
isolationForestModel1.write.overwrite().save(savePath)
val isolationForestModel2 = IsolationForestModel.load(savePath)
deleteDirectory(new File(savePath))

// Assert that all parameter values are equal
Assert.assertEquals(
isolationForestModel1.extractParamMap.toString,
isolationForestModel2.extractParamMap.toString)
isolationForestModel1.extractParamMap().toString,
isolationForestModel2.extractParamMap().toString)
Assert.assertEquals(isolationForestModel1.getNumSamples, isolationForestModel2.getNumSamples)
Assert.assertEquals(isolationForestModel1.getNumFeatures, isolationForestModel2.getNumFeatures)
Assert.assertEquals(
Expand All @@ -64,8 +64,8 @@ class IsolationForestModelWriteReadTest extends Logging {
Assert.assertEquals(auroc1, auroc2)

// Assert the predicted labels are equal
val predictedLabels1 = scores1.map(x => x.predictedLabel).collect
val predictedLabels2 = scores2.map(x => x.predictedLabel).collect
val predictedLabels1 = scores1.map(x => x.predictedLabel).collect()
val predictedLabels2 = scores2.map(x => x.predictedLabel).collect()
Assert.assertEquals(predictedLabels1.toSeq, predictedLabels2.toSeq)

// Compare each tree in the original and saved/loaded model and assert they are equal
Expand Down Expand Up @@ -102,14 +102,14 @@ class IsolationForestModelWriteReadTest extends Logging {

// Write the trained model to disk and then read it back from disk
val savePath = System.getProperty("java.io.tmpdir") + "/savedIsolationForestModelZeroContamination"
isolationForestModel1.write.overwrite.save(savePath)
isolationForestModel1.write.overwrite().save(savePath)
val isolationForestModel2 = IsolationForestModel.load(savePath)
deleteDirectory(new File(savePath))

// Assert that all parameter values are equal
Assert.assertEquals(
isolationForestModel1.extractParamMap.toString,
isolationForestModel2.extractParamMap.toString)
isolationForestModel1.extractParamMap().toString,
isolationForestModel2.extractParamMap().toString)
Assert.assertEquals(isolationForestModel1.getNumSamples, isolationForestModel2.getNumSamples)
Assert.assertEquals(isolationForestModel1.getNumFeatures, isolationForestModel2.getNumFeatures)
Assert.assertEquals(
Expand All @@ -128,8 +128,8 @@ class IsolationForestModelWriteReadTest extends Logging {
Assert.assertEquals(auroc1, auroc2)

// Assert the predicted labels are equal and always 0.0
val predictedLabels1 = scores1.map(x => x.predictedLabel).collect
val predictedLabels2 = scores2.map(x => x.predictedLabel).collect
val predictedLabels1 = scores1.map(x => x.predictedLabel).collect()
val predictedLabels2 = scores2.map(x => x.predictedLabel).collect()
val expectedLabels = Array.fill[Double](predictedLabels1.length)(0.0)
Assert.assertEquals(predictedLabels1.toSeq, predictedLabels2.toSeq)
Assert.assertEquals(predictedLabels2.toSeq, expectedLabels.toSeq)
Expand Down Expand Up @@ -182,7 +182,7 @@ class IsolationForestModelWriteReadTest extends Logging {

// Write the trained model to disk and then read it back from disk
val savePath = System.getProperty("java.io.tmpdir") + "/savedIsolationForestModelIdenticalFeatures"
isolationForestModel1.write.overwrite.save(savePath)
isolationForestModel1.write.overwrite().save(savePath)
val isolationForestModel2 = IsolationForestModel.load(savePath)
deleteDirectory(new File(savePath))

Expand All @@ -197,8 +197,8 @@ class IsolationForestModelWriteReadTest extends Logging {
val scores2 = isolationForestModel2.transform(data).as[ScoringResult]

Assert.assertEquals(
scores1.map(x => x.outlierScore).collect.toSeq,
scores2.map(x => x.outlierScore).collect.toSeq)
scores1.map(x => x.outlierScore).collect().toSeq,
scores2.map(x => x.outlierScore).collect().toSeq)

spark.stop()
}
Expand All @@ -214,14 +214,14 @@ class IsolationForestModelWriteReadTest extends Logging {

// Write the trained model to disk and then read it back from disk
val savePath = System.getProperty("java.io.tmpdir") + "/emptyIsolationForestModelWriteReadTest"
isolationForestModel1.write.overwrite.save(savePath)
isolationForestModel1.write.overwrite().save(savePath)
val isolationForestModel2 = IsolationForestModel.load(savePath)
deleteDirectory(new File(savePath))

// Assert that all parameter values are equal
Assert.assertEquals(
isolationForestModel1.extractParamMap.toString,
isolationForestModel2.extractParamMap.toString)
isolationForestModel1.extractParamMap().toString,
isolationForestModel2.extractParamMap().toString)
Assert.assertEquals(isolationForestModel1.getNumSamples, isolationForestModel2.getNumSamples)
Assert.assertEquals(isolationForestModel1.getNumFeatures, isolationForestModel2.getNumFeatures)
Assert.assertEquals(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ class IsolationForestTest {
.setContaminationError(contamination * 0.01)
.setRandomSeed(1)

isolationForest1.write.overwrite.save(savePath)
isolationForest1.write.overwrite().save(savePath)
val isolationForest2 = IsolationForest.load(savePath)
deleteDirectory(new File(savePath))

Assert.assertEquals(
isolationForest1.extractParamMap.toString,
isolationForest2.extractParamMap.toString)
isolationForest1.extractParamMap().toString,
isolationForest2.extractParamMap().toString)

spark.stop()
}
Expand Down Expand Up @@ -148,7 +148,7 @@ class IsolationForestTest {

// Calculate area under ROC curve and assert
val scores = isolationForestModel.transform(data).as[ScoringResult]
val predictedLabels = scores.map(x => x.predictedLabel).collect
val predictedLabels = scores.map(x => x.predictedLabel).collect()
val expectedLabels = Array.fill[Double](predictedLabels.length)(0.0)

Assert.assertEquals(
Expand Down Expand Up @@ -195,10 +195,10 @@ class IsolationForestTest {

val labeledOutlierScoresMean = labeledOutlierScores
.map(_.outlierScore)
.reduce(_+_) / labeledOutlierScores.count
.reduce(_+_) / labeledOutlierScores.count()
val labeledInlierScoresMean = labeledInlierScores
.map(_.outlierScore)
.reduce(_+_) / labeledInlierScores.count
.reduce(_+_) / labeledInlierScores.count()

val uncert = 0.02
val expectedOutlierScoreMean = 0.61
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ object TestUtils {
}

// local context with 4 threads
SparkSession.builder
SparkSession.builder()
.master("local[4]")
.appName("testing-spark")
.config(sparkConf)
Expand Down
Loading