Skip to content

Commit 7a2125a

Browse files
derrickburnsclaude
andcommitted
Fix CI: format code and update CodeQL workflow
- Run scalafmt to format all test files - Fix CodeQL workflow by adding build-mode: none for Scala projects This resolves the "no source code seen" error in CodeQL analysis by explicitly setting the build mode for manual compilation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 0913ddb commit 7a2125a

37 files changed

Lines changed: 617 additions & 838 deletions

.github/workflows/codeql.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ jobs:
4949
with:
5050
languages: java
5151
queries: security-and-quality
52+
# Use none build-mode for Scala projects with manual build
53+
build-mode: none
5254

5355
# MANUAL BUILD: compile Scala so the extractor can see .class files
5456
- name: Compile (Scala -> JVM bytecode)

src/test/scala/com/massivedatascience/clusterer/AssignmentPlanSuite.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class AssignmentPlanSuite extends AnyFunSuite with Matchers {
6262

6363
test("ConditionalAssignmentPlan should be created correctly") {
6464
val defaultPlan = RDDMapAssignmentPlan("squaredEuclidean")
65-
val plan = ConditionalAssignmentPlan(
65+
val plan = ConditionalAssignmentPlan(
6666
defaultPlan = defaultPlan,
6767
featuresCol = "features",
6868
predictionCol = "prediction"
@@ -87,7 +87,7 @@ class AssignmentPlanSuite extends AnyFunSuite with Matchers {
8787

8888
test("AssignmentPlan.crossJoin factory should accept custom parameters") {
8989
val customProvider = RowIdProvider.fromColumn("id")
90-
val plan = AssignmentPlan.crossJoin(
90+
val plan = AssignmentPlan.crossJoin(
9191
featuresCol = "data",
9292
predictionCol = "cluster",
9393
rowIdProvider = customProvider
@@ -140,11 +140,11 @@ class AssignmentPlanSuite extends AnyFunSuite with Matchers {
140140
val result = plan match {
141141
case CrossJoinAssignmentPlan(div, _, feat, pred) =>
142142
s"CrossJoin: $div, $feat -> $pred"
143-
case RDDMapAssignmentPlan(div, feat, pred) =>
143+
case RDDMapAssignmentPlan(div, feat, pred) =>
144144
s"RDDMap: $div, $feat -> $pred"
145-
case UDFAssignmentPlan(div, feat, pred) =>
145+
case UDFAssignmentPlan(div, feat, pred) =>
146146
s"UDF: $div, $feat -> $pred"
147-
case ConditionalAssignmentPlan(_, feat, pred) =>
147+
case ConditionalAssignmentPlan(_, feat, pred) =>
148148
s"Conditional: $feat -> $pred"
149149
}
150150

src/test/scala/com/massivedatascience/clusterer/BisectingKMeansSuite.scala

Lines changed: 19 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
package com.massivedatascience.clusterer
22

3-
import org.apache.spark.ml.linalg.{Vector, Vectors}
3+
import org.apache.spark.ml.linalg.{ Vector, Vectors }
44
import org.apache.spark.sql.SparkSession
55
import org.scalatest.BeforeAndAfterAll
66
import org.scalatest.funsuite.AnyFunSuite
77

88
/** Test suite for Bisecting K-Means clustering.
99
*
10-
* Tests hierarchical divisive clustering with various configurations, comparing behavior with standard K-Means.
10+
* Tests hierarchical divisive clustering with various configurations, comparing behavior with
11+
* standard K-Means.
1112
*/
1213
class BisectingKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
1314

@@ -102,7 +103,7 @@ class BisectingKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
102103

103104
// Set minDivisibleClusterSize to 5, so only the large cluster can be split
104105
val bisecting = new ml.BisectingKMeans()
105-
.setK(4) // Request 4 clusters
106+
.setK(4) // Request 4 clusters
106107
.setDivergence("squaredEuclidean")
107108
.setMaxIter(10)
108109
.setMinDivisibleClusterSize(5) // Minimum size to split
@@ -136,11 +137,8 @@ class BisectingKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
136137
assert(modelKL.numClusters === 2)
137138

138139
// Test with L1 divergence
139-
val bisectingL1 = new ml.BisectingKMeans()
140-
.setK(2)
141-
.setDivergence("l1")
142-
.setMaxIter(10)
143-
.setSeed(42)
140+
val bisectingL1 =
141+
new ml.BisectingKMeans().setK(2).setDivergence("l1").setMaxIter(10).setSeed(42)
144142

145143
val modelL1 = bisectingL1.fit(df)
146144
assert(modelL1.numClusters === 2)
@@ -161,11 +159,8 @@ class BisectingKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
161159

162160
// Run bisecting K-Means multiple times with different seeds
163161
val runs = Range.inclusive(1, 5).map { i =>
164-
val bisecting = new ml.BisectingKMeans()
165-
.setK(2)
166-
.setDivergence("squaredEuclidean")
167-
.setMaxIter(10)
168-
.setSeed(i)
162+
val bisecting =
163+
new ml.BisectingKMeans().setK(2).setDivergence("squaredEuclidean").setMaxIter(10).setSeed(i)
169164

170165
val model = bisecting.fit(df)
171166
val predictions = model.transform(df)
@@ -193,11 +188,8 @@ class BisectingKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
193188

194189
val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF("features")
195190

196-
val bisecting = new ml.BisectingKMeans()
197-
.setK(2)
198-
.setDivergence("squaredEuclidean")
199-
.setMaxIter(10)
200-
.setSeed(42)
191+
val bisecting =
192+
new ml.BisectingKMeans().setK(2).setDivergence("squaredEuclidean").setMaxIter(10).setSeed(42)
201193

202194
val model = bisecting.fit(df)
203195

@@ -233,7 +225,7 @@ class BisectingKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
233225
assert(model.numClusters === 2)
234226

235227
// The heavy point should influence the center of its cluster
236-
val centers = model.clusterCentersAsVectors
228+
val centers = model.clusterCentersAsVectors
237229
val hasNearZeroCenter = centers.exists { center =>
238230
val arr = center.toArray
239231
math.sqrt(arr(0) * arr(0) + arr(1) * arr(1)) < 1.0
@@ -259,11 +251,8 @@ class BisectingKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
259251
val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF("features")
260252

261253
// First split into 2 clusters (should be A vs B)
262-
val bisecting2 = new ml.BisectingKMeans()
263-
.setK(2)
264-
.setDivergence("squaredEuclidean")
265-
.setMaxIter(10)
266-
.setSeed(42)
254+
val bisecting2 =
255+
new ml.BisectingKMeans().setK(2).setDivergence("squaredEuclidean").setMaxIter(10).setSeed(42)
267256

268257
val model2 = bisecting2.fit(df)
269258
val predictions2 = model2.transform(df).select("prediction").collect().map(_.getInt(0))
@@ -277,11 +266,8 @@ class BisectingKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
277266
)
278267

279268
// Now split into 4 clusters (should be A1, A2, B1, B2)
280-
val bisecting4 = new ml.BisectingKMeans()
281-
.setK(4)
282-
.setDivergence("squaredEuclidean")
283-
.setMaxIter(10)
284-
.setSeed(42)
269+
val bisecting4 =
270+
new ml.BisectingKMeans().setK(4).setDivergence("squaredEuclidean").setMaxIter(10).setSeed(42)
285271

286272
val model4 = bisecting4.fit(df)
287273
assert(model4.numClusters === 4)
@@ -295,11 +281,8 @@ class BisectingKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
295281

296282
val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF("features")
297283

298-
val bisecting = new ml.BisectingKMeans()
299-
.setK(2)
300-
.setDivergence("squaredEuclidean")
301-
.setMaxIter(10)
302-
.setSeed(42)
284+
val bisecting =
285+
new ml.BisectingKMeans().setK(2).setDivergence("squaredEuclidean").setMaxIter(10).setSeed(42)
303286

304287
val model = bisecting.fit(df)
305288
assert(model.numClusters === 2)
@@ -315,11 +298,8 @@ class BisectingKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
315298

316299
val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF("features")
317300

318-
val bisecting = new ml.BisectingKMeans()
319-
.setK(2)
320-
.setDivergence("squaredEuclidean")
321-
.setMaxIter(10)
322-
.setSeed(42)
301+
val bisecting =
302+
new ml.BisectingKMeans().setK(2).setDivergence("squaredEuclidean").setMaxIter(10).setSeed(42)
323303

324304
val model = bisecting.fit(df)
325305
val cost = model.computeCost(df)

src/test/scala/com/massivedatascience/clusterer/BregmanDivergenceEdgeCasesSuite.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -280,8 +280,9 @@ class BregmanDivergenceEdgeCasesSuite extends AnyFunSuite {
280280
val expectedInhomogeneous = Vectors.dense(1.0, 2.0) // (2.0, 4.0) / 2.0
281281
val actualInhomogeneous = point.inhomogeneous
282282

283-
assert(expectedInhomogeneous.toArray.zip(actualInhomogeneous.toArray).forall { case (expected, actual) =>
284-
math.abs(expected - actual) < 1e-8
283+
assert(expectedInhomogeneous.toArray.zip(actualInhomogeneous.toArray).forall {
284+
case (expected, actual) =>
285+
math.abs(expected - actual) < 1e-8
285286
})
286287
}
287288

@@ -304,7 +305,7 @@ class BregmanDivergenceEdgeCasesSuite extends AnyFunSuite {
304305
test("sparse vector handling") {
305306
val ops = BregmanPointOps(BregmanPointOps.EUCLIDEAN)
306307
val sparseVector = WeightedVector(Vectors.sparse(10, Seq((1, 2.0), (5, 3.0), (9, 1.0))), 1.0)
307-
val denseVector =
308+
val denseVector =
308309
WeightedVector(Vectors.dense(0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0), 1.0)
309310

310311
val sparsePoint = ops.toPoint(sparseVector)

src/test/scala/com/massivedatascience/clusterer/BregmanMixtureModelTestSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ class BregmanMixtureModelTestSuite extends AnyFunSuite with LocalClusterSparkCon
142142
}
143143

144144
// Points that are close should likely have the same assignment
145-
val assignmentMap = mapAssignments.toMap
145+
val assignmentMap = mapAssignments.toMap
146146
val expectedSameCluster = Set(
147147
(
148148
BregmanPoint(WeightedVector(Vectors.dense(0.0, 0.0)), 0.0),
@@ -292,7 +292,7 @@ class BregmanMixtureModelTestSuite extends AnyFunSuite with LocalClusterSparkCon
292292
val model = BregmanMixtureModel()
293293
val result = model.fit(points, 2, pointOps)
294294

295-
val stats = result.getStats
295+
val stats = result.getStats
296296
val expectedKeys = Set(
297297
"logLikelihood",
298298
"numComponents",

src/test/scala/com/massivedatascience/clusterer/BregmanSoftKMeansTestSuite.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,12 @@ class BregmanSoftKMeansTestSuite extends AnyFunSuite with LocalClusterSparkConte
8484
val initialCenters = selector.init(pointOps, points, 2, None, 1, 42L).head
8585

8686
// Test with low beta (soft assignments)
87-
val softResult =
87+
val softResult =
8888
BregmanSoftKMeans.verySoft(beta = 0.1).clusterSoft(30, pointOps, points, initialCenters)
8989
val softMemberships = softResult.memberships.collect()
9090

9191
// Test with high beta (sharp assignments)
92-
val sharpResult =
92+
val sharpResult =
9393
BregmanSoftKMeans.sharp(beta = 10.0).clusterSoft(30, pointOps, points, initialCenters)
9494
val sharpMemberships = sharpResult.memberships.collect()
9595

@@ -210,7 +210,7 @@ class BregmanSoftKMeansTestSuite extends AnyFunSuite with LocalClusterSparkConte
210210
val effectiveNumClusters = result.effectiveNumberOfClusters
211211

212212
// Debug: Check actual memberships
213-
val sampleMemberships = result.memberships.take(5)
213+
val sampleMemberships = result.memberships.take(5)
214214
val hasMultipleClusters = sampleMemberships.exists { case (_, probs) =>
215215
probs.count(p => p > 0.01) > 1
216216
}
@@ -270,7 +270,7 @@ class BregmanSoftKMeansTestSuite extends AnyFunSuite with LocalClusterSparkConte
270270
points.cache()
271271

272272
// Use very tight convergence threshold
273-
val config = BregmanSoftKMeansConfig(
273+
val config = BregmanSoftKMeansConfig(
274274
beta = 5.0,
275275
convergenceThreshold = 1e-12,
276276
maxIterations = 5

src/test/scala/com/massivedatascience/clusterer/BregmanTestSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ class BregmanTestSuite extends AnyFunSuite {
1919
def g(d: Vector): Vector = {
2020
Vectors.dense(d.toArray.map { _ * 2.0 })
2121
}
22-
val div = BregmanDivergence(f, g)
22+
val div = BregmanDivergence(f, g)
2323

2424
val input = Vectors.dense(1.0, 2.0, 4.0)
2525

src/test/scala/com/massivedatascience/clusterer/CenterStoreSuite.scala

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ class CenterStoreSuite extends AnyFunSuite with Matchers with BeforeAndAfterAll
5353
// Helper to create centers - directly construct BregmanCenter
5454
// For testing purposes, we use simple Euclidean-like values
5555
def makeCenter(values: Array[Double], weight: Double = 1.0): BregmanCenter = {
56-
val homogeneous = Vectors.dense(values.map(_ * weight))
57-
val gradient = Vectors.dense(values) // For Euclidean: gradient = inhomogeneous
58-
val dotGradMinusF = 0.5 * values.map(x => x * x).sum // For Euclidean: F(x) = 0.5 ||x||^2
56+
val homogeneous = Vectors.dense(values.map(_ * weight))
57+
val gradient = Vectors.dense(values) // For Euclidean: gradient = inhomogeneous
58+
val dotGradMinusF = 0.5 * values.map(x => x * x).sum // For Euclidean: F(x) = 0.5 ||x||^2
5959
BregmanCenter(homogeneous, weight, dotGradMinusF, gradient)
6060
}
6161

@@ -103,9 +103,9 @@ class CenterStoreSuite extends AnyFunSuite with Matchers with BeforeAndAfterAll
103103
makeCenter(Array(3.0, 4.0))
104104
)
105105

106-
val store = ArrayCenterStore(centers)
107-
val newCenter = makeCenter(Array(10.0, 20.0))
108-
val newStore = store.updated(0, newCenter)
106+
val store = ArrayCenterStore(centers)
107+
val newCenter = makeCenter(Array(10.0, 20.0))
108+
val newStore = store.updated(0, newCenter)
109109

110110
assert(newStore(0) == newCenter)
111111
assert(newStore(1) == centers(1))
@@ -173,7 +173,7 @@ class CenterStoreSuite extends AnyFunSuite with Matchers with BeforeAndAfterAll
173173
makeCenter(Array(3.0, 4.0), weight = 2.0)
174174
)
175175

176-
val store = ArrayCenterStore(centers)
176+
val store = ArrayCenterStore(centers)
177177
val mapped = store.map { c =>
178178
// Double the weight
179179
makeCenter(c.inhomogeneous.toArray, c.weight * 2.0)

src/test/scala/com/massivedatascience/clusterer/IntegrationTestSuite.scala

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ class IntegrationTestSuite extends AnyFunSuite with LocalClusterSparkContext {
101101
val timeSeries = sc.parallelize((0 until 50).map { i =>
102102
// Create time series with different patterns
103103
val pattern = i % 3
104-
val values = (0 until 16).map { t =>
104+
val values = (0 until 16).map { t =>
105105
pattern match {
106106
case 0 => math.sin(t * 0.5) + scala.util.Random.nextGaussian() * 0.1
107107
case 1 => math.cos(t * 0.3) + scala.util.Random.nextGaussian() * 0.1
@@ -175,13 +175,15 @@ class IntegrationTestSuite extends AnyFunSuite with LocalClusterSparkContext {
175175
val cost = model.computeCostWeighted(data)
176176
assert(cost >= 0.0 && java.lang.Double.isFinite(cost))
177177
} catch {
178-
case e: IllegalArgumentException if e.getMessage.contains("requires at least one valid center") =>
178+
case e: IllegalArgumentException
179+
if e.getMessage.contains("requires at least one valid center") =>
179180
// Acceptable if extreme conditions cause invalid centers
180181
succeed
181182
case e: IllegalArgumentException if e.getMessage.contains("requirement failed") =>
182183
// Acceptable if RDD caching requirement fails during multi-stage training
183184
succeed
184-
case e: org.apache.spark.SparkException if e.getMessage.contains("does not match requested numClusters") =>
185+
case e: org.apache.spark.SparkException
186+
if e.getMessage.contains("does not match requested numClusters") =>
185187
// Acceptable if fewer unique clusters are produced due to data characteristics
186188
succeed
187189
} finally {
@@ -280,8 +282,7 @@ class IntegrationTestSuite extends AnyFunSuite with LocalClusterSparkContext {
280282
.groupBy(_._1) // Group by predicted cluster
281283
.mapValues { pairs =>
282284
// Find the most common true label in this predicted cluster
283-
pairs
284-
.map { case (_, idx) => trueLabels(idx) }
285+
pairs.map { case (_, idx) => trueLabels(idx) }
285286
.groupBy(identity)
286287
.mapValues(_.length)
287288
.maxBy(_._2)
@@ -292,7 +293,7 @@ class IntegrationTestSuite extends AnyFunSuite with LocalClusterSparkContext {
292293
val correctAssignments = predictions.zipWithIndex.count { case (prediction, index) =>
293294
clusterToTrueLabel.get(prediction).contains(trueLabels(index))
294295
}
295-
val accuracy = correctAssignments.toDouble / numPoints
296+
val accuracy = correctAssignments.toDouble / numPoints
296297
assert(accuracy > 0.5, s"Poor clustering accuracy: $accuracy")
297298
}
298299

0 commit comments

Comments
 (0)