From 380b6b167d5c3a220604aabaf3bf101859582633 Mon Sep 17 00:00:00 2001 From: Martin Mauch Date: Wed, 8 Nov 2017 18:26:04 +0100 Subject: [PATCH 1/9] Compare Java and Scala BigDecimal with tolerance --- .../holdenkarau/spark/testing/DataFrameSuiteBase.scala | 8 +++++++- .../holdenkarau/spark/testing/DataFrameSuiteBase.scala | 6 +++++- .../holdenkarau/spark/testing/SampleDataFrameTest.scala | 6 ++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/core/src/main/2.0/scala/com/holdenkarau/spark/testing/DataFrameSuiteBase.scala b/core/src/main/2.0/scala/com/holdenkarau/spark/testing/DataFrameSuiteBase.scala index c546db10..0e343958 100644 --- a/core/src/main/2.0/scala/com/holdenkarau/spark/testing/DataFrameSuiteBase.scala +++ b/core/src/main/2.0/scala/com/holdenkarau/spark/testing/DataFrameSuiteBase.scala @@ -207,7 +207,13 @@ object DataFrameSuiteBase { } case d1: java.math.BigDecimal => - if (d1.compareTo(o2.asInstanceOf[java.math.BigDecimal]) != 0) { + if (d1.subtract(o2.asInstanceOf[java.math.BigDecimal]).abs + .compareTo(new java.math.BigDecimal(tol)) > 0) { + return false + } + + case d1: scala.math.BigDecimal => + if ((d1 - o2.asInstanceOf[scala.math.BigDecimal]).abs > tol) { return false } diff --git a/core/src/main/pre-2.0/scala/com/holdenkarau/spark/testing/DataFrameSuiteBase.scala b/core/src/main/pre-2.0/scala/com/holdenkarau/spark/testing/DataFrameSuiteBase.scala index 64fe68c1..a66f9423 100644 --- a/core/src/main/pre-2.0/scala/com/holdenkarau/spark/testing/DataFrameSuiteBase.scala +++ b/core/src/main/pre-2.0/scala/com/holdenkarau/spark/testing/DataFrameSuiteBase.scala @@ -171,7 +171,11 @@ object DataFrameSuiteBase { if (abs(d1 - o2.asInstanceOf[Double]) > tol) return false case d1: java.math.BigDecimal => - if (d1.compareTo(o2.asInstanceOf[java.math.BigDecimal]) != 0) return false + if (d1.subtract(o2.asInstanceOf[java.math.BigDecimal]).abs + .compareTo(new java.math.BigDecimal(tol)) > 0) return false + + case d1: scala.math.BigDecimal => + if ((d1 - o2.asInstanceOf[scala.math.BigDecimal]).abs > tol) return false case t1: Timestamp => if (abs(t1.getTime - o2.asInstanceOf[Timestamp].getTime) > tol) { diff --git a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleDataFrameTest.scala b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleDataFrameTest.scala index 8177abbb..1ca7faae 100644 --- a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleDataFrameTest.scala +++ b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleDataFrameTest.scala @@ -70,6 +70,10 @@ class SampleDataFrameTest extends FunSuite with DataFrameSuiteBase { val row8 = Row(Timestamp.valueOf("2018-01-12 20:22:13")) val row9 = Row(Timestamp.valueOf("2018-01-12 20:22:18")) val row10 = Row(Timestamp.valueOf("2018-01-12 20:23:13")) + val row11 = Row(new java.math.BigDecimal(1.0)) + val row11a = Row(new java.math.BigDecimal(1.0 + 1.0E-6)) + val row12 = Row(scala.math.BigDecimal(1.0)) + val row12a = Row(scala.math.BigDecimal(1.0 + 1.0E-6)) assert(false === approxEquals(row, row2, 1E-7)) assert(true === approxEquals(row, row2, 1E-5)) assert(true === approxEquals(row3, row3, 1E-5)) @@ -84,6 +88,8 @@ class SampleDataFrameTest extends FunSuite with DataFrameSuiteBase { assert(false === approxEquals(row9, row8, 3000)) assert(true === approxEquals(row9, row10, 60000)) assert(false === approxEquals(row9, row10, 53000)) + assert(true === approxEquals(row11, row11a, 1.0E-6)) + assert(true === approxEquals(row12, row12a, 1.0E-6)) } test("verify hive function support") { From 468a1d6b27d41c7a45b02c5a6af7544023264da0 Mon Sep 17 00:00:00 2001 From: Martin Mauch Date: Thu, 9 Nov 2017 13:35:51 +0100 Subject: [PATCH 2/9] Add relative tolerance checks to approxEquals Fixes #214 --- .../spark/testing/DataFrameSuiteBase.scala | 78 ++++++++++++++++--- .../spark/testing/SampleDataFrameTest.scala | 45 ++++++++++- 2 files changed, 109 insertions(+), 14 deletions(-) diff --git a/core/src/main/2.0/scala/com/holdenkarau/spark/testing/DataFrameSuiteBase.scala b/core/src/main/2.0/scala/com/holdenkarau/spark/testing/DataFrameSuiteBase.scala index 0e343958..d1cef8b1 100644 --- a/core/src/main/2.0/scala/com/holdenkarau/spark/testing/DataFrameSuiteBase.scala +++ b/core/src/main/2.0/scala/com/holdenkarau/spark/testing/DataFrameSuiteBase.scala @@ -128,7 +128,11 @@ trait DataFrameSuiteBaseLike extends SparkContextProvider * @param tol max acceptable tolerance, should be less than 1. */ def assertDataFrameApproximateEquals( - expected: DataFrame, result: DataFrame, tol: Double) { + expected: DataFrame, + result: DataFrame, + tol: Double = 0.0, + relTol: Double = 0.0 + ) { assert(expected.schema, result.schema) @@ -142,7 +146,7 @@ trait DataFrameSuiteBaseLike extends SparkContextProvider val unequalRDD = expectedIndexValue.join(resultIndexValue). filter{case (idx, (r1, r2)) => - !(r1.equals(r2) || DataFrameSuiteBase.approxEquals(r1, r2, tol))} + !(r1.equals(r2) || DataFrameSuiteBase.approxEquals(r1, r2, tol, relTol))} assertEmpty(unequalRDD.take(maxUnequalRowsToShow)) } finally { @@ -160,15 +164,67 @@ trait DataFrameSuiteBaseLike extends SparkContextProvider rdd.zipWithIndex().map{ case (row, idx) => (idx, row) } } - def approxEquals(r1: Row, r2: Row, tol: Double): Boolean = { - DataFrameSuiteBase.approxEquals(r1, r2, tol) + def approxEquals( + r1: Row, + r2: Row, + tol: Double = 0.0, + relTol: Double = 0.0 + ): Boolean = { + DataFrameSuiteBase.approxEquals(r1, r2, tol, relTol) } } object DataFrameSuiteBase { + trait WithinToleranceChecker { + def apply(a: Double, b: Double): Boolean + def apply(a: BigDecimal, b: BigDecimal): Boolean + } + object WithinToleranceChecker { + def apply(tol: Double = 0.0, relTol: Double = 0.0) = + if(tol != 0.0 || relTol == 0.0) { + new WithinAbsoluteToleranceChecker(tol) + } else { + new WithinRelativeToleranceChecker(relTol) + } + } + + class WithinAbsoluteToleranceChecker(tolerance: Double) + extends WithinToleranceChecker { + def apply(a: Double, b: Double): Boolean = + (a - b).abs <= tolerance + def apply(a: BigDecimal, b: BigDecimal): Boolean = + (a - b).abs <= tolerance + } + + class WithinRelativeToleranceChecker(relativeTolerance: Double) + extends WithinToleranceChecker { + def apply(a: Double, b: Double): Boolean = { + val max = (a.abs max b.abs) + if (max == 0.0) { + true + } else { + (a - b).abs / max <= relativeTolerance + } + } + def apply(a: BigDecimal, b: BigDecimal): Boolean = { + val max = (a.abs max b.abs) + if (max == 0.0) { + true + } else { + (a - b).abs / max <= relativeTolerance + } + } + } /** Approximate equality, based on equals from [[Row]] */ - def approxEquals(r1: Row, r2: Row, tol: Double): Boolean = { + def approxEquals( + r1: Row, + r2: Row, + tol: Double = 0.0, + relTol: Double = 0.0 + ): Boolean = { + val withinTolerance = WithinToleranceChecker(tol, relTol) + if (r1.length != r2.length) { return false } else { @@ -192,7 +248,7 @@ object DataFrameSuiteBase { { return false } - if (abs(f1 - o2.asInstanceOf[Float]) > tol) { + if (!withinTolerance(f1, o2.asInstanceOf[Float])) { return false } @@ -202,18 +258,20 @@ object DataFrameSuiteBase { { return false } - if (abs(d1 - o2.asInstanceOf[Double]) > tol) { + if (!withinTolerance(d1, o2.asInstanceOf[Double])) { return false } case d1: java.math.BigDecimal => - if (d1.subtract(o2.asInstanceOf[java.math.BigDecimal]).abs - .compareTo(new java.math.BigDecimal(tol)) > 0) { + if (!withinTolerance( + BigDecimal(d1), + BigDecimal(o2.asInstanceOf[java.math.BigDecimal] + ))) { return false } case d1: scala.math.BigDecimal => - if ((d1 - o2.asInstanceOf[scala.math.BigDecimal]).abs > tol) { + if (!withinTolerance(d1, o2.asInstanceOf[scala.math.BigDecimal])) { return false } diff --git a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleDataFrameTest.scala b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleDataFrameTest.scala index 1ca7faae..4e6ec4b4 100644 --- a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleDataFrameTest.scala +++ b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleDataFrameTest.scala @@ -21,6 +21,7 @@ import java.sql.Timestamp import org.apache.spark.sql.Row import org.apache.spark.sql.types._ import org.scalatest.FunSuite +import java.math.{ BigDecimal => JBigDecimal } class SampleDataFrameTest extends FunSuite with DataFrameSuiteBase { val byteArray = new Array[Byte](1) @@ -70,10 +71,10 @@ class SampleDataFrameTest extends FunSuite with DataFrameSuiteBase { val row8 = Row(Timestamp.valueOf("2018-01-12 20:22:13")) val row9 = Row(Timestamp.valueOf("2018-01-12 20:22:18")) val row10 = Row(Timestamp.valueOf("2018-01-12 20:23:13")) - val row11 = Row(new java.math.BigDecimal(1.0)) - val row11a = Row(new java.math.BigDecimal(1.0 + 1.0E-6)) - val row12 = Row(scala.math.BigDecimal(1.0)) - val row12a = Row(scala.math.BigDecimal(1.0 + 1.0E-6)) + val row11 = Row(new JBigDecimal(1.0)) + val row11a = Row(new JBigDecimal(1.0 + 1.0E-6)) + val row12 = Row(BigDecimal(1.0)) + val row12a = Row(BigDecimal(1.0 + 1.0E-6)) assert(false === approxEquals(row, row2, 1E-7)) assert(true === approxEquals(row, row2, 1E-5)) assert(true === approxEquals(row3, row3, 1E-5)) @@ -92,6 +93,42 @@ class SampleDataFrameTest extends FunSuite with DataFrameSuiteBase { assert(true === approxEquals(row12, row12a, 1.0E-6)) } + test("dataframe approxEquals on rows with relative tolerance") { + import sqlContext.implicits._ + // Use 1 / 2^n as example numbers to avoid numeric errors + val relTol = scala.math.pow(2, -6) + val orig = 0.25 + val within = orig - relTol * orig + val outside = within - 1.0E-4 + def assertRelativeApproxEqualsWorksFor[T](constructor: Double => T) = { + assertResult(true) { + approxEquals( + Row(constructor(orig)), + Row(constructor(within)), + relTol = relTol + ) + } + assertResult(false) { + approxEquals( + Row(constructor(orig)), + Row(constructor(outside)), + relTol = relTol + ) + } + assertResult(true) { + approxEquals( + Row(constructor(0.0)), + Row(constructor(0.0)), + relTol = relTol + ) + } + } + assertRelativeApproxEqualsWorksFor[Double](identity) + assertRelativeApproxEqualsWorksFor[Float](_.toFloat) + assertRelativeApproxEqualsWorksFor[BigDecimal](BigDecimal.apply) + assertRelativeApproxEqualsWorksFor[JBigDecimal](new JBigDecimal(_)) + } + test("verify hive function support") { import sqlContext.implicits._ // Convert to int since old versions of hive only support percentile on From 479516d9c2a2115a93f41257e9531dea2a1010fb Mon Sep 17 00:00:00 2001 From: Martin Mauch Date: Sun, 29 Dec 2019 02:47:35 +0100 Subject: [PATCH 3/9] Update SBT to 1.3.6 --- project/build.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/build.properties b/project/build.properties index 72f90289..00b48d97 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.2.7 +sbt.version=1.3.6 From 770896b535b3d193430845acdcc0b646623f6505 Mon Sep 17 00:00:00 2001 From: Martin Mauch Date: Sun, 29 Dec 2019 03:05:57 +0100 Subject: [PATCH 4/9] Update Scalatest to 3.1.0 --- build.sbt | 9 +++++++-- .../spark/testing/ArtisinalStreamingTest.scala | 4 ++-- .../holdenkarau/spark/testing/HDFSClusterTest.scala | 4 ++-- .../spark/testing/MultipleDataFrameSuites.scala | 4 ++-- .../spark/testing/PerTestSampleTest.scala | 4 ++-- .../holdenkarau/spark/testing/PerfSampleTest.scala | 4 ++-- .../spark/testing/SampleDataFrameTest.scala | 4 ++-- .../holdenkarau/spark/testing/SampleRDDTest.scala | 4 ++-- .../spark/testing/SampleScalaCheckTest.scala | 12 ++++++------ .../spark/testing/SampleStreamingActionTest.scala | 4 ++-- .../spark/testing/SampleStreamingTest.scala | 4 ++-- .../com/holdenkarau/spark/testing/UtilsTest.scala | 4 ++-- .../com/holdenkarau/spark/testing/PrettifyTest.scala | 8 ++++---- .../spark/testing/SampleDatasetGeneratorTest.scala | 6 +++--- .../spark/testing/SampleDatasetTest.scala | 4 ++-- .../spark/testing/SampleMiniClusterTest.scala | 4 ++-- .../holdenkarau/spark/testing/YARNClusterTest.scala | 5 +++-- .../spark/testing/DatasetGeneratorSizeSpecial.scala | 6 +++--- .../holdenkarau/spark/testing/MLScalaCheckTest.scala | 6 +++--- .../spark/testing/PerTestSparkContextTest.scala | 4 ++-- .../testing/StructuredStreamingSampleTests.scala | 4 ++-- project/plugins.sbt | 2 ++ 22 files changed, 59 insertions(+), 51 deletions(-) diff --git a/build.sbt b/build.sbt index 6ce41531..cd980cf5 100644 --- a/build.sbt +++ b/build.sbt @@ -211,9 +211,10 @@ val coreTestSources = unmanagedSourceDirectories in Test := { // additional libraries lazy val commonDependencies = Seq( - "org.scalatest" %% "scalatest" % "3.0.5", + "org.scalatest" %% "scalatest" % "3.1.0", + "org.scalatestplus" %% "scalatestplus-scalacheck" % "3.1.0.0-RC2", "io.github.nicolasstucki" %% "multisets" % "0.4", - "org.scalacheck" %% "scalacheck" % "1.14.0", + "org.scalacheck" %% "scalacheck" % "1.14.3", "junit" % "junit" % "4.12", "org.eclipse.jetty" % "jetty-util" % "9.3.11.v20160721", "com.novocode" % "junit-interface" % "0.11" % "test->default") @@ -290,3 +291,7 @@ lazy val publishSettings = Seq( lazy val noPublishSettings = skip in publish := true + +scalafixDependencies in ThisBuild += "org.scalatest" %% "autofix" % "3.1.0.0" + +addCompilerPlugin(scalafixSemanticdb) diff --git a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/ArtisinalStreamingTest.scala b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/ArtisinalStreamingTest.scala index 8e302529..ea368fe0 100644 --- a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/ArtisinalStreamingTest.scala +++ b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/ArtisinalStreamingTest.scala @@ -25,8 +25,8 @@ import org.apache.spark._ import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext._ -import org.scalatest.FunSuite import org.scalatest.exceptions.TestFailedException +import org.scalatest.funsuite.AnyFunSuite /** * ArtisinalStreamingTest illustrates how to write a streaming test @@ -36,7 +36,7 @@ import org.scalatest.exceptions.TestFailedException * This does not use a manual clock and instead uses the kind of sketchy * sleep approach. Instead please look at [[SampleStreamingTest]]. */ -class ArtisinalStreamingTest extends FunSuite with SharedSparkContext { +class ArtisinalStreamingTest extends AnyFunSuite with SharedSparkContext { // tag::createQueueStream[] def makeSimpleQueueStream(ssc: StreamingContext) = { val input = List(List("hi"), List("happy pandas", "sad pandas")) diff --git a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/HDFSClusterTest.scala b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/HDFSClusterTest.scala index 3555ae82..217aaaa1 100644 --- a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/HDFSClusterTest.scala +++ b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/HDFSClusterTest.scala @@ -7,9 +7,9 @@ import com.holdenkarau.spark.testing.{RDDComparisons, SharedSparkContext} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.rdd.RDD -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite -class HDFSClusterTest extends FunSuite with SharedSparkContext with RDDComparisons { +class HDFSClusterTest extends AnyFunSuite with SharedSparkContext with RDDComparisons { var hdfsCluster: HDFSCluster = null diff --git a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/MultipleDataFrameSuites.scala b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/MultipleDataFrameSuites.scala index fd9a7a41..6352d67c 100644 --- a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/MultipleDataFrameSuites.scala +++ b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/MultipleDataFrameSuites.scala @@ -1,8 +1,8 @@ package com.holdenkarau.spark.testing -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite -class MultipleDataFrameSuites extends FunSuite with DataFrameSuiteBase { +class MultipleDataFrameSuites extends AnyFunSuite with DataFrameSuiteBase { test("test nothing") { assert(1 === 1) } diff --git a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/PerTestSampleTest.scala b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/PerTestSampleTest.scala index 44328056..0ef017a3 100644 --- a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/PerTestSampleTest.scala +++ b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/PerTestSampleTest.scala @@ -16,14 +16,14 @@ */ package com.holdenkarau.spark.testing -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite /** * Illustrate using per-test sample test. This is the one to use * when your tests may be destructive to the Spark context * (e.g. stopping it) */ -class PerTestSampleTest extends FunSuite with PerTestSparkContext { +class PerTestSampleTest extends AnyFunSuite with PerTestSparkContext { test("sample test stops a context") { sc.stop() diff --git a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/PerfSampleTest.scala b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/PerfSampleTest.scala index 5ef11f8f..75e867c9 100644 --- a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/PerfSampleTest.scala +++ b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/PerfSampleTest.scala @@ -19,14 +19,14 @@ package com.holdenkarau.spark.testing import java.nio.file.Files import org.apache.spark._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite /** * Illustrate using per-test sample test. This is the one to use * when your tests may be destructive to the Spark context * (e.g. stopping it) */ -class PerfSampleTest extends FunSuite with PerTestSparkContext { +class PerfSampleTest extends AnyFunSuite with PerTestSparkContext { val tempPath = Files.createTempDirectory(null).toString() //tag::samplePerfTest[] diff --git a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleDataFrameTest.scala b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleDataFrameTest.scala index 4e6ec4b4..2d3d923b 100644 --- a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleDataFrameTest.scala +++ b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleDataFrameTest.scala @@ -20,10 +20,10 @@ import java.sql.Timestamp import org.apache.spark.sql.Row import org.apache.spark.sql.types._ -import org.scalatest.FunSuite import java.math.{ BigDecimal => JBigDecimal } +import org.scalatest.funsuite.AnyFunSuite -class SampleDataFrameTest extends FunSuite with DataFrameSuiteBase { +class SampleDataFrameTest extends AnyFunSuite with DataFrameSuiteBase { val byteArray = new Array[Byte](1) val diffByteArray = Array[Byte](192.toByte) val inputList = List( diff --git a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleRDDTest.scala b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleRDDTest.scala index a30e0719..36158466 100644 --- a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleRDDTest.scala +++ b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleRDDTest.scala @@ -19,9 +19,9 @@ package com.holdenkarau.spark.testing import scala.util.Random import org.apache.spark.rdd.RDD -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite -class SampleRDDTest extends FunSuite with SharedSparkContext with RDDComparisons { +class SampleRDDTest extends AnyFunSuite with SharedSparkContext with RDDComparisons { test("really simple transformation") { val input = List("hi", "hi holden", "bye") val expected = List(List("hi"), List("hi", "holden"), List("bye")) diff --git a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleScalaCheckTest.scala b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleScalaCheckTest.scala index e64a1946..eaaec08b 100644 --- a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleScalaCheckTest.scala +++ b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleScalaCheckTest.scala @@ -21,10 +21,10 @@ import org.apache.spark.sql.{DataFrame, Row, SQLContext} import org.apache.spark.sql.types._ import org.scalacheck.{Arbitrary, Gen} import org.scalacheck.Prop.forAll -import org.scalatest.FunSuite -import org.scalatest.prop.Checkers +import org.scalatestplus.scalacheck.Checkers +import org.scalatest.funsuite.AnyFunSuite -class SampleScalaCheckTest extends FunSuite +class SampleScalaCheckTest extends AnyFunSuite with SharedSparkContext with RDDComparisons with Checkers { // tag::propertySample[] // A trivial property that the map doesn't change the number of elements @@ -257,7 +257,7 @@ class SampleScalaCheckTest extends FunSuite test("generate rdd of specific size") { implicit val generatorDrivenConfig = - PropertyCheckConfig(minSize = 10, maxSize = 20) + PropertyCheckConfiguration(minSize = 10, sizeRange = 20) val prop = forAll(RDDGenerator.genRDD[String](sc)(Arbitrary.arbitrary[String])){ rdd => rdd.count() <= 20 } @@ -333,7 +333,7 @@ class SampleScalaCheckTest extends FunSuite StructType(StructField("timestampType", TimestampType) :: Nil)) :: Nil test("second dataframe's evaluation has the same values as first") { implicit val generatorDrivenConfig = - PropertyCheckConfig(minSize = 1, maxSize = 1) + PropertyCheckConfiguration(minSize = 1, sizeRange = 1) val sqlContext = new SQLContext(sc) val dataframeGen = @@ -354,7 +354,7 @@ class SampleScalaCheckTest extends FunSuite } test("nullable fields contain null values as well") { implicit val generatorDrivenConfig = - PropertyCheckConfig(minSize = 1, maxSize = 1) + PropertyCheckConfiguration(minSize = 1, sizeRange = 1) val nullableFields = fields.map(f => f.copy(nullable = true, name = s"${f.name}Nullable")) val sqlContext = new SQLContext(sc) val allFields = fields ::: nullableFields diff --git a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleStreamingActionTest.scala b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleStreamingActionTest.scala index 9e1a99f7..26d6b15e 100644 --- a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleStreamingActionTest.scala +++ b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleStreamingActionTest.scala @@ -19,9 +19,9 @@ package com.holdenkarau.spark.testing import org.apache.spark._ import org.apache.spark.rdd.RDD import org.apache.spark.streaming.dstream._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite -class SampleStreamingActionTest extends FunSuite with StreamingActionBase { +class SampleStreamingActionTest extends AnyFunSuite with StreamingActionBase { test("a simple action") { val input = List(List("hi"), List("bye")) diff --git a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleStreamingTest.scala b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleStreamingTest.scala index af065ed7..0fdeb384 100644 --- a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleStreamingTest.scala +++ b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/SampleStreamingTest.scala @@ -21,9 +21,9 @@ import org.apache.spark.streaming.Seconds import org.apache.spark.streaming.dstream._ import org.scalactic.Equality import org.scalatest.exceptions.TestFailedException -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite -class SampleStreamingTest extends FunSuite with StreamingSuiteBase { +class SampleStreamingTest extends AnyFunSuite with StreamingSuiteBase { //tag::simpleStreamingTest[] test("really simple transformation") { diff --git a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/UtilsTest.scala b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/UtilsTest.scala index daa7f683..d4e7f85e 100644 --- a/core/src/test/1.3/scala/com/holdenkarau/spark/testing/UtilsTest.scala +++ b/core/src/test/1.3/scala/com/holdenkarau/spark/testing/UtilsTest.scala @@ -19,9 +19,9 @@ package com.holdenkarau.spark.testing import java.io._ import java.nio.file.Files -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite -class UtilsTest extends FunSuite { +class UtilsTest extends AnyFunSuite { test("test utils cleanup") { val tempDir = Utils.createTempDir() val tempPath = tempDir.toPath().toAbsolutePath().toString() diff --git a/core/src/test/1.6/scala/com/holdenkarau/spark/testing/PrettifyTest.scala b/core/src/test/1.6/scala/com/holdenkarau/spark/testing/PrettifyTest.scala index 5b41a808..b57b4297 100644 --- a/core/src/test/1.6/scala/com/holdenkarau/spark/testing/PrettifyTest.scala +++ b/core/src/test/1.6/scala/com/holdenkarau/spark/testing/PrettifyTest.scala @@ -5,12 +5,12 @@ import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructT import org.scalacheck.Gen import org.scalacheck.Prop._ import org.scalacheck.util.Pretty -import org.scalatest.FunSuite import org.scalatest.exceptions.GeneratorDrivenPropertyCheckFailedException -import org.scalatest.prop.Checkers +import org.scalatestplus.scalacheck.Checkers +import org.scalatest.funsuite.AnyFunSuite -class PrettifyTest extends FunSuite with SharedSparkContext with Checkers with Prettify { - implicit val propertyCheckConfig = PropertyCheckConfig(minSize = 2, maxSize = 2) +class PrettifyTest extends AnyFunSuite with SharedSparkContext with Checkers with Prettify { + implicit val propertyCheckConfig = PropertyCheckConfiguration(minSize = 2, sizeRange = 2) test("pretty output of DataFrame's check") { val schema = StructType(List(StructField("name", StringType), StructField("age", IntegerType))) diff --git a/core/src/test/1.6/scala/com/holdenkarau/spark/testing/SampleDatasetGeneratorTest.scala b/core/src/test/1.6/scala/com/holdenkarau/spark/testing/SampleDatasetGeneratorTest.scala index c703bb66..c1fb1c88 100644 --- a/core/src/test/1.6/scala/com/holdenkarau/spark/testing/SampleDatasetGeneratorTest.scala +++ b/core/src/test/1.6/scala/com/holdenkarau/spark/testing/SampleDatasetGeneratorTest.scala @@ -3,10 +3,10 @@ package com.holdenkarau.spark.testing import org.apache.spark.sql.{Dataset, SQLContext} import org.scalacheck.{Gen, Arbitrary} import org.scalacheck.Prop.forAll -import org.scalatest.FunSuite -import org.scalatest.prop.Checkers +import org.scalatestplus.scalacheck.Checkers +import org.scalatest.funsuite.AnyFunSuite -class SampleDatasetGeneratorTest extends FunSuite +class SampleDatasetGeneratorTest extends AnyFunSuite with SharedSparkContext with Checkers { test("test generating Datasets[String]") { diff --git a/core/src/test/1.6/scala/com/holdenkarau/spark/testing/SampleDatasetTest.scala b/core/src/test/1.6/scala/com/holdenkarau/spark/testing/SampleDatasetTest.scala index 55ea5573..c7cab011 100644 --- a/core/src/test/1.6/scala/com/holdenkarau/spark/testing/SampleDatasetTest.scala +++ b/core/src/test/1.6/scala/com/holdenkarau/spark/testing/SampleDatasetTest.scala @@ -2,9 +2,9 @@ package com.holdenkarau.spark.testing import java.sql.Timestamp -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite -class SampleDatasetTest extends FunSuite with DatasetSuiteBase { +class SampleDatasetTest extends AnyFunSuite with DatasetSuiteBase { test("equal empty dataset") { import sqlContext.implicits._ diff --git a/core/src/test/1.6/scala/com/holdenkarau/spark/testing/SampleMiniClusterTest.scala b/core/src/test/1.6/scala/com/holdenkarau/spark/testing/SampleMiniClusterTest.scala index ee239cad..cd674a4f 100644 --- a/core/src/test/1.6/scala/com/holdenkarau/spark/testing/SampleMiniClusterTest.scala +++ b/core/src/test/1.6/scala/com/holdenkarau/spark/testing/SampleMiniClusterTest.scala @@ -17,10 +17,10 @@ package com.holdenkarau.spark.testing import org.apache.spark.rdd.RDD -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite -class SampleMiniClusterTest extends FunSuite with SharedMiniCluster { +class SampleMiniClusterTest extends AnyFunSuite with SharedMiniCluster { test("really simple transformation") { val input = List("hi", "hi holden", "bye") diff --git a/core/src/test/1.6/scala/com/holdenkarau/spark/testing/YARNClusterTest.scala b/core/src/test/1.6/scala/com/holdenkarau/spark/testing/YARNClusterTest.scala index 33156887..bb0ce4f1 100644 --- a/core/src/test/1.6/scala/com/holdenkarau/spark/testing/YARNClusterTest.scala +++ b/core/src/test/1.6/scala/com/holdenkarau/spark/testing/YARNClusterTest.scala @@ -1,10 +1,11 @@ package com.holdenkarau.spark.testing import org.apache.spark.{SparkConf, SparkContext} -import org.scalatest.{BeforeAndAfterAll, FunSuite} +import org.scalatest.BeforeAndAfterAll +import org.scalatest.funsuite.AnyFunSuite -class YARNClusterTest extends FunSuite with BeforeAndAfterAll { +class YARNClusterTest extends AnyFunSuite with BeforeAndAfterAll { var yarnCluster: YARNCluster = null var sc: SparkContext = null diff --git a/core/src/test/2.0/scala/com/holdenkarau/spark/testing/DatasetGeneratorSizeSpecial.scala b/core/src/test/2.0/scala/com/holdenkarau/spark/testing/DatasetGeneratorSizeSpecial.scala index ef6ee912..331247ce 100644 --- a/core/src/test/2.0/scala/com/holdenkarau/spark/testing/DatasetGeneratorSizeSpecial.scala +++ b/core/src/test/2.0/scala/com/holdenkarau/spark/testing/DatasetGeneratorSizeSpecial.scala @@ -3,10 +3,10 @@ package com.holdenkarau.spark.testing import org.apache.spark.sql.{Dataset, SQLContext} import org.scalacheck.{Gen, Arbitrary} import org.scalacheck.Prop.forAll -import org.scalatest.FunSuite -import org.scalatest.prop.Checkers +import org.scalatestplus.scalacheck.Checkers +import org.scalatest.funsuite.AnyFunSuite -class DatasetGeneratorSizeSpecial extends FunSuite +class DatasetGeneratorSizeSpecial extends AnyFunSuite with SharedSparkContext with Checkers { test("test generating sized Datasets[Custom Class]") { diff --git a/core/src/test/2.0/scala/com/holdenkarau/spark/testing/MLScalaCheckTest.scala b/core/src/test/2.0/scala/com/holdenkarau/spark/testing/MLScalaCheckTest.scala index 578038ff..9a6ba910 100644 --- a/core/src/test/2.0/scala/com/holdenkarau/spark/testing/MLScalaCheckTest.scala +++ b/core/src/test/2.0/scala/com/holdenkarau/spark/testing/MLScalaCheckTest.scala @@ -4,10 +4,10 @@ import org.apache.spark.ml.linalg.SQLDataTypes.{MatrixType, VectorType} import org.apache.spark.sql.SQLContext import org.apache.spark.sql.types.{StructField, StructType} import org.scalacheck.Prop.forAll -import org.scalatest.FunSuite -import org.scalatest.prop.Checkers +import org.scalatestplus.scalacheck.Checkers +import org.scalatest.funsuite.AnyFunSuite -class MLScalaCheckTest extends FunSuite with SharedSparkContext with Checkers { +class MLScalaCheckTest extends AnyFunSuite with SharedSparkContext with Checkers { // re-use the spark context override implicit def reuseContextIfPossible: Boolean = false diff --git a/core/src/test/2.0/scala/com/holdenkarau/spark/testing/PerTestSparkContextTest.scala b/core/src/test/2.0/scala/com/holdenkarau/spark/testing/PerTestSparkContextTest.scala index 54b36a69..59ed2cc3 100644 --- a/core/src/test/2.0/scala/com/holdenkarau/spark/testing/PerTestSparkContextTest.scala +++ b/core/src/test/2.0/scala/com/holdenkarau/spark/testing/PerTestSparkContextTest.scala @@ -19,14 +19,14 @@ package com.holdenkarau.spark.testing import java.nio.file.Files import org.apache.spark._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite /** * Illustrate using per-test sample test. This is the one to use * when your tests may be destructive to the Spark context * (e.g. stopping it) */ -class PerTestSparkContextTest extends FunSuite with PerTestSparkContext { +class PerTestSparkContextTest extends AnyFunSuite with PerTestSparkContext { val tempPath = Files.createTempDirectory(null).toString() //tag::samplePerfTest[] diff --git a/core/src/test/2.2/scala/com/holdenkarau/spark/testing/StructuredStreamingSampleTests.scala b/core/src/test/2.2/scala/com/holdenkarau/spark/testing/StructuredStreamingSampleTests.scala index 49138ea2..99ad3cad 100644 --- a/core/src/test/2.2/scala/com/holdenkarau/spark/testing/StructuredStreamingSampleTests.scala +++ b/core/src/test/2.2/scala/com/holdenkarau/spark/testing/StructuredStreamingSampleTests.scala @@ -2,10 +2,10 @@ package com.holdenkarau.spark.testing import org.apache.spark.sql._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite class StructuredStreamingTests - extends FunSuite with SharedSparkContext with StructuredStreamingBase { + extends AnyFunSuite with SharedSparkContext with StructuredStreamingBase { // re-use the spark context override implicit def reuseContextIfPossible: Boolean = true diff --git a/project/plugins.sbt b/project/plugins.sbt index f3affb0f..8f29656b 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -5,3 +5,5 @@ addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.1.1") addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.5.1") addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.9.2") + +addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.9.11") \ No newline at end of file From 9fabed72d3fb3809f83de93e0451b909fa222462 Mon Sep 17 00:00:00 2001 From: Martin Mauch Date: Fri, 10 May 2019 20:00:47 +0200 Subject: [PATCH 5/9] Temporarily remove Kafka 0.8 project --- build.sbt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index cd980cf5..1b35b168 100644 --- a/build.sbt +++ b/build.sbt @@ -1,5 +1,5 @@ lazy val root = (project in file(".")) - .aggregate(core, kafka_0_8) + .aggregate(core) .settings(noPublishSettings, commonSettings) val sparkVersion = settingKey[String]("Spark version") @@ -32,6 +32,7 @@ lazy val core = (project in file("core")) ) ++ commonDependencies ++ miniClusterDependencies ) +/* lazy val kafka_0_8 = { Project("kafka_0_8", file("kafka-0.8")) .dependsOn(core) @@ -64,6 +65,7 @@ lazy val kafka_0_8 = { ) ) } +*/ val commonSettings = Seq( organization := "com.holdenkarau", From 9d65b9d820e8abd8274831a5b7d0ecb234c62814 Mon Sep 17 00:00:00 2001 From: Martin Mauch Date: Sun, 29 Dec 2019 02:47:58 +0100 Subject: [PATCH 6/9] Exclude Kafka if Scala >= 2.12.0 --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 1b35b168..68161560 100644 --- a/build.sbt +++ b/build.sbt @@ -234,7 +234,7 @@ def excludeJpountz(items: Seq[ModuleID]) = libraryDependencies ++= excludeJpountz( // For Spark 2.4 w/ Scala 2.12 we're going to need some special logic - if (sparkVersion.value >= "2.3.0") { + if (sparkVersion.value >= "2.3.0" && scalaVersion.value < "2.12.0") { Seq( "org.apache.spark" %% "spark-streaming-kafka-0-8" % sparkVersion.value ) From a800908ce1d8e034147ea0a4d11e6ebee6ebb545 Mon Sep 17 00:00:00 2001 From: Martin Mauch Date: Sun, 29 Dec 2019 03:38:28 +0100 Subject: [PATCH 7/9] Cross-publish to 2.11 even for Spark 2.4.0 --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 68161560..2db2699f 100644 --- a/build.sbt +++ b/build.sbt @@ -14,7 +14,7 @@ lazy val core = (project in file("core")) coreTestSources, crossScalaVersions := { if (sparkVersion.value >= "2.4.0") { - Seq("2.12.8") + Seq("2.12.10", "2.11.12") } else if (sparkVersion.value >= "2.3.0") { Seq("2.11.11") } else { From 4886163b14935776c6f0fdaec26ce45f3cff09aa Mon Sep 17 00:00:00 2001 From: Martin Mauch Date: Fri, 10 May 2019 16:56:00 +0200 Subject: [PATCH 8/9] Add custom Jitpack build --- jitpack.yml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 jitpack.yml diff --git a/jitpack.yml b/jitpack.yml new file mode 100644 index 00000000..91f84bed --- /dev/null +++ b/jitpack.yml @@ -0,0 +1,2 @@ +install: + - sbt publishM2 From c2bc44caf41e64d2be810807fdef6deab384ab29 Mon Sep 17 00:00:00 2001 From: Martin Mauch Date: Sun, 29 Dec 2019 03:58:52 +0100 Subject: [PATCH 9/9] Manually add Scala version to Jitpack sbt command --- jitpack.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jitpack.yml b/jitpack.yml index 91f84bed..95548a71 100644 --- a/jitpack.yml +++ b/jitpack.yml @@ -1,2 +1,2 @@ install: - - sbt publishM2 + - sbt `echo $ARTIFACT | sed 's/.*2.11$/++2.11.12/;s/.*2.12$/++2.12.10/'` publishM2