Skip to content

Commit

Permalink
Merge branch 'main' into gh129-lockfile
Browse files Browse the repository at this point in the history
  • Loading branch information
colindean authored Apr 11, 2024
2 parents aa1f1f7 + 98cf101 commit fdcb056
Show file tree
Hide file tree
Showing 18 changed files with 114 additions and 63 deletions.
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -1 +1 @@
* @colindean @c-horn @samratmitra-0812
* @target/data-validator-members @c-horn
4 changes: 4 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,7 @@ updates:
directory: "/"
schedule:
interval: "daily"

# Until Dependabot provides Scala updates, they are are handed by the Scala Steward action.
# https://github.com/target/data-validator/blob/master/.github/workflows/scala-steward.yaml
# Check periodically for updates: https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem
20 changes: 9 additions & 11 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,11 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout current branch (full)
uses: actions/checkout@v3

- name: Setup Java and Scala
uses: olafurpg/setup-scala@v14
uses: actions/checkout@v4
- uses: coursier/cache-action@v6
with:
java-version: [email protected]

- name: Cache sbt
uses: actions/cache@v3
extraKey: ${{ runner.os }}
- uses: coursier/setup-action@v1
with:
path: |
~/.sbt
Expand All @@ -34,6 +30,8 @@ jobs:
~/AppData/Local/Coursier/Cache/v1
~/Library/Caches/Coursier/v1
key: ${{ runner.os }}-sbt-cache-v2-${{ hashFiles('**/*.sbt') }}-${{ hashFiles('project/build.properties') }}

- name: Build, test, and package project
run: bin/sbt clean update dependencyLockCheck compile test package makePom
jvm: adopt:1.8
- name: Build, test, and package project on Spark 3.5
run: bin/sbt clean update dependencyLockCheck compile test package makePom -DsparkVersion=3.5.1
- name: Build and package project on "legacy" Spark
run: bin/sbt clean update dependencyLockCheck compile package makePom
16 changes: 7 additions & 9 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,11 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout current branch (full)
uses: actions/checkout@v3

- name: Setup Java and Scala
uses: olafurpg/setup-scala@v14
uses: actions/checkout@v4
- uses: coursier/cache-action@v6
with:
java-version: [email protected]

- name: Cache sbt
uses: actions/cache@v3
extraKey: ${{ runner.os }}
- uses: coursier/setup-action@v1
with:
path: |
~/.sbt
Expand All @@ -33,7 +29,9 @@ jobs:
~/AppData/Local/Coursier/Cache/v1
~/Library/Caches/Coursier/v1
key: ${{ runner.os }}-sbt-cache-v2-${{ hashFiles('**/*.sbt') }}-${{ hashFiles('project/build.properties') }}

jvm: adopt:1.8
# uses sbt-github-packages, see build.sbt
- name: Publish with SBT
run: bin/sbt dependencyLockCheck publish
- name: Publish with SBT
run: bin/sbt dependencyLockCheck publish -DsparkVersion=3.5.1
2 changes: 1 addition & 1 deletion .github/workflows/scala-steward.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
name: Launch Scala Steward
steps:
- name: Install JDK for Scala Steward use
uses: actions/setup-java@v3
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '11'
Expand Down
2 changes: 1 addition & 1 deletion .scalafmt.conf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version = 3.7.3
version = 3.8.1
runner.dialect = scala211
project.git = true
align.preset = none
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ Usage: data-validator [options]
--help Show this help message and exit.
```
If you want to build with Java 11 or newer, set the "MODERN_JAVA" environment variable.
This may become the default in the future.
## Example Run
With the JAR directly:
Expand Down
8 changes: 4 additions & 4 deletions bin/sbt
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@

set -o pipefail

declare -r sbt_release_version="1.6.2"
declare -r sbt_unreleased_version="1.7.0-M2"
declare -r sbt_release_version="1.9.9"
declare -r sbt_unreleased_version="1.9.9"

declare -r latest_213="2.13.8"
declare -r latest_212="2.12.15"
declare -r latest_213="2.13.13"
declare -r latest_212="2.12.19"
declare -r latest_211="2.11.12"
declare -r latest_210="2.10.7"
declare -r latest_29="2.9.3"
Expand Down
68 changes: 57 additions & 11 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,11 +1,42 @@
name := "data-validator"
organization := "com.target"

scalaVersion := "2.11.12"
val sparkVersion = settingKey[String]("Spark version")

val sparkVersion = "2.3.4"
sparkVersion := System.getProperty("sparkVersion", "2.3.4")

val circeVersion = "0.11.2"
scalaVersion := {
if (sparkVersion.value > "3.0") {
"2.12.19"
} else {
"2.11.12"
}
}

val sparkValidationVersion = settingKey[String]("Version of package")

sparkValidationVersion := "0.15.0"

version := sparkVersion.value + "_" + sparkValidationVersion.value

val circeVersion = settingKey[String]("Circe version")
val circeYamlVersion = settingKey[String]("Circe YAML version")

circeVersion := {
if (sparkVersion.value > "3.0") {
"0.14.6"
} else {
"0.11.2"
}
}

circeYamlVersion := {
if (sparkVersion.value > "3.0") {
"0.15.1"
} else {
"0.10.1"
}
}

//addDependencyTreePlugin
enablePlugins(GitVersioning)
Expand Down Expand Up @@ -35,18 +66,28 @@ libraryDependencies ++= Seq(
"com.github.scopt" %% "scopt" % "4.1.0",
"com.sun.mail" % "javax.mail" % "1.6.2",
"com.lihaoyi" %% "scalatags" % "0.12.0",
"io.circe" %% "circe-yaml" % "0.10.1",
"io.circe" %% "circe-core" % circeVersion,
"io.circe" %% "circe-generic" % circeVersion,
"io.circe" %% "circe-parser" % circeVersion,
"org.apache.spark" %% "spark-sql" % sparkVersion % Provided,
"io.circe" %% "circe-yaml" % circeYamlVersion.value,
"io.circe" %% "circe-core" % circeVersion.value,
"io.circe" %% "circe-generic" % circeVersion.value,
"io.circe" %% "circe-parser" % circeVersion.value,
"org.apache.spark" %% "spark-sql" % sparkVersion.value % Provided,
"junit" % "junit" % "4.13.2" % Test,
"org.scalatest" %% "scalatest" % "3.2.15" % Test,
"org.scalatest" %% "scalatest" % "3.2.18" % Test,
"com.github.sbt" % "junit-interface" % "0.13.3" % Test exclude ("junit", "junit-dep")
)

Test / fork := true
javaOptions ++= Seq("-Xms512M", "-Xmx2048M", "-XX:+CMSClassUnloadingEnabled")
javaOptions ++= (if (sparkVersion.value > "3.0" && System.getenv("MODERN_JAVA") == "TRUE") {
// For modern Java we need to open up a lot of config options.
Seq("-Xms4048M", "-Xmx4048M",
// these were added in JDK 11 and newer, apparently.
"-Dio.netty.tryReflectionSetAccessible=true",
"--add-opens=java.base/java.lang=ALL-UNNAMED",
"--add-opens=java.base/java.io=ALL-UNNAMED",
"--add-opens=java.base/sun.nio.ch=ALL-UNNAMED")
} else {
Seq("-Xms4048M", "-Xmx4048M")
})
Test / parallelExecution := false
// required for unit tests, but not set in some environments
Test / envVars ++= Map(
Expand All @@ -57,6 +98,11 @@ Test / envVars ++= Map(

assembly / mainClass := Some("com.target.data_validator.Main")

assembly / assemblyShadeRules := Seq(
ShadeRule.rename("shapeless.**" -> "new_shapeless.@1").inAll,
ShadeRule.rename("cats.kernel.**" -> s"new_cats.kernel.@1").inAll
)

// Enforces scalastyle checks
val compileScalastyle = TaskKey[Unit]("compileScalastyle")
scalastyleFailOnWarning := true
Expand All @@ -75,6 +121,6 @@ compileScalastyle := (Compile / scalastyle).toTask("").value

(Compile / runMain) := Defaults.runMainTask(Compile / fullClasspath, Compile / run / runner).evaluated
TaskKey[Unit]("generateTestData") := {
libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion
libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion.value
(Compile / runMain).toTask(" com.target.data_validator.GenTestData").value
}
2 changes: 1 addition & 1 deletion project/assembly.sbt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.1")
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.2.0")
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version = 1.8.2
sbt.version = 1.9.9
5 changes: 3 additions & 2 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0")
addSbtPlugin("com.github.sbt" % "sbt-git" % "2.0.1")
addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.11.0")
addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.12.0")
addSbtPlugin("com.codecommit" % "sbt-github-packages" % "0.5.3")
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.0")
addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.10.4")
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2")
addSbtPlugin("software.purpledragon" % "sbt-dependency-lock" % "1.5.0")
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ case class MinNumRows(minNumRows: Json) extends ColumnBased("", ValidatorBase.L0
}

case class ColumnMaxCheck(column: String, value: Json)
extends ColumnBased(column, Max(UnresolvedAttribute(column)).toAggregateExpression()) {
extends ColumnBased(column, Max(UnresolvedAttribute.quoted(column)).toAggregateExpression()) {

override def substituteVariables(dict: VarSubstitution): ValidatorBase = {
val ret = copy(column = getVarSub(column, "column", dict), value = getVarSubJson(value, "value", dict))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ case class ColumnSumCheck(
minValue: Option[Json] = None,
maxValue: Option[Json] = None,
inclusive: Option[Json] = None
) extends ColumnBased(column, Sum(UnresolvedAttribute(column)).toAggregateExpression()) {
) extends ColumnBased(column, Sum(UnresolvedAttribute.quoted(column)).toAggregateExpression()) {

private val minOrMax: Either[String, Unit] = if (minValue.isEmpty && maxValue.isEmpty) {
Left("'minValue' or 'maxValue' or both must be defined")
Expand Down Expand Up @@ -78,13 +78,14 @@ case class ColumnSumCheck(
}

def getData(pctError: String): ListMap[String, String] = {
((minValue, maxValue) match {
val initial: ListMap[String, String] = ((minValue, maxValue) match {
case (Some(x), Some(y)) =>
ListMap("lower_bound" -> x.asNumber.get.toString, "upper_bound" -> y.asNumber.get.toString)
case (None, Some(y)) => ListMap("upper_bound" -> y.asNumber.get.toString)
case (Some(x), None) => ListMap("lower_bound" -> x.asNumber.get.toString)
case (None, None) => throw new RuntimeException("Must define at least one of minValue or maxValue.")
}) + ("inclusive" -> isInclusive.toString, "actual" -> r(idx).toString, "relative_error" -> pctError)
})
initial ++ List("inclusive" -> isInclusive.toString, "actual" -> r(idx).toString, "relative_error" -> pctError)
}

val actualSum: Double = dataType match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,8 @@ object ValidatorBase extends LazyLogging {
private val backtick = "`"
val I0: Literal = Literal.create(0, IntegerType)
val D0: Literal = Literal.create(0.0, DoubleType)
val L0: Literal = Literal.create(0, LongType)
val L1: Literal = Literal.create(1, LongType)
val L0: Literal = Literal.create(0L, LongType)
val L1: Literal = Literal.create(1L, LongType)

def isValueColumn(v: String): Boolean = v.startsWith(backtick)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ class RangeCheckSpec extends AnyFunSpec with Matchers with TestingSparkSession {
ValidatorQuickCheckError(
("item", "Eggs") :: Nil,
5.99,
"RangeCheck failed! max = 5.99 and (('max <= 6.0) || ('max >= 10.0))"
"RangeCheck failed! max = 5.99 and (('max <= 6.0) OR ('max >= 10.0))"
)
)
}
Expand Down Expand Up @@ -341,7 +341,7 @@ class RangeCheckSpec extends AnyFunSpec with Matchers with TestingSparkSession {
val dict = new VarSubstitution
val df = mkDataFrame(spark, defData)
assert(!sut.configCheck(df))
assert(sut.colTest(df.schema, dict).sql == "((`avg` < `min`) OR (`avg` > `max`))")
assert(sut.colTest(df.schema, dict).sql == "((avg < min) OR (avg > max))")
}

it("bad minValue column") {
Expand Down Expand Up @@ -395,7 +395,7 @@ class RangeCheckSpec extends AnyFunSpec with Matchers with TestingSparkSession {
ValidatorQuickCheckError(
("item", "Bread") :: Nil,
0.99,
"RangeCheck failed! avg = 0.99 and (('avg <= 'min) || ('avg >= 'max))"
"RangeCheck failed! avg = 0.99 and (('avg <= 'min) OR ('avg >= 'max))"
)
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -327,13 +327,13 @@ class StringLengthCheckSpec extends AnyFunSpec with Matchers with TestingSparkSe
ValidatorQuickCheckError(
("item", "I") :: Nil,
"I",
"StringLengthCheck failed! item = I and ((length('item) < 5) || (length('item) > 6))"
"StringLengthCheck failed! item = I and ((length('item) < 5) OR (length('item) > 6))"
)) ^
(sut.getEvents contains
ValidatorQuickCheckError(
("item", "") :: Nil,
"",
"StringLengthCheck failed! item = and ((length('item) < 5) || (length('item) > 6))"
"StringLengthCheck failed! item = and ((length('item) < 5) OR (length('item) > 6))"
))
)
}
Expand Down Expand Up @@ -364,7 +364,7 @@ class StringLengthCheckSpec extends AnyFunSpec with Matchers with TestingSparkSe
ValidatorQuickCheckError(
("item", "I") :: Nil,
"I",
"StringLengthCheck failed! item = I and ((length('item) < 5) || (length('item) > 6))"
"StringLengthCheck failed! item = I and ((length('item) < 5) OR (length('item) > 6))"
)
)

Expand All @@ -373,7 +373,7 @@ class StringLengthCheckSpec extends AnyFunSpec with Matchers with TestingSparkSe
ValidatorQuickCheckError(
("item", "") :: Nil,
"",
"StringLengthCheck failed! item = and ((length('item) < 5) || (length('item) > 6))"
"StringLengthCheck failed! item = and ((length('item) < 5) OR (length('item) > 6))"
)
)
}
Expand Down Expand Up @@ -404,7 +404,7 @@ class StringLengthCheckSpec extends AnyFunSpec with Matchers with TestingSparkSe
ValidatorQuickCheckError(
("item", "I") :: Nil,
"I",
"StringLengthCheck failed! item = I and ((length('item) < 5) || (length('item) > 5))"
"StringLengthCheck failed! item = I and ((length('item) < 5) OR (length('item) > 5))"
)
)

Expand All @@ -413,7 +413,7 @@ class StringLengthCheckSpec extends AnyFunSpec with Matchers with TestingSparkSe
ValidatorQuickCheckError(
("item", "") :: Nil,
"",
"StringLengthCheck failed! item = and ((length('item) < 5) || (length('item) > 5))"
"StringLengthCheck failed! item = and ((length('item) < 5) OR (length('item) > 5))"
)
)

Expand All @@ -422,7 +422,7 @@ class StringLengthCheckSpec extends AnyFunSpec with Matchers with TestingSparkSe
ValidatorQuickCheckError(
("item", "Item23") :: Nil,
"Item23",
"StringLengthCheck failed! item = Item23 and ((length('item) < 5) || (length('item) > 5))"
"StringLengthCheck failed! item = Item23 and ((length('item) < 5) OR (length('item) > 5))"
)
)
}
Expand Down
Loading

0 comments on commit fdcb056

Please sign in to comment.