From 2280d7d31911e5b7370f30e76f702b9dbf89bb43 Mon Sep 17 00:00:00 2001 From: David Benedeki Date: Thu, 7 Nov 2024 13:43:29 +0100 Subject: [PATCH 01/10] #291: Project improvements after 0.3.0 release * set credentials persistence to `false` in GitHub checkout actions * fixed release notes presence check GitHub workflow * added supported Atum Agent control functions list to documentation * added new grouping of issues into release notes draft * added badges to `README.md` --- .github/workflows/build.yml | 6 +- .github/workflows/check_pr_release_notes.yml | 90 ++++++++++++++++++ .github/workflows/format_check.yml | 3 +- .github/workflows/jacoco_report.yml | 2 + .github/workflows/license_check.yml | 4 +- .../pr_release_note_comment_check.yml | 94 ------------------- .github/workflows/release_draft.yml | 11 ++- .github/workflows/release_publish.yml | 2 + .github/workflows/test_filenames_check.yml | 4 +- README.md | 74 +++++++++++++++ 10 files changed, 190 insertions(+), 100 deletions(-) create mode 100644 .github/workflows/check_pr_release_notes.yml delete mode 100644 .github/workflows/pr_release_note_comment_check.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 31ee0e50b..2340472d6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -30,7 +30,8 @@ jobs: - name: Checkout code uses: actions/checkout@v4 with: - fetch-depth: 0 + fetch-depth: 1 + persist-credentials: false - uses: coursier/cache-action@v5 - name: Setup Scala @@ -64,7 +65,8 @@ jobs: - name: Checkout code uses: actions/checkout@v4 with: - fetch-depth: 0 + fetch-depth: 1 + persist-credentials: false - uses: coursier/cache-action@v5 - name: Setup Scala diff --git a/.github/workflows/check_pr_release_notes.yml b/.github/workflows/check_pr_release_notes.yml new file mode 100644 index 000000000..c4db978cd --- /dev/null +++ b/.github/workflows/check_pr_release_notes.yml @@ -0,0 +1,90 @@ +# +# Copyright 2021 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Check PR Release Notes in Description + +on: + pull_request: + types: [opened, synchronize, reopened, edited, labeled, unlabeled] + branches: [ master ] + +env: + SKIP_LABEL: 'no RN' + RLS_NOTES_TAG_REGEX: 'Release Notes:' + +jobs: + check-pr-release-notes: + runs-on: ubuntu-latest + + steps: + - name: Get Pull Request Info + id: pr_info + uses: actions/github-script@v7 + with: + script: | + const pr_number = context.payload.pull_request.number; + const pr = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: pr_number + }); + const labels = pr.data.labels ? pr.data.labels.map(label => label.name) : []; + + if (labels.includes("${{ env.SKIP_LABEL }}")) { + console.log("Skipping release notes check because '${{ env.SKIP_LABEL }}' label is present."); + core.setOutput("skip_check", 'true'); + core.setOutput("pr_body", ""); + return; + } + + const pr_body = pr.data.body; + if (!pr_body) { + core.setFailed("Pull request description is empty."); + core.setOutput("pr_body", ""); + core.setOutput("skip_check", 'false'); + return; + } + core.setOutput("pr_body", pr_body); + core.setOutput("skip_check", 'false'); + return; + + - name: Skip check if SKIP_LABEL is present + if: steps.pr_info.outputs.skip_check == 'true' + run: echo "Skipping release notes validation." + + - name: Check for 'Release Notes:' and bullet list + if: steps.pr_info.outputs.skip_check == 'false' + run: | + # Extract the body from the previous step + PR_BODY=$(cat <<-'EOF' + ${{ steps.pr_info.outputs.pr_body }} + EOF + ) + + # Check if "Release Notes:" exists + if ! echo "$PR_BODY" | grep -q '${{ env.RLS_NOTES_TAG_REGEX }}'; then + echo "Error: release notes tag not found in pull request description. Has to adhere to format '${{ env.RLS_NOTES_TAG_REGEX }}'." + exit 1 + fi + + # Extract text after "Release Notes:" line + TEXT_BELOW_RELEASE_NOTES_TAG=$(echo "$PR_BODY" | sed -n '/${{ env.RLS_NOTES_TAG_REGEX }}/,$p' | tail -n +2) + + # Check if there's a bullet list (lines starting with '-', '+' or '*') + if ! echo "$TEXT_BELOW_RELEASE_NOTES_TAG" | grep -qE '^\s*[-+*]\s+.+$'; then + echo "Error: No bullet list found under release notes tag." + exit 1 + fi diff --git a/.github/workflows/format_check.yml b/.github/workflows/format_check.yml index a93ce1783..12090ccfe 100644 --- a/.github/workflows/format_check.yml +++ b/.github/workflows/format_check.yml @@ -27,8 +27,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: + persist-credentials: false fetch-depth: 0 ref: ${{ github.event.pull_request.head.ref }} diff --git a/.github/workflows/jacoco_report.yml b/.github/workflows/jacoco_report.yml index 80f08b2f2..0f3157b95 100644 --- a/.github/workflows/jacoco_report.yml +++ b/.github/workflows/jacoco_report.yml @@ -50,6 +50,8 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 + with: + persist-credentials: false - name: Setup Scala uses: olafurpg/setup-scala@v14 with: diff --git a/.github/workflows/license_check.yml b/.github/workflows/license_check.yml index 36a4f4d5f..3113d4886 100644 --- a/.github/workflows/license_check.yml +++ b/.github/workflows/license_check.yml @@ -27,7 +27,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 + with: + persist-credentials: false - name: Setup Scala uses: olafurpg/setup-scala@v10 with: diff --git a/.github/workflows/pr_release_note_comment_check.yml b/.github/workflows/pr_release_note_comment_check.yml deleted file mode 100644 index 4dc08f526..000000000 --- a/.github/workflows/pr_release_note_comment_check.yml +++ /dev/null @@ -1,94 +0,0 @@ -# -# Copyright 2021 ABSA Group Limited -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -name: PR Release Note Comment Check - -on: - issue_comment: - types: - - created - - edited - - deleted - pull_request: - types: - - opened - - synchronize - - reopened - - edited - - labeled - - unlabeled - branches: [ master ] - -jobs: - check-for-release-notes-comments: - if: ${{ ( github.event_name == 'pull_request') || (github.event.issue.pull_request) }} - name: Check For Release Notes Comments - runs-on: ubuntu-latest - steps: - - name: Get PR branch - uses: xt0rted/pull-request-comment-branch@v1 - id: comment-branch - - - name: Set latest commit status as pending - uses: myrotvorets/set-commit-status-action@master - with: - sha: ${{ steps.comment-branch.outputs.head_sha }} - token: ${{ secrets.GITHUB_TOKEN }} - status: pending - - - name: Fetch all PR comments - if: ${{ ! contains( github.event.pull_request.labels.*.name, 'no RN') }} - id: get-comments - uses: actions/github-script@v7 - with: - github-token: ${{secrets.GITHUB_TOKEN}} - script: | - const issueNumber = context.issue.number; - const repoName = context.repo.repo; - const repoOwner = context.repo.owner; - - const comments = await github.rest.issues.listComments({ - owner: repoOwner, - repo: repoName, - issue_number: issueNumber, - }); - - return comments.data.map(comment => comment.body); - - - name: Check for 'Release Notes' in comments - if: ${{ ! contains( github.event.pull_request.labels.*.name, 'no RN') }} - uses: actions/github-script@v7 - with: - script: | - const comments = ${{ steps.get-comments.outputs.result }}; - console.log("Comments:"); - console.log(comments); - const releaseNotesRegex = /release notes?:?/i; - const hasReleaseNotes = comments.some(comment => releaseNotesRegex.test(comment)); - - if (!hasReleaseNotes) { - console.log('No "Release notes" found in PR comments'); - core.setFailed('No "Release notes" found in PR comments') - } else { - console.log('"Release notes" found in comments'); - } - - name: Set latest commit status as ${{ job.status }} - uses: myrotvorets/set-commit-status-action@master - if: always() - with: - sha: ${{ steps.comment-branch.outputs.head_sha }} - token: ${{ secrets.GITHUB_TOKEN }} - status: ${{ job.status }} diff --git a/.github/workflows/release_draft.yml b/.github/workflows/release_draft.yml index aa303469c..95055bfa1 100644 --- a/.github/workflows/release_draft.yml +++ b/.github/workflows/release_draft.yml @@ -28,6 +28,7 @@ jobs: steps: - uses: actions/checkout@v4 with: + persist-credentials: false fetch-depth: 0 # the following step is disabled because it doesn't order the version tags correctly # - name: Validate format of received tag @@ -104,6 +105,7 @@ jobs: steps: - uses: actions/checkout@v4 with: + persist-credentials: false fetch-depth: 0 ref: refs/tags/${{ github.event.inputs.tagName }} @@ -119,10 +121,17 @@ jobs: with: tag-name: ${{ github.event.inputs.tagName }} chapters: '[ + {"title": "No entry 🚫", "label": "duplicate"}, + {"title": "No entry 🚫", "label": "invalid"}, + {"title": "No entry 🚫", "label": "wontfix"}, + {"title": "No entry 🚫", "label": "no RN"}, {"title": "Breaking Changes 💥", "label": "breaking-change"}, {"title": "New Features 🎉", "label": "enhancement"}, {"title": "New Features 🎉", "label": "feature"}, - {"title": "Bugfixes 🛠", "label": "bug"} + {"title": "Bugfixes 🛠", "label": "bug"}, + {"title": "Infrastructure ⚙️", "label": "infrastructure"}, + {"title": "Silent-live 🤫", "label": "silent-live"}, + {"title": "Documentation 📜", "label": "documentation"} ]' duplicity-scope: 'service' duplicity-icon: '🔁' diff --git a/.github/workflows/release_publish.yml b/.github/workflows/release_publish.yml index b349a8ff6..3a68d8cf2 100644 --- a/.github/workflows/release_publish.yml +++ b/.github/workflows/release_publish.yml @@ -27,6 +27,7 @@ jobs: - name: Checkout code uses: actions/checkout@v4 with: + persist-credentials: false fetch-depth: 0 - uses: coursier/cache-action@v5 @@ -51,6 +52,7 @@ jobs: - name: Checkout code uses: actions/checkout@v4 with: + persist-credentials: false fetch-depth: 0 - uses: coursier/cache-action@v5 diff --git a/.github/workflows/test_filenames_check.yml b/.github/workflows/test_filenames_check.yml index d3e24ee2f..6e35228e2 100644 --- a/.github/workflows/test_filenames_check.yml +++ b/.github/workflows/test_filenames_check.yml @@ -27,7 +27,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 + with: + persist-credentials: false - name: Filename Inspector id: scan-test-files diff --git a/README.md b/README.md index 46dfc975b..2b1185990 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,19 @@ # Atum Service +[![Build](https://github.com/AbsaOSS/spark-commons/actions/workflows/build.yml/badge.svg)](https://github.com/AbsaOSS/spark-commons/actions/workflows/build.yml) +[![License](http://img.shields.io/:license-apache-blue.svg)](http://www.apache.org/licenses/LICENSE-2.0.html) +[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://GitHub.com/Naereen/StrapDown.js/graphs/commit-activity) + +| Atum Server | Atum Agent | Atum Model | Atum Reader | +|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [![GitHub release](https://img.shields.io/github/release/AbsaOSS/atum-service.svg)](https://GitHub.com/AbsaOSS/atum-service/releases/) | [![Maven Central](https://maven-badges.herokuapp.com/maven-central/za.co.absa.atum-service/atum-agent-spark3_2.13/badge.svg)](https://central.sonatype.com/search?q=atum-agent&namespace=za.co.absa.atum-service) | [![Maven Central](https://maven-badges.herokuapp.com/maven-central/za.co.absa.atum-service/atum-model_2.13/badge.svg)](https://central.sonatype.com/search?q=atum-model&namespace=za.co.absa.atum-service) | [![Maven Central](https://maven-badges.herokuapp.com/maven-central/za.co.absa.atum-service/atum-reader_2.13/badge.svg)](https://central.sonatype.com/search?q=atum-reader&namespace=za.co.absa.atum-service) | + + + + - [Atum Service](#atum-service) + - [Motivation](#motivation) + - [Features](#features) - [Modules](#modules) - [Agent `agent/`](#agent-agent) - [Reader `reader/`](#agent-agent) @@ -15,6 +28,9 @@ - [Measurement](#measurement) - [Checkpoint](#checkpoint) - [Data Flow](#data-flow) + - [Usage](#usage) + - [Atum Agent routines](#atum-agent-routines) + - [Control measurement types](#control-measurement-types) - [How to generate Code coverage report](#how-to-generate-code-coverage-report) - [How to Run in IntelliJ](#how-to-run-in-intellij) - [How to Run Tests](#how-to-run-tests) @@ -41,6 +57,39 @@ functions and are stored on a single central place, in a relational database. Co checkpoints is not only helpful for complying with strict regulatory frameworks, but also helps during development and debugging of your Spark-based data processing. +## Motivation + +Big Data strategy for a company usually includes data gathering and ingestion processes. +That is the definition of how data from different systems operating inside a company +are gathered and stored for further analysis and reporting. An ingestion processes can involve +various transformations like: +* Converting between data formats (XML, CSV, etc.) +* Data type casting, for example converting XML strings to numeric values +* Joining reference tables. For example this can include enriching existing + data with additional information available through dictionary mappings. + This constitutes a common ETL (Extract, Transform and Load) process. + +During such transformations, sometimes data can get corrupted (e.g. during casting), records can +get added or lost. For instance, *outer joining* a table holding duplicate keys can result in records explosion. +And *inner joining* a table which has no matching keys for some records will result in loss of records. + +In regulated industries it is crucial to ensure data integrity and accuracy. For instance, in the banking industry +the BCBS set of regulations requires analysis and reporting to be based on data accuracy and integrity principles. +Thus it is critical at the ingestion stage to preserve the accuracy and integrity of the data gathered from a +source system. + +The purpose of Atum is to provide means of ensuring no critical fields have been modified during +the processing and no records are added or lost. To do this the library provides an ability +to calculate *hash sums* of explicitly specified columns. We call the set of hash sums at a given time +a *checkpoint* and each hash sum we call a *control measurement*. Checkpoints can be calculated anytime +between Spark transformations and actions. + +We assume the data for ETL are processed in a series of batch jobs. Let's call each data set for a given batch +job a *batch*. All checkpoints are calculated for a specific batch. + +## Features + +TBD ## Modules @@ -157,6 +206,31 @@ The journey of a dataset throughout various data transformations and pipelines. even if it involves multiple applications or ETL pipelines. +## Usage + +### Atum Agent routines + +TBD + +### Control measurement types + +The control measurement of a column is a hash sum. It can be calculated differently depending on the column's data type and +on business requirements. This table represents all currently supported measurement types: + +| Type | Description | +|------------------------------------|:--------------------------------------------------------------| +| AtumMeasure.RecordCount | Calculates the number of rows in the dataset | +| AtumMeasure.DistinctRecordCount | Calculates DISTINCT(COUNT(()) of the specified column | +| AtumMeasure.SumOfValuesOfColumn | Calculates SUM() of the specified column | +| AtumMeasure.AbsSumOfValuesOfColumn | Calculates SUM(ABS()) of the specified column | +| AtumMeasure.SumOfHashesOfColumn | Calculates SUM(CRC32()) of the specified column | +| Measure.UnknownMeasure | Custom measure where the data are provided by the application | + +[//]: # (| controlType.aggregatedTruncTotal | Calculates SUM(TRUNC()) of the specified column |) + +[//]: # (| controlType.absAggregatedTruncTotal | Calculates SUM(TRUNC(ABS())) of the specified column |) + + ## How to generate Code coverage report ```sbt sbt jacoco From cfa6964bbe8a3ddd01ce9dfabb95f4b40cdc42dc Mon Sep 17 00:00:00 2001 From: David Benedeki Date: Mon, 18 Nov 2024 12:06:11 +0100 Subject: [PATCH 02/10] * fixed suggested README.md inaccuracy * better `build.sbt` --- README.md | 10 +++++----- build.sbt | 2 -- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 2b1185990..e8e6a305b 100644 --- a/README.md +++ b/README.md @@ -78,11 +78,11 @@ the BCBS set of regulations requires analysis and reporting to be based on data Thus it is critical at the ingestion stage to preserve the accuracy and integrity of the data gathered from a source system. -The purpose of Atum is to provide means of ensuring no critical fields have been modified during -the processing and no records are added or lost. To do this the library provides an ability -to calculate *hash sums* of explicitly specified columns. We call the set of hash sums at a given time -a *checkpoint* and each hash sum we call a *control measurement*. Checkpoints can be calculated anytime -between Spark transformations and actions. +The purpose of Atum is to provide means of ensuring no critical fields have been modified during the processing and no +records are added or lost. To do this the library provides an ability to calculate *control numbers* of explicitly +specified columns using a selection of agregate function. We call the set of such measurements at a given time +a *checkpoint* and each value - a result of the function computation - we call a *control measurement*. Checkpoints can +be calculated anytime between Spark transformations and actions, so as at the start of the process or after its end. We assume the data for ETL are processed in a series of batch jobs. Let's call each data set for a given batch job a *batch*. All checkpoints are calculated for a specific batch. diff --git a/build.sbt b/build.sbt index 0c2f6b1ee..c9ab64c9c 100644 --- a/build.sbt +++ b/build.sbt @@ -20,8 +20,6 @@ import Dependencies.* import Dependencies.Versions.spark3 import VersionAxes.* -ThisBuild / scalaVersion := Setup.scala213.asString // default version TODO - ThisBuild / versionScheme := Some("early-semver") Global / onChangedBuildSource := ReloadOnSourceChanges From 3fb3af41fcfa3918a3966a344be9a27b2ba2385e Mon Sep 17 00:00:00 2001 From: David Benedeki Date: Mon, 18 Nov 2024 12:09:59 +0100 Subject: [PATCH 03/10] * CODEOWNERS update --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index cec71281c..ddd8ebeb0 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1 @@ -* @benedeki @lsulak @TebaleloS @Zejnilovic @dk1844 @salamonpavel +* @benedeki @lsulak @Zejnilovic @dk1844 @salamonpavel @abll256 From 45e137f3b02e6247c00f642875c9173014216e90 Mon Sep 17 00:00:00 2001 From: David Benedeki Date: Mon, 18 Nov 2024 17:21:12 +0100 Subject: [PATCH 04/10] * ignoring `AgentServerCompatibilityTests` --- .../za/co/absa/atum/agent/AgentServerCompatibilityTests.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/agent/src/test/scala/za/co/absa/atum/agent/AgentServerCompatibilityTests.scala b/agent/src/test/scala/za/co/absa/atum/agent/AgentServerCompatibilityTests.scala index 992aabe12..591b5c779 100644 --- a/agent/src/test/scala/za/co/absa/atum/agent/AgentServerCompatibilityTests.scala +++ b/agent/src/test/scala/za/co/absa/atum/agent/AgentServerCompatibilityTests.scala @@ -22,10 +22,12 @@ import za.co.absa.atum.agent.model.AtumMeasure.RecordCount import za.co.absa.balta.DBTestSuite import za.co.absa.balta.classes.JsonBString import com.typesafe.config.{ConfigFactory, ConfigValueFactory} +import org.scalatest.Ignore import za.co.absa.atum.agent.dispatcher.HttpDispatcher import scala.collection.immutable.ListMap +@Ignore class AgentServerCompatibilityTests extends DBTestSuite { private val testDataForRDD = Seq( From 7e86f6cb8923e24d1d5488136c1418b4297467a0 Mon Sep 17 00:00:00 2001 From: David Benedeki Date: Thu, 21 Nov 2024 05:15:51 +0100 Subject: [PATCH 05/10] * conditional load of some modules based on Java version --- build.sbt | 85 ++++++++++++++++++++++++++++----------------- project/Setup.scala | 5 +++ 2 files changed, 59 insertions(+), 31 deletions(-) diff --git a/build.sbt b/build.sbt index c9ab64c9c..839e11b56 100644 --- a/build.sbt +++ b/build.sbt @@ -20,17 +20,25 @@ import Dependencies.* import Dependencies.Versions.spark3 import VersionAxes.* +ThisBuild / scalaVersion := Setup.scala213.asString + ThisBuild / versionScheme := Some("early-semver") Global / onChangedBuildSource := ReloadOnSourceChanges +val limitedProject: Boolean = Setup.currentJava < Setup.recommendedJava + initialize := { val _ = initialize.value // Ensure previous initializations are run - val requiredJavaVersion = VersionNumber("11") - val currentJavaVersion = VersionNumber(sys.props("java.specification.version")) - println(s"Running on Java version $currentJavaVersion, required is at least version $requiredJavaVersion") - //this routine can be used to assert the required Java version + assert(Setup.currentJava >= Setup.requiredJava, + s"Running on Java version ${Setup.currentJava}, required is at least version ${Setup.requiredJava}, recommended is ${Setup.recommendedJava}") + + if (limitedProject) { + val log = Keys.sLog.value + log.warn(s"Some nodules will not be loaded, because they require at least Java ${Setup.recommendedJava} while Java ${Setup.currentJava} has been found") + log.warn("""Affected modules are: "atum-server", "atum-database"""") + } } enablePlugins(FlywayPlugin) @@ -45,23 +53,31 @@ libraryDependencies ++= flywayDependencies /** * Module `server` is the service application that collects and stores measured data And upo request retrives them */ -lazy val server = (projectMatrix in file("server")) - .settings( - Setup.commonSettings ++ Seq( - name := "atum-server", - javacOptions ++= Setup.serverAndDbJavacOptions, - Compile / packageBin / publishArtifact := false, - packageBin := (Compile / assembly).value, - artifactPath / (Compile / packageBin) := baseDirectory.value / s"target/${name.value}-${version.value}.jar", - testFrameworks += new TestFramework("zio.test.sbt.ZTestFramework"), - Setup.serverMergeStrategy, - publish / skip := true - ): _* - ) - .enablePlugins(AssemblyPlugin) - .enablePlugins(AutomateHeaderPlugin) - .addSingleScalaBuild(Setup.serverAndDbScalaVersion, Dependencies.serverDependencies) - .dependsOn(model) +lazy val server = { + val server = (projectMatrix in file("server")) + .settings( + Setup.commonSettings ++ Seq( + name := "atum-server", + javacOptions ++= Setup.serverAndDbJavacOptions, + Compile / packageBin / publishArtifact := false, + packageBin := (Compile / assembly).value, + artifactPath / (Compile / packageBin) := baseDirectory.value / s"target/${name.value}-${version.value}.jar", + testFrameworks += new TestFramework("zio.test.sbt.ZTestFramework"), + Setup.serverMergeStrategy, + publish / skip := true + ): _* + ) + .enablePlugins(AssemblyPlugin) + .enablePlugins(AutomateHeaderPlugin) + .addSingleScalaBuild(Setup.serverAndDbScalaVersion, Dependencies.serverDependencies) + .dependsOn(model) + + if (limitedProject) { + null // if value other then null is returned, the condition doesn't seem to work. + } else { + server + } +} /** * Module `agent` is the library to be plugged into the Spark application to measure the data and send it to the server @@ -93,16 +109,23 @@ lazy val model = (projectMatrix in file("model")) /** * Module `database` is the source of database structures of the service */ -lazy val database = (projectMatrix in file("database")) - .disablePlugins(sbtassembly.AssemblyPlugin) - .settings( - Setup.commonSettings ++ Seq( - name := "atum-database", - javacOptions ++= Setup.serverAndDbJavacOptions, - publish / skip := true - ): _* - ) - .addSingleScalaBuild(Setup.serverAndDbScalaVersion, Dependencies.databaseDependencies) +lazy val database = { + val database = (projectMatrix in file("database")) + .disablePlugins(sbtassembly.AssemblyPlugin) + .settings( + Setup.commonSettings ++ Seq( + name := "atum-database", + javacOptions ++= Setup.serverAndDbJavacOptions, + publish / skip := true + ): _* + ) + .addSingleScalaBuild(Setup.serverAndDbScalaVersion, Dependencies.databaseDependencies) + if (limitedProject) { + null // if value other then null is returned, the condition doesn't seem to work. + } else { + database + } +} /** * Module `reader` is the library to be plugged into application which wants to easily read the measured data stored on diff --git a/project/Setup.scala b/project/Setup.scala index 14c3f8927..c10270589 100644 --- a/project/Setup.scala +++ b/project/Setup.scala @@ -24,6 +24,11 @@ import za.co.absa.commons.version.Version object Setup { + //supprtted Java versions + val requiredJava: Double = "1.8".toDouble + val recommendedJava: Double = "11".toDouble + val currentJava: Double = sys.props("java.specification.version").toDouble + //supported Scala versions val scala211: Version = Version.asSemVer("2.11.12") val scala212: Version = Version.asSemVer("2.12.18") From 00ebc9e6ac34f8c9ffb6e50fdea1fee7d7c37906 Mon Sep 17 00:00:00 2001 From: David Benedeki Date: Thu, 21 Nov 2024 05:20:34 +0100 Subject: [PATCH 06/10] * Run tests on all Scala versions --- .github/workflows/build.yml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2340472d6..849bd9e0c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,11 +39,11 @@ jobs: with: java-version: "adopt@1.8" - - name: Build and run unit tests - run: sbt "project model" test doc "project reader" test doc "project agent_spark3" test doc + - name: Build and run tests + run: sbt testAll - - name: Build and run integration tests - run: sbt "project model" testIT "project reader" testIT "project agent_spark3" testIT + - name: Generate documenation + run: sbt doc test-database-and-server: name: Test Database and Server @@ -75,10 +75,14 @@ jobs: java-version: "adopt@1.11.0-11" - name: Build and run unit tests - run: sbt "project database" test doc "project server" test doc + run: sbt "project database" test "project server" test - name: Prepare testing database run: sbt flywayMigrate - name: Build and run integration tests run: sbt "project database" testIT "project server" testIT + + - name: Generate documentation + run: sbt "project database" doc "project server" doc + From 2cb34428e53f43eb7a5c0bf2277148441f48cbaa Mon Sep 17 00:00:00 2001 From: David Benedeki Date: Thu, 21 Nov 2024 10:12:34 +0100 Subject: [PATCH 07/10] * Release Notes Presence Check updated --- .github/workflows/check_pr_release_notes.yml | 90 ------------------- .../release-notes-presence-check.yml | 44 +++++++++ .../agent/AgentServerCompatibilityTests.scala | 4 +- 3 files changed, 45 insertions(+), 93 deletions(-) delete mode 100644 .github/workflows/check_pr_release_notes.yml create mode 100644 .github/workflows/release-notes-presence-check.yml diff --git a/.github/workflows/check_pr_release_notes.yml b/.github/workflows/check_pr_release_notes.yml deleted file mode 100644 index c4db978cd..000000000 --- a/.github/workflows/check_pr_release_notes.yml +++ /dev/null @@ -1,90 +0,0 @@ -# -# Copyright 2021 ABSA Group Limited -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -name: Check PR Release Notes in Description - -on: - pull_request: - types: [opened, synchronize, reopened, edited, labeled, unlabeled] - branches: [ master ] - -env: - SKIP_LABEL: 'no RN' - RLS_NOTES_TAG_REGEX: 'Release Notes:' - -jobs: - check-pr-release-notes: - runs-on: ubuntu-latest - - steps: - - name: Get Pull Request Info - id: pr_info - uses: actions/github-script@v7 - with: - script: | - const pr_number = context.payload.pull_request.number; - const pr = await github.rest.pulls.get({ - owner: context.repo.owner, - repo: context.repo.repo, - pull_number: pr_number - }); - const labels = pr.data.labels ? pr.data.labels.map(label => label.name) : []; - - if (labels.includes("${{ env.SKIP_LABEL }}")) { - console.log("Skipping release notes check because '${{ env.SKIP_LABEL }}' label is present."); - core.setOutput("skip_check", 'true'); - core.setOutput("pr_body", ""); - return; - } - - const pr_body = pr.data.body; - if (!pr_body) { - core.setFailed("Pull request description is empty."); - core.setOutput("pr_body", ""); - core.setOutput("skip_check", 'false'); - return; - } - core.setOutput("pr_body", pr_body); - core.setOutput("skip_check", 'false'); - return; - - - name: Skip check if SKIP_LABEL is present - if: steps.pr_info.outputs.skip_check == 'true' - run: echo "Skipping release notes validation." - - - name: Check for 'Release Notes:' and bullet list - if: steps.pr_info.outputs.skip_check == 'false' - run: | - # Extract the body from the previous step - PR_BODY=$(cat <<-'EOF' - ${{ steps.pr_info.outputs.pr_body }} - EOF - ) - - # Check if "Release Notes:" exists - if ! echo "$PR_BODY" | grep -q '${{ env.RLS_NOTES_TAG_REGEX }}'; then - echo "Error: release notes tag not found in pull request description. Has to adhere to format '${{ env.RLS_NOTES_TAG_REGEX }}'." - exit 1 - fi - - # Extract text after "Release Notes:" line - TEXT_BELOW_RELEASE_NOTES_TAG=$(echo "$PR_BODY" | sed -n '/${{ env.RLS_NOTES_TAG_REGEX }}/,$p' | tail -n +2) - - # Check if there's a bullet list (lines starting with '-', '+' or '*') - if ! echo "$TEXT_BELOW_RELEASE_NOTES_TAG" | grep -qE '^\s*[-+*]\s+.+$'; then - echo "Error: No bullet list found under release notes tag." - exit 1 - fi diff --git a/.github/workflows/release-notes-presence-check.yml b/.github/workflows/release-notes-presence-check.yml new file mode 100644 index 000000000..f60618756 --- /dev/null +++ b/.github/workflows/release-notes-presence-check.yml @@ -0,0 +1,44 @@ +# +# Copyright 2021 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Release Notes Presence Check + +on: + pull_request: + types: [opened, synchronize, reopened, edited, labeled, unlabeled] + branches: [ master ] + +env: + SKIP_LABEL: 'no RN' + RLS_NOTES_TAG_REGEX: 'Release Notes:' + +jobs: + release-notes-presence-check: + name: Release Notes Presence Check + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@v5.1.1 + with: + python-version: '3.11' + + - name: Check presence of release notes in PR description + uses: AbsaOSS/release-notes-presence-check@v0.1.0 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + github-repository: ${{ github.repository }} + pr-number: ${{ github.event.number }} diff --git a/agent/src/test/scala/za/co/absa/atum/agent/AgentServerCompatibilityTests.scala b/agent/src/test/scala/za/co/absa/atum/agent/AgentServerCompatibilityTests.scala index 591b5c779..d720100f1 100644 --- a/agent/src/test/scala/za/co/absa/atum/agent/AgentServerCompatibilityTests.scala +++ b/agent/src/test/scala/za/co/absa/atum/agent/AgentServerCompatibilityTests.scala @@ -22,12 +22,10 @@ import za.co.absa.atum.agent.model.AtumMeasure.RecordCount import za.co.absa.balta.DBTestSuite import za.co.absa.balta.classes.JsonBString import com.typesafe.config.{ConfigFactory, ConfigValueFactory} -import org.scalatest.Ignore import za.co.absa.atum.agent.dispatcher.HttpDispatcher import scala.collection.immutable.ListMap -@Ignore class AgentServerCompatibilityTests extends DBTestSuite { private val testDataForRDD = Seq( @@ -42,7 +40,7 @@ class AgentServerCompatibilityTests extends DBTestSuite { .add(StructField("columnForSum", DoubleType)) // Need to add service & pg run in CI - test("Agent should be compatible with server") { + ignore("Agent should be compatible with server") { val expectedMeasurement = JsonBString( """{"mainValue": {"value": "4", "valueType": "Long"}, "supportValues": {}}""".stripMargin From e7e75acbdd66f0ec210ab7dac65fdf7404127ade Mon Sep 17 00:00:00 2001 From: David Benedeki <14905969+benedeki@users.noreply.github.com> Date: Thu, 21 Nov 2024 10:21:01 +0100 Subject: [PATCH 08/10] Update CODEOWNERS --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index ddd8ebeb0..ab470496d 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1 @@ -* @benedeki @lsulak @Zejnilovic @dk1844 @salamonpavel @abll256 +* @benedeki @lsulak @Zejnilovic @dk1844 @salamonpavel @ABLL526 From 3add844346e62727bb931e0531fb0d8dbc30b2cc Mon Sep 17 00:00:00 2001 From: David Benedeki <14905969+benedeki@users.noreply.github.com> Date: Thu, 21 Nov 2024 10:48:36 +0100 Subject: [PATCH 09/10] Update project/Setup.scala Co-authored-by: Ladislav Sulak --- project/Setup.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Setup.scala b/project/Setup.scala index c10270589..319a06bcd 100644 --- a/project/Setup.scala +++ b/project/Setup.scala @@ -24,7 +24,7 @@ import za.co.absa.commons.version.Version object Setup { - //supprtted Java versions + //supported Java versions val requiredJava: Double = "1.8".toDouble val recommendedJava: Double = "11".toDouble val currentJava: Double = sys.props("java.specification.version").toDouble From 59ef2a9db676fa42816f71c786eafe81c5e398fb Mon Sep 17 00:00:00 2001 From: David Benedeki Date: Thu, 21 Nov 2024 10:53:03 +0100 Subject: [PATCH 10/10] * Text improvement --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e8e6a305b..5703b5954 100644 --- a/README.md +++ b/README.md @@ -214,8 +214,9 @@ TBD ### Control measurement types -The control measurement of a column is a hash sum. It can be calculated differently depending on the column's data type and -on business requirements. This table represents all currently supported measurement types: +The control measurement of one or more columns is an aggregation function result executed over the dataset. It can be +calculated differently depending on the column's data type, on business requirements and function used. This table +represents all currently supported measurement types (aka measures): | Type | Description | |------------------------------------|:--------------------------------------------------------------|