diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index ded5fc0..a0c33c4 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1 @@ -* @tmikula-dev @OlivieFranklova +* @petr-pokorny-absa @lsulak @tmikula-dev diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..732b6f6 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,23 @@ +Tool for exact comparison of two Parquet/CSV datasets, detecting row and column-level differences. +Monorepo: `bigfiles/` (Scala+Spark, files not fitting RAM) and `smallfiles/` (Python, files fitting RAM). + +## Scala (bigfiles/) +- Scala 2.12.20 default, 2.11.12 cross-compiled via `sbt +`. Spark 3.5.3/2.4.7, Hadoop 3.3.5/2.6.5, Java 8. +- SBT 1.10.2. All sbt commands run from `bigfiles/`. Entry point: `za.co.absa.DatasetComparison`. +- `sbt test` — unit + integration (local Spark `local[*]`). `sbt jacoco` — coverage. `sbt assembly` — fat JAR. +- JaCoCo online mode: `sbt-jacoco` + `jacoco-method-filter-sbt`, rules in `bigfiles/jmf-rules.txt`, aliases in `bigfiles/.sbtrc`. +- scalafmt: dialect scala211 (cross-compat), maxColumn 120. `assemblyMergeStrategy` discards META-INF. +- No runtime services — pure Spark batch job. + +## Python (smallfiles/) +- Python 3.13. Entry point: `smallfiles/main.py`. Deps pinned in `smallfiles/requirements.txt`. + +## Quality gates +- Scala: JaCoCo overall >= 67% ( >= 80% is goal), changed files >= 80%. PR comments via `MoranaApps/jacoco-report`. +- Python: pytest >= 80%, pylint >= 9.5, black formatting, mypy type checking. + +## Conventions +- Apache 2.0 license headers on all source files. +- Organization: `za.co.absa`. Git versioning via `sbt-git`. +- GH Actions: pinned commit SHAs for all third-party actions. +- `bigfiles/project/` — sbt build definitions only, excluded from CI change detection. diff --git a/.github/workflows/ci_python.yml b/.github/workflows/ci_python.yml index f80c74b..9b8ec01 100644 --- a/.github/workflows/ci_python.yml +++ b/.github/workflows/ci_python.yml @@ -17,105 +17,168 @@ name: CI Python on: pull_request: + types: [ opened, synchronize, reopened ] + push: + branches: [ master ] + workflow_dispatch: + +concurrency: + group: static-python-check-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + security-events: write jobs: - test-smallfiles: - name: Test Small files + detect: + name: Python Changes Detection + runs-on: ubuntu-latest + outputs: + python_changed: ${{ steps.changes.outputs.python_changed }} + steps: + - name: Checkout repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 + with: + persist-credentials: false + fetch-depth: 0 + + - name: Check if Python files changed + id: changes + shell: bash + env: + GH_TOKEN: ${{ github.token }} + run: | + set -euo pipefail + + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + CHANGED_FILES=$(gh api \ + "repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/files" \ + --jq '.[].filename | select(endswith(".py") or . == "smallfiles/requirements.txt")') + else + CHANGED_FILES=$(git diff --name-only "${{ github.sha }}~1" "${{ github.sha }}" -- '*.py' 'smallfiles/requirements.txt') + fi + + if [[ -n "$CHANGED_FILES" ]]; then + echo "python_changed=true" >> "$GITHUB_OUTPUT" + else + echo "python_changed=false" >> "$GITHUB_OUTPUT" + fi + + pylint-analysis: + name: Pylint Static Code Analysis + needs: detect + if: needs.detect.outputs.python_changed == 'true' runs-on: ubuntu-latest - defaults: - run: - working-directory: smallfiles steps: - - name: Checkout code - uses: actions/checkout@v4 + - name: Checkout repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 with: persist-credentials: false + fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 with: - python-version: '3.11' + python-version: '3.13' cache: 'pip' - name: Install dependencies - run: | - pip install -r requirements.txt - pip install coverage pytest - - - name: Run tests - run: coverage run -m pytest test/ + run: pip install -r smallfiles/requirements.txt - - name: Show coverage - run: coverage report -m --omit=".*.ipynb" - - - name: Create coverage file - if: github.event_name == 'pull_request' - run: coverage xml + - name: Analyze code with Pylint + id: analyze-code + run: | + pylint_score=$(pylint $(git ls-files '*.py')| grep 'rated at' | awk '{print $7}' | cut -d'/' -f1) + echo "PYLINT_SCORE=$pylint_score" >> $GITHUB_ENV - - name: Get Cover - if: github.event_name == 'pull_request' - uses: orgoro/coverage@v3.1 + - name: Check Pylint score + run: | + if (( $(echo "$PYLINT_SCORE < 9.5" | bc -l) )); then + echo "Failure: Pylint score is below 9.5 (project score: $PYLINT_SCORE)." + exit 1 + else + echo "Success: Pylint score is above 9.5 (project score: $PYLINT_SCORE)." + fi + + black-check: + name: Black Format Check + needs: detect + if: needs.detect.outputs.python_changed == 'true' + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 with: - coverageFile: smallfiles/coverage.xml - token: ${{ secrets.GITHUB_TOKEN }} - thresholdAll: 0.7 - thresholdNew: 0.9 + persist-credentials: false + fetch-depth: 0 - - uses: actions/upload-artifact@v4 - if: github.event_name == 'pull_request' + - name: Set up Python + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 with: - name: coverage - path: coverage.xml - retention-days: 1 + python-version: '3.13' + cache: 'pip' + + - name: Install dependencies + run: pip install -r smallfiles/requirements.txt - python-format-check: - name: Python Format Check + - name: Check code format with Black + id: check-format + run: black --check $(git ls-files '*.py') + + mypy-check: + name: Mypy Type Check + needs: detect + if: needs.detect.outputs.python_changed == 'true' runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 with: persist-credentials: false + fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 with: - python-version: '3.11' + python-version: '3.13' cache: 'pip' - name: Install dependencies - run: | - pip install black - - - name: Check code format with Black - run: | - black --check $(git ls-files '*.py') - + run: pip install -r smallfiles/requirements.txt + - name: Check types with Mypy + id: check-types + run: mypy . - python-static-analysis: - name: Python Static Analysis + unit-tests: + name: Pytest Tests with Coverage + needs: detect + if: needs.detect.outputs.python_changed == 'true' runs-on: ubuntu-latest - defaults: - run: - working-directory: smallfiles steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 with: persist-credentials: false + fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 with: - python-version: '3.11' + python-version: '3.13' cache: 'pip' - - name: Install dependencies - run: | - pip install -r requirements.txt - pip install pylint + - name: Install Python dependencies + run: pip install -r smallfiles/requirements.txt - - name: Analysing the code with pylint - run: | - pylint $(git ls-files '*.py') + - name: Check code coverage with Pytest + run: pytest --cov=smallfiles -v smallfiles/test/ --cov-fail-under=80 + + noop: + name: No Operation + needs: detect + if: needs.detect.outputs.python_changed != 'true' + runs-on: ubuntu-latest + steps: + - run: echo "No changes in the *.py files — passing." diff --git a/.github/workflows/ci_scala.yml b/.github/workflows/ci_scala.yml index 5d89cef..454f950 100644 --- a/.github/workflows/ci_scala.yml +++ b/.github/workflows/ci_scala.yml @@ -17,84 +17,124 @@ name: CI Scala on: pull_request: + branches: [ master ] + +env: + coverage-overall: 67.0 + coverage-changed-files: 80.0 + coverage-per-changed-file: 0.0 jobs: - test: - name: Test - runs-on: ubuntu-22.04 - env: - overall: 0.0 - changed: 80.0 + detect: + name: Scala Changes Detection + runs-on: ubuntu-latest + outputs: + has_scala_changed: ${{ steps.changes.outputs.has_scala_changed }} + steps: + - name: Checkout code + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 + with: + persist-credentials: false + fetch-depth: 0 + + - name: Check if Scala files changed (excluding project/) + id: changes + shell: bash + env: + GH_TOKEN: ${{ github.token }} + run: | + set -euo pipefail + + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + CHANGED_FILES=$(gh api \ + --paginate \ + "repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/files" \ + --jq '.[].filename + | select(endswith(".scala")) + | select(startswith("project/") | not)') + else + CHANGED_FILES=$(git diff --name-only "${{ github.sha }}~1" "${{ github.sha }}" -- '**/*.scala' | grep -v '^project/' || true) + fi + + if [[ -n "${CHANGED_FILES}" ]]; then + echo "has_scala_changed=true" >> "$GITHUB_OUTPUT" + else + echo "has_scala_changed=false" >> "$GITHUB_OUTPUT" + fi + + jacoco-report: + name: JaCoCo Report + needs: detect + if: needs.detect.outputs.has_scala_changed == 'true' + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write defaults: run: working-directory: bigfiles - strategy: - matrix: - include: - - scala: 2.12.17 - scalaShort: "2.12" - - scala: 2.11.12 - scalaShort: "2.11" steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 with: persist-credentials: false + fetch-depth: 0 - name: Setup Scala - uses: olafurpg/setup-scala@v14 + uses: olafurpg/setup-scala@32ffa16635ff8f19cc21ea253a987f0fdf29844c with: java-version: "adopt@1.8" - name: Build and run tests continue-on-error: true id: jacocorun - run: sbt ++${{ matrix.scala }} jacoco - - - name: Add coverage to PR - if: steps.jacocorun.outcome == 'success' - id: jacoco-coverage - uses: madrapps/jacoco-report@v1.7.1 - with: - paths: ${{ github.workspace }}/bigfiles/target/scala-${{ matrix.scalaShort }}/jacoco/report/jacoco.xml - token: ${{ secrets.GITHUB_TOKEN }} - min-coverage-overall: ${{ env.overall }} - min-coverage-changed-files: ${{ env.changed }} - title: JaCoCo code coverage report - scala ${{ matrix.scala }} - update-comment: true - - - name: Get the Coverage info - run: | - echo "Total coverage ${{ steps.jacoco-coverage.outputs.coverage-overall }}" - echo "Changed Files coverage ${{ steps.jacoco-coverage.outputs.coverage-changed-files }}" + run: sbt jacoco - - name: Fail PR if changed files coverage is less than ${{ env.changed }}% - if: ${{ steps.jacoco-coverage.outputs.coverage-changed-files < 80.0 }} - uses: actions/github-script@v6 + - name: Publish JaCoCo Report in PR comments + uses: MoranaApps/jacoco-report@54bfe284d1119dc917dddba80517c54c5bcf3627 with: - script: | - core.setFailed('Changed files coverage is less than ${{ env.changed }}%!') + token: '${{ secrets.GITHUB_TOKEN }}' + paths: bigfiles/target/scala-2.12/jacoco-report/jacoco.xml + sensitivity: "detail" + comment-mode: 'single' + min-coverage-overall: ${{ env.coverage-overall }} + min-coverage-changed-files: ${{ env.coverage-changed-files }} + min-coverage-per-changed-file: ${{ env.coverage-per-changed-file }} + skip-unchanged: false + - name: Fail if tests failed + if: steps.jacocorun.outcome != 'success' + run: exit 1 format-check: name: Format Check + needs: detect + if: needs.detect.outputs.has_scala_changed == 'true' runs-on: ubuntu-latest defaults: run: working-directory: bigfiles steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 with: persist-credentials: false fetch-depth: 0 ref: ${{ github.event.pull_request.head.ref }} - name: Setup Scala - uses: olafurpg/setup-scala@v14 + uses: olafurpg/setup-scala@32ffa16635ff8f19cc21ea253a987f0fdf29844c with: java-version: "adopt@1.8" - name: Run scalafmt And Print Diff continue-on-error: false run: sbt scalafmt scalafmtSbt && git diff --exit-code + + noop: + name: No Operation + needs: detect + if: needs.detect.outputs.has_scala_changed != 'true' + runs-on: ubuntu-latest + steps: + - run: echo "No changes in the *.scala files (excluding project/) — passing." diff --git a/smallfiles/test/test_version.py b/bigfiles/.sbtrc similarity index 70% rename from smallfiles/test/test_version.py rename to bigfiles/.sbtrc index 54f82c4..33bebbd 100644 --- a/smallfiles/test/test_version.py +++ b/bigfiles/.sbtrc @@ -1,4 +1,5 @@ -# Copyright 2020 ABSA Group Limited +# +# Copyright 2026 ABSA Group Limited # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,9 +12,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# -import sys - - -def test_version(): - assert sys.version_info[0] == 3 +# JaCoCo Aliases +alias jacoco=; jacocoOn; clean; test; jacocoReportAll; jacocoOff +alias jacocoOff=; set every jacocoPluginEnabled := false +alias jacocoOn=; set every jacocoPluginEnabled := true diff --git a/bigfiles/README.md b/bigfiles/README.md index e3bdd5a..2fb984a 100644 --- a/bigfiles/README.md +++ b/bigfiles/README.md @@ -59,9 +59,14 @@ spark.hadoop.fs.defaultFS hdfs://localhost:9999/ # set your hdfs uri ### Requirements -- scala 2.12 -- spark 3.5.1 -- java 11.0.24-amzn +Two Scala versions are supported, each paired with a different Spark version: + +| Scala | Spark | Java | Hadoop | +|-------|-------|----------------|--------| +| 2.12.20 (default) | 3.5.3 | 11.0.24-amzn | 3.3.5 | +| 2.11.12 | 2.4.7 | 8.0.422-amzn | 2.6.5 | + +The default build targets **Scala 2.12**. more requirements are in [Dependency](project/Dependencies.scala) file @@ -84,9 +89,10 @@ sdk env install ## How to run tests -| sbt command | test type | info | -| ----------- |-----------|----------------------------------------| -| `sbt test` | ... | It will run tests in test/scala folder | +| sbt command | Test | Info | +|--------------|----------------------|----------------------------------------------------------------------------------------------------------------------| +| `sbt test` | Unit & Integration | It will run tests in bigfiles/src/test/scala folder | +| `sbt jacoco` | Jacoco code coverage | Runs all possible tests with code coverage - i.e. you need environment setup for all previous unit/integration tests | --------- diff --git a/bigfiles/build.sbt b/bigfiles/build.sbt index cb9ce33..fa1f744 100644 --- a/bigfiles/build.sbt +++ b/bigfiles/build.sbt @@ -1,12 +1,25 @@ +/* + * Copyright 2024 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import Dependencies.* import sbt.Package.ManifestAttributes import sbtassembly.MergeStrategy import java.time.LocalDateTime -enablePlugins(GitVersioning, GitBranchPrompt) -enablePlugins(ScalafmtPlugin) - lazy val scala212 = "2.12.20" lazy val scala211 = "2.11.12" lazy val supportedScalaVersions = List(scala211, scala212) @@ -16,6 +29,9 @@ ThisBuild / scalaVersion := scala212 ThisBuild / organization := "za.co.absa" lazy val root = (project in file(".")) + .enablePlugins(JacocoFilterPlugin) + .enablePlugins(GitVersioning, GitBranchPrompt) + .enablePlugins(ScalafmtPlugin) .settings( name := "dataset-comparison", crossScalaVersions := supportedScalaVersions, @@ -42,14 +58,6 @@ lazy val root = (project in file(".")) ) ) -// JaCoCo code coverage -Test / jacocoReportSettings := JacocoReportSettings( - title = s"{project} Jacoco Report - scala:${scalaVersion.value}", - formats = Seq(JacocoReportFormats.HTML, JacocoReportFormats.XML) -) - -Test / jacocoExcludes := Seq("za.co.absa.DatasetComparison*") - ThisBuild / assemblyMergeStrategy := { case PathList("META-INF", xs @ _*) => MergeStrategy.discard case x => MergeStrategy.first diff --git a/bigfiles/jmf-rules.txt b/bigfiles/jmf-rules.txt new file mode 100644 index 0000000..7c2ade2 --- /dev/null +++ b/bigfiles/jmf-rules.txt @@ -0,0 +1,155 @@ +# jacoco-method-filter — Default Rules & HowTo (Scala) +# [jmf:1.0.0] +# +# This file defines which methods should be annotated as *Generated so JaCoCo ignores them. +# One rule per line. +# +# ───────────────────────────────────────────────────────────────────────────── +# HOW TO USE (quick) +# 1) Replace YOUR.PACKAGE.ROOT with your project’s package root (e.g., com.example.app). +# 2) Start with the CONSERVATIVE section only. +# 3) If clean, enable STANDARD. Use AGGRESSIVE only inside DTO/auto‑generated packages. +# 4) Keep rules narrow (by package), prefer flags (synthetic/bridge) for compiler artifacts, +# and add `id:` labels so logs are easy to read. +# +# ───────────────────────────────────────────────────────────────────────────── +# ALLOWED SYNTAX (cheat sheet) +# +# General form: +# #() [FLAGS and PREDICATES...] +# +# FQCN_glob (dot form; $ allowed for inner classes): +# Examples: *.model.*, com.example.*, * +# +# method_glob (glob on method name): +# Examples: copy | $anonfun$* | get* | *_$eq +# +# descriptor_glob (JVM descriptor in (args)ret). You may omit it entirely. +# • Omitting descriptor ⇒ treated as "(*)*" (any args, any return). +# • Short/empty forms "", "()", "(*)" normalize to "(*)*". +# Examples: +# (I)I # takes int, returns int +# (Ljava/lang/String;)V # takes String, returns void +# () or (*) or omitted # any args, any return +# +# FLAGS (optional) — space or comma separated: +# public | protected | private | synthetic | bridge | static | abstract +# +# PREDICATES (optional): +# ret: # match return type only (e.g., ret:V, ret:I, ret:Lcom/example/*;) +# id: # identifier shown in logs/reports +# name-contains: # method name must contain +# name-starts: # method name must start with +# name-ends: # method name must end with +# +# Notes +# - Always use dot-form (com.example.Foo) for class names. +# - Comments (# …) and blank lines are ignored. +# +# ───────────────────────────────────────────────────────────────────────────── +# QUICK EXAMPLES +# +# Simple wildcards +# *#*(*) +# → Match EVERY method in EVERY class (any package). Useful only for diagnostics. +# "(*)" normalizes to "(*)*" ⇒ any args, any return. +# *.dto.*#*(*) +# → Match every method on any class under any package segment named "dto". +# Good when you treat DTOs as generated/boilerplate. + +# Scala case class helpers +# *.model.*#copy(*) +# → Matches Scala case-class `copy` methods under `*.model.*`. +# Hides boilerplate clones with any parameter list and any return. +# *.model.*#productArity() +# → Matches zero-arg `productArity` (case-class/Product API). +# *.model.*#productElement(*) +# → Matches `productElement(int)` (or any descriptor form) on case classes. +# *.model.*#productPrefix() +# → Matches `productPrefix()`; returns the case class' constructor name. + +# Companion objects and defaults +# *.model.*$*#apply(*) +# → Matches companion `apply` factories under `*.model.*` (any args). +# BE CAREFUL: can hide real factory logic; keep the package scope narrow. +# *.model.*$*#unapply(*) +# → Matches extractor `unapply` methods in companions under `*.model.*`. +# *#*$default$*(*) +# → Matches Scala-generated default-argument helpers everywhere. +# Safe to keep enabled; they’re compiler-synthesized. + +# Anonymous / synthetic / bridge +# *#$anonfun$* +# → Matches any method whose name contains `$anonfun$` (Scala lambdas). +# Consider adding `synthetic` and/or a package scope in real configs. +# *#*(*):synthetic # any synthetic +# → Matches ANY method marked `synthetic` (compiler-generated). +# Powerful; scope by package to avoid hiding intentional glue code. +# *#*(*):bridge # any bridge +# → Matches Java generic bridge methods the compiler inserts. +# Usually safe globally, but scoping is still recommended. + +# Setters / fluent APIs +# *.dto.*#*_$eq(*) +# → Matches Scala var setters in DTO packages (e.g., `name_=(...)`). +# Good for excluding trivial field writes. +# *.builder.*#with*(*) +# → Matches builder-style fluent setters (`withXxx(...)`) in builder pkgs. +# Treats chainable configuration as boilerplate. +# *.client.*#with*(*) ret:Lcom/api/client/* +# → Like above but ONLY when the return type matches your client package. +# The `ret:` predicate protects real logic that returns other types. + +# Return-type constraints +# *.jobs.*#*(*):ret:V +# → Any method under `*.jobs.*` returning `void` (`V`). Often orchestration. +# *.math.*#*(*):ret:I +# → Any method under `*.math.*` returning primitive int (`I`). +# *.model.*#*(*):ret:Lcom/example/model/* +# → Any method under `*.model.*` that returns a type in `com.example.model`. +# Handy when the *return type* uniquely identifies boilerplate. + +# ───────────────────────────────────────────────────────────────────────────── +# GLOBALS RULES +# ───────────────────────────────────────────────────────────────────────────── +# ** all case class boilerplate + +# Scala case class helpers +*#canEqual(*) id:case-canequal +*#equals(*) id:case-equals +*#apply(*) id:case-apply +*#unapply(*) id:case-unapply +*#hashCode(*) id:case-hashcode +*#copy(*) id:case-copy +*#copy$default$*(*) id:case-copy-defaults +*#productElement() id:case-prod-element +*#productArity() id:case-prod-arity +*#productPrefix() id:case-prod-prefix +*#productIterator() id:case-prod-iterator +*#tupled() id:case-tupled +*#curried() id:case-curried +*#toString() id:case-tostring +*#name() id:case-name +*#groups() id:case-groups +*#optionalAttributes() id:case-optionalAttributes + +# Companion objects, constructors, and static definitions +*$#(*) id:gen-ctor # constructors +*$#() id:gen-clinit # static initializer blocks + +# Companion objects and defaults +*$*#apply(*) id:comp-apply +*$*#unapply(*) id:comp-unapply +*$*#toString(*) id:comp-tostring +*$*#readResolve(*) id:comp-readresolve + +# anonymous class created by a macro expansion +*$macro$*#$anonfun$inst$macro$* id:macro-inst +*$macro$*#inst$macro$* id:macro-inst + +# lambda +*#* synthetic name-contains:$anonfun$ id:scala-anonfun + +# ───────────────────────────────────────────────────────────────────────────── +# PROJECT RULES +# ───────────────────────────────────────────────────────────────────────────── \ No newline at end of file diff --git a/bigfiles/project/plugins.sbt b/bigfiles/project/plugins.sbt index 28851e0..f4a5006 100644 --- a/bigfiles/project/plugins.sbt +++ b/bigfiles/project/plugins.sbt @@ -11,35 +11,9 @@ */ addSbtPlugin("org.jetbrains.scala" % "sbt-ide-settings" % "1.1.2") +addSbtPlugin("com.github.sbt" % "sbt-git" % "2.0.0") +addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.6") +addSbtPlugin("io.github.moranaapps" % "jacoco-method-filter-sbt" % "2.0.1") // Plugins to build the server module as a jar file addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.2.0") - -// sbt-jacoco dependency downloading -lazy val ow2Version = "9.5" -lazy val jacocoVersion = "0.8.11-absa.1" - -def jacocoUrl(artifactName: String): String = - s"https://github.com/AbsaOSS/jacoco/releases/download/$jacocoVersion/org.jacoco.$artifactName-$jacocoVersion.jar" -def ow2Url(artifactName: String): String = - s"https://repo1.maven.org/maven2/org/ow2/asm/$artifactName/$ow2Version/$artifactName-$ow2Version.jar" - -addSbtPlugin( - "com.jsuereth" %% "scala-arm" % "2.0" from "https://repo1.maven.org/maven2/com/jsuereth/scala-arm_2.11/2.0/scala-arm_2.11-2.0.jar" -) -addSbtPlugin( - "com.jsuereth" %% "scala-arm" % "2.0" from "https://repo1.maven.org/maven2/com/jsuereth/scala-arm_2.12/2.0/scala-arm_2.12-2.0.jar" -) - -addSbtPlugin("za.co.absa.jacoco" % "report" % jacocoVersion from jacocoUrl("report")) -addSbtPlugin("za.co.absa.jacoco" % "core" % jacocoVersion from jacocoUrl("core")) -addSbtPlugin("za.co.absa.jacoco" % "agent" % jacocoVersion from jacocoUrl("agent")) -addSbtPlugin("org.ow2.asm" % "asm" % ow2Version from ow2Url("asm")) -addSbtPlugin("org.ow2.asm" % "asm-commons" % ow2Version from ow2Url("asm-commons")) -addSbtPlugin("org.ow2.asm" % "asm-tree" % ow2Version from ow2Url("asm-tree")) - -addSbtPlugin( - "za.co.absa.sbt" % "sbt-jacoco" % "3.4.1-absa.4" from "https://github.com/AbsaOSS/sbt-jacoco/releases/download/3.4.1-absa.4/sbt-jacoco-3.4.1-absa.4.jar" -) -addSbtPlugin("com.github.sbt" % "sbt-git" % "2.0.0") -addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.6") diff --git a/smallfiles/README.md b/smallfiles/README.md index 435f24e..d2552c6 100644 --- a/smallfiles/README.md +++ b/smallfiles/README.md @@ -44,4 +44,13 @@ For specific test ```bash # pytest name_of_test_file.py pytest test/test_version.py -``` \ No newline at end of file +``` + +## Quality gates + +| Tool | Command | Gate | +|------|---------|------| +| pytest | `pytest --cov=. --cov-fail-under=80` | coverage >= 80% | +| pylint | `pylint $(git ls-files '*.py')` | score >= 9.5 | +| black | `black --check $(git ls-files '*.py')` | formatting enforced | +| mypy | `mypy .` | type checking enforced | diff --git a/smallfiles/pyproject.toml b/smallfiles/pyproject.toml index 05da954..02a8485 100644 --- a/smallfiles/pyproject.toml +++ b/smallfiles/pyproject.toml @@ -2,3 +2,7 @@ line-length = 160 target-version = ['py311'] force-exclude = '''test''' + +[tool.coverage.run] +omit = ["test/*"] + diff --git a/smallfiles/requirements.txt b/smallfiles/requirements.txt index 3470d0d..a95be60 100644 --- a/smallfiles/requirements.txt +++ b/smallfiles/requirements.txt @@ -1,10 +1,14 @@ -coverage==7.6.1 +black==26.1.0 +coverage==7.10.6 iniconfig==2.0.0 +mypy==1.15.0 numpy==2.1.2 packaging==24.1 pandas==2.2.3 +pandas-stubs==3.0.0.260204 pluggy==1.5.0 pytest==8.3.3 +pytest-cov==7.0.0 python-dateutil==2.9.0.post0 pytz==2024.2 six==1.16.0 diff --git a/smallfiles/test/test_main.py b/smallfiles/test/test_main.py new file mode 100644 index 0000000..6b3dd89 --- /dev/null +++ b/smallfiles/test/test_main.py @@ -0,0 +1,23 @@ +# Copyright 2026 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from main import main + + +def test_main_outputs_only_unique_rows(capsys): + main() + out = capsys.readouterr().out + assert "Alice" in out + assert "Bob" not in out + assert "David" not in out