Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spark 3.4 #611

Merged
merged 18 commits into from
Oct 4, 2023
5 changes: 5 additions & 0 deletions .github/actions/init-python-env/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ inputs:
poetry_version:
description: 'Version of Poetry to configure'
default: '1.3.2'
spark_version:
description: 'Version of Spark to configure'
default: '3.4.0'

runs:
using: "composite"
Expand Down Expand Up @@ -36,5 +39,7 @@ runs:

- name: Install Poetry project dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
env:
SPARK_VERSION: ${{ inputs.spark_version }}
shell: bash
run: make init-python
86 changes: 78 additions & 8 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ jobs:
build-scala:
runs-on: ubuntu-20.04

strategy:
matrix:
spark_version:
- "3.2.4"
- "3.3.2"
- "3.4.0"

steps:
- name: Checkout Repository
uses: actions/checkout@v3
Expand All @@ -25,22 +32,30 @@ jobs:
uses: ./.github/actions/init-scala-env

- name: Compile Scala Project
env:
SPARK_VERSION: ${{ matrix.spark_version }}
run: make compile-scala

- name: Test Scala Project
# python/* branches are not supposed to change scala code, trust them
if: ${{ !startsWith(github.event.inputs.from_branch, 'python/') }}
run: make test-scala
env:
SPARK_VERSION: ${{ matrix.spark_version }}
run:
ulimit -c unlimited
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pomadchin, without this, test suite started to crash for Spark versions <3.4 after I added frameless 0.14.1. Any idea why?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope, that's a good observation though

make test-scala

- name: Build Spark Assembly
env:
SPARK_VERSION: ${{ matrix.spark_version }}
shell: bash
run: make build-scala

- name: Cache Spark Assembly
uses: actions/cache@v3
with:
path: ./dist/*
key: dist-${{ github.sha }}
key: dist-${{ matrix.spark_version }}-${{ github.sha }}

build-python:
# scala/* branches are not supposed to change python code, trust them
Expand All @@ -50,7 +65,13 @@ jobs:

strategy:
matrix:
python: [ "3.8" ]
python:
- "3.8"
- "3.9"
spark_version:
- "3.2.4"
- "3.3.2"
- "3.4.0"

steps:
- name: Checkout Repository
Expand All @@ -61,6 +82,7 @@ jobs:
- uses: ./.github/actions/init-python-env
with:
python_version: ${{ matrix.python }}
spark_version: ${{ matrix.spark_version }}

- name: Static checks
shell: bash
Expand All @@ -69,18 +91,25 @@ jobs:
- uses: actions/cache@v3
with:
path: ./dist/*
key: dist-${{ github.sha }}
key: dist-${{ matrix.spark_version }}-${{ github.sha }}

- name: Run tests
shell: bash
run: make test-python-quick

publish:
name: Publish Artifacts
publish-scala:
name: Publish Scala Artifacts
needs: [ build-scala, build-python ]
runs-on: ubuntu-20.04
if: (github.event_name != 'pull_request') && startsWith(github.ref, 'refs/tags/v')

strategy:
matrix:
spark_version:
- "3.2.4"
- "3.3.2"
- "3.4.0"

steps:
- name: Checkout Repository
uses: actions/checkout@v3
Expand All @@ -94,17 +123,58 @@ jobs:
shell: bash
env:
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
SPARK_VERSION: ${{ matrix.spark_version }}
run: make publish-scala

- name: Build Spark Assembly
env:
SPARK_VERSION: ${{ matrix.spark_version }}
shell: bash
run: make build-scala

- name: Cache Spark Assembly
uses: actions/cache@v3
with:
path: ./dist/*
key: dist-${{ matrix.spark_version }}-${{ github.ref }}


publish-python:
name: Publish Scala Artifacts
needs: [ publish-scala ]
runs-on: ubuntu-20.04
if: (github.event_name != 'pull_request') && startsWith(github.ref, 'refs/tags/v')

strategy:
matrix:
python:
- "3.8"
- "3.9"
spark_version:
- "3.2.4"
- "3.3.2"
- "3.4.0"

steps:
- name: Checkout Repository
uses: actions/checkout@v3
with:
fetch-depth: 0

- uses: ./.github/actions/init-python-env
with:
python_version: "3.8"
python_version: ${{ matrix.python }}
spark_version: ${{ matrix.spark_version }}

- uses: actions/cache@v3
with:
path: ./dist/*
key: dist-${{ matrix.spark_version }}-${{ github.ref }}

- name: Build Python whl
shell: bash
run: make build-python


# TODO: Where does this go, do we need it?
# - name: upload artefacts
# uses: ./.github/actions/upload_artefacts
Expand Down
34 changes: 23 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
SHELL := /usr/bin/env bash
SHELL := env SPARK_VERSION=$(SPARK_VERSION) /usr/bin/env bash
SPARK_VERSION ?= 3.4.0

.PHONY: init test lint build docs notebooks help

DIST_DIR = ./dist

help:
@echo "init - Setup the repository"
@echo "clean - clean all compiled python files, build artifacts and virtual envs. Run \`make init\` anew afterwards."
Expand All @@ -18,27 +21,32 @@ test: test-scala test-python
###############

compile-scala:
sbt -v -batch compile test:compile it:compile
sbt -v -batch compile test:compile it:compile -DrfSparkVersion=${SPARK_VERSION}

test-scala: test-core-scala test-datasource-scala test-experimental-scala

test-core-scala:
sbt -batch core/test
sbt -batch core/test -DrfSparkVersion=${SPARK_VERSION}

test-datasource-scala:
sbt -batch datasource/test
sbt -batch datasource/test -DrfSparkVersion=${SPARK_VERSION}

test-experimental-scala:
sbt -batch experimental/test
sbt -batch experimental/test -DrfSparkVersion=${SPARK_VERSION}

build-scala: clean-build-scala
sbt "pyrasterframes/assembly" -DrfSparkVersion=${SPARK_VERSION}

build-scala:
sbt "pyrasterframes/assembly"
clean-build-scala:
if [ -d "$(DIST_DIR)" ]; then \
find ./dist -name 'pyrasterframes-assembly-${SPARK_VERSION}*.jar' -exec rm -fr {} +; \
fi

clean-scala:
sbt clean
sbt clean -DrfSparkVersion=${SPARK_VERSION}

publish-scala:
sbt publish
sbt publish -DrfSparkVersion=${SPARK_VERSION}

################
# PYTHON
Expand All @@ -49,9 +57,11 @@ init-python:
./.venv/bin/python -m pip install --upgrade pip
poetry self add "poetry-dynamic-versioning[plugin]"
poetry install
poetry add pyspark@${SPARK_VERSION}
poetry run pre-commit install

test-python: build-scala
poetry add pyspark@${SPARK_VERSION}
poetry run pytest -vv python/tests --cov=python/pyrasterframes --cov=python/geomesa_pyspark --cov-report=term-missing

test-python-quick:
Expand All @@ -72,8 +82,10 @@ notebooks-python: clean-notebooks-python
clean-python: clean-build-python clean-test-python clean-venv-python clean-docs-python clean-notebooks-python

clean-build-python:
find ./dist -name 'pyrasterframes*.whl' -exec rm -fr {} +
find ./dist -name 'pyrasterframes*.tar.gz' -exec rm -fr {} +
if [ -d "$(DIST_DIR)" ]; then \
find ./dist -name 'pyrasterframes*.whl' -exec rm -fr {} +; \
find ./dist -name 'pyrasterframes*.tar.gz' -exec rm -fr {} +; \
fi

clean-test-python:
rm -f .coverage
Expand Down
9 changes: 6 additions & 3 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ ThisBuild / dynverSonatypeSnapshots := true
ThisBuild / publishMavenStyle := true
ThisBuild / Test / publishArtifact := false


addCommandAlias("makeSite", "docs/makeSite")
addCommandAlias("previewSite", "docs/previewSite")
addCommandAlias("ghpagesPushSite", "docs/ghpagesPushSite")
Expand All @@ -38,13 +39,15 @@ lazy val IntegrationTest = config("it") extend Test
lazy val root = project
.withId("RasterFrames")
.aggregate(core, datasource)
.settings(publish / skip := true)
.settings(
publish / skip := true)

lazy val `rf-notebook` = project
.dependsOn(pyrasterframes)
.disablePlugins(CiReleasePlugin)
.enablePlugins(RFAssemblyPlugin, DockerPlugin)
.settings(publish / skip := true)
.settings(
publish / skip := true)

lazy val core = project
.enablePlugins(BuildInfoPlugin)
Expand Down Expand Up @@ -79,7 +82,7 @@ lazy val core = project
ExclusionRule(organization = "com.github.mpilquist")
),
scaffeine,
sparktestingbase excludeAll ExclusionRule("org.scala-lang.modules", "scala-xml_2.12"),
sparktestingbase().value % Test excludeAll ExclusionRule("org.scala-lang.modules", "scala-xml_2.12"),
`scala-logging`
),
libraryDependencies ++= {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ class SlippyDataSourceSpec extends TestEnvironment with TestData with BeforeAndA

def tileFilesCount(dir: File): Long = {
val r = countFiles(dir, ".png")
println(dir, r)
r
}

Expand Down
Loading