Docker builds #2365
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Docker builds | |
on: | |
push: | |
branches: [master, "release/*"] | |
pull_request: | |
branches: [master, "release/*"] | |
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped | |
paths: | |
- ".actions/*" | |
- ".github/workflows/docker-build.yml" | |
- "dockers/**" | |
- "requirements/*.txt" | |
- "requirements/pytorch/**" | |
- "requirements/fabric/**" | |
- "setup.py" | |
- "!requirements/*/docs.txt" | |
- "!*.md" | |
- "!**/*.md" | |
schedule: | |
- cron: "0 0 * * *" # at the end of every day | |
release: | |
types: [published] | |
workflow_dispatch: {} | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}-${{ github.event_name }} | |
cancel-in-progress: ${{ github.event_name == 'pull_request' }} | |
env: | |
PUSH_NIGHTLY: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} | |
PUSH_RELEASE: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'release' }} | |
jobs: | |
build-pl: | |
# the images generated by this job are not used anywhere in this repository. they are just meant to be available | |
# for users | |
if: github.event.pull_request.draft == false | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
# We only release one docker image per PyTorch version. | |
# Make sure the matrix here matches the one below. | |
- { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.1" } | |
- { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.1" } | |
- { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.1" } | |
- { python_version: "3.11", pytorch_version: "2.4", cuda_version: "12.1.1" } | |
- { python_version: "3.12", pytorch_version: "2.5", cuda_version: "12.1.1", latest: "true" } | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- uses: docker/setup-buildx-action@v3 | |
- uses: docker/login-action@v3 | |
if: env.PUSH_RELEASE == 'true' && github.repository_owner == 'Lightning-AI' | |
with: | |
username: ${{ secrets.DOCKER_USERNAME }} | |
password: ${{ secrets.DOCKER_PASSWORD }} | |
- name: Get release version | |
if: github.event_name == 'release' | |
# For workflows triggered by release, `GITHUB_REF` is the release tag created. | |
run: echo "RELEASE_VERSION=$(echo ${GITHUB_REF##*/})" >> $GITHUB_ENV | |
- name: Set tags | |
run: | | |
pip install -q -r .actions/requirements.txt | |
tags=$(python .actions/assistant.py generate_docker_tags \ | |
--release_version="${{ env.RELEASE_VERSION }}" \ | |
--python_version="${{ matrix.python_version }}" \ | |
--torch_version="${{ matrix.pytorch_version }}" \ | |
--cuda_version="${{ matrix.cuda_version }}" \ | |
--add_latest="${{ matrix.latest || 'false' }}") | |
echo "DOCKER_TAGS=$tags" >> $GITHUB_ENV | |
- uses: docker/build-push-action@v6 | |
with: | |
build-args: | | |
PYTHON_VERSION=${{ matrix.python_version }} | |
PYTORCH_VERSION=${{ matrix.pytorch_version }} | |
CUDA_VERSION=${{ matrix.cuda_version }} | |
LIGHTNING_VERSION=${{ env.RELEASE_VERSION }} | |
file: dockers/release/Dockerfile | |
push: ${{ env.PUSH_RELEASE }} # pushed in release-docker.yml only when PL is released | |
tags: ${{ env.DOCKER_TAGS }} | |
timeout-minutes: 35 | |
build-cuda: | |
if: github.event.pull_request.draft == false | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
# These are the base images for PL release docker images. | |
# Make sure the matrix here matches the one above. | |
- { python_version: "3.10", pytorch_version: "2.1.2", cuda_version: "12.1.1" } | |
- { python_version: "3.11", pytorch_version: "2.2.2", cuda_version: "12.1.1" } | |
- { python_version: "3.11", pytorch_version: "2.3.1", cuda_version: "12.1.1" } | |
- { python_version: "3.11", pytorch_version: "2.4.1", cuda_version: "12.1.1" } | |
- { python_version: "3.12", pytorch_version: "2.5.1", cuda_version: "12.1.1" } | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: docker/setup-buildx-action@v3 | |
- uses: docker/login-action@v3 | |
if: env.PUSH_NIGHTLY == 'true' && github.repository_owner == 'Lightning-AI' | |
with: | |
username: ${{ secrets.DOCKER_USERNAME }} | |
password: ${{ secrets.DOCKER_PASSWORD }} | |
- name: shorten Torch version | |
run: | | |
# convert 1.10.2 to 1.10 | |
pt_version=$(echo ${{ matrix.pytorch_version }} | cut -d. -f1,2) | |
echo "PT_VERSION=$pt_version" >> $GITHUB_ENV | |
- uses: docker/build-push-action@v6 | |
with: | |
build-args: | | |
PYTHON_VERSION=${{ matrix.python_version }} | |
PYTORCH_VERSION=${{ matrix.pytorch_version }} | |
CUDA_VERSION=${{ matrix.cuda_version }} | |
file: dockers/base-cuda/Dockerfile | |
push: ${{ env.PUSH_NIGHTLY }} | |
tags: "pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ env.PT_VERSION }}-cuda${{ matrix.cuda_version }}" | |
timeout-minutes: 95 | |
- uses: ravsamhq/notify-slack-action@v2 | |
if: failure() && env.PUSH_NIGHTLY == 'true' | |
with: | |
status: ${{ job.status }} | |
token: ${{ secrets.GITHUB_TOKEN }} | |
notification_title: ${{ format('CUDA; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.pytorch_version) }} | |
message_format: "{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01A5T7EY9M>" # akihironitta | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} | |
build-NGC: | |
if: github.event.pull_request.draft == false | |
# fixme: use larger machine or optimize image size | |
# runs-on: ubuntu-latest-4-cores | |
# then drop continue-on-error | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Build Conda Docker | |
# publish master/release | |
continue-on-error: true | |
uses: docker/build-push-action@v6 | |
with: | |
file: dockers/nvidia/Dockerfile | |
push: false | |
timeout-minutes: 55 |