Skip to content

Apply forwarding in pjit linearization rule to avoid intermediate copies #1277

Apply forwarding in pjit linearization rule to avoid intermediate copies

Apply forwarding in pjit linearization rule to avoid intermediate copies #1277

name: CI - Bazel Optional CUDA tests
on:
workflow_dispatch:
inputs:
halt-for-connection:
description: 'Should this workflow run wait for a remote connection?'
type: choice
required: true
default: 'no'
options:
- 'yes'
- 'no'
pull_request:
branches:
- main
types: [ labeled, synchronize ]
schedule:
- cron: "0 */2 * * *" # Run once every 2 hours
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
# Don't cancel in-progress jobs for main/release branches.
cancel-in-progress: ${{ !contains(github.ref, 'release/') && github.ref != 'main' }}
jobs:
run_tests:
if: ${{ github.event.repository.fork == false && (github.event_name == 'schedule' || contains(github.event.pull_request.labels.*.name, 'CI Optional GPU Presubmit')) }}
runs-on: ${{ matrix.runner }}
container: 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build-cuda12.8-cudnn9.8:latest'
strategy:
matrix:
# Optional gpus to run against
runner: ["linux-x86-a4-224-b200-1gpu"]
name: "Bazel single accelerator CUDA tests (${{ matrix.runner }})"
# End Presubmit Naming Check github-cuda-presubmits
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Wait For Connection
uses: google-ml-infra/actions/ci_connection@main
with:
halt-dispatch-input: ${{ inputs.halt-for-connection }}
- name: Run Bazel CUDA Tests
run: |
nvidia-smi
bazel test --config=ci_linux_x86_64_cuda \
--config=resultstore \
--config=rbe_cache \
--repo_env=HERMETIC_CUDA_VERSION="12.8.0" \
--repo_env=HERMETIC_CUDNN_VERSION="9.8.0" \
--repo_env=HERMETIC_PYTHON_VERSION="3.13" \
--test_env=XLA_PYTHON_CLIENT_ALLOCATOR=platform \
--run_under "$(pwd)/build/parallel_accelerator_execute.sh" \
--test_output=errors \
--test_env=JAX_ACCELERATOR_COUNT=1 \
--test_env=JAX_TESTS_PER_ACCELERATOR=32 \
--local_test_jobs=32 \
--test_env=JAX_EXCLUDE_TEST_TARGETS=PmapTest.testSizeOverflow \
--test_tag_filters=-multiaccelerator \
--test_env=TF_CPP_MIN_LOG_LEVEL=0 \
--test_env=JAX_SKIP_SLOW_TESTS=true \
--action_env=JAX_ENABLE_X64="1" \
--action_env=NCCL_DEBUG=WARN \
--color=yes \
//tests:gpu_tests //tests:backend_independent_tests \
//tests/pallas:gpu_tests //tests/pallas:backend_independent_tests