diff --git a/.github/workflows/gpu-tests.yml b/.github/workflows/gpu-tests.yml index a12bdad57f..c6e075399e 100644 --- a/.github/workflows/gpu-tests.yml +++ b/.github/workflows/gpu-tests.yml @@ -3,18 +3,19 @@ name: GPU Tests on: workflow_dispatch: push: - branches: [main] + branches: + - main + - pull-request/* tags: - "v[0-9]+.[0-9]+.[0-9]+" - pull_request: - branches: [main] - types: [opened, synchronize, reopened, closed] concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: + # Multi-GPU tests + gpu-tests: runs-on: 2GPU @@ -31,4 +32,94 @@ jobs: raw=$(git branch -r --contains ${{ github.ref_name }}) branch=${raw/origin\/} fi - cd ${{ github.workspace }}; tox -e test-gpu -- $branch + cd ${{ github.workspace }}; MERLIN_BRANCH=$branch tox -e test-gpu + + # Single GPU tests + + gpu-tests-conda-cu12: + runs-on: linux-amd64-gpu-p100-latest-1 + container: + image: nvidia/cuda:12.1.1-devel-ubuntu22.04 + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Install Ubuntu packages + run: | + apt-get update -y + apt-get install -y git lsb-release + - uses: conda-incubator/setup-miniconda@v2 + with: + miniforge-variant: Mambaforge + use-mamba: true + activate-environment: cu12-env + environment-file: conda/environments/test-cu12.yaml + python-version: "3.10" + - name: Get Branch name + id: get-branch-name + uses: NVIDIA-Merlin/.github/actions/branch-name@9f82e25a18e2b4a3f4350e9f287c2c31e906d89e + - name: Run tests + run: | + merlin_branch="${{ steps.get-branch-name.outputs.branch }}" + MERLIN_BRANCH=$merlin_branch tox -e test-gpu + + gpu-tests-cu11: + runs-on: linux-amd64-gpu-p100-latest-1 + container: + image: nvidia/cuda:11.8.0-devel-ubuntu22.04 + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Install Ubuntu packages + run: | + apt-get update -y + # libcudnn8 installed for tensorflow GPU support + apt-get install -y git lsb-release 'libcudnn8=*cuda11.8' + - name: Set up Python 3.8 + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Install and upgrade python packages + run: | + python -m pip install --upgrade pip tox + - name: Get Branch name + id: get-branch-name + uses: NVIDIA-Merlin/.github/actions/branch-name@9f82e25a18e2b4a3f4350e9f287c2c31e906d89e + - name: Run tests + run: | + merlin_branch="${{ steps.get-branch-name.outputs.branch }}" + RAPIDS_VERSION=23.04 MERLIN_BRANCH=$merlin_branch tox -e test-gpu-cu11 + + gpu-tests-cu12: + runs-on: linux-amd64-gpu-p100-latest-1 + container: + image: nvidia/cuda:12.1.1-devel-ubuntu22.04 + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Install Ubuntu packages + run: | + apt-get update -y + apt-get install -y git lsb-release + - name: Set up Python 3.9 + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Install and upgrade python packages + run: | + python -m pip install --upgrade pip tox + - name: Get Branch name + id: get-branch-name + uses: NVIDIA-Merlin/.github/actions/branch-name@9f82e25a18e2b4a3f4350e9f287c2c31e906d89e + - name: Run tests + run: | + merlin_branch="${{ steps.get-branch-name.outputs.branch }}" + RAPIDS_VERSION=23.06 MERLIN_BRANCH=$merlin_branch tox -e test-gpu-cu12 diff --git a/conda/environments/test-cu12.yaml b/conda/environments/test-cu12.yaml new file mode 100644 index 0000000000..aa94329a09 --- /dev/null +++ b/conda/environments/test-cu12.yaml @@ -0,0 +1,11 @@ +name: cu12-env +channels: + - conda-forge + - rapidsai-nightly + - nvidia +dependencies: + - cuda-version=12 + - cuda-nvcc=12 + - cudf=23.08 + - dask-cudf=23.08 + - tox=4 diff --git a/requirements/test.txt b/requirements/test.txt index 0a29deeed9..93b974e66a 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -19,7 +19,7 @@ s3fs>=2021.4 aiobotocore>=1.3.3 # required for synthetic data `merlin.datasets` and notebook tests using merlin models -merlin-models[tensorflow]@git+https://github.com/NVIDIA-Merlin/models.git +merlin-models[tensorflow] # needed to run notebook tests nest-asyncio diff --git a/tests/unit/examples/test_01-Getting-started.py b/tests/unit/examples/test_01-Getting-started.py index cf28fbcf56..0781367497 100644 --- a/tests/unit/examples/test_01-Getting-started.py +++ b/tests/unit/examples/test_01-Getting-started.py @@ -23,6 +23,7 @@ nest_asyncio.apply() +@pytest.mark.tensorflow def test_example_01_getting_started(): with testbook( REPO_ROOT / "examples" / "01-Getting-started.ipynb", diff --git a/tests/unit/examples/test_02-Advanced-NVTabular-workflow.py b/tests/unit/examples/test_02-Advanced-NVTabular-workflow.py index 0e9bc2a962..6b231bc9a3 100644 --- a/tests/unit/examples/test_02-Advanced-NVTabular-workflow.py +++ b/tests/unit/examples/test_02-Advanced-NVTabular-workflow.py @@ -27,6 +27,7 @@ nest_asyncio.apply() +@pytest.mark.tensorflow def test_example_02_advanced_workflow(): with testbook( REPO_ROOT / "examples" / "02-Advanced-NVTabular-workflow.ipynb", diff --git a/tox.ini b/tox.ini index da0c1239a3..39afffd2e5 100644 --- a/tox.ini +++ b/tox.ini @@ -39,14 +39,51 @@ sitepackages=true ; to install requirements.txt yet. As we get better at python environment isolation, we will ; need to add some back. deps = - pytest - pytest-cov + -rrequirements/test.txt + git+https://github.com/NVIDIA-Merlin/core.git@{env:MERLIN_BRANCH} + git+https://github.com/NVIDIA-Merlin/dataloader.git@{env:MERLIN_BRANCH} + git+https://github.com/NVIDIA-Merlin/models.git@{env:MERLIN_BRANCH} commands = - python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/core.git - python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/dataloader.git - python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/models.git - python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/core.git@{posargs:main} - python -m pytest --cov-report term --cov merlin -rxs tests/unit + python -m pytest --cov-report term --cov merlin -rxs {posargs:tests/unit} + +[testenv:test-gpu-cu11] +; Runs in: GitHub Actions +; Runs GPU-based tests. +setenv = + TF_GPU_ALLOCATOR=cuda_malloc_async + PIP_EXTRA_INDEX_URL=https://pypi.nvidia.com + PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +passenv = + CUDA_VISIBLE_DEVICES +deps = + -rrequirements/test.txt + git+https://github.com/NVIDIA-Merlin/core.git@{env:MERLIN_BRANCH} + git+https://github.com/NVIDIA-Merlin/dataloader.git@{env:MERLIN_BRANCH} + git+https://github.com/NVIDIA-Merlin/models.git@{env:MERLIN_BRANCH} + nvidia-cudnn-cu11==8.6.0.163 + cudf-cu11=={env:RAPIDS_VERSION} + dask-cudf-cu11=={env:RAPIDS_VERSION} +commands = + python -m pytest --cov-report term --cov merlin -rxs -s {posargs:tests/unit} + +[testenv:test-gpu-cu12] +; Runs in: GitHub Actions +; Runs GPU-based tests. +setenv = + PIP_EXTRA_INDEX_URL=https://pypi.nvidia.com + PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +passenv = + CUDA_VISIBLE_DEVICES +deps = + -rrequirements/test.txt + git+https://github.com/NVIDIA-Merlin/core.git@{env:MERLIN_BRANCH} + git+https://github.com/NVIDIA-Merlin/dataloader.git@{env:MERLIN_BRANCH} + git+https://github.com/NVIDIA-Merlin/models.git@{env:MERLIN_BRANCH} + cudf-cu12=={env:RAPIDS_VERSION} + dask-cudf-cu12=={env:RAPIDS_VERSION} +commands = + ; Latest TensorFlow PyPI package does not currently support CUDA 12 + python -m pytest --cov-report term --cov merlin -rxs -s -m 'not tensorflow' {posargs:tests/unit} [testenv:test-merlin] ; Runs in: Internal Jenkins