From 70cbf047e5d608c5eeedca449805d80b143748e1 Mon Sep 17 00:00:00 2001 From: saienduri <77521230+saienduri@users.noreply.github.com> Date: Fri, 26 Apr 2024 18:24:38 -0700 Subject: [PATCH] Add initial CI (#4) This commit brings over all the CI from SHARK-Turbine and configures it properly for iree-turbine. I also add parallelization to the model testing, so that the cpu model tests run on ubuntu 64 core and gpu tests run on mi250. This way we only hog the mi250 for 4 mins compared to the ~30 mins in SHARK Turbine (we use mi250 for gpu model regression testing on iree PRs so wanted to reduce time being used to only necessity). I didn't see anywhere in iree-org that solely depends on ubuntu 64 core so I don't think we would be blocking anywhere by hogging for while. But, let me know if we want further parallelization due to github runner cost of using 64 core (I can try and see if we can get some of the sd tests running on 32 or 16 core even) --- .github/workflows/test_models.yml | 95 +++++++++++++++++++++++++++++++ .github/workflows/test_shark.yml | 56 ++++++++++++++++++ mypy-requirements.txt | 3 + requirements.txt | 2 +- 4 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/test_models.yml create mode 100644 .github/workflows/test_shark.yml create mode 100644 mypy-requirements.txt diff --git a/.github/workflows/test_models.yml b/.github/workflows/test_models.yml new file mode 100644 index 000000000..8af872301 --- /dev/null +++ b/.github/workflows/test_models.yml @@ -0,0 +1,95 @@ +name: Test Turbine Models + +on: + workflow_dispatch: + pull_request: + push: + branches: + - main + +concurrency: + # A PR number if a pull request and otherwise the commit hash. This cancels + # queued and in-progress runs for the same PR (presubmit) or commit + # (postsubmit). The workflow name is prepended to avoid conflicts between + # different workflows. + group: ${{ github.workflow }}-${{ github.event.number || github.sha }} + cancel-in-progress: true + +jobs: + test-turbine-models-cpu: + runs-on: + - self-hosted + - runner-group=presubmit + - environment=prod + - cpu + - os-family=Linux + steps: + - name: "Setting up Python" + uses: actions/setup-python@75f3110429a8c05be0e1bf360334e4cced2b63fa # v2.3.3 + with: + python-version: 3.11 + - name: "Checkout This Repo" + uses: actions/checkout@v4 + - name: "Checkout SHARK-Turbine repo" + uses: actions/checkout@v4 + with: + repository: nod-ai/SHARK-Turbine + # TODO: Let the ref be passed as a parameter to run integration tests. + path: SHARK-Turbine + - name: Sync source deps + # build IREE from source with -DIREE_BUILD_TRACY=ON if getting tracy profile + run: | + python3 -m venv turbine_venv + source turbine_venv/bin/activate + python3 -m pip install --upgrade pip + # Note: We install in three steps in order to satisfy requirements + # from non default locations first. Installing the PyTorch CPU + # wheels saves multiple minutes and a lot of bandwidth on runner setup. + pip install --no-compile -r pytorch-cpu-requirements.txt + pip install --no-compile --pre --upgrade -r requirements.txt + pip install --no-compile --pre -e .[testing] + pip install --no-compile --pre --upgrade -e ${{ github.workspace }}/SHARK-Turbine/models -r ${{ github.workspace }}/SHARK-Turbine/models/requirements.txt + - name: Show current free memory + run: | + free -mh + - name: Run stateless_llama tests + run: | + source turbine_venv/bin/activate + pytest -v ${{ github.workspace }}/SHARK-Turbine/models/turbine_models/tests/stateless_llama_test.py + - name: Run sd tests + run: | + source turbine_venv/bin/activate + pytest -v ${{ github.workspace }}/SHARK-Turbine/models/turbine_models/tests/sd_test.py + pytest -v ${{ github.workspace }}/SHARK-Turbine/models/turbine_models/tests/sdxl_test.py --device cpu --rt_device local-task --iree_target_triple x86_64-linux-gnu + test-turbine-models-gpu: + runs-on: nodai-amdgpu-mi250-x86-64 + steps: + - name: "Checkout This Repo" + uses: actions/checkout@v4 + - name: "Checkout SHARK-Turbine repo" + uses: actions/checkout@v4 + with: + repository: nod-ai/SHARK-Turbine + # TODO: Let the ref be passed as a parameter to run integration tests. + path: SHARK-Turbine + - name: Sync source deps + # build IREE from source with -DIREE_BUILD_TRACY=ON if getting tracy profile + run: | + python3 -m venv turbine_venv + source turbine_venv/bin/activate + python3 -m pip install --upgrade pip + # Note: We install in three steps in order to satisfy requirements + # from non default locations first. Installing the PyTorch CPU + # wheels saves multiple minutes and a lot of bandwidth on runner setup. + pip install --no-compile -r pytorch-cpu-requirements.txt + pip install --no-compile --pre --upgrade -r requirements.txt + pip install --no-compile --pre -e .[testing] + pip install --no-compile --pre --upgrade -e ${{ github.workspace }}/SHARK-Turbine/models -r ${{ github.workspace }}/SHARK-Turbine/models/requirements.txt + - name: Show current free memory + run: | + free -mh + - name: Run sdxl gpu tests + run: | + source turbine_venv/bin/activate + pytest -v ${{ github.workspace }}/SHARK-Turbine/models/turbine_models/tests/sdxl_test.py --device vulkan --rt_device vulkan --iree_target_triple rdna3-unknown-linux + pytest -v ${{ github.workspace }}/SHARK-Turbine/models/turbine_models/tests/sdxl_test.py --device rocm --rt_device hip --iree_target_triple gfx90a --precision fp16 diff --git a/.github/workflows/test_shark.yml b/.github/workflows/test_shark.yml new file mode 100644 index 000000000..284d76b1f --- /dev/null +++ b/.github/workflows/test_shark.yml @@ -0,0 +1,56 @@ +name: Test SHARK + +on: + workflow_dispatch: + pull_request: + push: + branches: + - main + +concurrency: + # A PR number if a pull request and otherwise the commit hash. This cancels + # queued and in-progress runs for the same PR (presubmit) or commit + # (postsubmit). The workflow name is prepended to avoid conflicts between + # different workflows. + group: ${{ github.workflow }}-${{ github.event.number || github.sha }} + cancel-in-progress: true + +jobs: + test-shark: + strategy: + matrix: + version: [3.11] + + runs-on: + - self-hosted + - runner-group=presubmit + - environment=prod + - cpu + - os-family=Linux + steps: + - name: "Setting up Python" + uses: actions/setup-python@75f3110429a8c05be0e1bf360334e4cced2b63fa # v2.3.3 + with: + python-version: ${{matrix.version}} + + - name: "Checkout SHARK" + uses: actions/checkout@v4 + with: + repository: "nod-ai/SHARK.git" + path: SHARK + ref: "iree-turbine-switch" + + # TODO: Replace with a sh script from shark repo + - name: "Install SHARK" + run: | + cd $GITHUB_WORKSPACE/SHARK + python${{ matrix.version }} -m venv shark.venv + source shark.venv/bin/activate + sed -i 's/iree-turbine#/iree-turbine.git@${{github.sha}}#/g' requirements.txt + pip install -r requirements.txt --no-cache-dir + pip install -e . + pip uninstall -y torch + pip install torch==2.1.0+cpu -f https://download.pytorch.org/whl/torch_stable.html + pip uninstall -y mpmath + pip install mpmath==1.3.0 + python apps/shark_studio/tests/api_test.py diff --git a/mypy-requirements.txt b/mypy-requirements.txt new file mode 100644 index 000000000..f2484e486 --- /dev/null +++ b/mypy-requirements.txt @@ -0,0 +1,3 @@ +# Typing packages needed for full mypy execution at the project level. +mypy==1.8.0 +types-requests diff --git a/requirements.txt b/requirements.txt index b70793240..c74e08cea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ mypy==1.8.0 # It is expected that you have installed a PyTorch version/variant specific # to your needs, so we only include a minimum version spec. # TODO: Use a versioned release once 2.3.0 drops. -torch>=2.3.0.dev1 +torch==2.3.0 torchaudio torchvision