Memory usage: new dynamic cache for models supporting sliding window attention #7617
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: PR slow CI | |
on: | |
pull_request: | |
paths: | |
- "src/transformers/models/*/modeling_*.py" | |
- "tests/**/test_*.py" | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
env: | |
HF_HOME: /mnt/cache | |
TRANSFORMERS_IS_CI: yes | |
OMP_NUM_THREADS: 8 | |
MKL_NUM_THREADS: 8 | |
RUN_SLOW: yes | |
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. | |
# This token is created under the bot `hf-transformers-bot`. | |
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} | |
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} | |
TF_FORCE_GPU_ALLOW_GROWTH: true | |
RUN_PT_TF_CROSS_TESTS: 1 | |
CUDA_VISIBLE_DEVICES: 0,1 | |
jobs: | |
find_models_to_run: | |
runs-on: ubuntu-22.04 | |
name: Find models to run slow tests | |
# Triggered only if the required label `run-slow` is added | |
if: ${{ contains(github.event.pull_request.labels.*.name, 'run-slow') }} | |
outputs: | |
models: ${{ steps.models_to_run.outputs.models }} | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: "0" | |
ref: ${{ github.event.pull_request.head.sha }} | |
- name: Get commit message | |
run: | | |
echo "commit_message=$(git show -s --format=%s)" >> $GITHUB_ENV | |
- name: Get models to run slow tests | |
run: | | |
echo "${{ env.commit_message }}" | |
python -m pip install GitPython | |
python utils/pr_slow_ci_models.py --commit_message "${{ env.commit_message }}" | tee output.txt | |
echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV | |
- name: Models to run slow tests | |
id: models_to_run | |
run: | | |
echo "${{ env.models }}" | |
echo "models=${{ env.models }}" >> $GITHUB_OUTPUT | |
run_models_gpu: | |
name: Run all tests for the model | |
# Triggered only `find_models_to_run` is triggered (label `run-slow` is added) which gives the models to run | |
# (either a new model PR or via a commit message) | |
if: ${{ needs.find_models_to_run.outputs.models != '[]' }} | |
needs: find_models_to_run | |
strategy: | |
fail-fast: false | |
matrix: | |
folders: ${{ fromJson(needs.find_models_to_run.outputs.models) }} | |
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] | |
runs-on: | |
group: '${{ matrix.machine_type }}' | |
container: | |
image: huggingface/transformers-all-latest-gpu | |
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | |
steps: | |
- name: Echo input and matrix info | |
shell: bash | |
run: | | |
echo "${{ matrix.folders }}" | |
- name: Echo folder ${{ matrix.folders }} | |
shell: bash | |
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to | |
# set the artifact folder names (because the character `/` is not allowed). | |
run: | | |
echo "${{ matrix.folders }}" | |
matrix_folders=${{ matrix.folders }} | |
matrix_folders=${matrix_folders/'models/'/'models_'} | |
echo "$matrix_folders" | |
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV | |
- name: Update clone | |
working-directory: /transformers | |
run: git fetch && git fetch origin pull/${{ github.event.pull_request.number }}/head:pull/${{ github.event.pull_request.number }}/merge && git checkout pull/${{ github.event.pull_request.number }}/merge | |
- name: Reinstall transformers in edit mode (remove the one installed during docker image build) | |
working-directory: /transformers | |
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . && python3 -m pip install --upgrade torch torchaudio torchvision | |
- name: NVIDIA-SMI | |
run: | | |
nvidia-smi | |
- name: Set `machine_type` for report and artifact names | |
working-directory: /transformers | |
shell: bash | |
run: | | |
echo "${{ matrix.machine_type }}" | |
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then | |
machine_type=single-gpu | |
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then | |
machine_type=multi-gpu | |
else | |
machine_type=${{ matrix.machine_type }} | |
fi | |
echo "$machine_type" | |
echo "machine_type=$machine_type" >> $GITHUB_ENV | |
- name: Environment | |
working-directory: /transformers | |
run: | | |
python3 utils/print_env.py | |
- name: Show installed libraries and their versions | |
working-directory: /transformers | |
run: pip freeze | |
- name: Run all tests on GPU | |
working-directory: /transformers | |
run: | | |
export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})" | |
echo $CUDA_VISIBLE_DEVICES | |
python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} | |
- name: Failure short reports | |
if: ${{ failure() }} | |
continue-on-error: true | |
run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt | |
- name: Make sure report directory exists | |
shell: bash | |
run: | | |
mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports | |
echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt | |
echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports" | |
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" | |
if: ${{ always() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports | |
path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports |