Memory usage: new dynamic cache for models supporting sliding window attention #7617

Workflow file for this run

.github/workflows/self-pr-slow-ci.yml at fe8a625

	name: PR slow CI

	on:
	pull_request:
	paths:
	- "src/transformers/models//modeling_.py"
	- "tests/*/test_.py"

	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref \|\| github.run_id }}
	cancel-in-progress: true

	env:
	HF_HOME: /mnt/cache
	TRANSFORMERS_IS_CI: yes
	OMP_NUM_THREADS: 8
	MKL_NUM_THREADS: 8
	RUN_SLOW: yes
	# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
	# This token is created under the bot `hf-transformers-bot`.
	HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
	SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
	TF_FORCE_GPU_ALLOW_GROWTH: true
	RUN_PT_TF_CROSS_TESTS: 1
	CUDA_VISIBLE_DEVICES: 0,1

	jobs:
	find_models_to_run:
	runs-on: ubuntu-22.04
	name: Find models to run slow tests
	# Triggered only if the required label `run-slow` is added
	if: ${{ contains(github.event.pull_request.labels.*.name, 'run-slow') }}
	outputs:
	models: ${{ steps.models_to_run.outputs.models }}
	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: "0"
	ref: ${{ github.event.pull_request.head.sha }}

	- name: Get commit message
	run: \|
	echo "commit_message=$(git show -s --format=%s)" >> $GITHUB_ENV

	- name: Get models to run slow tests
	run: \|
	echo "${{ env.commit_message }}"
	python -m pip install GitPython
	python utils/pr_slow_ci_models.py --commit_message "${{ env.commit_message }}" \| tee output.txt
	echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV

	- name: Models to run slow tests
	id: models_to_run
	run: \|
	echo "${{ env.models }}"
	echo "models=${{ env.models }}" >> $GITHUB_OUTPUT

	run_models_gpu:
	name: Run all tests for the model
	# Triggered only `find_models_to_run` is triggered (label `run-slow` is added) which gives the models to run
	# (either a new model PR or via a commit message)
	if: ${{ needs.find_models_to_run.outputs.models != '[]' }}
	needs: find_models_to_run
	strategy:
	fail-fast: false
	matrix:
	folders: ${{ fromJson(needs.find_models_to_run.outputs.models) }}
	machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
	runs-on:
	group: '${{ matrix.machine_type }}'
	container:
	image: huggingface/transformers-all-latest-gpu
	options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
	steps:
	- name: Echo input and matrix info
	shell: bash
	run: \|
	echo "${{ matrix.folders }}"

	- name: Echo folder ${{ matrix.folders }}
	shell: bash
	# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
	# set the artifact folder names (because the character `/` is not allowed).
	run: \|
	echo "${{ matrix.folders }}"
	matrix_folders=${{ matrix.folders }}
	matrix_folders=${matrix_folders/'models/'/'models_'}
	echo "$matrix_folders"
	echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV

	- name: Update clone
	working-directory: /transformers
	run: git fetch && git fetch origin pull/${{ github.event.pull_request.number }}/head:pull/${{ github.event.pull_request.number }}/merge && git checkout pull/${{ github.event.pull_request.number }}/merge

	- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
	working-directory: /transformers
	run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . && python3 -m pip install --upgrade torch torchaudio torchvision

	- name: NVIDIA-SMI
	run: \|
	nvidia-smi

	- name: Set `machine_type` for report and artifact names
	working-directory: /transformers
	shell: bash
	run: \|
	echo "${{ matrix.machine_type }}"
	if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
	machine_type=single-gpu
	elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
	machine_type=multi-gpu
	else
	machine_type=${{ matrix.machine_type }}
	fi
	echo "$machine_type"
	echo "machine_type=$machine_type" >> $GITHUB_ENV

	- name: Environment
	working-directory: /transformers
	run: \|
	python3 utils/print_env.py

	- name: Show installed libraries and their versions
	working-directory: /transformers
	run: pip freeze

	- name: Run all tests on GPU
	working-directory: /transformers
	run: \|
	export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
	echo $CUDA_VISIBLE_DEVICES
	python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}

	- name: Failure short reports
	if: ${{ failure() }}
	continue-on-error: true
	run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt

	- name: Make sure report directory exists
	shell: bash
	run: \|
	mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
	echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
	echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"

	- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
	if: ${{ always() }}
	uses: actions/upload-artifact@v4
	with:
	name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
	path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Memory usage: new dynamic cache for models supporting sliding window attention #7617

Workflow file

Memory usage: new dynamic cache for models supporting sliding window attention #7617

Jobs

Run details

Workflow file for this run