-
Notifications
You must be signed in to change notification settings - Fork 661
[ci] add legacy test workflow and test config #4387
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
c437a1e
b6e078c
f7f12f6
10066e0
1a6cec8
d34a919
63e1bef
29fc00b
ed3869a
5da1b7f
c263e94
c2c02f0
bce92a2
5014764
f0fe949
e54d331
480a813
7db3c9f
98e713d
8c70ee3
44d9140
a0fbf89
2090d93
47e2b7c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| name: api_eval_h800 | ||
| name: api_eval_legacy | ||
|
|
||
| on: | ||
| workflow_dispatch: | ||
|
|
@@ -32,31 +32,32 @@ on: | |
| description: 'Set custom run ID. If not provided, github.run_id will be used' | ||
| type: string | ||
| default: '' | ||
| offline_mode: | ||
| required: true | ||
| description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself' | ||
| type: boolean | ||
| default: false | ||
|
|
||
|
|
||
| env: | ||
| HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache | ||
| HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai | ||
| OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }} | ||
| ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true | ||
| REPORT_DIR: /nvme/qa_test_models/evaluation_report/allure_report/${{ inputs.repo_ref }}_${{ github.run_id }} | ||
| COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy | ||
| FAIL_CONFIG: '--lf' | ||
| TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref }}_${{ github.run_id }} | ||
| OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy | ||
| OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt | ||
| DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL | ||
| COMPASS_DATA_CACHE: /nvme/qa_test_models/compass_data_cache | ||
| HF_DATASETS_OFFLINE: 1 | ||
| HF_DATASETS_CACHE: /nvme/qa_test_models/hf_datasets | ||
| HF_HUB_OFFLINE: 1 | ||
| HF_EVALUATE_OFFLINE: 1 | ||
| RUN_ID: ${{ inputs.repo_ref }}_${{ github.run_id }} | ||
| TEST_ENV: h800 | ||
| TEST_ENV: legacy | ||
|
|
||
| jobs: | ||
| linux-build: | ||
| if: ${{ !cancelled() }} | ||
| if: ${{github.event_name == 'schedule' || (!cancelled() && !inputs.offline_mode)}} | ||
| strategy: | ||
| matrix: | ||
| pyver: [py310] | ||
zhulinJulia24 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
@@ -67,8 +68,20 @@ jobs: | |
| DOCKER_TAG: cuda12.8 | ||
| OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }} | ||
| steps: | ||
| - name: Free disk space | ||
| uses: jlumbroso/free-disk-space@main | ||
| with: | ||
| # This might remove tools that are actually needed, if set to "true" but frees about 6 GB | ||
| tool-cache: false | ||
| docker-images: false | ||
| # All of these default to true, but feel free to set to "false" if necessary for your workflow | ||
| android: true | ||
| dotnet: true | ||
| haskell: true | ||
| large-packages: true | ||
| swap-storage: false | ||
| - name: Checkout repository | ||
| uses: actions/checkout@v3 | ||
| uses: actions/checkout@v6 | ||
| with: | ||
| repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | ||
| ref: ${{github.event.inputs.repo_ref || 'main'}} | ||
|
|
@@ -90,56 +103,95 @@ jobs: | |
| retention-days: 1 | ||
| name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }} | ||
|
|
||
| test_evaluation: | ||
|
|
||
| download_pkgs: | ||
| needs: linux-build | ||
| if: ${{ !cancelled() }} | ||
| runs-on: [self-hosted, h800-r1] | ||
| timeout-minutes: 2400 | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}} | ||
| if: ${{!cancelled()}} | ||
| runs-on: [self-hosted, linux-a100] | ||
| timeout-minutes: 50 | ||
| container: | ||
| image: m.daocloud.io/docker.io/openmmlab/lmdeploy:latest-cu12.8 | ||
| image: openmmlab/lmdeploy:latest-cu12.8 | ||
| options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | ||
| volumes: | ||
| - /nvme/github-actions/pip-cache:/root/.cache/pip | ||
| - /nvme/github-actions/packages:/root/packages | ||
| - /nvme/github-actions/resources:/root/resources | ||
| - /nvme/github-actions/opencompass-data:/root/opencompass-data | ||
| - /nvme/qa_test_models:/nvme/qa_test_models | ||
| - /nvme1/qa_test_models:/nvme1/qa_test_models | ||
| - /nvme2/share:/nvme2/share | ||
| - /mnt/158_nvme2:/mnt/158_nvme2 | ||
| - /mnt/158_nvme3:/mnt/158_nvme3 | ||
| - /mnt/158_nvme4:/mnt/158_nvme4 | ||
| - /mnt/121:/mnt/121 | ||
| - /mnt/104:/mnt/104 | ||
| - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | ||
| steps: | ||
| - name: Create and change to _wk directory | ||
| run: | | ||
| echo "Working directory set to: $(pwd)" | ||
| - name: Clone repository | ||
| uses: actions/checkout@v2 | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. May use "actions/checkout@v6"
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is cloning the repo needed here in |
||
| if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | ||
| with: | ||
| repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | ||
| ref: ${{github.event.inputs.repo_ref || 'main'}} | ||
| - name: Copy repository | ||
| if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | ||
| run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}} | ||
| - name: Copy repository - offline | ||
| if: ${{inputs.offline_mode}} | ||
| run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}} | ||
| - name: Download Artifacts | ||
| if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | ||
| uses: actions/download-artifact@v4 | ||
| with: | ||
| name: my-artifact-${{ github.run_id }}-py310 | ||
| - name: Copy Artifacts | ||
| if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | ||
zhulinJulia24 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}} | ||
| - name: Copy Artifacts - offline | ||
| if: ${{inputs.offline_mode}} | ||
| run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}} | ||
| - name: Mark as start | ||
| run: | | ||
| chmod -R 777 ${{env.TEST_CODE_PATH}} | ||
| mkdir ${{env.REPORT_DIR}} -p | ||
| echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt | ||
|
Comment on lines
+147
to
+148
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The mkdir and echo commands are repeated three times. In test_evaluation, the second call (L200) overwrites the first (L180). Suggest removing the redundancy and keeping only the necessary write. |
||
|
|
||
| test_evaluation: | ||
| needs: download_pkgs | ||
| if: ${{ !cancelled() }} | ||
| runs-on: [self-hosted, linux-a100] | ||
| timeout-minutes: 7200 | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}} | ||
| gpu_num: ['gpu_num_1', 'gpu_num_2', 'gpu_num_4', 'gpu_num_8'] | ||
| container: | ||
| image: openmmlab/lmdeploy:latest-cu12.8 | ||
| options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | ||
| volumes: | ||
| - /nvme/github-actions/pip-cache:/root/.cache/pip | ||
| - /nvme/github-actions/packages:/root/packages | ||
| - /nvme/github-actions/resources:/root/resources | ||
| - /nvme/qa_test_models:/nvme/qa_test_models | ||
| - /nvme/huggingface_hub:/nvme/huggingface_hub | ||
| - /mnt/121:/mnt/121 | ||
| - /mnt/104:/mnt/104 | ||
| - /mnt/bigdisk:/mnt/bigdisk | ||
| - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | ||
| steps: | ||
| - name: Copy repository and Artifacts | ||
| run: | | ||
| cp -r ${{env.TEST_CODE_PATH}}/. . | ||
| mkdir ${{env.REPORT_DIR}} -p | ||
| echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt | ||
| - name: Install lmdeploy - dependency | ||
| run: | | ||
| python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}} | ||
| python3 -m pip install -r /nvme/qa_test_models/offline_pkg/requirements.txt | ||
| - name: Install lmdeploy | ||
| run: | | ||
| python3 -m pip install lmdeploy-*.whl --no-deps | ||
| python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps | ||
zhulinJulia24 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| python3 -m pip install -r requirements/test.txt | ||
| - name: Install opencompass | ||
| run: | | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we consider pinning OC to a fixed version instead of using the latest? |
||
| python3 -m pip install opencompass | ||
| git clone https://github.com/open-compass/opencompass.git --depth 1 | ||
| cd opencompass | ||
| python3 -m pip install . | ||
| python3 -m pip install langdetect | ||
| - name: Check env | ||
| run: | | ||
| pip install transformers==4.57.6 | ||
| python3 -m pip list | ||
| lmdeploy check_env | ||
| mkdir ${{env.REPORT_DIR}} -p | ||
|
|
@@ -148,17 +200,15 @@ jobs: | |
| if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') | ||
| run: | | ||
| overall_exit=0 | ||
| ln -s /nvme/qa_test_models/resource/opencompass-data/data ./data | ||
| ln -s /mnt/104/opencompass-data/data ./data | ||
| ln -s /nvme/qa_test_models/resource/nltk_data /usr/share/nltk_data | ||
| execution_mode="${{ github.event.inputs.execution_mode || 'both' }}" | ||
| ulimit -n 65535 | ||
| if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "infer" ]; then | ||
| pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_1 and ${{matrix.backend}} and infer" -n 8 --alluredir=${{env.REPORT_DIR}} || overall_exit=$? | ||
| pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_2 and ${{matrix.backend}} and infer" -n 4 --alluredir=${{env.REPORT_DIR}} || overall_exit=$? | ||
| pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_4 and ${{matrix.backend}} and infer" -n 2 --alluredir=${{env.REPORT_DIR}} || overall_exit=$? | ||
| pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_8 and ${{matrix.backend}} and infer" -n 1 --alluredir=${{env.REPORT_DIR}} || overall_exit=$? | ||
| pytest autotest/evaluate/test_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and infer" --alluredir=${{env.REPORT_DIR}} || overall_exit=$? | ||
| fi | ||
| if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "eval" ]; then | ||
| pytest autotest/evaluate/test_api_evaluate.py -m "${{matrix.backend}} and eval" -n 4 --alluredir=${{env.REPORT_DIR}} || overall_exit=$? | ||
| pytest autotest/evaluate/test_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and eval" -n 4 --alluredir=${{env.REPORT_DIR}} || overall_exit=$? | ||
| fi | ||
| exit $overall_exit | ||
| - name: Clear workspace | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION can be removed after upgrading action/checkout