Skip to content

daily_ete_test

daily_ete_test #208

name: daily_ete_test
on:
workflow_dispatch:
inputs:
repo_org:
required: false
description: 'Tested repository organization name. Default is InternLM'
type: string
default: 'InternLM/lmdeploy'
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
backend:
required: true
description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "["turbomind", "pytorch"]"'
type: string
default: "['turbomind', 'pytorch', 'turbomind-vl']"
model:
required: true
description: 'Set testcase module filter: chat, restful, pipeline, quantization. Default contains all models'
type: string
default: "['quantization','convert','pipeline','restful','chat','local_case']"
offline_mode:
required: true
description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
type: boolean
default: false
dependency_pkgs:
required: true
description: 'Dependency packages, you can also set a specific version'
type: string
default: 'packaging transformers_stream_generator transformers==4.41.2 datasets matplotlib openai attrdict timm modelscope jmespath'
tools_regression:
required: true
description: 'Whether start a tool regression'
type: boolean
default: true
restful_regression:
required: true
description: 'Whether start a restful api regression'
type: boolean
default: true
triton_regression:
required: true
description: 'Whether start a triton server api regression'
type: boolean
default: true
pipeline_regression:
required: true
description: 'Whether start an interface pipeline regression'
type: boolean
default: true
schedule:
- cron: '00 20 * * 0-4'
env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
OUTPUT_FOLDER: cuda11.8_dist_${{ github.run_id }}
TRITON_PTXAS_PATH: /usr/local/cuda/bin/ptxas
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
jobs:
linux-build:
if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py38, py310]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda11.8
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Build
run: |
echo ${PYTHON_VERSION}
echo ${PLAT_NAME}
echo ${DOCKER_TAG}
echo ${OUTPUT_FOLDER}
echo ${GITHUB_RUN_ID}
# remove -it
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
retention-days: 1
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}
test_tools:
needs: linux-build
if: ${{!cancelled() && (github.event_name == 'schedule' || inputs.tools_regression)}}
runs-on: [self-hosted, linux-a100]
timeout-minutes: 300
env:
REPORT_DIR: /nvme/qa_test_models/test-reports
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
MODELSCOPE_CACHE: /root/modelscope_hub
MODELSCOPE_MODULES_CACHE: /root/modelscope_modules
container:
image: nvcr.io/nvidia/tritonserver:22.12-py3
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/github-actions/modelscope_hub:/root/modelscope_hub
- /nvme/github-actions/modelscope_modules:/root/modelscope_modules
- /nvme/github-actions/resources/lora:/root/lora
- /nvme/qa_test_models:/nvme/qa_test_models
- /nvme/qa_test_models/lmdeploy/autotest:/local_case
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Clone repository
uses: actions/checkout@v2
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Copy repository - offline
if: ${{inputs.offline_mode}}
run: cp -r /nvme/qa_test_models/offline_pkg/lmdeploy/. .
- name: Download Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py38
- name: Install pytorch
run: |
python3 -m pip cache dir
python3 -m pip install torch==2.2.2 torchvision==0.17.2 --index-url https://download.pytorch.org/whl/cu118
- name: Install lmdeploy - dependency
run: |
python3 -m pip install ${{inputs.dependency_pkgs || 'packaging transformers_stream_generator transformers==4.41.2 datasets matplotlib openai attrdict timm modelscope jmespath'}}
# manually install flash attn
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
python3 -m pip install /root/packages/flash_attn-2.5.7+cu118torch2.2cxx11abiFALSE-cp38-cp38-linux_x86_64.whl
python3 -m pip install -U 'xformers<=0.0.26' --index-url https://download.pytorch.org/whl/cu118
- name: Install lmdeploy
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: |
python3 -m pip install lmdeploy-*.whl
python3 -m pip install -r requirements/test.txt
pip install /nvme/qa_test_models/offline_pkg/DeepSeek-VL --no-deps
- name: Install lmdeploy - offline
if: ${{inputs.offline_mode}}
run: |
python3 -m pip install /nvme/qa_test_models/offline_pkg/py38/lmdeploy-*.whl
python3 -m pip install -r requirements/test.txt
pip install /nvme/qa_test_models/offline_pkg/DeepSeek-VL --no-deps
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
cp -r /root/lora .
rm -rf allure-results
# remove tmp log in testcase
rm -rf /nvme/qa_test_models/autotest_model/log/*
- name: Test lmdeploy - quantization w4a16
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'quantization'))
run: |
pytest autotest/tools/quantization/test_quantization_w4a16.py -m 'not pr_test' -n 8 --alluredir=allure-results --clean-alluredir
- name: Test lmdeploy - quantization w8a8
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'quantization'))
run: |
pytest autotest/tools/quantization/test_quantization_w8a8.py -n 8 --alluredir=allure-results
- name: Test lmdeploy - convert
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'convert'))
run: |
pytest autotest/tools/convert -m 'not pr_test' -n 8 --alluredir=allure-results
- name: Test lmdeploy - chat workspace
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'chat'))
run: |
pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true
pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - chat hf turbomind
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'chat'))
run: |
pytest autotest/tools/chat/test_command_chat_hf_turbomind.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true
pytest autotest/tools/chat/test_command_chat_hf_turbomind.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - chat hf torch
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'chat'))
run: |
pytest autotest/tools/chat/test_command_chat_hf_pytorch.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true
pytest autotest/tools/chat/test_command_chat_hf_pytorch.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - pipeline turbomind
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'pipeline'))
run: |
pytest autotest/tools/pipeline/test_pipeline_chat_turbomind.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true
pytest autotest/tools/pipeline/test_pipeline_chat_turbomind.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - pipeline torch
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'pipeline'))
run: |
pytest autotest/tools/pipeline/test_pipeline_chat_pytorch.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true
pytest autotest/tools/pipeline/test_pipeline_chat_pytorch.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - pipeline turbomind vl
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind-vl') && contains(fromJSON(github.event.inputs.model), 'pipeline'))
run: |
pytest autotest/tools/pipeline/test_pipeline_chat_turbomind_vl.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true
pytest autotest/tools/pipeline/test_pipeline_chat_turbomind_vl.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - restful turbomind
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'restful'))
run: |
pytest autotest/tools/restful/test_restful_chat_hf_turbomind.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true
pytest autotest/tools/restful/test_restful_chat_hf_turbomind.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - restful turbomind vl
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind-vl') && contains(fromJSON(github.event.inputs.model), 'restful'))
run: |
pytest autotest/tools/restful/test_restful_chat_hf_turbomind_vl.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true
pytest autotest/tools/restful/test_restful_chat_hf_turbomind_vl.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - restful workspace
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'restful'))
run: |
pytest autotest/tools/restful/test_restful_chat_workspace.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true
pytest autotest/tools/restful/test_restful_chat_workspace.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - restful torch
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'restful'))
run: |
pytest autotest/tools/restful/test_restful_chat_hf_pytorch.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true
pytest autotest/tools/restful/test_restful_chat_hf_pytorch.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - local testcase
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.model), 'local_case')
run: |
pytest /local_case/issue_regression --alluredir=allure-results
- name: Generate reports
if: always()
run: |
export date_today="$(date +'%Y%m%d-%H%M%S')"
export report_dir="$REPORT_DIR/$date_today"
echo "Save report to $report_dir"
mv allure-results $report_dir
chmod -R 777 $report_dir
- name: Clear workfile
if: always()
run: |
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
test_triton:
if: ${{!cancelled() && (github.event_name == 'schedule' || inputs.triton_regression)}}
runs-on: [self-hosted, linux-a100-2]
needs: test_tools
timeout-minutes: 30
env:
HF_MODEL: /nvme/qa_test_models/internlm-chat-20b
WORKDIR: /nvme/qa_test_models/triton_workspace
OFFLINE_PKGS: /nvme/qa_test_models/offline_pkg
TB_MODEL: internlm-chat-20b-fp16-tp2
GRPC_PORT: 33337
steps:
- name: Clone repository
uses: actions/checkout@v2
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Create test container
run: |
export CONTAINER_ID=$(docker create \
--rm \
--gpus='"device=4,5"' \
--shm-size 16g \
--cap-add=SYS_PTRACE \
--cap-add=SYS_ADMIN \
--security-opt seccomp=unconfined \
--name "lmdeploy-ci-triton-$GITHUB_RUN_ID" \
--workdir /__w/lmdeploy/lmdeploy \
--env NCCL_LAUNCH_MODE=GROUP \
--pull never \
-v $(pwd)/../../:/__w \
-v ${HF_MODEL}:/root/workspace/hf_model \
-v ${WORKDIR}:/root/workspace/workdir \
-v ${OFFLINE_PKGS}:/root/workspace/offline_pkg \
-v ${HOST_PIP_CACHE_DIR}:/root/.cache/pip \
-v ${HOST_LOCALTIME}:/etc/localtime:ro \
openmmlab/lmdeploy:latest tail -f /dev/null \
)
docker start $CONTAINER_ID
echo "CONTAINER_ID=$CONTAINER_ID"
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
- name: Build lmdeploy from source
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: |
docker exec $CONTAINER_ID mkdir build
docker exec --workdir /__w/lmdeploy/lmdeploy/build \
--env http_proxy=${{secrets.PROXY}} \
--env https_proxy=${{secrets.PROXY}} \
--env HTTP_PROXY=${{secrets.PROXY}} \
--env HTTPS_PROXY=${{secrets.PROXY}} \
--env no_proxy="localhost,127.0.0.1" \
--env NO_PROXY="localhost,127.0.0.1" \
$CONTAINER_ID cmake .. \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_EXPORT_COMPILE_COMMANDS=1 \
-DCMAKE_INSTALL_PREFIX=/opt/tritonserver \
-DBUILD_PY_FFI=ON \
-DBUILD_MULTI_GPU=ON \
-DCMAKE_CUDA_FLAGS="-lineinfo" \
-DUSE_NVTX=ON \
-DSM=80 \
-DCMAKE_CUDA_ARCHITECTURES=80 \
-DBUILD_TEST=OFF
docker exec --workdir /__w/lmdeploy/lmdeploy/build $CONTAINER_ID make -j$(nproc)
docker exec --workdir /__w/lmdeploy/lmdeploy/build $CONTAINER_ID make install \
--env http_proxy=${{secrets.PROXY}} \
--env https_proxy=${{secrets.PROXY}} \
--env HTTP_PROXY=${{secrets.PROXY}} \
--env HTTPS_PROXY=${{secrets.PROXY}}
- name: Install lmdeploy
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: |
docker exec \
--env http_proxy=${{secrets.PROXY}} \
--env https_proxy=${{secrets.PROXY}} \
$CONTAINER_ID python3 -m pip install tritonclient[grpc] protobuf
docker exec \
--env http_proxy=${{secrets.PROXY}} \
--env https_proxy=${{secrets.PROXY}} \
$CONTAINER_ID python3 -m pip install -r requirements/test.txt
docker exec \
--env http_proxy=${{secrets.PROXY}} \
--env https_proxy=${{secrets.PROXY}} \
$CONTAINER_ID python3 -m pip install .
docker exec $CONTAINER_ID lmdeploy check_env
- name: Copy repository - offline
if: ${{inputs.offline_mode}}
run: |
docker exec --workdir /__w/lmdeploy $CONTAINER_ID \
cp -r /root/workspace/offline_pkg/lmdeploy .
- name: Install lmdeploy - offline
if: ${{inputs.offline_mode}}
run: |
docker exec \
--env http_proxy=${{secrets.PROXY}} \
--env https_proxy=${{secrets.PROXY}} \
$CONTAINER_ID python3 -m pip install tritonclient[grpc] protobuf
docker exec --workdir /__w/lmdeploy/lmdeploy \
--env http_proxy=${{secrets.PROXY}} \
--env https_proxy=${{secrets.PROXY}} \
$CONTAINER_ID python3 -m pip install -r requirements/test.txt
docker exec --env http_proxy=${{secrets.PROXY}} \
--env https_proxy=${{secrets.PROXY}} $CONTAINER_ID \
python3 -m pip install /root/workspace/offline_pkg/py38/lmdeploy-latest-cp38-cp38-manylinux2014_x86_64.whl
docker exec $CONTAINER_ID lmdeploy check_env
- name: Convert to turbomind model
run: |
docker exec $CONTAINER_ID \
lmdeploy convert \
internlm-chat-20b \
/root/workspace/hf_model \
--tp 2 \
--trust-remote-code \
--dst-path /root/workspace/workdir/${TB_MODEL}
- name: Start triton server service
run: |
docker exec --detach $CONTAINER_ID bash -c \
"tritonserver \
--model-repository=/root/workspace/workdir/${TB_MODEL}/model_repository \
--allow-http=0 \
--allow-grpc=1 \
--grpc-port=${GRPC_PORT} \
--log-verbose=0 \
--allow-metrics=1 > run.log 2>&1 ; touch finish.txt"
# wait for triton server to fully start up
sleep 180s
# print triton server log file
cat run.log
python3 -c 'import os; assert not os.path.exists("finish.txt"), "Failed to start tritonserver"'
- name: Test triton server
run: |
docker exec \
--env no_proxy="localhost,127.0.0.1" \
--env NO_PROXY="localhost,127.0.0.1" \
$CONTAINER_ID python3 .github/scripts/test_triton_server.py --port ${GRPC_PORT}
# print triton server log file
cat run.log
- name: Clear workfile
if: always()
run: |
docker exec --workdir /__w/lmdeploy $CONTAINER_ID rm -rf /root/workspace/workdir/${TB_MODEL}
docker exec --workdir /__w/lmdeploy $CONTAINER_ID chmod -R 777 lmdeploy
docker stop $CONTAINER_ID
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
test_restful:
if: ${{!cancelled() && (github.event_name == 'schedule' || inputs.restful_regression)}}
runs-on: [self-hosted, linux-a100]
needs: test_tools
strategy:
fail-fast: false
matrix:
backend: ['turbomind', 'pytorch']
timeout-minutes: 300
env:
REPORT_DIR: /nvme/qa_test_models/test-reports
container:
image: nvcr.io/nvidia/tritonserver:22.12-py3
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/qa_test_models:/nvme/qa_test_models
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Clone repository
uses: actions/checkout@v2
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Copy repository - offline
if: ${{inputs.offline_mode}}
run: cp -r /nvme/qa_test_models/offline_pkg/lmdeploy/. .
- name: Download Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py38
- name: Install pytorch
run: |
python3 -m pip cache dir
python3 -m pip install torch==2.2.2 torchvision==0.17.2 --index-url https://download.pytorch.org/whl/cu118
- name: Install lmdeploy - dependency
run: |
python3 -m pip install ${{inputs.dependency_pkgs || 'packaging transformers_stream_generator transformers==4.41.2 datasets matplotlib openai attrdict timm modelscope jmespath'}}
# manually install flash attn
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
python3 -m pip install /root/packages/flash_attn-2.5.7+cu118torch2.2cxx11abiFALSE-cp38-cp38-linux_x86_64.whl
- name: Install lmdeploy
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: |
python3 -m pip install lmdeploy-*.whl
python3 -m pip install -r requirements/test.txt
- name: Install lmdeploy - offline
if: ${{inputs.offline_mode}}
run: |
python3 -m pip install /nvme/qa_test_models/offline_pkg/py38/lmdeploy-*.whl
python3 -m pip install -r requirements/test.txt
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
- name: Start restful api turbomind
if: matrix.backend == 'turbomind'
run: |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2-chat-20b --tp 2 > restful.log 2>&1 &
echo "restful_pid=$!" >> "$GITHUB_ENV"
sleep 120s
- name: Start restful api pytorch
if: matrix.backend == 'pytorch'
run: |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2-chat-20b --tp 2 --backend pytorch > restful.log 2>&1 &
echo "restful_pid=$!" >> "$GITHUB_ENV"
sleep 180s
- name: Test lmdeploy - restful api
timeout-minutes: 75
run: |
pytest autotest/interface/restful/test_restful_chat_func.py -n 20 --alluredir=allure-results
- name: Kill api server
if: always()
run: |
kill -15 "$restful_pid"
- name: Start restful api turbomind - base
if: matrix.backend == 'turbomind'
run: |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2-20b --tp 2 > restful.log 2>&1 &
echo "restful_pid=$!" >> "$GITHUB_ENV"
sleep 120s
- name: Start restful api pytorch - base
if: matrix.backend == 'pytorch'
run: |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2-20b --tp 2 --backend pytorch > restful.log 2>&1 &
echo "restful_pid=$!" >> "$GITHUB_ENV"
sleep 180s
- name: Test lmdeploy - restful api - base
timeout-minutes: 40
run: |
pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 --alluredir=allure-results
- name: Kill api server
if: always()
run: |
kill -15 "$restful_pid"
- name: Generate reports
if: always()
run: |
export date_today="$(date +'%Y%m%d-%H%M%S')"
export report_dir="$REPORT_DIR/$date_today"
echo "Save report to $report_dir"
mv allure-results $report_dir
chmod -R 777 $report_dir
- name: Clear workfile
if: always()
run: |
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
test_pipeline:
if: ${{!cancelled() && (github.event_name == 'schedule' || inputs.pipeline_regression)}}
runs-on: [self-hosted, linux-a100]
needs: test_tools
timeout-minutes: 300
env:
REPORT_DIR: /nvme/qa_test_models/test-reports
container:
image: nvcr.io/nvidia/tritonserver:22.12-py3
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/qa_test_models:/nvme/qa_test_models
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Clone repository
uses: actions/checkout@v2
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Copy repository - offline
if: ${{inputs.offline_mode}}
run: cp -r /nvme/qa_test_models/offline_pkg/lmdeploy/. .
- name: Download Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py38
- name: Install pytorch
run: |
python3 -m pip cache dir
python3 -m pip install torch==2.2.2 torchvision==0.17.2 --index-url https://download.pytorch.org/whl/cu118
- name: Install lmdeploy - dependency
run: |
python3 -m pip install ${{inputs.dependency_pkgs || 'packaging transformers_stream_generator transformers==4.41.2 datasets matplotlib openai attrdict timm modelscope jmespath'}}
# manually install flash attn
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
python3 -m pip install /root/packages/flash_attn-2.5.7+cu118torch2.2cxx11abiFALSE-cp38-cp38-linux_x86_64.whl
- name: Install lmdeploy
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: |
python3 -m pip install lmdeploy-*.whl
python3 -m pip install -r requirements/test.txt
- name: Install lmdeploy - offline
if: ${{inputs.offline_mode}}
run: |
python3 -m pip install /nvme/qa_test_models/offline_pkg/py38/lmdeploy-*.whl
python3 -m pip install -r requirements/test.txt
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
- name: Test lmdeploy - interface pipeline case
run: |
pytest autotest/interface/pipeline/test_pipeline_func.py -m 'not pr_test' -n 4 --alluredir=allure-results || true
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results || true
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=allure-results
- name: Generate reports
if: always()
run: |
export date_today="$(date +'%Y%m%d-%H%M%S')"
export report_dir="$REPORT_DIR/$date_today"
echo "Save report to $report_dir"
mv allure-results $report_dir
chmod -R 777 $report_dir
- name: Clear workfile
if: always()
run: |
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
notify_to_feishu:
if: always() && !cancelled() && (github.ref_name == 'develop' || github.ref_name == 'main')
needs: [test_tools, test_triton, test_restful]
timeout-minutes: 5
runs-on: [self-hosted, linux-a100]
steps:
- name: fail notify
if: contains(needs.*.result, 'failure')
run: |
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- Daily test failed!!!","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.FEISHU_USER_ID }}'"}]]}}}}' ${{ secrets.FEISHU_WEBHOOK_URL }}
- name: success notify
if: needs.test_tools.result=='success' && needs.test_triton.result=='success' && needs.test_restful.result=='success'
run: |
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- Daily test success","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"}]]}}}}' ${{ secrets.FEISHU_WEBHOOK_URL }}