From db0a28033d8bfabeb991808c7b28bcbf1f7e697a Mon Sep 17 00:00:00 2001 From: songhappy Date: Tue, 27 Aug 2024 19:12:02 -0700 Subject: [PATCH 1/6] test main 3072-384 --- .github/workflows/llm_performance_tests.yml | 610 ++++++++++---------- python/llm/dev/benchmark/all-in-one/run.py | 6 + 2 files changed, 311 insertions(+), 305 deletions(-) diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 4b092ed04cf..37e6243f341 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -640,359 +640,359 @@ jobs: run_transformer_int4_fp16_gpu_win(repo_id, local_model_hub, in_out_pairs, warm_up, num_trials, num_beams, low_bit, cpu_embedding, batch_size, streaming) ' python/llm/dev/benchmark/all-in-one/run.py - # 32-32 int4+fp16 - - name: Prepare igpu perf test (32-32 int4+fp16) - shell: bash - run: | - # hide time info - # sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i 's/{api}-results-{today}.csv/32-32-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml - - - name: Test on igpu (32-32 int4+fp16) - shell: cmd - run: | - call conda activate igpu-perf - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 - REM for llava - set TRANSFORMERS_OFFLINE=1 - - cd python\llm\dev\benchmark\all-in-one - move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16.yaml config.yaml - set PYTHONIOENCODING=utf-8 - python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 (exit /b 1) - python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1 - if %ERRORLEVEL% neq 0 (exit /b 1) + # # 32-32 int4+fp16 + # - name: Prepare igpu perf test (32-32 int4+fp16) + # shell: bash + # run: | + # # hide time info + # # sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i 's/{api}-results-{today}.csv/32-32-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml - call conda deactivate + # - name: Test on igpu (32-32 int4+fp16) + # shell: cmd + # run: | + # call conda activate igpu-perf + # set SYCL_CACHE_PERSISTENT=1 + # set BIGDL_LLM_XMX_DISABLED=1 + # REM for llava + # set TRANSFORMERS_OFFLINE=1 + + # cd python\llm\dev\benchmark\all-in-one + # move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16.yaml config.yaml + # set PYTHONIOENCODING=utf-8 + # python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 + # if %ERRORLEVEL% neq 0 (exit /b 1) + # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1 + # if %ERRORLEVEL% neq 0 (exit /b 1) + + # call conda deactivate + + # - name: Prepare igpu perf test for transformers 4.36 (32-32 int4+fp16) + # shell: bash + # run: | + # sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_436.yaml - - name: Prepare igpu perf test for transformers 4.36 (32-32 int4+fp16) - shell: bash - run: | - sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_436.yaml + # - name: Test on igpu for transformers 4.36 (32-32 int4+fp16) + # shell: cmd + # run: | + # call conda activate igpu-perf + # pip install transformers==4.36.2 - - name: Test on igpu for transformers 4.36 (32-32 int4+fp16) - shell: cmd - run: | - call conda activate igpu-perf - pip install transformers==4.36.2 + # set SYCL_CACHE_PERSISTENT=1 + # set BIGDL_LLM_XMX_DISABLED=1 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + # cd python\llm\dev\benchmark\all-in-one + # move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_436.yaml config.yaml + # set PYTHONIOENCODING=utf-8 + # python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 + # if %ERRORLEVEL% neq 0 (exit /b 1) + # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2 + # if %ERRORLEVEL% neq 0 (exit /b 1) - cd python\llm\dev\benchmark\all-in-one - move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_436.yaml config.yaml - set PYTHONIOENCODING=utf-8 - python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 (exit /b 1) - python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2 - if %ERRORLEVEL% neq 0 (exit /b 1) - - call conda deactivate + # call conda deactivate - - name: Prepare igpu perf test for transformers 4.38 (32-32 int4+fp16) - shell: bash - run: | - sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_438.yaml - - - name: Test on igpu for transformers 4.38 (32-32 int4+fp16) - shell: cmd - run: | - call conda activate igpu-perf - pip install transformers==4.38.2 - - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 - - cd python\llm\dev\benchmark\all-in-one - move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_438.yaml config.yaml - set PYTHONIOENCODING=utf-8 - python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 if %ERRORLEVEL% neq -1073740791 (exit /b 1) - python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3 - if %ERRORLEVEL% neq 0 (exit /b 1) - - call conda deactivate - - - name: Prepare igpu perf test for transformers 4.43 (32-32 int4+fp16) - shell: bash - run: | - sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml - - - name: Test on igpu for transformers 4.43 (32-32 int4+fp16) - shell: cmd - run: | - call conda activate igpu-perf - pip install transformers==4.43.1 - pip install trl - - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 - - cd python\llm\dev\benchmark\all-in-one - move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_443.yaml config.yaml - set PYTHONIOENCODING=utf-8 - python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 if %ERRORLEVEL% neq -1073740791 (exit /b 1) - python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4 - if %ERRORLEVEL% neq 0 (exit /b 1) - - pip uninstall trl -y - call conda deactivate - - - name: Concat csv and generate html (32-32 int4+fp16) - shell: cmd - run: | - call conda activate html-gen - - cd python\llm\dev\benchmark\all-in-one - python ..\..\..\test\benchmark\concat_csv.py - if %ERRORLEVEL% neq 0 (exit /b 1) - del /q *test*.csv - move *.csv %CSV_SAVE_PATH%\32-32_int4_fp16\ - cd ..\..\..\test\benchmark - python csv_to_html.py -f %CSV_SAVE_PATH%\32-32_int4_fp16\ - if %ERRORLEVEL% neq 0 (exit /b 1) - move %CSV_SAVE_PATH%\32-32_int4_fp16\*.html %CSV_SAVE_PATH% - - call conda deactivate - - # TODO: create a action function here for different input - # 1024-128 int4+fp16 - - name: Prepare igpu perf test (1024-128 int4+fp16) - shell: bash - run: | - sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i 's/{today}_test4/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml - - - name: Test on igpu (1024-128 int4+fp16) - shell: cmd - run: | - call conda activate igpu-perf - pip install transformers==4.37.0 + # - name: Prepare igpu perf test for transformers 4.38 (32-32 int4+fp16) + # shell: bash + # run: | + # sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_438.yaml - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 - REM for llava - set TRANSFORMERS_OFFLINE=1 + # - name: Test on igpu for transformers 4.38 (32-32 int4+fp16) + # shell: cmd + # run: | + # call conda activate igpu-perf + # pip install transformers==4.38.2 - cd python\llm\dev\benchmark\all-in-one - move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16.yaml config.yaml - set PYTHONIOENCODING=utf-8 - python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 (exit /b 1) - python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1 - if %ERRORLEVEL% neq 0 (exit /b 1) + # set SYCL_CACHE_PERSISTENT=1 + # set BIGDL_LLM_XMX_DISABLED=1 - call conda deactivate + # cd python\llm\dev\benchmark\all-in-one + # move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_438.yaml config.yaml + # set PYTHONIOENCODING=utf-8 + # python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 + # if %ERRORLEVEL% neq 0 if %ERRORLEVEL% neq -1073740791 (exit /b 1) + # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3 + # if %ERRORLEVEL% neq 0 (exit /b 1) - - name: Prepare igpu perf test for transformers 4.36 (1024-128 int4+fp16) - shell: bash - run: | - sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_436.yaml + # call conda deactivate - - name: Test on igpu for transformers 4.36 (1024-128 int4+fp16) - shell: cmd - run: | - call conda activate igpu-perf - pip install transformers==4.36.2 + # - name: Prepare igpu perf test for transformers 4.43 (32-32 int4+fp16) + # shell: bash + # run: | + # sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + # - name: Test on igpu for transformers 4.43 (32-32 int4+fp16) + # shell: cmd + # run: | + # call conda activate igpu-perf + # pip install transformers==4.43.1 + # pip install trl - cd python\llm\dev\benchmark\all-in-one - move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_436.yaml config.yaml - set PYTHONIOENCODING=utf-8 - python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 (exit /b 1) - python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2 - if %ERRORLEVEL% neq 0 (exit /b 1) + # set SYCL_CACHE_PERSISTENT=1 + # set BIGDL_LLM_XMX_DISABLED=1 - call conda deactivate + # cd python\llm\dev\benchmark\all-in-one + # move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_443.yaml config.yaml + # set PYTHONIOENCODING=utf-8 + # python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 + # if %ERRORLEVEL% neq 0 if %ERRORLEVEL% neq -1073740791 (exit /b 1) + # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4 + # if %ERRORLEVEL% neq 0 (exit /b 1) - - name: Prepare igpu perf test for transformers 4.38 (1024-128 int4+fp16) - shell: bash - run: | - sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_438.yaml + # pip uninstall trl -y + # call conda deactivate - - name: Test on igpu for transformers 4.38 (1024-128 int4+fp16) - shell: cmd - run: | - call conda activate igpu-perf - pip install transformers==4.38.2 + # - name: Concat csv and generate html (32-32 int4+fp16) + # shell: cmd + # run: | + # call conda activate html-gen + + # cd python\llm\dev\benchmark\all-in-one + # python ..\..\..\test\benchmark\concat_csv.py + # if %ERRORLEVEL% neq 0 (exit /b 1) + # del /q *test*.csv + # move *.csv %CSV_SAVE_PATH%\32-32_int4_fp16\ + # cd ..\..\..\test\benchmark + # python csv_to_html.py -f %CSV_SAVE_PATH%\32-32_int4_fp16\ + # if %ERRORLEVEL% neq 0 (exit /b 1) + # move %CSV_SAVE_PATH%\32-32_int4_fp16\*.html %CSV_SAVE_PATH% + + # call conda deactivate + + # # TODO: create a action function here for different input + # # 1024-128 int4+fp16 + # - name: Prepare igpu perf test (1024-128 int4+fp16) + # shell: bash + # run: | + # sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i 's/{today}_test4/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + # - name: Test on igpu (1024-128 int4+fp16) + # shell: cmd + # run: | + # call conda activate igpu-perf + # pip install transformers==4.37.0 + + # set SYCL_CACHE_PERSISTENT=1 + # set BIGDL_LLM_XMX_DISABLED=1 + # REM for llava + # set TRANSFORMERS_OFFLINE=1 + + # cd python\llm\dev\benchmark\all-in-one + # move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16.yaml config.yaml + # set PYTHONIOENCODING=utf-8 + # python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 + # if %ERRORLEVEL% neq 0 (exit /b 1) + # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1 + # if %ERRORLEVEL% neq 0 (exit /b 1) + + # call conda deactivate + + # - name: Prepare igpu perf test for transformers 4.36 (1024-128 int4+fp16) + # shell: bash + # run: | + # sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_436.yaml - cd python\llm\dev\benchmark\all-in-one - move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_438.yaml config.yaml - set PYTHONIOENCODING=utf-8 - python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 (exit /b 1) - python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3 - if %ERRORLEVEL% neq 0 (exit /b 1) + # - name: Test on igpu for transformers 4.36 (1024-128 int4+fp16) + # shell: cmd + # run: | + # call conda activate igpu-perf + # pip install transformers==4.36.2 - call conda deactivate + # set SYCL_CACHE_PERSISTENT=1 + # set BIGDL_LLM_XMX_DISABLED=1 - - name: Prepare igpu perf test for transformers 4.43 (1024-128 int4+fp16) - shell: bash - run: | - sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml + # cd python\llm\dev\benchmark\all-in-one + # move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_436.yaml config.yaml + # set PYTHONIOENCODING=utf-8 + # python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 + # if %ERRORLEVEL% neq 0 (exit /b 1) + # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2 + # if %ERRORLEVEL% neq 0 (exit /b 1) - - name: Test on igpu for transformers 4.43 (1024-128 int4+fp16) - shell: cmd - run: | - call conda activate igpu-perf - pip install transformers==4.43.1 - pip install trl + # call conda deactivate - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + # - name: Prepare igpu perf test for transformers 4.38 (1024-128 int4+fp16) + # shell: bash + # run: | + # sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_438.yaml - cd python\llm\dev\benchmark\all-in-one - move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_443.yaml config.yaml - set PYTHONIOENCODING=utf-8 - python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 (exit /b 1) - python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4 - if %ERRORLEVEL% neq 0 (exit /b 1) + # - name: Test on igpu for transformers 4.38 (1024-128 int4+fp16) + # shell: cmd + # run: | + # call conda activate igpu-perf + # pip install transformers==4.38.2 - pip uninstall trl -y - call conda deactivate + # set SYCL_CACHE_PERSISTENT=1 + # set BIGDL_LLM_XMX_DISABLED=1 - - name: Concat csv and generate html (1024-128 int4+fp16) - shell: cmd - run: | - call conda activate html-gen + # cd python\llm\dev\benchmark\all-in-one + # move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_438.yaml config.yaml + # set PYTHONIOENCODING=utf-8 + # python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 + # if %ERRORLEVEL% neq 0 (exit /b 1) + # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3 + # if %ERRORLEVEL% neq 0 (exit /b 1) - cd python\llm\dev\benchmark\all-in-one - python ..\..\..\test\benchmark\concat_csv.py - if %ERRORLEVEL% neq 0 (exit /b 1) - del /q *test*.csv - move *.csv %CSV_SAVE_PATH%\1024-128_int4_fp16\ - cd ..\..\..\test\benchmark - python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128_int4_fp16\ - if %ERRORLEVEL% neq 0 (exit /b 1) - move %CSV_SAVE_PATH%\1024-128_int4_fp16\*.html %CSV_SAVE_PATH% + # call conda deactivate - call conda deactivate + # - name: Prepare igpu perf test for transformers 4.43 (1024-128 int4+fp16) + # shell: bash + # run: | + # sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml - # 2048-256 int4+fp16 - - name: Prepare igpu perf test (2048-256 int4+fp16) - shell: bash - run: | - sed -i 's/1024-128/2048-256/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i 's/{today}_test4/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml + # - name: Test on igpu for transformers 4.43 (1024-128 int4+fp16) + # shell: cmd + # run: | + # call conda activate igpu-perf + # pip install transformers==4.43.1 + # pip install trl - - name: Test on igpu (2048-256 int4+fp16) - shell: cmd - run: | - call conda activate igpu-perf - pip install transformers==4.37.0 + # set SYCL_CACHE_PERSISTENT=1 + # set BIGDL_LLM_XMX_DISABLED=1 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 - REM for llava - set TRANSFORMERS_OFFLINE=1 + # cd python\llm\dev\benchmark\all-in-one + # move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_443.yaml config.yaml + # set PYTHONIOENCODING=utf-8 + # python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 + # if %ERRORLEVEL% neq 0 (exit /b 1) + # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4 + # if %ERRORLEVEL% neq 0 (exit /b 1) - cd python\llm\dev\benchmark\all-in-one - move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16.yaml config.yaml - set PYTHONIOENCODING=utf-8 - python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 (exit /b 1) - python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1 - if %ERRORLEVEL% neq 0 (exit /b 1) + # pip uninstall trl -y + # call conda deactivate - call conda deactivate + # - name: Concat csv and generate html (1024-128 int4+fp16) + # shell: cmd + # run: | + # call conda activate html-gen + + # cd python\llm\dev\benchmark\all-in-one + # python ..\..\..\test\benchmark\concat_csv.py + # if %ERRORLEVEL% neq 0 (exit /b 1) + # del /q *test*.csv + # move *.csv %CSV_SAVE_PATH%\1024-128_int4_fp16\ + # cd ..\..\..\test\benchmark + # python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128_int4_fp16\ + # if %ERRORLEVEL% neq 0 (exit /b 1) + # move %CSV_SAVE_PATH%\1024-128_int4_fp16\*.html %CSV_SAVE_PATH% + + # call conda deactivate + + # # 2048-256 int4+fp16 + # - name: Prepare igpu perf test (2048-256 int4+fp16) + # shell: bash + # run: | + # sed -i 's/1024-128/2048-256/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i 's/{today}_test4/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml - - name: Prepare igpu perf test for transformers 4.36 (2048-256 int4+fp16) - shell: bash - run: | - sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_436.yaml + # - name: Test on igpu (2048-256 int4+fp16) + # shell: cmd + # run: | + # call conda activate igpu-perf + # pip install transformers==4.37.0 + + # set SYCL_CACHE_PERSISTENT=1 + # set BIGDL_LLM_XMX_DISABLED=1 + # REM for llava + # set TRANSFORMERS_OFFLINE=1 + + # cd python\llm\dev\benchmark\all-in-one + # move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16.yaml config.yaml + # set PYTHONIOENCODING=utf-8 + # python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1 + # if %ERRORLEVEL% neq 0 (exit /b 1) + # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1 + # if %ERRORLEVEL% neq 0 (exit /b 1) + + # call conda deactivate + + # - name: Prepare igpu perf test for transformers 4.36 (2048-256 int4+fp16) + # shell: bash + # run: | + # sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_436.yaml - - name: Test on igpu for transformers 4.36 (2048-256 int4+fp16) - shell: cmd - run: | - call conda activate igpu-perf - pip install transformers==4.36.2 + # - name: Test on igpu for transformers 4.36 (2048-256 int4+fp16) + # shell: cmd + # run: | + # call conda activate igpu-perf + # pip install transformers==4.36.2 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + # set SYCL_CACHE_PERSISTENT=1 + # set BIGDL_LLM_XMX_DISABLED=1 - cd python\llm\dev\benchmark\all-in-one - move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_436.yaml config.yaml - set PYTHONIOENCODING=utf-8 - python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 (exit /b 1) - python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2 - if %ERRORLEVEL% neq 0 (exit /b 1) + # cd python\llm\dev\benchmark\all-in-one + # move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_436.yaml config.yaml + # set PYTHONIOENCODING=utf-8 + # python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1 + # if %ERRORLEVEL% neq 0 (exit /b 1) + # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2 + # if %ERRORLEVEL% neq 0 (exit /b 1) - call conda deactivate + # call conda deactivate - - name: Prepare igpu perf test for transformers 4.38 (2048-256 int4+fp16) - shell: bash - run: | - sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_438.yaml + # - name: Prepare igpu perf test for transformers 4.38 (2048-256 int4+fp16) + # shell: bash + # run: | + # sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_438.yaml - - name: Test on igpu for transformers 4.38 (2048-256 int4+fp16) - shell: cmd - run: | - call conda activate igpu-perf - pip install transformers==4.38.2 + # - name: Test on igpu for transformers 4.38 (2048-256 int4+fp16) + # shell: cmd + # run: | + # call conda activate igpu-perf + # pip install transformers==4.38.2 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + # set SYCL_CACHE_PERSISTENT=1 + # set BIGDL_LLM_XMX_DISABLED=1 - cd python\llm\dev\benchmark\all-in-one - move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_438.yaml config.yaml - set PYTHONIOENCODING=utf-8 - python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 (exit /b 1) - python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3 - if %ERRORLEVEL% neq 0 (exit /b 1) + # cd python\llm\dev\benchmark\all-in-one + # move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_438.yaml config.yaml + # set PYTHONIOENCODING=utf-8 + # python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1 + # if %ERRORLEVEL% neq 0 (exit /b 1) + # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3 + # if %ERRORLEVEL% neq 0 (exit /b 1) - call conda deactivate + # call conda deactivate - - name: Prepare igpu perf test for transformers 4.43 (2048-256 int4+fp16) - shell: bash - run: | - sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml + # - name: Prepare igpu perf test for transformers 4.43 (2048-256 int4+fp16) + # shell: bash + # run: | + # sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py + # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml - - name: Test on igpu for transformers 4.43 (2048-256 int4+fp16) - shell: cmd - run: | - call conda activate igpu-perf - pip install transformers==4.43.1 - pip install trl + # - name: Test on igpu for transformers 4.43 (2048-256 int4+fp16) + # shell: cmd + # run: | + # call conda activate igpu-perf + # pip install transformers==4.43.1 + # pip install trl - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + # set SYCL_CACHE_PERSISTENT=1 + # set BIGDL_LLM_XMX_DISABLED=1 - cd python\llm\dev\benchmark\all-in-one - move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_443.yaml config.yaml - set PYTHONIOENCODING=utf-8 - python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 (exit /b 1) - python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4 - if %ERRORLEVEL% neq 0 (exit /b 1) + # cd python\llm\dev\benchmark\all-in-one + # move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_443.yaml config.yaml + # set PYTHONIOENCODING=utf-8 + # python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1 + # if %ERRORLEVEL% neq 0 (exit /b 1) + # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4 + # if %ERRORLEVEL% neq 0 (exit /b 1) - pip uninstall trl -y - call conda deactivate + # pip uninstall trl -y + # call conda deactivate - - name: Concat csv and generate html (2048-256 int4+fp16) + # - name: Concat csv and generate html (2048-256 int4+fp16) shell: cmd run: | call conda activate html-gen diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py index 4b5a64d664a..7df128e9b59 100644 --- a/python/llm/dev/benchmark/all-in-one/run.py +++ b/python/llm/dev/benchmark/all-in-one/run.py @@ -2049,6 +2049,9 @@ def run_pipeline_parallel_gpu(repo_id, for batch_size in batch_list: for model in conf.repo_id: in_out_pairs = conf['in_out_pairs'].copy() + print("-------------------- Start running batch_size: {} --------------------".format(batch_size)) + print("-------------------- Start running model: {} --------------------".format(model)) + print("--------------------in_out_pairs: {}--------------------".format(in_out_pairs)) if excludes: for in_out in conf['in_out_pairs']: model_id_input = model + ':' + in_out.split('-')[0] @@ -2059,9 +2062,11 @@ def run_pipeline_parallel_gpu(repo_id, lookahead = True run_model(model, api, in_out_pairs, conf['local_model_hub'], conf['warm_up'], conf['num_trials'], conf['num_beams'], conf['low_bit'], conf['cpu_embedding'], batch_size, streaming, use_fp16_torch_dtype, lookahead, task, optimize_model) + print("-------------------- Finish running model: {} --------------------".format(model)) df = pd.DataFrame(results, columns=['model', '1st token avg latency (ms)', '2+ avg latency (ms/token)', 'encoder time (ms)', 'input/output tokens', 'batch_size', 'actual input/output tokens', 'num_beams', 'low_bit', 'cpu_embedding', 'model loading time (s)', 'peak mem (GB)', 'streaming', 'use_fp16_torch_dtype']) + print("-------------------- Results: {} --------------------".format(results)) if "pipeline" in api or "deepspeed" in api: if torch.distributed.get_rank() == 0: df.index += max(line_counter - 1, 0) @@ -2079,3 +2084,4 @@ def run_pipeline_parallel_gpu(repo_id, df.to_csv(csv_name, mode='a', header=None, encoding='utf-8') line_counter += len(df.index) results = [] + From 27f75f537c80b2149770e1cd17f667288a624519 Mon Sep 17 00:00:00 2001 From: songhappy Date: Tue, 27 Aug 2024 19:17:46 -0700 Subject: [PATCH 2/6] main 3k test --- .github/workflows/llm_performance_tests.yml | 28 ++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 37e6243f341..a4b1fd2ee55 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -992,22 +992,22 @@ jobs: # pip uninstall trl -y # call conda deactivate - # - name: Concat csv and generate html (2048-256 int4+fp16) - shell: cmd - run: | - call conda activate html-gen + # # - name: Concat csv and generate html (2048-256 int4+fp16) + # shell: cmd + # run: | + # call conda activate html-gen - cd python\llm\dev\benchmark\all-in-one - python ..\..\..\test\benchmark\concat_csv.py - if %ERRORLEVEL% neq 0 (exit /b 1) - del /q *test*.csv - move *.csv %CSV_SAVE_PATH%\2048-256_int4_fp16\ - cd ..\..\..\test\benchmark - python csv_to_html.py -f %CSV_SAVE_PATH%\2048-256_int4_fp16\ - if %ERRORLEVEL% neq 0 (exit /b 1) - move %CSV_SAVE_PATH%\2048-256_int4_fp16\*.html %CSV_SAVE_PATH% + # cd python\llm\dev\benchmark\all-in-one + # python ..\..\..\test\benchmark\concat_csv.py + # if %ERRORLEVEL% neq 0 (exit /b 1) + # del /q *test*.csv + # move *.csv %CSV_SAVE_PATH%\2048-256_int4_fp16\ + # cd ..\..\..\test\benchmark + # python csv_to_html.py -f %CSV_SAVE_PATH%\2048-256_int4_fp16\ + # if %ERRORLEVEL% neq 0 (exit /b 1) + # move %CSV_SAVE_PATH%\2048-256_int4_fp16\*.html %CSV_SAVE_PATH% - call conda deactivate + # call conda deactivate # 3072-384 int4+fp16 - name: Prepare igpu perf test (3072-384 int4+fp16) From 12865dd98bff58294492db6d6fef30374b36c092 Mon Sep 17 00:00:00 2001 From: songhappy Date: Tue, 27 Aug 2024 22:52:01 -0700 Subject: [PATCH 3/6] log conf --- python/llm/dev/benchmark/all-in-one/run.py | 4 ++++ python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py index 7df128e9b59..72a25691151 100644 --- a/python/llm/dev/benchmark/all-in-one/run.py +++ b/python/llm/dev/benchmark/all-in-one/run.py @@ -2038,6 +2038,8 @@ def run_pipeline_parallel_gpu(repo_id, for api in conf.test_api: global csv_name csv_name = f'{current_dir}/{api}-results-{today}.csv' + print("-------------------- csv_name: {} --------------------".format(csv_name)) + print(conf) try: line_counter = len(open(csv_name).readlines()) except: @@ -2066,6 +2068,8 @@ def run_pipeline_parallel_gpu(repo_id, df = pd.DataFrame(results, columns=['model', '1st token avg latency (ms)', '2+ avg latency (ms/token)', 'encoder time (ms)', 'input/output tokens', 'batch_size', 'actual input/output tokens', 'num_beams', 'low_bit', 'cpu_embedding', 'model loading time (s)', 'peak mem (GB)', 'streaming', 'use_fp16_torch_dtype']) + print("-------------------- Results df:--------------------") + print(df) print("-------------------- Results: {} --------------------".format(results)) if "pipeline" in api or "deepspeed" in api: if torch.distributed.get_rank() == 0: diff --git a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml index 60202594cba..edca0e7b67d 100644 --- a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml @@ -19,7 +19,7 @@ repo_id: - 'openbmb/MiniCPM-V-2_6' local_model_hub: 'path to your local model hub' warm_up: 1 -num_trials: 3 +num_trials: 1 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) batch_size: 1 # default to 1 From 6a6549ff5ea26adb5669eac498e5b533df910208 Mon Sep 17 00:00:00 2001 From: songhappy Date: Tue, 27 Aug 2024 23:32:33 -0700 Subject: [PATCH 4/6] 41 new run --- .github/workflows/llm_performance_tests.yml | 667 +++++++++--------- python/llm/dev/benchmark/all-in-one/run.py | 5 +- .../src/ipex_llm/utils/benchmark_util_4_29.py | 2 +- .../igpu-perf/3072-384_int4_fp16.yaml | 2 +- 4 files changed, 339 insertions(+), 337 deletions(-) diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index a4b1fd2ee55..220bf2fe0fa 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -154,7 +154,7 @@ jobs: source /opt/intel/oneapi/setvars.sh export USE_XETLA=OFF export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 - pip install transformers==4.36.2 + pip install transformers==4.41.2, trl cp python/llm/test/benchmark/arc-perf-transformers-436.yaml python/llm/dev/benchmark/all-in-one/config.yaml cd python/llm/dev/benchmark/all-in-one mkdir test_batch1 @@ -191,7 +191,7 @@ jobs: export USE_XETLA=OFF export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 # upgrade for default transformers version - python -m pip install transformers==4.37.0 + python -m pip install transformers==4.41.2, trl # batch_size 1 cp python/llm/test/benchmark/arc-perf-transformers-437.yaml python/llm/dev/benchmark/all-in-one/config.yaml cd python/llm/dev/benchmark/all-in-one @@ -223,7 +223,7 @@ jobs: export USE_XETLA=OFF export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 # upgrade transformers for model Qwen/Qwen1.5-MoE-A2.7B-Chat - python -m pip install transformers==4.40.0 + python -m pip install transformers==4.41.2 python -m pip install trl # batch_size 1 cp python/llm/test/benchmark/arc-perf-transformers-440.yaml python/llm/dev/benchmark/all-in-one/config.yaml @@ -557,7 +557,7 @@ jobs: pip install --upgrade pip pip install --upgrade wheel pip install --upgrade omegaconf pandas - pip install --upgrade tiktoken einops transformers_stream_generator matplotlib + pip install --upgrade tiktoken einops transformers_stream_generator matplotlib trl cd python\llm python setup.py clean --all bdist_wheel --win @@ -640,374 +640,375 @@ jobs: run_transformer_int4_fp16_gpu_win(repo_id, local_model_hub, in_out_pairs, warm_up, num_trials, num_beams, low_bit, cpu_embedding, batch_size, streaming) ' python/llm/dev/benchmark/all-in-one/run.py - # # 32-32 int4+fp16 - # - name: Prepare igpu perf test (32-32 int4+fp16) - # shell: bash - # run: | - # # hide time info - # # sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i 's/{api}-results-{today}.csv/32-32-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml + # 32-32 int4+fp16 + - name: Prepare igpu perf test (32-32 int4+fp16) + shell: bash + run: | + # hide time info + # sed -i 's/str(end - st)/"xxxxxx"/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i 's/{api}-results-{today}.csv/32-32-{api}-results-{today}_test1.csv/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml - # - name: Test on igpu (32-32 int4+fp16) - # shell: cmd - # run: | - # call conda activate igpu-perf - # set SYCL_CACHE_PERSISTENT=1 - # set BIGDL_LLM_XMX_DISABLED=1 - # REM for llava - # set TRANSFORMERS_OFFLINE=1 - - # cd python\llm\dev\benchmark\all-in-one - # move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16.yaml config.yaml - # set PYTHONIOENCODING=utf-8 - # python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - - # call conda deactivate - - # - name: Prepare igpu perf test for transformers 4.36 (32-32 int4+fp16) - # shell: bash - # run: | - # sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_436.yaml + - name: Test on igpu (32-32 int4+fp16) + shell: cmd + run: | + call conda activate igpu-perf + pip install transformers==4.41.2 trl + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + REM for llava + set TRANSFORMERS_OFFLINE=1 - # - name: Test on igpu for transformers 4.36 (32-32 int4+fp16) - # shell: cmd - # run: | - # call conda activate igpu-perf - # pip install transformers==4.36.2 + cd python\llm\dev\benchmark\all-in-one + move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16.yaml config.yaml + set PYTHONIOENCODING=utf-8 + python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 + if %ERRORLEVEL% neq 0 (exit /b 1) + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1 + if %ERRORLEVEL% neq 0 (exit /b 1) - # set SYCL_CACHE_PERSISTENT=1 - # set BIGDL_LLM_XMX_DISABLED=1 + call conda deactivate + + - name: Prepare igpu perf test for transformers 4.36 (32-32 int4+fp16) + shell: bash + run: | + sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_436.yaml + + - name: Test on igpu for transformers 4.36 (32-32 int4+fp16) + shell: cmd + run: | + call conda activate igpu-perf + pip install transformers==4.41.2 trl + + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 - # cd python\llm\dev\benchmark\all-in-one - # move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_436.yaml config.yaml - # set PYTHONIOENCODING=utf-8 - # python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2 - # if %ERRORLEVEL% neq 0 (exit /b 1) + cd python\llm\dev\benchmark\all-in-one + move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_436.yaml config.yaml + set PYTHONIOENCODING=utf-8 + python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 + if %ERRORLEVEL% neq 0 (exit /b 1) + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2 + if %ERRORLEVEL% neq 0 (exit /b 1) - # call conda deactivate + call conda deactivate - # - name: Prepare igpu perf test for transformers 4.38 (32-32 int4+fp16) - # shell: bash - # run: | - # sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_438.yaml + - name: Prepare igpu perf test for transformers 4.38 (32-32 int4+fp16) + shell: bash + run: | + sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_438.yaml - # - name: Test on igpu for transformers 4.38 (32-32 int4+fp16) - # shell: cmd - # run: | - # call conda activate igpu-perf - # pip install transformers==4.38.2 + - name: Test on igpu for transformers 4.38 (32-32 int4+fp16) + shell: cmd + run: | + call conda activate igpu-perf + pip install transformers==4.41.2 trl - # set SYCL_CACHE_PERSISTENT=1 - # set BIGDL_LLM_XMX_DISABLED=1 + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 - # cd python\llm\dev\benchmark\all-in-one - # move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_438.yaml config.yaml - # set PYTHONIOENCODING=utf-8 - # python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 - # if %ERRORLEVEL% neq 0 if %ERRORLEVEL% neq -1073740791 (exit /b 1) - # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3 - # if %ERRORLEVEL% neq 0 (exit /b 1) + cd python\llm\dev\benchmark\all-in-one + move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_438.yaml config.yaml + set PYTHONIOENCODING=utf-8 + python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 + if %ERRORLEVEL% neq 0 if %ERRORLEVEL% neq -1073740791 (exit /b 1) + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3 + if %ERRORLEVEL% neq 0 (exit /b 1) - # call conda deactivate + call conda deactivate - # - name: Prepare igpu perf test for transformers 4.43 (32-32 int4+fp16) - # shell: bash - # run: | - # sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml + - name: Prepare igpu perf test for transformers 4.43 (32-32 int4+fp16) + shell: bash + run: | + sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml - # - name: Test on igpu for transformers 4.43 (32-32 int4+fp16) - # shell: cmd - # run: | - # call conda activate igpu-perf - # pip install transformers==4.43.1 - # pip install trl + - name: Test on igpu for transformers 4.43 (32-32 int4+fp16) + shell: cmd + run: | + call conda activate igpu-perf + pip install transformers==4.43.1 + pip install trl - # set SYCL_CACHE_PERSISTENT=1 - # set BIGDL_LLM_XMX_DISABLED=1 + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 - # cd python\llm\dev\benchmark\all-in-one - # move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_443.yaml config.yaml - # set PYTHONIOENCODING=utf-8 - # python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 - # if %ERRORLEVEL% neq 0 if %ERRORLEVEL% neq -1073740791 (exit /b 1) - # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4 - # if %ERRORLEVEL% neq 0 (exit /b 1) + cd python\llm\dev\benchmark\all-in-one + move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_443.yaml config.yaml + set PYTHONIOENCODING=utf-8 + python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 + if %ERRORLEVEL% neq 0 if %ERRORLEVEL% neq -1073740791 (exit /b 1) + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4 + if %ERRORLEVEL% neq 0 (exit /b 1) - # pip uninstall trl -y - # call conda deactivate + pip uninstall trl -y + call conda deactivate - # - name: Concat csv and generate html (32-32 int4+fp16) - # shell: cmd - # run: | - # call conda activate html-gen - - # cd python\llm\dev\benchmark\all-in-one - # python ..\..\..\test\benchmark\concat_csv.py - # if %ERRORLEVEL% neq 0 (exit /b 1) - # del /q *test*.csv - # move *.csv %CSV_SAVE_PATH%\32-32_int4_fp16\ - # cd ..\..\..\test\benchmark - # python csv_to_html.py -f %CSV_SAVE_PATH%\32-32_int4_fp16\ - # if %ERRORLEVEL% neq 0 (exit /b 1) - # move %CSV_SAVE_PATH%\32-32_int4_fp16\*.html %CSV_SAVE_PATH% - - # call conda deactivate - - # # TODO: create a action function here for different input - # # 1024-128 int4+fp16 - # - name: Prepare igpu perf test (1024-128 int4+fp16) - # shell: bash - # run: | - # sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i 's/{today}_test4/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml + - name: Concat csv and generate html (32-32 int4+fp16) + shell: cmd + run: | + call conda activate html-gen - # - name: Test on igpu (1024-128 int4+fp16) - # shell: cmd - # run: | - # call conda activate igpu-perf - # pip install transformers==4.37.0 - - # set SYCL_CACHE_PERSISTENT=1 - # set BIGDL_LLM_XMX_DISABLED=1 - # REM for llava - # set TRANSFORMERS_OFFLINE=1 - - # cd python\llm\dev\benchmark\all-in-one - # move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16.yaml config.yaml - # set PYTHONIOENCODING=utf-8 - # python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - - # call conda deactivate - - # - name: Prepare igpu perf test for transformers 4.36 (1024-128 int4+fp16) - # shell: bash - # run: | - # sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_436.yaml + cd python\llm\dev\benchmark\all-in-one + python ..\..\..\test\benchmark\concat_csv.py + if %ERRORLEVEL% neq 0 (exit /b 1) + del /q *test*.csv + move *.csv %CSV_SAVE_PATH%\32-32_int4_fp16\ + cd ..\..\..\test\benchmark + python csv_to_html.py -f %CSV_SAVE_PATH%\32-32_int4_fp16\ + if %ERRORLEVEL% neq 0 (exit /b 1) + move %CSV_SAVE_PATH%\32-32_int4_fp16\*.html %CSV_SAVE_PATH% - # - name: Test on igpu for transformers 4.36 (1024-128 int4+fp16) - # shell: cmd - # run: | - # call conda activate igpu-perf - # pip install transformers==4.36.2 + call conda deactivate - # set SYCL_CACHE_PERSISTENT=1 - # set BIGDL_LLM_XMX_DISABLED=1 + # TODO: create a action function here for different input + # 1024-128 int4+fp16 + - name: Prepare igpu perf test (1024-128 int4+fp16) + shell: bash + run: | + sed -i 's/32-32/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i 's/{today}_test4/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml - # cd python\llm\dev\benchmark\all-in-one - # move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_436.yaml config.yaml - # set PYTHONIOENCODING=utf-8 - # python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2 - # if %ERRORLEVEL% neq 0 (exit /b 1) + - name: Test on igpu (1024-128 int4+fp16) + shell: cmd + run: | + call conda activate igpu-perf + pip install transformers==4.41.0 trl - # call conda deactivate + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + REM for llava + set TRANSFORMERS_OFFLINE=1 - # - name: Prepare igpu perf test for transformers 4.38 (1024-128 int4+fp16) - # shell: bash - # run: | - # sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_438.yaml + cd python\llm\dev\benchmark\all-in-one + move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16.yaml config.yaml + set PYTHONIOENCODING=utf-8 + python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 + if %ERRORLEVEL% neq 0 (exit /b 1) + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1 + if %ERRORLEVEL% neq 0 (exit /b 1) - # - name: Test on igpu for transformers 4.38 (1024-128 int4+fp16) - # shell: cmd - # run: | - # call conda activate igpu-perf - # pip install transformers==4.38.2 + call conda deactivate - # set SYCL_CACHE_PERSISTENT=1 - # set BIGDL_LLM_XMX_DISABLED=1 + - name: Prepare igpu perf test for transformers 4.36 (1024-128 int4+fp16) + shell: bash + run: | + sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_436.yaml - # cd python\llm\dev\benchmark\all-in-one - # move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_438.yaml config.yaml - # set PYTHONIOENCODING=utf-8 - # python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3 - # if %ERRORLEVEL% neq 0 (exit /b 1) + - name: Test on igpu for transformers 4.36 (1024-128 int4+fp16) + shell: cmd + run: | + call conda activate igpu-perf + pip install transformers==4.41.2 trl - # call conda deactivate + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 - # - name: Prepare igpu perf test for transformers 4.43 (1024-128 int4+fp16) - # shell: bash - # run: | - # sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml + cd python\llm\dev\benchmark\all-in-one + move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_436.yaml config.yaml + set PYTHONIOENCODING=utf-8 + python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 + if %ERRORLEVEL% neq 0 (exit /b 1) + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2 + if %ERRORLEVEL% neq 0 (exit /b 1) - # - name: Test on igpu for transformers 4.43 (1024-128 int4+fp16) - # shell: cmd - # run: | - # call conda activate igpu-perf - # pip install transformers==4.43.1 - # pip install trl + call conda deactivate - # set SYCL_CACHE_PERSISTENT=1 - # set BIGDL_LLM_XMX_DISABLED=1 + - name: Prepare igpu perf test for transformers 4.38 (1024-128 int4+fp16) + shell: bash + run: | + sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_438.yaml - # cd python\llm\dev\benchmark\all-in-one - # move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_443.yaml config.yaml - # set PYTHONIOENCODING=utf-8 - # python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4 - # if %ERRORLEVEL% neq 0 (exit /b 1) + - name: Test on igpu for transformers 4.38 (1024-128 int4+fp16) + shell: cmd + run: | + call conda activate igpu-perf + pip install transformers==4.41.2 trl - # pip uninstall trl -y - # call conda deactivate + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 - # - name: Concat csv and generate html (1024-128 int4+fp16) - # shell: cmd - # run: | - # call conda activate html-gen - - # cd python\llm\dev\benchmark\all-in-one - # python ..\..\..\test\benchmark\concat_csv.py - # if %ERRORLEVEL% neq 0 (exit /b 1) - # del /q *test*.csv - # move *.csv %CSV_SAVE_PATH%\1024-128_int4_fp16\ - # cd ..\..\..\test\benchmark - # python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128_int4_fp16\ - # if %ERRORLEVEL% neq 0 (exit /b 1) - # move %CSV_SAVE_PATH%\1024-128_int4_fp16\*.html %CSV_SAVE_PATH% - - # call conda deactivate - - # # 2048-256 int4+fp16 - # - name: Prepare igpu perf test (2048-256 int4+fp16) - # shell: bash - # run: | - # sed -i 's/1024-128/2048-256/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i 's/{today}_test4/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml + cd python\llm\dev\benchmark\all-in-one + move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_438.yaml config.yaml + set PYTHONIOENCODING=utf-8 + python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 + if %ERRORLEVEL% neq 0 (exit /b 1) + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3 + if %ERRORLEVEL% neq 0 (exit /b 1) - # - name: Test on igpu (2048-256 int4+fp16) - # shell: cmd - # run: | - # call conda activate igpu-perf - # pip install transformers==4.37.0 - - # set SYCL_CACHE_PERSISTENT=1 - # set BIGDL_LLM_XMX_DISABLED=1 - # REM for llava - # set TRANSFORMERS_OFFLINE=1 - - # cd python\llm\dev\benchmark\all-in-one - # move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16.yaml config.yaml - # set PYTHONIOENCODING=utf-8 - # python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - - # call conda deactivate - - # - name: Prepare igpu perf test for transformers 4.36 (2048-256 int4+fp16) - # shell: bash - # run: | - # sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_436.yaml + call conda deactivate - # - name: Test on igpu for transformers 4.36 (2048-256 int4+fp16) - # shell: cmd - # run: | - # call conda activate igpu-perf - # pip install transformers==4.36.2 + - name: Prepare igpu perf test for transformers 4.43 (1024-128 int4+fp16) + shell: bash + run: | + sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml - # set SYCL_CACHE_PERSISTENT=1 - # set BIGDL_LLM_XMX_DISABLED=1 + - name: Test on igpu for transformers 4.43 (1024-128 int4+fp16) + shell: cmd + run: | + call conda activate igpu-perf + pip install transformers==4.43.1 + pip install trl - # cd python\llm\dev\benchmark\all-in-one - # move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_436.yaml config.yaml - # set PYTHONIOENCODING=utf-8 - # python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2 - # if %ERRORLEVEL% neq 0 (exit /b 1) + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 - # call conda deactivate + cd python\llm\dev\benchmark\all-in-one + move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_443.yaml config.yaml + set PYTHONIOENCODING=utf-8 + python run.py >> %CSV_SAVE_PATH%\1024-128_int4_fp16\log\%LOG_FILE% 2>&1 + if %ERRORLEVEL% neq 0 (exit /b 1) + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4 + if %ERRORLEVEL% neq 0 (exit /b 1) - # - name: Prepare igpu perf test for transformers 4.38 (2048-256 int4+fp16) - # shell: bash - # run: | - # sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_438.yaml + pip uninstall trl -y + call conda deactivate - # - name: Test on igpu for transformers 4.38 (2048-256 int4+fp16) - # shell: cmd - # run: | - # call conda activate igpu-perf - # pip install transformers==4.38.2 + - name: Concat csv and generate html (1024-128 int4+fp16) + shell: cmd + run: | + call conda activate html-gen - # set SYCL_CACHE_PERSISTENT=1 - # set BIGDL_LLM_XMX_DISABLED=1 + cd python\llm\dev\benchmark\all-in-one + python ..\..\..\test\benchmark\concat_csv.py + if %ERRORLEVEL% neq 0 (exit /b 1) + del /q *test*.csv + move *.csv %CSV_SAVE_PATH%\1024-128_int4_fp16\ + cd ..\..\..\test\benchmark + python csv_to_html.py -f %CSV_SAVE_PATH%\1024-128_int4_fp16\ + if %ERRORLEVEL% neq 0 (exit /b 1) + move %CSV_SAVE_PATH%\1024-128_int4_fp16\*.html %CSV_SAVE_PATH% - # cd python\llm\dev\benchmark\all-in-one - # move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_438.yaml config.yaml - # set PYTHONIOENCODING=utf-8 - # python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3 - # if %ERRORLEVEL% neq 0 (exit /b 1) + call conda deactivate - # call conda deactivate + # 2048-256 int4+fp16 + - name: Prepare igpu perf test (2048-256 int4+fp16) + shell: bash + run: | + sed -i 's/1024-128/2048-256/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i 's/{today}_test4/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml - # - name: Prepare igpu perf test for transformers 4.43 (2048-256 int4+fp16) - # shell: bash - # run: | - # sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py - # sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml + - name: Test on igpu (2048-256 int4+fp16) + shell: cmd + run: | + call conda activate igpu-perf + pip install transformers==4.41.2 trl - # - name: Test on igpu for transformers 4.43 (2048-256 int4+fp16) - # shell: cmd - # run: | - # call conda activate igpu-perf - # pip install transformers==4.43.1 - # pip install trl + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + REM for llava + set TRANSFORMERS_OFFLINE=1 - # set SYCL_CACHE_PERSISTENT=1 - # set BIGDL_LLM_XMX_DISABLED=1 + cd python\llm\dev\benchmark\all-in-one + move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16.yaml config.yaml + set PYTHONIOENCODING=utf-8 + python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1 + if %ERRORLEVEL% neq 0 (exit /b 1) + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1 + if %ERRORLEVEL% neq 0 (exit /b 1) - # cd python\llm\dev\benchmark\all-in-one - # move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_443.yaml config.yaml - # set PYTHONIOENCODING=utf-8 - # python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1 - # if %ERRORLEVEL% neq 0 (exit /b 1) - # python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4 - # if %ERRORLEVEL% neq 0 (exit /b 1) + call conda deactivate - # pip uninstall trl -y - # call conda deactivate + - name: Prepare igpu perf test for transformers 4.36 (2048-256 int4+fp16) + shell: bash + run: | + sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_436.yaml - # # - name: Concat csv and generate html (2048-256 int4+fp16) - # shell: cmd - # run: | - # call conda activate html-gen + - name: Test on igpu for transformers 4.36 (2048-256 int4+fp16) + shell: cmd + run: | + call conda activate igpu-perf + pip install transformers==4.41.2 trl + + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + + cd python\llm\dev\benchmark\all-in-one + move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_436.yaml config.yaml + set PYTHONIOENCODING=utf-8 + python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1 + if %ERRORLEVEL% neq 0 (exit /b 1) + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2 + if %ERRORLEVEL% neq 0 (exit /b 1) + + call conda deactivate + + - name: Prepare igpu perf test for transformers 4.38 (2048-256 int4+fp16) + shell: bash + run: | + sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_438.yaml - # cd python\llm\dev\benchmark\all-in-one - # python ..\..\..\test\benchmark\concat_csv.py - # if %ERRORLEVEL% neq 0 (exit /b 1) - # del /q *test*.csv - # move *.csv %CSV_SAVE_PATH%\2048-256_int4_fp16\ - # cd ..\..\..\test\benchmark - # python csv_to_html.py -f %CSV_SAVE_PATH%\2048-256_int4_fp16\ - # if %ERRORLEVEL% neq 0 (exit /b 1) - # move %CSV_SAVE_PATH%\2048-256_int4_fp16\*.html %CSV_SAVE_PATH% + - name: Test on igpu for transformers 4.38 (2048-256 int4+fp16) + shell: cmd + run: | + call conda activate igpu-perf + pip install transformers==4.41.2 trl - # call conda deactivate + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + + cd python\llm\dev\benchmark\all-in-one + move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_438.yaml config.yaml + set PYTHONIOENCODING=utf-8 + python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1 + if %ERRORLEVEL% neq 0 (exit /b 1) + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3 + if %ERRORLEVEL% neq 0 (exit /b 1) + + call conda deactivate + + - name: Prepare igpu perf test for transformers 4.43 (2048-256 int4+fp16) + shell: bash + run: | + sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py + sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml + + - name: Test on igpu for transformers 4.43 (2048-256 int4+fp16) + shell: cmd + run: | + call conda activate igpu-perf + pip install transformers==4.43.1 + pip install trl + + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + + cd python\llm\dev\benchmark\all-in-one + move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_443.yaml config.yaml + set PYTHONIOENCODING=utf-8 + python run.py >> %CSV_SAVE_PATH%\2048-256_int4_fp16\log\%LOG_FILE% 2>&1 + if %ERRORLEVEL% neq 0 (exit /b 1) + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4 + if %ERRORLEVEL% neq 0 (exit /b 1) + + pip uninstall trl -y + call conda deactivate + + - name: Concat csv and generate html (2048-256 int4+fp16) + shell: cmd + run: | + call conda activate html-gen + + cd python\llm\dev\benchmark\all-in-one + python ..\..\..\test\benchmark\concat_csv.py + if %ERRORLEVEL% neq 0 (exit /b 1) + del /q *test*.csv + move *.csv %CSV_SAVE_PATH%\2048-256_int4_fp16\ + cd ..\..\..\test\benchmark + python csv_to_html.py -f %CSV_SAVE_PATH%\2048-256_int4_fp16\ + if %ERRORLEVEL% neq 0 (exit /b 1) + move %CSV_SAVE_PATH%\2048-256_int4_fp16\*.html %CSV_SAVE_PATH% + + call conda deactivate # 3072-384 int4+fp16 - name: Prepare igpu perf test (3072-384 int4+fp16) @@ -1021,7 +1022,7 @@ jobs: shell: cmd run: | call conda activate igpu-perf - pip install transformers==4.37.0 + pip install transformers==4.41.2 trl set SYCL_CACHE_PERSISTENT=1 set BIGDL_LLM_XMX_DISABLED=1 @@ -1048,7 +1049,7 @@ jobs: shell: cmd run: | call conda activate igpu-perf - pip install transformers==4.36.2 + pip install transformers==4.41.2 trl set SYCL_CACHE_PERSISTENT=1 set BIGDL_LLM_XMX_DISABLED=1 @@ -1073,7 +1074,7 @@ jobs: shell: cmd run: | call conda activate igpu-perf - pip install transformers==4.38.2 + pip install transformers==4.41.2 trl set SYCL_CACHE_PERSISTENT=1 set BIGDL_LLM_XMX_DISABLED=1 @@ -1144,7 +1145,7 @@ jobs: shell: cmd run: | call conda activate igpu-perf - pip install transformers==4.37.0 + pip install transformers==4.41.2 trl set SYCL_CACHE_PERSISTENT=1 set BIGDL_LLM_XMX_DISABLED=1 @@ -1171,7 +1172,7 @@ jobs: shell: cmd run: | call conda activate igpu-perf - pip install transformers==4.38.2 + pip install transformers==4.41.2 trl set SYCL_CACHE_PERSISTENT=1 set BIGDL_LLM_XMX_DISABLED=1 @@ -1242,7 +1243,7 @@ jobs: shell: cmd run: | call conda activate igpu-perf - pip install transformers==4.37.0 + pip install transformers==4.41.2 trl set SYCL_CACHE_PERSISTENT=1 set BIGDL_LLM_XMX_DISABLED=1 @@ -1269,7 +1270,7 @@ jobs: shell: cmd run: | call conda activate igpu-perf - pip install transformers==4.36.2 + pip install transformers==4.41.2 trl set SYCL_CACHE_PERSISTENT=1 set BIGDL_LLM_XMX_DISABLED=1 @@ -1294,7 +1295,7 @@ jobs: shell: cmd run: | call conda activate igpu-perf - pip install transformers==4.38.2 + pip install transformers==4.41.2 trl set SYCL_CACHE_PERSISTENT=1 set BIGDL_LLM_XMX_DISABLED=1 @@ -1364,7 +1365,7 @@ jobs: shell: cmd run: | call conda activate igpu-perf - pip install transformers==4.37.0 + pip install transformers==4.41.2 trl set SYCL_CACHE_PERSISTENT=1 set BIGDL_LLM_XMX_DISABLED=1 @@ -1391,7 +1392,7 @@ jobs: shell: cmd run: | call conda activate igpu-perf - pip install transformers==4.36.2 + pip install transformers==4.41.2 trl set SYCL_CACHE_PERSISTENT=1 set BIGDL_LLM_XMX_DISABLED=1 @@ -1416,7 +1417,7 @@ jobs: shell: cmd run: | call conda activate igpu-perf - pip install transformers==4.38.2 + pip install transformers==4.41.2 trl set SYCL_CACHE_PERSISTENT=1 set BIGDL_LLM_XMX_DISABLED=1 diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py index 72a25691151..0919d1349c7 100644 --- a/python/llm/dev/benchmark/all-in-one/run.py +++ b/python/llm/dev/benchmark/all-in-one/run.py @@ -2038,8 +2038,7 @@ def run_pipeline_parallel_gpu(repo_id, for api in conf.test_api: global csv_name csv_name = f'{current_dir}/{api}-results-{today}.csv' - print("-------------------- csv_name: {} --------------------".format(csv_name)) - print(conf) + try: line_counter = len(open(csv_name).readlines()) except: @@ -2071,6 +2070,8 @@ def run_pipeline_parallel_gpu(repo_id, print("-------------------- Results df:--------------------") print(df) print("-------------------- Results: {} --------------------".format(results)) + print("-------------------- csv_name: {} --------------------".format(csv_name)) + print(conf) if "pipeline" in api or "deepspeed" in api: if torch.distributed.get_rank() == 0: df.index += max(line_counter - 1, 0) diff --git a/python/llm/src/ipex_llm/utils/benchmark_util_4_29.py b/python/llm/src/ipex_llm/utils/benchmark_util_4_29.py index d64631f1f4c..8e74b4507c5 100644 --- a/python/llm/src/ipex_llm/utils/benchmark_util_4_29.py +++ b/python/llm/src/ipex_llm/utils/benchmark_util_4_29.py @@ -2452,7 +2452,7 @@ def greedy_search( last_token_time.append(end - st) # stop if we exceed the maximum length - if stopping_criteria(input_ids, scores): + if stopping_criteria(input_ids, scores)[0]: this_peer_finished = True if this_peer_finished and not synced_gpus: diff --git a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml index edca0e7b67d..60202594cba 100644 --- a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml @@ -19,7 +19,7 @@ repo_id: - 'openbmb/MiniCPM-V-2_6' local_model_hub: 'path to your local model hub' warm_up: 1 -num_trials: 1 +num_trials: 3 num_beams: 1 # default to greedy search low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4) batch_size: 1 # default to 1 From f3256609dd2264fd74713ff62d94aecc36f78750 Mon Sep 17 00:00:00 2001 From: songhappy Date: Fri, 30 Aug 2024 13:22:41 -0700 Subject: [PATCH 5/6] for cpu --- .github/workflows/llm_performance_tests.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 220bf2fe0fa..44c3fb2ec7e 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -419,6 +419,8 @@ jobs: export https_proxy=${HTTPS_PROXY} source ipex-llm-init -t export OMP_NUM_THREADS=48 + # upgrade for default transformers version + python -m pip install transformers==4.41.2, trl # hide time info sed -i 's/str(end - st)/"xxxxxx"/g' run.py python run.py @@ -499,6 +501,8 @@ jobs: cd python/llm/dev/benchmark/all-in-one export http_proxy=${HTTP_PROXY} export https_proxy=${HTTPS_PROXY} + # upgrade for default transformers version + python -m pip install transformers==4.41.2, trl # hide time info sed -i 's/str(end - st)/"xxxxxx"/g' run.py python run.py From 428e62b44a7d0bd7bcb170e5e2c1dc884ea39f2c Mon Sep 17 00:00:00 2001 From: songhappy Date: Wed, 4 Sep 2024 16:55:37 -0700 Subject: [PATCH 6/6] update --- python/llm/src/ipex_llm/transformers/models/llama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/transformers/models/llama.py b/python/llm/src/ipex_llm/transformers/models/llama.py index dfbbaf003a6..873407fbddd 100644 --- a/python/llm/src/ipex_llm/transformers/models/llama.py +++ b/python/llm/src/ipex_llm/transformers/models/llama.py @@ -1579,7 +1579,7 @@ def llama_attention_forward_4_41_original( past_key_value.key_cache[self.layer_idx] = key_states past_key_value.value_cache[self.layer_idx] = value_states - if cache_position is not None: + if attention_mask is not None: new_attention_mask = attention_mask[:, :, :, 0:kv_seq_len] else: new_attention_mask = attention_mask