diff --git a/.github/workflows/cpp-graph-test.yml b/.github/workflows/cpp-graph-test.yml index fe60897cecf..a63e2b787c5 100644 --- a/.github/workflows/cpp-graph-test.yml +++ b/.github/workflows/cpp-graph-test.yml @@ -37,6 +37,7 @@ jobs: uses: actions/checkout@v3 with: submodules: "recursive" + fetch-tags: true - name: Env build run: | diff --git a/.github/workflows/deploy-test.yml b/.github/workflows/deploy-test.yml index 758cfa0dc4b..318b1fda042 100644 --- a/.github/workflows/deploy-test.yml +++ b/.github/workflows/deploy-test.yml @@ -65,6 +65,7 @@ jobs: uses: actions/checkout@v3 with: submodules: "recursive" + fetch-tags: true # We need this because GitHub needs to clone the branch to pipeline - name: Docker Build run: | diff --git a/.github/workflows/llm-test.yml b/.github/workflows/llm-test.yml index 2ca234c4760..450fa455305 100644 --- a/.github/workflows/llm-test.yml +++ b/.github/workflows/llm-test.yml @@ -41,6 +41,7 @@ jobs: uses: actions/checkout@v3 with: submodules: "recursive" + fetch-tags: true - name: Env build run: | diff --git a/.github/workflows/optimize-test.yml b/.github/workflows/optimize-test.yml index 8fe27a3f31e..8811dc91039 100644 --- a/.github/workflows/optimize-test.yml +++ b/.github/workflows/optimize-test.yml @@ -62,6 +62,7 @@ jobs: uses: actions/checkout@v3 with: submodules: "recursive" + fetch-tags: true # We need this because GitHub needs to clone the branch to pipeline - name: Docker Build run: | diff --git a/.github/workflows/script/SparseLibCI/run_sparse_lib.sh b/.github/workflows/script/SparseLibCI/run_sparse_lib.sh index 899216ffb7a..d6001a1a458 100644 --- a/.github/workflows/script/SparseLibCI/run_sparse_lib.sh +++ b/.github/workflows/script/SparseLibCI/run_sparse_lib.sh @@ -1,6 +1,4 @@ #!/bin/bash -export COVERAGE_RCFILE="/intel-extension-for-transformers/.github/workflows/script/unitTest/coverage/.coveragerc" - output_log_dir="/intel-extension-for-transformers/benchmark_log" WORKING_DIR="/intel-extension-for-transformers/intel_extension_for_transformers/llm/runtime/deprecated" mkdir ${output_log_dir} diff --git a/.github/workflows/script/formatScan/pylint.sh b/.github/workflows/script/formatScan/pylint.sh index 9dd77b50431..1aa9e9bec3b 100644 --- a/.github/workflows/script/formatScan/pylint.sh +++ b/.github/workflows/script/formatScan/pylint.sh @@ -12,7 +12,6 @@ $BOLD_YELLOW && echo "---------------- run python setup.py sdist bdist_wheel --- export PYTHONPATH=`pwd` #$BOLD_YELLOW && echo "---------------- pip install binary -------------" && $RESET #pip install dist/intel_extension_for_transformers*.whl - pip list cd /intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/ diff --git a/.github/workflows/script/unitTest/coverage/.coveragerc b/.github/workflows/script/unitTest/coverage/.coveragerc deleted file mode 100644 index c7c4fc7a42b..00000000000 --- a/.github/workflows/script/unitTest/coverage/.coveragerc +++ /dev/null @@ -1,8 +0,0 @@ -[run] -branch = True - -[report] -omit = - */**/fake*yaml -exclude_lines = - pragma: no cover diff --git a/.github/workflows/script/unitTest/coverage/.engine-coveragerc b/.github/workflows/script/unitTest/coverage/.engine-coveragerc new file mode 100644 index 00000000000..a60803774a9 --- /dev/null +++ b/.github/workflows/script/unitTest/coverage/.engine-coveragerc @@ -0,0 +1,14 @@ +[run] +branch = True + +[report] +omit = + */**/fake*yaml + */intel_extension_for_transformers/neural_chat/** + */intel_extension_for_transformers/transformers/** + */intel_extension_for_transformers/llm/evaluation/** + */intel_extension_for_transformers/llm/finetuning/** + */intel_extension_for_transformers/llm/inference/** + */intel_extension_for_transformers/llm/quantization/** +exclude_lines = + pragma: no cover diff --git a/.github/workflows/script/unitTest/coverage/.neural-chat-coveragerc b/.github/workflows/script/unitTest/coverage/.neural-chat-coveragerc new file mode 100644 index 00000000000..30050b5d48b --- /dev/null +++ b/.github/workflows/script/unitTest/coverage/.neural-chat-coveragerc @@ -0,0 +1,24 @@ +[run] +branch = True + +[report] +omit = + */**/fake*yaml + */**/fake.py + */intel_extension_for_transformers/llm/amp/** + */intel_extension_for_transformers/llm/evaluation/** + */intel_extension_for_transformers/llm/quantization/** + */intel_extension_for_transformers/llm/library/** + */intel_extension_for_transformers/llm/operator/** + */intel_extension_for_transformers/llm/runtime/** + */intel_extension_for_transformers/transformers/** +exclude_lines = + pragma: no cover + raise NotImplementedError + raise TypeError + if self.device == "gpu": + if device == "gpu": + except ImportError: + except Exception as e: + onnx_version < ONNX18_VERSION + onnx_version >= ONNX18_VERSION diff --git a/.github/workflows/script/unitTest/coverage/calc_coverage.sh b/.github/workflows/script/unitTest/coverage/calc_coverage.sh index dea595b3d2a..08e3a3bece1 100644 --- a/.github/workflows/script/unitTest/coverage/calc_coverage.sh +++ b/.github/workflows/script/unitTest/coverage/calc_coverage.sh @@ -87,18 +87,22 @@ function check_coverage_status() { $BOLD_RED && echo "Unit Test failed with ${item} coverage decrease ${decrease}%" && $RESET done $BOLD_RED && echo "compare coverage to give detail info" && $RESET - bash -x compare_coverage.sh ${coverage_compare} ${coverage_log_pr}/coverage.log ${coverage_log_base}/coverage.log "FAILED" ${coverage_PR_lines_rate} ${coverage_base_lines_rate} ${coverage_PR_branches_rate} ${coverage_base_branches_rate} + bash compare_coverage.sh ${coverage_compare} ${coverage_log_pr}/coverage.log ${coverage_log_base}/coverage.log "FAILED" ${coverage_PR_lines_rate} ${coverage_base_lines_rate} ${coverage_PR_branches_rate} ${coverage_base_branches_rate} exit 1 else $BOLD_GREEN && echo "Unit Test success with coverage lines: ${coverage_PR_lines_rate}%, branches: ${coverage_PR_branches_rate}%" && $RESET $BOLD_GREEN && echo "compare coverage to give detail info" && $RESET - bash -x compare_coverage.sh ${coverage_compare} ${coverage_log_pr}/coverage.log ${coverage_log_base}/coverage.log "SUCCESS" ${coverage_PR_lines_rate} ${coverage_base_lines_rate} ${coverage_PR_branches_rate} ${coverage_base_branches_rate} + bash compare_coverage.sh ${coverage_compare} ${coverage_log_pr}/coverage.log ${coverage_log_base}/coverage.log "SUCCESS" ${coverage_PR_lines_rate} ${coverage_base_lines_rate} ${coverage_PR_branches_rate} ${coverage_base_branches_rate} fi } function main() { compare_coverage check_coverage_status + $BOLD_BLUE && echo "PR lines coverage: $lines_PR_covered/$lines_PR_valid ($coverage_PR_lines_rate%)" && $RESET + $BOLD_BLUE && echo "PR branches coverage: $branches_PR_covered/$branches_PR_valid ($coverage_PR_branches_rate%)" && $RESET + $BOLD_BLUE && echo "BASE lines coverage: $lines_base_covered/$lines_base_valid ($coverage_base_lines_rate%)" && $RESET + $BOLD_BLUE && echo "BASE branches coverage: $branches_base_covered/$branches_base_valid ($coverage_base_branches_rate%)" && $RESET } main diff --git a/.github/workflows/script/unitTest/run_unit_test_engine.sh b/.github/workflows/script/unitTest/run_unit_test_engine.sh index 0ee166c565a..543df411c19 100644 --- a/.github/workflows/script/unitTest/run_unit_test_engine.sh +++ b/.github/workflows/script/unitTest/run_unit_test_engine.sh @@ -1,6 +1,6 @@ #!/bin/bash source /intel-extension-for-transformers/.github/workflows/script/change_color.sh -export COVERAGE_RCFILE="/intel-extension-for-transformers/.github/workflows/script/unitTest/coverage/.coveragerc" +export COVERAGE_RCFILE="/intel-extension-for-transformers/.github/workflows/script/unitTest/coverage/.engine-coveragerc" LOG_DIR=/log_dir mkdir -p ${LOG_DIR} WORKING_DIR="/intel-extension-for-transformers/intel_extension_for_transformers/llm/runtime/deprecated" @@ -86,7 +86,13 @@ function gtest() { function main() { bash /intel-extension-for-transformers/.github/workflows/script/unitTest/env_setup.sh - echo "test on ${test_name}" + cd ${WORKING_DIR}/test/pytest || exit 1 + if [ -f "requirements.txt" ]; then + python -m pip install --default-timeout=100 -r requirements.txt + pip list + else + echo "Not found requirements.txt file." + fi if [[ $test_name == "PR-test" ]]; then pytest "${LOG_DIR}/coverage_pr" gtest diff --git a/.github/workflows/script/unitTest/run_unit_test_neuralchat.sh b/.github/workflows/script/unitTest/run_unit_test_neuralchat.sh new file mode 100644 index 00000000000..a75be157a09 --- /dev/null +++ b/.github/workflows/script/unitTest/run_unit_test_neuralchat.sh @@ -0,0 +1,71 @@ +#!/bin/bash +source /intel-extension-for-transformers/.github/workflows/script/change_color.sh +export COVERAGE_RCFILE="/intel-extension-for-transformers/.github/workflows/script/unitTest/coverage/.neural-chat-coveragerc" +LOG_DIR=/log_dir +mkdir -p ${LOG_DIR} +WORKING_DIR="/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tests" +# get parameters +PATTERN='[-a-zA-Z0-9_]*=' +PERF_STABLE_CHECK=true + +for i in "$@"; do + case $i in + --test_name=*) + test_name=`echo $i | sed "s/${PATTERN}//"`;; + *) + echo "Parameter $i not recognized."; exit 1;; + esac +done + +function pytest() { + local coverage_log_dir=$1 + mkdir -p ${coverage_log_dir} + + cd ${WORKING_DIR} || exit 1 + JOB_NAME=unit_test + ut_log_name=${LOG_DIR}/${JOB_NAME}.log + export GLOG_minloglevel=2 + + itrex_path=$(python -c 'import intel_extension_for_transformers; import os; print(os.path.dirname(intel_extension_for_transformers.__file__))') + find . -name "test*.py" | sed 's,\.\/,coverage run --source='"${itrex_path}"' --append ,g' | sed 's/$/ --verbose/' >run.sh + coverage erase + + # run UT + $BOLD_YELLOW && echo "cat run.sh..." && $RESET + cat run.sh | tee ${ut_log_name} + $BOLD_YELLOW && echo "------UT start-------" && $RESET + bash run.sh 2>&1 | tee -a ${ut_log_name} + $BOLD_YELLOW && echo "------UT end -------" && $RESET + + # run coverage report + coverage report -m --rcfile=${COVERAGE_RCFILE} | tee ${coverage_log_dir}/coverage.log + coverage html -d ${coverage_log_dir}/htmlcov --rcfile=${COVERAGE_RCFILE} + coverage xml -o ${coverage_log_dir}/coverage.xml --rcfile=${COVERAGE_RCFILE} + + # check UT status + if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ]; then + $BOLD_RED && echo "Find errors in UT test, please check the output..." && $RESET + exit 1 + fi + $BOLD_GREEN && echo "UT finished successfully! " && $RESET +} + +function main() { + bash /intel-extension-for-transformers/.github/workflows/script/unitTest/env_setup.sh + wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb + dpkg -i libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb + cd ${WORKING_DIR} || exit 1 + if [ -f "requirements.txt" ]; then + python -m pip install --default-timeout=100 -r requirements.txt + pip list + else + echo "Not found requirements.txt file." + fi + echo "test on ${test_name}" + if [[ $test_name == "PR-test" ]]; then + pytest "${LOG_DIR}/coverage_pr" + elif [[ $test_name == "baseline" ]]; then + pytest "${LOG_DIR}/coverage_base" + fi +} +main diff --git a/.github/workflows/unit-test-engine.yml b/.github/workflows/unit-test-engine.yml index 3d75c65142a..f21efb4ed5c 100644 --- a/.github/workflows/unit-test-engine.yml +++ b/.github/workflows/unit-test-engine.yml @@ -56,6 +56,7 @@ jobs: with: submodules: "recursive" ref: ${{ matrix.test_branch }} + fetch-tags: true - name: Docker Build run: | diff --git a/.github/workflows/unit-test-kernel.yml b/.github/workflows/unit-test-kernel.yml index 5839731bf34..32a88194214 100644 --- a/.github/workflows/unit-test-kernel.yml +++ b/.github/workflows/unit-test-kernel.yml @@ -47,6 +47,7 @@ jobs: uses: actions/checkout@v3 with: submodules: "recursive" + fetch-tags: true - name: Docker Build run: | diff --git a/.github/workflows/unit-test-neuralchat.yml b/.github/workflows/unit-test-neuralchat.yml new file mode 100644 index 00000000000..540031e1d41 --- /dev/null +++ b/.github/workflows/unit-test-neuralchat.yml @@ -0,0 +1,141 @@ +name: NeuralChat Unit Test + +on: + pull_request: + branches: [main] + paths: + - intel_extension_for_transformers/neural_chat/tests/** + - .github/workflows/unit-test-neuralchat.yml + - .github/workflows/script/unitTest/** + workflow_dispatch: + +# If there is a new commit, the previous jobs will be canceled +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + DOCKER_CONFIG_NAME: "commonDockerConfig" + REPO_NAME: "intel-extension-for-transformers" + REPO_TAG: "py38" + DOCKER_FILE_NAME: "devel" + CONTAINER_NAME: "utTest" + EXTRA_CONTAINER_NAME: "modelTest" + +jobs: + unit-test: + runs-on: [self-hosted, Linux, X64, itrex-node] + strategy: + matrix: + include: + - test_branch: ${{ github.ref }} + test_name: "PR-test" + - test_branch: "main" + test_name: "baseline" + steps: + - name: Docker Clean Up + run: | + docker ps -a + if [[ $(docker ps -a | grep -i '${{ env.CONTAINER_NAME }}'$) ]]; then + docker start ${{ env.CONTAINER_NAME }} + echo "remove left files through container ..." + docker exec ${{ env.CONTAINER_NAME }} bash -c "ls -a /intel-extension-for-transformers && rm -fr /intel-extension-for-transformers/* && rm -fr /intel-extension-for-transformers/.* || true" + fi + if [[ $(docker ps -a | grep -i '${{ env.EXTRA_CONTAINER_NAME }}'$) ]]; then + docker start ${{ env.EXTRA_CONTAINER_NAME }} + echo "remove left files through container ..." + docker exec ${{ env.EXTRA_CONTAINER_NAME }} bash -c "ls -a /intel-extension-for-transformers && rm -fr /intel-extension-for-transformers/* && rm -fr /intel-extension-for-transformers/.* || true" + fi + + - name: Checkout out Repo + uses: actions/checkout@v3 + with: + submodules: "recursive" + ref: ${{ matrix.test_branch }} + fetch-tags: true + + - name: Docker Build + run: | + docker build -f ${{ github.workspace }}/.github/workflows/docker/${{ env.DOCKER_FILE_NAME }}.dockerfile -t ${{ env.REPO_NAME }}:${{ env.REPO_TAG }} . + + - name: Docker Run + run: | + if [[ $(docker ps -a | grep -i '${{ env.CONTAINER_NAME }}'$) ]]; then + docker stop ${{ env.CONTAINER_NAME }} + docker rm -vf ${{ env.CONTAINER_NAME }} || true + fi + docker run -dit --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} -v /dev/shm:/dev/shm \ + -v ${{ github.workspace }}:/intel-extension-for-transformers \ + ${{ env.REPO_NAME }}:${{ env.REPO_TAG }} + + - name: Env build + run: | + docker exec ${{ env.CONTAINER_NAME }} \ + bash /intel-extension-for-transformers/.github/workflows/script/prepare_env.sh + + - name: Binary build + run: | + docker exec ${{ env.CONTAINER_NAME }} \ + bash -c "cd /intel-extension-for-transformers/.github/workflows/script \ + && bash install_binary.sh \ + && pip install intel_extension_for_pytorch wget sentencepiece \ + && pip install transformers>=4.32.0 \ + && pip install peft torch num2words speechbrain paddlepaddle \ + && pip install paddlespeech==1.4.1 shortuuid gptcache evaluate \ + && pip install fschat pydub python-multipart PyPDF2 langchain \ + && pip install python-docx scikit-learn farm-haystack librosa beautifulsoup4 \ + && pip install InstructorEmbedding chromadb pydantic fastapi starlette \ + && pip install yacs uvicorn optimum optimum[habana] \ + && pip install sentence_transformers unstructured markdown rouge_score \ + && pip install --upgrade accelerate" + + - name: Run UT + run: | + docker exec ${{ env.CONTAINER_NAME }} \ + bash -c "cd /intel-extension-for-transformers/.github/workflows/script/unitTest \ + && bash run_unit_test_neuralchat.sh --test_name=${{ matrix.test_name }}" + + - name: Collect log + if: ${{ !cancelled() }} + run: | + docker exec ${{ env.CONTAINER_NAME }} \ + bash -c "cd /intel-extension-for-transformers && \ + mv /log_dir . " + + - name: Publish pipeline artifact + uses: actions/upload-artifact@v3 + if: ${{ !cancelled() }} + with: + name: "UnitTest${{ matrix.test_name }}" + path: ${{ github.workspace }}/log_dir + + + Genreate-Report: + runs-on: itrex-node-spell + needs: [unit-test] + steps: + - name: Checkout out Repo + uses: actions/checkout@v3 + with: + submodules: "recursive" + + - name: Download UT PR Log + uses: actions/download-artifact@v3 + with: + path: ${{ github.workspace }}/log_dir + + - name: Display structure of downloaded files + run: cd ${{ github.workspace }}/log_dir && ls -R + + - name: Calculte coverage + run: | + cd ${{ github.workspace }}/.github/workflows/script/unitTest/coverage + /usr/bin/bash calc_coverage.sh ${{ github.workspace }}/log_dir + + - name: Publish pipeline artifact + uses: actions/upload-artifact@v3 + if: ${{ !cancelled() }} + with: + name: Neural Chat Unit Test + path: ${{ github.workspace }}/log_dir + retention-days: 5 \ No newline at end of file diff --git a/.github/workflows/unit-test-optimize.yml b/.github/workflows/unit-test-optimize.yml index 860c887fbc7..620f249bb7c 100644 --- a/.github/workflows/unit-test-optimize.yml +++ b/.github/workflows/unit-test-optimize.yml @@ -56,6 +56,7 @@ jobs: with: submodules: "recursive" ref: ${{ matrix.test_branch }} + fetch-tags: true - name: Docker Build run: | @@ -131,7 +132,7 @@ jobs: uses: actions/upload-artifact@v3 if: ${{ !cancelled() }} with: - name: Engine Unit Test + name: Optimize Unit Test path: ${{ github.workspace }}/log_dir retention-days: 5 diff --git a/intel_extension_for_transformers/__init__.py b/intel_extension_for_transformers/__init__.py index 5603667ff8f..e1bfd064448 100644 --- a/intel_extension_for_transformers/__init__.py +++ b/intel_extension_for_transformers/__init__.py @@ -18,4 +18,4 @@ try: from ._version import __version__ # load _version file generated by setuptools_scm except ModuleNotFoundError: - __version__ = "0.0.0" \ No newline at end of file + __version__ = "1.1" \ No newline at end of file diff --git a/intel_extension_for_transformers/llm/finetuning/finetuning.py b/intel_extension_for_transformers/llm/finetuning/finetuning.py index de58b0ff965..7cadf1b01cc 100644 --- a/intel_extension_for_transformers/llm/finetuning/finetuning.py +++ b/intel_extension_for_transformers/llm/finetuning/finetuning.py @@ -78,12 +78,12 @@ def __init__(self, finetuning_config: BaseFinetuningConfig): else: finetuning_config.finetune_args.device = "cpu" if finetuning_config.finetune_args.device == "cpu": - finetuning_config.training_args.no_cuda = True Arguments = type(finetuning_config.training_args) training_args = { k: getattr(finetuning_config.training_args, k) \ for k in Arguments.__dataclass_fields__.keys() if Arguments.__dataclass_fields__[k].init } + training_args["no_cuda"] = True self.training_args = Arguments(**training_args) os.environ["WANDB_DISABLED"] = "true" diff --git a/intel_extension_for_transformers/neural_chat/assets/audio/pat.wav b/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav similarity index 100% rename from intel_extension_for_transformers/neural_chat/assets/audio/pat.wav rename to intel_extension_for_transformers/neural_chat/assets/audio/sample.wav diff --git a/intel_extension_for_transformers/neural_chat/assets/docs/sample.xlsx b/intel_extension_for_transformers/neural_chat/assets/docs/sample.xlsx index 0b1c43c9254..71abab41430 100644 Binary files a/intel_extension_for_transformers/neural_chat/assets/docs/sample.xlsx and b/intel_extension_for_transformers/neural_chat/assets/docs/sample.xlsx differ diff --git a/intel_extension_for_transformers/neural_chat/cli/cli_commands.py b/intel_extension_for_transformers/neural_chat/cli/cli_commands.py index 2b6edc9af04..794f388bd35 100644 --- a/intel_extension_for_transformers/neural_chat/cli/cli_commands.py +++ b/intel_extension_for_transformers/neural_chat/cli/cli_commands.py @@ -336,4 +336,4 @@ def __call__(self): command_register( name='neuralchat.{}'.format(com), description=info[0], - cls='neural_chat.cli.cli_commands.{}'.format(info[1])) + cls='intel_extension_for_transformers.neural_chat.cli.cli_commands.{}'.format(info[1])) diff --git a/intel_extension_for_transformers/neural_chat/config.py b/intel_extension_for_transformers/neural_chat/config.py index 56223dce62f..bd3a587109c 100644 --- a/intel_extension_for_transformers/neural_chat/config.py +++ b/intel_extension_for_transformers/neural_chat/config.py @@ -22,14 +22,6 @@ from transformers.utils.versions import require_version from dataclasses import dataclass -from .pipeline.plugins.audio.asr import AudioSpeechRecognition -from .pipeline.plugins.audio.asr_chinese import ChineseAudioSpeechRecognition -from .pipeline.plugins.audio.tts import TextToSpeech -from .pipeline.plugins.audio.tts_chinese import ChineseTextToSpeech -from .pipeline.plugins.retrieval.indexing import DocumentIndexing -from .pipeline.plugins.retrieval import SparseBM25Retriever, ChromaRetriever -from .pipeline.plugins.retrieval.detector import IntentDetector -from .pipeline.plugins.security import SafetyChecker from .plugins import plugins from enum import Enum, auto diff --git a/intel_extension_for_transformers/neural_chat/docs/notebooks/chatbot_on_intel_cpu.ipynb b/intel_extension_for_transformers/neural_chat/docs/notebooks/chatbot_on_intel_cpu.ipynb index 873619cffe3..ca6c5478c15 100644 --- a/intel_extension_for_transformers/neural_chat/docs/notebooks/chatbot_on_intel_cpu.ipynb +++ b/intel_extension_for_transformers/neural_chat/docs/notebooks/chatbot_on_intel_cpu.ipynb @@ -120,7 +120,7 @@ "from intel_extension_for_transformers.neural_chat import build_chatbot\n", "config = PipelineConfig(audio_input=True, audio_output=True)\n", "chatbot = build_chatbot(config)\n", - "result = chatbot.predict(query=\"./assets/audio/pat.wav\")" + "result = chatbot.predict(query=\"./assets/audio/sample.wav\")" ] }, { @@ -302,7 +302,7 @@ "from neural_chat import VoiceChatClientExecutor\n", "executor = VoiceChatClientExecutor()\n", "result = executor(\n", - " audio_input_path='./assets/audio/pat.wav',\n", + " audio_input_path='./assets/audio/sample.wav',\n", " audio_output_path='./results.wav',\n", " server_ip=\"127.0.0.1\", # master server ip\n", " port=8000 # master server entry point \n", @@ -318,7 +318,7 @@ "import IPython\n", "# Play input audio\n", "print(\" Play Input Audio ......\")\n", - "IPython.display.display(IPython.display.Audio(\"./assets/audio/pat.wav\"))\n", + "IPython.display.display(IPython.display.Audio(\"./assets/audio/sample.wav\"))\n", "# Play output audio\n", "print(\" Play Output Audio ......\")\n", "IPython.display.display(IPython.display.Audio(\"./assets/audio/welcome.wav\"))\n" diff --git a/intel_extension_for_transformers/neural_chat/docs/notebooks/chatbot_on_intel_habana_hpu.ipynb b/intel_extension_for_transformers/neural_chat/docs/notebooks/chatbot_on_intel_habana_hpu.ipynb index 2110f4dd834..efd2f5b5dc3 100644 --- a/intel_extension_for_transformers/neural_chat/docs/notebooks/chatbot_on_intel_habana_hpu.ipynb +++ b/intel_extension_for_transformers/neural_chat/docs/notebooks/chatbot_on_intel_habana_hpu.ipynb @@ -141,7 +141,7 @@ "from intel_extension_for_transformers.neural_chat import build_chatbot\n", "config = PipelineConfig(audio_input=True, audio_output=True)\n", "chatbot = build_chatbot(config)\n", - "result = chatbot.predict(query=\"./assets/audio/pat.wav\")" + "result = chatbot.predict(query=\"./assets/audio/sample.wav\")" ] }, { @@ -323,7 +323,7 @@ "from neural_chat import VoiceChatClientExecutor\n", "executor = VoiceChatClientExecutor()\n", "result = executor(\n", - " audio_input_path='./assets/audio/pat.wav',\n", + " audio_input_path='./assets/audio/sample.wav',\n", " audio_output_path='./results.wav',\n", " server_ip=\"127.0.0.1\", # master server ip\n", " port=8000 # master server entry point \n", @@ -339,7 +339,7 @@ "import IPython\n", "# Play input audio\n", "print(\" Play Input Audio ......\")\n", - "IPython.display.display(IPython.display.Audio(\"./assets/audio/pat.wav\"))\n", + "IPython.display.display(IPython.display.Audio(\"./assets/audio/sample.wav\"))\n", "# Play output audio\n", "print(\" Play Output Audio ......\")\n", "IPython.display.display(IPython.display.Audio(\"./assets/audio/welcome.wav\"))\n" diff --git a/intel_extension_for_transformers/neural_chat/docs/notebooks/chatbot_on_nv_gpu.ipynb b/intel_extension_for_transformers/neural_chat/docs/notebooks/chatbot_on_nv_gpu.ipynb index a3c15037738..c31e0d367cb 100644 --- a/intel_extension_for_transformers/neural_chat/docs/notebooks/chatbot_on_nv_gpu.ipynb +++ b/intel_extension_for_transformers/neural_chat/docs/notebooks/chatbot_on_nv_gpu.ipynb @@ -120,7 +120,7 @@ "from intel_extension_for_transformers.neural_chat import build_chatbot\n", "config = PipelineConfig(audio_input=True, audio_output=True)\n", "chatbot = build_chatbot(config)\n", - "result = chatbot.predict(query=\"./assets/audio/pat.wav\")" + "result = chatbot.predict(query=\"./assets/audio/sample.wav\")" ] }, { @@ -331,7 +331,7 @@ "from neural_chat import VoiceChatClientExecutor\n", "executor = VoiceChatClientExecutor()\n", "result = executor(\n", - " audio_input_path='./assets/audio/pat.wav',\n", + " audio_input_path='./assets/audio/sample.wav',\n", " audio_output_path='./results.wav',\n", " server_ip=\"127.0.0.1\", # master server ip\n", " port=8000 # master server entry point \n", @@ -347,7 +347,7 @@ "import IPython\n", "# Play input audio\n", "print(\" Play Input Audio ......\")\n", - "IPython.display.display(IPython.display.Audio(\"./assets/audio/pat.wav\"))\n", + "IPython.display.display(IPython.display.Audio(\"./assets/audio/sample.wav\"))\n", "# Play output audio\n", "print(\" Play Output Audio ......\")\n", "IPython.display.display(IPython.display.Audio(\"./assets/audio/welcome.wav\"))\n" diff --git a/intel_extension_for_transformers/neural_chat/models/mpt_model.py b/intel_extension_for_transformers/neural_chat/models/mpt_model.py index c7b204f8700..1afd55a08ed 100644 --- a/intel_extension_for_transformers/neural_chat/models/mpt_model.py +++ b/intel_extension_for_transformers/neural_chat/models/mpt_model.py @@ -37,7 +37,7 @@ def match(self, model_path: str): Returns: bool: True if the model_path matches, False otherwise. """ - return "mpt" in model_path.lower() + return "mpt" in model_path.lower() or "opt" in model_path.lower() def get_default_conv_template(self, model_path: str) -> Conversation: """ diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/tts.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/tts.py index 8c77dcb12d4..fb0c0cec4f3 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/tts.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/tts.py @@ -38,7 +38,8 @@ class TextToSpeech(): 2) Finetuned voice (Fine-tuned offline model of specific person, such as Pat's voice + corresponding embedding) 3) Customized voice (Original model + User's customized input voice embedding) """ - def __init__(self, output_audio_path="./response.wav", voice="default", stream_mode=False, device="cpu"): + def __init__(self, output_audio_path="./response.wav", voice="default", stream_mode=False, device="cpu", + asset_path="/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets"): """Make sure your export LD_PRELOAD= beforehand.""" # default setting self.original_model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts") @@ -56,8 +57,13 @@ def __init__(self, output_audio_path="./response.wav", voice="default", stream_m self.vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") self.vocoder.eval() script_dir = os.path.dirname(os.path.abspath(__file__)) - default_speaker_embedding_path = os.path.join(script_dir, - '../../../assets/speaker_embeddings/spk_embed_default.pt') + if os.path.exists(os.path.join(script_dir, '../../../assets/speaker_embeddings/spk_embed_default.pt')): + default_speaker_embedding_path = os.path.join(script_dir, + '../../../assets/speaker_embeddings/spk_embed_default.pt') + elif os.path.exists(os.path.join(asset_path, 'speaker_embeddings/spk_embed_default.pt')): + default_speaker_embedding_path = os.path.join(asset_path, 'speaker_embeddings/spk_embed_default.pt') + else: + print("Warning! Need to prepare speaker_embeddings") # load the default speaker embedding self.default_speaker_embedding = torch.load(default_speaker_embedding_path) @@ -70,6 +76,8 @@ def __init__(self, output_audio_path="./response.wav", voice="default", stream_m pat_speaker_embedding_path = os.path.join(script_dir, '../../../assets/speaker_embeddings/spk_embed_pat.pt') if os.path.exists(pat_speaker_embedding_path): self.pat_speaker_embeddings = torch.load(pat_speaker_embedding_path) + elif os.path.exists(os.path.join(asset_path, 'speaker_embeddings/spk_embed_pat.pt')): + self.pat_speaker_embeddings = torch.load(os.path.join(asset_path, 'speaker_embeddings/spk_embed_pat.pt')) self.cpu_pool = None if not torch.cuda.is_available(): @@ -96,12 +104,16 @@ def create_speaker_embedding(self, driven_audio_path): speaker_embeddings = speaker_embeddings[0] # [1,512] return speaker_embeddings.cpu() - def _lookup_voice_embedding(self, voice): + def _lookup_voice_embedding(self, voice, + asset_path="/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets"): script_dir = os.path.dirname(os.path.abspath(__file__)) if os.path.exists(f"speaker_embeddings/spk_embed_{voice}.pt") == False: print("No customized speaker embedding is found! Use the default one") - default_speaker_embedding_path = os.path.join(script_dir, - '../../../assets/speaker_embeddings/spk_embed_default.pt') + if os.path.exists(os.path.join(script_dir, '../../../assets/speaker_embeddings/spk_embed_default.pt')): + default_speaker_embedding_path = os.path.join(script_dir, + '../../../assets/speaker_embeddings/spk_embed_default.pt') + elif os.path.exists(os.path.join(asset_path, 'speaker_embeddings/spk_embed_default.pt')): + default_speaker_embedding_path = (asset_path, 'speaker_embeddings/spk_embed_default.pt') return default_speaker_embedding_path else: specific_speaker_embedding_path = os.path.join(script_dir, diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/retrieval_agent.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/retrieval_agent.py index c877ecb160a..3eed6105ac4 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/retrieval_agent.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/retrieval_agent.py @@ -18,9 +18,9 @@ import os import torch import transformers -from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval import Retriever -from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.detector import IntentDetector -from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.indexing import DocumentIndexing +from .retrieval_base import Retriever +from .detector import IntentDetector +from .indexing import DocumentIndexing from intel_extension_for_transformers.neural_chat.pipeline.plugins.prompt import generate_qa_prompt, generate_prompt from intel_extension_for_transformers.neural_chat.plugins import register_plugin @@ -33,7 +33,7 @@ def __init__(self, persist_dir="./output", process=True, input_path=None, self.model = None self.tokenizer = None self.retrieval_type = retrieval_type - + self.retriever = None self.intent_detector = IntentDetector() if os.path.exists(input_path): self.doc_parser = DocumentIndexing(retrieval_type=self.retrieval_type, document_store=document_store, @@ -53,7 +53,10 @@ def pre_llm_inference_actions(self, model_name, query): prompt = generate_prompt(query) else: print("Chat with QA agent.") - context = self.retriever.get_context(query) - prompt = generate_qa_prompt(query, context) + if self.retriever: + context = self.retriever.get_context(query) + prompt = generate_qa_prompt(query, context) + else: + prompt = generate_prompt(query) return prompt diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/security/safety_checker.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/security/safety_checker.py index 2f221931aa9..b6cfc2f1231 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/security/safety_checker.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/security/safety_checker.py @@ -20,6 +20,7 @@ import os from ....plugins import register_plugin +doc_path = "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/pipeline/plugins/security/" def convert_fullwidth_to_halfwidth(query): """Converting Full-width Characters to Half-width Characters.""" content = "" @@ -37,9 +38,19 @@ class SafetyChecker: def __init__(self, dict_path=None, matchType=2): if dict_path == None or (not os.path.exists(dict_path)): dict_path = os.path.dirname(os.path.abspath(__file__)) - f = open(os.path.join(dict_path, "stopword.txt"), encoding="utf8") + if os.path.exists(os.path.join(dict_path, "stopword.txt")): + f = open(os.path.join(dict_path, "stopword.txt"), encoding="utf8") + elif os.path.exists(os.path.join(doc_path, "stopword.txt")): + f = open(os.path.join(doc_path, "stopword.txt"), encoding="utf8") + else: + print("Can't find stopword.txt") self.Stopwords = [i.split('\n')[0] for i in f.readlines()] - f1 = open(os.path.join(dict_path, "dict.txt"), encoding="utf8") + if os.path.exists(os.path.join(dict_path, "dict.txt")): + f1 = open(os.path.join(dict_path, "dict.txt"), encoding="utf8") + elif os.path.exists(os.path.join(doc_path, "dict.txt")): + f1 = open(os.path.join(doc_path, "dict.txt"), encoding="utf8") + else: + print("Can't find dict.txt") lst = f1.readlines() self.sensitiveWordSet = [i.split("\n")[0].split("\t") for i in lst] self.sensitiveWordMap = self._initSensitiveWordMap() diff --git a/intel_extension_for_transformers/neural_chat/requirements.txt b/intel_extension_for_transformers/neural_chat/requirements.txt index 5d655e30959..5f615d3a59d 100644 --- a/intel_extension_for_transformers/neural_chat/requirements.txt +++ b/intel_extension_for_transformers/neural_chat/requirements.txt @@ -1,4 +1,4 @@ -transformers>=4.31.0 +transformers>=4.32.0 peft fschat torch @@ -19,7 +19,6 @@ scikit-learn farm-haystack numpy==1.23.5 librosa -docx beautifulsoup4 InstructorEmbedding chromadb @@ -30,6 +29,7 @@ yacs uvicorn optimum optimum[habana] -#py-cpuinfo -#oneccl_bind_pt==2.0.100 -f https://developer.intel.com/ipex-whl-stable-xpu -#deepspeed +sentence_transformers +unstructured +markdown +rouge_score diff --git a/intel_extension_for_transformers/neural_chat/tests/api/test_chatbot_build_api.py b/intel_extension_for_transformers/neural_chat/tests/api/test_chatbot_build_api.py index 2fcd7b5cfa1..838b7bdcea7 100644 --- a/intel_extension_for_transformers/neural_chat/tests/api/test_chatbot_build_api.py +++ b/intel_extension_for_transformers/neural_chat/tests/api/test_chatbot_build_api.py @@ -21,6 +21,7 @@ from intel_extension_for_transformers.neural_chat import PipelineConfig, GenerationConfig from intel_extension_for_transformers.neural_chat import plugins +# All UT cases use 'facebook/opt-125m' to reduce test time. class TestChatbotBuilder(unittest.TestCase): def setUp(self): return super().setUp() @@ -29,14 +30,17 @@ def tearDown(self) -> None: return super().tearDown() def test_build_chatbot_with_default_config(self): - chatbot = build_chatbot() + config = PipelineConfig(model_name_or_path="facebook/opt-125m") + chatbot = build_chatbot(config) self.assertIsNotNone(chatbot) response = chatbot.predict(query="Tell me about Intel Xeon Scalable Processors.") print(response) self.assertIsNotNone(response) def test_build_chatbot_with_customized_pipelinecfg(self): - config = PipelineConfig(model_name_or_path="mosaicml/mpt-7b-chat") + config = PipelineConfig(model_name_or_path="facebook/opt-125m", + tokenizer_name_or_path="EleutherAI/gpt-neox-20b", + device="cpu") chatbot = build_chatbot(config) self.assertIsNotNone(chatbot) response = chatbot.predict(query="Tell me about Intel Xeon Scalable Processors.") @@ -44,7 +48,8 @@ def test_build_chatbot_with_customized_pipelinecfg(self): self.assertIsNotNone(response) def test_build_chatbot_with_customized_generationcfg(self): - chatbot = build_chatbot() + config = PipelineConfig(model_name_or_path="facebook/opt-125m") + chatbot = build_chatbot(config) self.assertIsNotNone(chatbot) config = GenerationConfig(max_new_tokens=512, temperature=0.1) response = chatbot.predict(query="Tell me about Intel Xeon Scalable Processors.", config=config) @@ -54,18 +59,22 @@ def test_build_chatbot_with_customized_generationcfg(self): def test_build_chatbot_with_audio_plugin(self): plugins.tts.enable = True plugins.tts.args["output_audio_path"]="./output_audio.wav" - pipeline_config = PipelineConfig(plugins=plugins) + pipeline_config = PipelineConfig(model_name_or_path="facebook/opt-125m", + plugins=plugins) chatbot = build_chatbot(pipeline_config) self.assertIsNotNone(chatbot) gen_config = GenerationConfig(max_new_tokens=64) - response = chatbot.predict(query="../../assets/audio/pat.wav", config=gen_config) + response = chatbot.predict(query= \ + "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav", \ + config=gen_config) self.assertIsNotNone(response) print("output audio path: ", response) self.assertTrue(os.path.exists(plugins.tts.args["output_audio_path"])) def test_build_chatbot_with_safety_checker_plugin(self): plugins.safety_checker.enable = True - pipeline_config = PipelineConfig(plugins=plugins) + pipeline_config = PipelineConfig(model_name_or_path="facebook/opt-125m", + plugins=plugins) chatbot = build_chatbot(pipeline_config) self.assertIsNotNone(chatbot) response = chatbot.predict(query="蔡英文是谁?") @@ -74,11 +83,12 @@ def test_build_chatbot_with_safety_checker_plugin(self): def test_build_chatbot_with_retrieval_plugin(self): plugins.retrieval.enable = True - plugins.retrieval.args["input_path"] = "../../assets/docs/" - pipeline_config = PipelineConfig(plugins=plugins) + plugins.retrieval.args["input_path"] = "../../../../README.md" + pipeline_config = PipelineConfig(model_name_or_path="facebook/opt-125m", + plugins=plugins) chatbot = build_chatbot(pipeline_config) self.assertIsNotNone(chatbot) - response = chatbot.predict(query="What are total cores of Intel® Xeon® Platinum 8480+ Processor?") + response = chatbot.predict(query="What is Intel extension for transformers?") print("response: ", response) if __name__ == '__main__': diff --git a/intel_extension_for_transformers/neural_chat/tests/api/test_inference.py b/intel_extension_for_transformers/neural_chat/tests/api/test_inference.py index 3aa16d18172..1a3a28bf4ab 100644 --- a/intel_extension_for_transformers/neural_chat/tests/api/test_inference.py +++ b/intel_extension_for_transformers/neural_chat/tests/api/test_inference.py @@ -19,8 +19,7 @@ import unittest from intel_extension_for_transformers.neural_chat.chatbot import build_chatbot, finetune_model, optimize_model from intel_extension_for_transformers.neural_chat.config import ( - PipelineConfig, GenerationConfig, FinetuningConfig, OptimizationConfig, - ModelArguments, DataArguments, TrainingArguments, FinetuningArguments + PipelineConfig, GenerationConfig, AMPConfig, ) from intel_extension_for_transformers.neural_chat import plugins @@ -32,14 +31,17 @@ def tearDown(self) -> None: return super().tearDown() def test_text_chat(self): - config = PipelineConfig() + config = PipelineConfig(model_name_or_path="facebook/opt-125m") chatbot = build_chatbot(config) response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.") print(response) self.assertIsNotNone(response) def test_retrieval(self): - config = PipelineConfig(retrieval_type="sparse", retrieval_document_path="../../assets/docs/") + plugins.retrieval.enable = True + plugins.retrieval.args["input_path"] = "../../assets/docs/" + config = PipelineConfig(model_name_or_path="facebook/opt-125m", + plugins=plugins) chatbot = build_chatbot(config) response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.") print(response) @@ -48,7 +50,8 @@ def test_retrieval(self): def test_voice_chat(self): plugins.tts.enable = True plugins.tts.args["output_audio_path"] = "./response.wav" - pipeline_config = PipelineConfig(plugins=plugins) + pipeline_config = PipelineConfig(model_name_or_path="facebook/opt-125m", + plugins=plugins) chatbot = build_chatbot(config=pipeline_config) gen_config = GenerationConfig(max_new_tokens=64) response = chatbot.predict(query="Nice to meet you!", config=gen_config) @@ -57,21 +60,9 @@ def test_voice_chat(self): print("output audio path: ", response) self.assertTrue(os.path.exists("./response.wav")) - def test_finetuning(self): - model_args = ModelArguments(model_name_or_path="meta-llama/Llama-2-7b-chat-hf", use_fast_tokenizer=False) - data_args = DataArguments(train_file='./alpaca_data.json', dataset_concatenation=True) - training_args = TrainingArguments(gradient_accumulation_steps=1, - do_train=True, learning_rate=1e-4, num_train_epochs=1, - logging_steps=100, save_total_limit=2, overwrite_output_dir=True, - log_level='info', save_strategy='epoch', max_steps=3, - output_dir='./saved_model', no_cuda=True) - finetune_args = FinetuningArguments(peft='lora') - config = FinetuningConfig(model_args, data_args, training_args, finetune_args) - finetune_model(config) - def test_quantization(self): - config = OptimizationConfig() - optimize_model(config) + config = AMPConfig() + optimize_model(model="facebook/opt-125m", config=config) if __name__ == '__main__': unittest.main() diff --git a/intel_extension_for_transformers/neural_chat/tests/audio/test_asr.py b/intel_extension_for_transformers/neural_chat/tests/audio/test_asr.py index dc0fef0041d..2de1f452f5d 100644 --- a/intel_extension_for_transformers/neural_chat/tests/audio/test_asr.py +++ b/intel_extension_for_transformers/neural_chat/tests/audio/test_asr.py @@ -29,14 +29,14 @@ def setUpClass(self): self.asr_bf16 = AudioSpeechRecognition("openai/whisper-small", bf16=True) def test_audio2text(self): - audio_path = "../../assets/audio/pat.wav" + audio_path = "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/audio/welcome.wav" text = self.asr.audio2text(audio_path) self.assertEqual(text.lower(), "Welcome to Neural Chat".lower()) def test_audio2text_bf16(self): if torch.cuda.is_available(): return - audio_path = "../../assets/audio/pat.wav" + audio_path = "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/audio/welcome.wav" text = self.asr_bf16.audio2text(audio_path) self.assertEqual(text.lower(), "Welcome to Neural Chat".lower()) diff --git a/intel_extension_for_transformers/neural_chat/tests/audio/test_tts.py b/intel_extension_for_transformers/neural_chat/tests/audio/test_tts.py index a5b5c14e050..ab6761df5b3 100644 --- a/intel_extension_for_transformers/neural_chat/tests/audio/test_tts.py +++ b/intel_extension_for_transformers/neural_chat/tests/audio/test_tts.py @@ -54,7 +54,8 @@ def text_generate(): self.assertTrue(os.path.exists(result_path)) def test_create_speaker_embedding(self): - driven_audio_path = "../../assets/audio/pat.wav" + driven_audio_path = \ + "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav" spk_embed = self.tts.create_speaker_embedding(driven_audio_path) self.assertEqual(spk_embed.shape[0], 1) self.assertEqual(spk_embed.shape[1], 512) diff --git a/intel_extension_for_transformers/neural_chat/tests/cli/test_cli.py b/intel_extension_for_transformers/neural_chat/tests/cli/test_cli.py index 173f03a77be..f3bad881386 100644 --- a/intel_extension_for_transformers/neural_chat/tests/cli/test_cli.py +++ b/intel_extension_for_transformers/neural_chat/tests/cli/test_cli.py @@ -26,34 +26,39 @@ def test_text_chat(self): logger.info(f'Testing CLI request === Text Chat ===') command = 'neuralchat textchat \ --query "Tell me about Intel." \ - --model_name_or_path "./Llama-2-7b-chat-hf"' + --model_name_or_path "facebook/opt-125m"' + result = None try: - result = subprocess.run(command, check=True) + result = subprocess.run(command, capture_output = True, check=True, + universal_newlines=True, shell=True) # nosec except subprocess.CalledProcessError as e: print("Error while executing command:", e) - print(result.stdout) - self.assertEqual(result.stdout, 0, msg="Textchat command line test failed.") + self.assertIn("model loaded", result.stdout) def test_help(self): logger.info(f'Testing CLI request === Help ===') command = 'neuralchat help' + result = None try: - result = subprocess.run(command, check=True) + result = subprocess.run(command, capture_output = True, check=True, + universal_newlines=True, shell=True) # nosec except subprocess.CalledProcessError as e: print("Error while executing command:", e) - self.assertEqual(result.stdout, 0, msg="Textchat command line test failed.") + self.assertIn("Show help for neuralchat commands.", result.stdout) def test_voice_chat(self): logger.info(f'Testing CLI request === Voice Chat ===') command = 'neuralchat voicechat \ --query "Tell me about Intel Xeon Scalable Processors." \ --audio_output_path "./response.wav" \ - --model_name_or_path "./Llama-2-7b-chat-hf"' + --model_name_or_path "facebook/opt-125m"' + result = None try: - result = subprocess.run(command, check=True) + result = subprocess.run(command, capture_output = True, check=True, + universal_newlines=True, shell=True) # nosec except subprocess.CalledProcessError as e: print("Error while executing command:", e) - self.assertEqual(result.stdout, 0, msg="Textchat command line test failed.") + self.assertIn("model loaded", result.stdout) if __name__ == "__main__": diff --git a/intel_extension_for_transformers/neural_chat/tests/optimization/test_optimization.py b/intel_extension_for_transformers/neural_chat/tests/optimization/test_optimization.py index 9503c3ef1c6..262f3436511 100644 --- a/intel_extension_for_transformers/neural_chat/tests/optimization/test_optimization.py +++ b/intel_extension_for_transformers/neural_chat/tests/optimization/test_optimization.py @@ -31,7 +31,7 @@ def tearDown(self) -> None: return super().tearDown() def test_build_chatbot_with_AMP(self): - config = PipelineConfig() + config = PipelineConfig(model_name_or_path="facebook/opt-125m") chatbot = build_chatbot(config) self.assertIsNotNone(chatbot) response = chatbot.predict(query="Tell me about Intel Xeon Scalable Processors.") @@ -39,7 +39,7 @@ def test_build_chatbot_with_AMP(self): self.assertIsNotNone(response) def test_build_chatbot_with_weight_only_quant(self): - config = PipelineConfig( + config = PipelineConfig(model_name_or_path="facebook/opt-125m", optimization_config=WeightOnlyQuantizationConfig() ) chatbot = build_chatbot(config) @@ -51,6 +51,7 @@ def test_build_chatbot_with_weight_only_quant(self): def test_build_chatbot_with_bitsandbytes_quant(self): if is_bitsandbytes_available() and torch.cuda.is_available(): config = PipelineConfig( + model_name_or_path="facebook/opt-125m", device='cuda', optimization_config=BitsAndBytesConfig( load_in_4bit=True, diff --git a/intel_extension_for_transformers/neural_chat/tests/restful/config.py b/intel_extension_for_transformers/neural_chat/tests/restful/config.py deleted file mode 100644 index 964f95c2aac..00000000000 --- a/intel_extension_for_transformers/neural_chat/tests/restful/config.py +++ /dev/null @@ -1,41 +0,0 @@ -# !/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -# Get the host and port from the environment variables -host = os.environ.get('MY_HOST') -port = os.environ.get('MY_PORT') - -# Check if the environment variables are set and not empty -if host and port: - # Combine the host and port to form the full URL - HOST = f"http://{host}:{port}" - API_COMPLETION = '/v1/completions' - API_CHAT_COMPLETION = '/v1/chat/completions' - API_AUDIO = '/v1/voicechat/completions' - API_FINETUNE = '/v1/finetune' - API_TEXT2IMAGE = '/v1/text2image' - - print("HOST URL:", HOST) - print("Completions Endpoint:", API_COMPLETION) - print("Chat completions Endpoint:", API_CHAT_COMPLETION) - print("Voice cbat Endpoint:", API_AUDIO) - print("Finerune Endpoint:", API_FINETUNE) - print("Text to image Endpoint:", API_TEXT2IMAGE) -else: - raise("Please set the environment variables MY_HOST and MY_PORT.") \ No newline at end of file diff --git a/intel_extension_for_transformers/neural_chat/tests/restful/stress_test/README.md b/intel_extension_for_transformers/neural_chat/tests/restful/stress_test/README.md deleted file mode 100644 index 68a784033cd..00000000000 --- a/intel_extension_for_transformers/neural_chat/tests/restful/stress_test/README.md +++ /dev/null @@ -1,34 +0,0 @@ -Stress Test for Restful API of Neural Chat -==================== - -The [Locust](https://github.com/locustio/locust) framework is used for stress test. - -## Preperations - -### Modify Configurations - -Before running stress test, you could modify `stress_test/locust.conf`: -- locustfile: The stress test script to run. -- headless: Do not use web UI, show test result in terminal directly. -- host: The host IP and port of the backend server. -- users: Number of users you want to mock. -- spawn-rate: Rate to spawn users. -- run-time: Stop after the specified amount of time, e.g. (300s, - 20m, 3h, 1h30m, etc.). - -For more configuration settings, refer to the [official documentations](https://docs.locust.io/en/stable/configuration.html) of locust. - -### install locust package - -```bash -pip install locust -``` - - - -## Run Stress Test -``` -cd stress_test -locust -``` -![stress test result](https://i.imgur.com/iCWkUQ6.jpeg) \ No newline at end of file diff --git a/intel_extension_for_transformers/neural_chat/tests/restful/stress_test/locust.conf b/intel_extension_for_transformers/neural_chat/tests/restful/stress_test/locust.conf deleted file mode 100644 index 0d05f42605b..00000000000 --- a/intel_extension_for_transformers/neural_chat/tests/restful/stress_test/locust.conf +++ /dev/null @@ -1,23 +0,0 @@ -# !/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -locustfile = locust.py -headless = true -host = http://ip_address:port # change the ip address and port number according to your server -users = 100 -spawn-rate = 10 -run-time = 1m # set upper-bound for time of tress test \ No newline at end of file diff --git a/intel_extension_for_transformers/neural_chat/tests/restful/stress_test/locust.py b/intel_extension_for_transformers/neural_chat/tests/restful/stress_test/locust.py deleted file mode 100644 index 58d5e8de444..00000000000 --- a/intel_extension_for_transformers/neural_chat/tests/restful/stress_test/locust.py +++ /dev/null @@ -1,71 +0,0 @@ -# !/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from locust import HttpUser, task, between -from intel_extension_for_transformers.neural_chat.tests.restful.config import API_COMPLETION, API_CHAT_COMPLETION, API_ASR, API_TTS, API_FINETUNE, API_TEXT2IMAGE -import time -from intel_extension_for_transformers.neural_chat.server.restful.openai_protocol import CompletionRequest, ChatCompletionRequest -from datasets import Dataset, Audio - - -# locust will create a FeedbackUser instance for each user -class FeedbackUser(HttpUser): - # each simulated user will wait 1~2 seconds for the next operation - wait_time = between(0.5, 2) - - @task - def test_completions(self): - time.sleep(0.01) - request = CompletionRequest( - model="mpt-7b-chat", - prompt="This is a test." - ) - self.client.post(API_COMPLETION, data=request) - - @task - def test_chat_completions(self): - time.sleep(0.01) - request = ChatCompletionRequest( - model="mpt-7b-chat", - messages=[ - {"role": "system","content": "You are a helpful assistant."}, - {"role": "user","content": "Hello!"} - ] - ) - self.client.post(API_CHAT_COMPLETION, data=request) - - @task - def test_asr(self): - audio_path = "../../../assets/audio/pat.wav" - audio_dataset = Dataset.from_dict({"audio": [audio_path]}).cast_column("audio", Audio(sampling_rate=16000)) - waveform = audio_dataset[0]["audio"]['array'] - self.client.post(API_ASR, data=waveform) - - @task - def test_tts(self): - text = "Welcome to Neural Chat" - self.client.post(API_TTS, data=text) - - @task - def test_text2image(self): - text = "A running horse." - self.client.post(API_TEXT2IMAGE, data=text) - - @task - def test_finetune(self): - self.client.post(API_FINETUNE) - time.sleep(2) \ No newline at end of file diff --git a/intel_extension_for_transformers/neural_chat/tests/restful/unit_test/test_finetune_api.py b/intel_extension_for_transformers/neural_chat/tests/restful/unit_test/test_finetune_api.py deleted file mode 100644 index a75fd147c68..00000000000 --- a/intel_extension_for_transformers/neural_chat/tests/restful/unit_test/test_finetune_api.py +++ /dev/null @@ -1,66 +0,0 @@ -# !/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import requests -import unittest -import shutil -import os -import json -from intel_extension_for_transformers.neural_chat.tests.restful.config import HOST, API_FINETUNE -from intel_extension_for_transformers.neural_chat.cli.log import logger - - -json_data = \ -""" -[ - {"instruction": "Generate a slogan for a software company", "input": "", "output": "The Future of Software is Here"}, - {"instruction": "Provide the word that comes immediately after the.", "input": "He threw the ball over the fence.", "output": "fence."} -] -""" -test_data_file = '/test.json' - - -class UnitTest(unittest.TestCase): - - @classmethod - def setUpClass(self): - with open(test_data_file, mode='w') as f: - f.write(json_data) - - @classmethod - def tearDownClass(self): - shutil.rmtree('./tmp', ignore_errors=True) - os.remove(test_data_file) - - def __init__(self, *args): - super(UnitTest, self).__init__(*args) - self.host = HOST - - def test_finetune(self): - logger.info(f'Testing POST request: {self.host+API_FINETUNE}') - request = { - "model_name_or_path": "facebook/opt-125m", - "train_file": "/test.json" - } - response = requests.post(self.host+API_FINETUNE, json.dumps(request)) - logger.info('Response status code: {}'.format(response.status_code)) - logger.info('Response text: {}'.format(response.text)) - self.assertEqual(response.status_code, 200, msg="Abnormal response status code.") - - -if __name__ == "__main__": - unittest.main() \ No newline at end of file diff --git a/intel_extension_for_transformers/neural_chat/tests/restful/unit_test/test_textchat_api.py b/intel_extension_for_transformers/neural_chat/tests/restful/unit_test/test_textchat_api.py deleted file mode 100644 index 27633ad86ff..00000000000 --- a/intel_extension_for_transformers/neural_chat/tests/restful/unit_test/test_textchat_api.py +++ /dev/null @@ -1,55 +0,0 @@ -# !/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import requests -import unittest -from intel_extension_for_transformers.neural_chat.tests.restful.config import HOST, API_COMPLETION, API_CHAT_COMPLETION -from intel_extension_for_transformers.neural_chat.cli.log import logger - - -class UnitTest(unittest.TestCase): - - def __init__(self, *args): - super(UnitTest, self).__init__(*args) - self.host = HOST - - def test_completions(self): - logger.info(f'Testing POST request: {self.host+API_COMPLETION}') - request = { - "prompt": "Tell me about Intel Xeon Scalable Processors." - } - response = requests.post(self.host+API_COMPLETION, json.dumps(request)) - response_dict = response.json() - logger.info('Response status code: {}'.format(response.status_code)) - logger.info('Response text: {}'.format(response_dict['response'])) - self.assertEqual(response.status_code, 200, msg="Abnormal response status code.") - - def test_chat_completions(self): - logger.info(f'Testing POST request: {self.host+API_CHAT_COMPLETION}') - request = { - "prompt": "Tell me about Intel Xeon Scalable Processors." - } - response = requests.post(self.host+API_CHAT_COMPLETION, json.dumps(request)) - response_dict = response.json() - logger.info('Response status code: {}'.format(response.status_code)) - logger.info('Response text: {}'.format(response_dict['response'])) - self.assertEqual(response.status_code, 200, msg="Abnormal response status code.") - - -if __name__ == "__main__": - unittest.main() \ No newline at end of file diff --git a/intel_extension_for_transformers/neural_chat/tests/restful/unit_test/test_voicechat_api.py b/intel_extension_for_transformers/neural_chat/tests/restful/unit_test/test_voicechat_api.py deleted file mode 100644 index b2b1a8e9677..00000000000 --- a/intel_extension_for_transformers/neural_chat/tests/restful/unit_test/test_voicechat_api.py +++ /dev/null @@ -1,65 +0,0 @@ -# !/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import requests -import unittest -from datasets import Dataset, Audio -from intel_extension_for_transformers.neural_chat.tests.restful.config import HOST, API_AUDIO -from intel_extension_for_transformers.neural_chat.cli.log import logger - - -class UnitTest(unittest.TestCase): - - def __init__(self, *args): - super(UnitTest, self).__init__(*args) - self.host = HOST - - def test_voicechat_text_out(self): - logger.info(f'Testing POST request: {self.host+API_AUDIO} with text output.') - audio_path = "../../../assets/audio/pat.wav" - - with open(audio_path, "rb") as wav_file: - files = { - "file": ("audio.wav", wav_file, "audio/wav"), - "voice": (None, "pat"), - "audio_output_path": (None, " ") - } - response = requests.post(self.host+API_AUDIO, files=files) - - logger.info('Response status code: {}'.format(response.status_code)) - logger.info('Response text: {}'.format(response.text)) - self.assertEqual(response.status_code, 200, msg="Abnormal response status code.") - - def test_voicechat_audio_out(self): - logger.info(f'Testing POST request: {self.host+API_AUDIO} with audio output.') - audio_path = "../../../assets/audio/pat.wav" - - with open(audio_path, "rb") as wav_file: - files = { - "file": ("audio.wav", wav_file, "audio/wav"), - "voice": (None, "pat"), - "audio_output_path": (None, "./response.wav") - } - response = requests.post(self.host+API_AUDIO, files=files) - - logger.info('Response status code: {}'.format(response.status_code)) - logger.info('Response text: {}'.format(response.text)) - self.assertEqual(response.status_code, 200, msg="Abnormal response status code.") - - -if __name__ == "__main__": - unittest.main()