From 95b1ea41d61f361b355cdf9008f7445e9f8bc234 Mon Sep 17 00:00:00 2001 From: Arnaud Date: Mon, 18 Sep 2023 19:38:16 +0100 Subject: [PATCH] update workshop with new titan embeddings --- 04_Chatbot/00_Chatbot_Titan.ipynb | 389 +++--------------------------- 1 file changed, 32 insertions(+), 357 deletions(-) diff --git a/04_Chatbot/00_Chatbot_Titan.ipynb b/04_Chatbot/00_Chatbot_Titan.ipynb index c00a898f..54806cb4 100644 --- a/04_Chatbot/00_Chatbot_Titan.ipynb +++ b/04_Chatbot/00_Chatbot_Titan.ipynb @@ -65,111 +65,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing /root/amazon-bedrock-workshop/dependencies/awscli-1.29.21-py3-none-any.whl\n", - "Processing /root/amazon-bedrock-workshop/dependencies/boto3-1.28.21-py3-none-any.whl\n", - "Processing /root/amazon-bedrock-workshop/dependencies/botocore-1.31.21-py3-none-any.whl\n", - "Collecting docutils<0.17,>=0.10 (from awscli==1.29.21)\n", - " Using cached docutils-0.16-py2.py3-none-any.whl (548 kB)\n", - "Collecting s3transfer<0.7.0,>=0.6.0 (from awscli==1.29.21)\n", - " Obtaining dependency information for s3transfer<0.7.0,>=0.6.0 from https://files.pythonhosted.org/packages/d9/17/a3b666f5ef9543cfd3c661d39d1e193abb9649d0cfbbfee3cf3b51d5af02/s3transfer-0.6.2-py3-none-any.whl.metadata\n", - " Using cached s3transfer-0.6.2-py3-none-any.whl.metadata (1.8 kB)\n", - "Collecting PyYAML<6.1,>=3.10 (from awscli==1.29.21)\n", - " Obtaining dependency information for PyYAML<6.1,>=3.10 from https://files.pythonhosted.org/packages/c8/6b/6600ac24725c7388255b2f5add93f91e58a5d7efaf4af244fdbcc11a541b/PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Using cached PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.1 kB)\n", - "Collecting colorama<0.4.5,>=0.2.5 (from awscli==1.29.21)\n", - " Using cached colorama-0.4.4-py2.py3-none-any.whl (16 kB)\n", - "Collecting rsa<4.8,>=3.1.2 (from awscli==1.29.21)\n", - " Using cached rsa-4.7.2-py3-none-any.whl (34 kB)\n", - "Collecting jmespath<2.0.0,>=0.7.1 (from botocore==1.31.21)\n", - " Using cached jmespath-1.0.1-py3-none-any.whl (20 kB)\n", - "Collecting python-dateutil<3.0.0,>=2.1 (from botocore==1.31.21)\n", - " Using cached python_dateutil-2.8.2-py2.py3-none-any.whl (247 kB)\n", - "Collecting urllib3<1.27,>=1.25.4 (from botocore==1.31.21)\n", - " Obtaining dependency information for urllib3<1.27,>=1.25.4 from https://files.pythonhosted.org/packages/c5/05/c214b32d21c0b465506f95c4f28ccbcba15022e000b043b72b3df7728471/urllib3-1.26.16-py2.py3-none-any.whl.metadata\n", - " Using cached urllib3-1.26.16-py2.py3-none-any.whl.metadata (48 kB)\n", - "Collecting six>=1.5 (from python-dateutil<3.0.0,>=2.1->botocore==1.31.21)\n", - " Using cached six-1.16.0-py2.py3-none-any.whl (11 kB)\n", - "Collecting pyasn1>=0.1.3 (from rsa<4.8,>=3.1.2->awscli==1.29.21)\n", - " Using cached pyasn1-0.5.0-py2.py3-none-any.whl (83 kB)\n", - "Using cached PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (736 kB)\n", - "Using cached s3transfer-0.6.2-py3-none-any.whl (79 kB)\n", - "Using cached urllib3-1.26.16-py2.py3-none-any.whl (143 kB)\n", - "\u001b[33mDEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", - "\u001b[0mInstalling collected packages: urllib3, six, PyYAML, pyasn1, jmespath, docutils, colorama, rsa, python-dateutil, botocore, s3transfer, boto3, awscli\n", - " Attempting uninstall: urllib3\n", - " Found existing installation: urllib3 1.26.16\n", - " Uninstalling urllib3-1.26.16:\n", - " Successfully uninstalled urllib3-1.26.16\n", - " Attempting uninstall: six\n", - " Found existing installation: six 1.16.0\n", - " Uninstalling six-1.16.0:\n", - " Successfully uninstalled six-1.16.0\n", - " Attempting uninstall: PyYAML\n", - " Found existing installation: PyYAML 6.0.1\n", - " Uninstalling PyYAML-6.0.1:\n", - " Successfully uninstalled PyYAML-6.0.1\n", - " Attempting uninstall: pyasn1\n", - " Found existing installation: pyasn1 0.5.0\n", - " Uninstalling pyasn1-0.5.0:\n", - " Successfully uninstalled pyasn1-0.5.0\n", - " Attempting uninstall: jmespath\n", - " Found existing installation: jmespath 1.0.1\n", - " Uninstalling jmespath-1.0.1:\n", - " Successfully uninstalled jmespath-1.0.1\n", - " Attempting uninstall: docutils\n", - " Found existing installation: docutils 0.16\n", - " Uninstalling docutils-0.16:\n", - " Successfully uninstalled docutils-0.16\n", - " Attempting uninstall: colorama\n", - " Found existing installation: colorama 0.4.4\n", - " Uninstalling colorama-0.4.4:\n", - " Successfully uninstalled colorama-0.4.4\n", - " Attempting uninstall: rsa\n", - " Found existing installation: rsa 4.7.2\n", - " Uninstalling rsa-4.7.2:\n", - " Successfully uninstalled rsa-4.7.2\n", - " Attempting uninstall: python-dateutil\n", - " Found existing installation: python-dateutil 2.8.2\n", - " Uninstalling python-dateutil-2.8.2:\n", - " Successfully uninstalled python-dateutil-2.8.2\n", - " Attempting uninstall: botocore\n", - " Found existing installation: botocore 1.31.21\n", - " Uninstalling botocore-1.31.21:\n", - " Successfully uninstalled botocore-1.31.21\n", - " Attempting uninstall: s3transfer\n", - " Found existing installation: s3transfer 0.6.2\n", - " Uninstalling s3transfer-0.6.2:\n", - " Successfully uninstalled s3transfer-0.6.2\n", - " Attempting uninstall: boto3\n", - " Found existing installation: boto3 1.28.21\n", - " Uninstalling boto3-1.28.21:\n", - " Successfully uninstalled boto3-1.28.21\n", - " Attempting uninstall: awscli\n", - " Found existing installation: awscli 1.29.21\n", - " Uninstalling awscli-1.29.21:\n", - " Successfully uninstalled awscli-1.29.21\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "spyder 5.1.5 requires pyqt5<5.13, which is not installed.\n", - "spyder 5.1.5 requires pyqtwebengine<5.13, which is not installed.\n", - "jupyterlab 3.2.1 requires jupyter-server~=1.4, but you have jupyter-server 2.7.3 which is incompatible.\n", - "jupyterlab 3.2.1 requires nbclassic~=0.2, but you have nbclassic 1.0.0 which is incompatible.\n", - "jupyterlab-server 2.8.2 requires jupyter-server~=1.4, but you have jupyter-server 2.7.3 which is incompatible.\n", - "sagemaker-datawrangler 0.4.3 requires sagemaker-data-insights==0.4.0, but you have sagemaker-data-insights 0.3.3 which is incompatible.\n", - "spyder 5.1.5 requires pylint<2.10.0,>=2.5.0, but you have pylint 3.0.0a7 which is incompatible.\n", - "spyder-kernels 2.1.3 requires jupyter-client<7,>=5.3.4, but you have jupyter-client 7.4.9 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed PyYAML-6.0.1 awscli-1.29.21 boto3-1.28.21 botocore-1.31.21 colorama-0.4.4 docutils-0.16 jmespath-1.0.1 pyasn1-0.5.0 python-dateutil-2.8.2 rsa-4.7.2 s3transfer-0.6.2 six-1.16.0 urllib3-1.26.16\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ "# Make sure you ran `download-dependencies.sh` from the root of the repository first!\n", "%pip install --no-build-isolation --force-reinstall \\\n", @@ -191,39 +89,18 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[33mDEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ "%pip install --quiet \"faiss-cpu>=1.7,<2\" \"ipywidgets>=7,<8\" langchain==0.0.249 \"pypdf>=3.8,<4\"" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Create new client\n", - " Using region: us-east-1\n", - "boto3 Bedrock client successfully created!\n", - "bedrock(https://bedrock.us-east-1.amazonaws.com)\n" - ] - } - ], + "outputs": [], "source": [ "import json\n", "import os\n", @@ -266,31 +143,11 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n", - "Prompt after formatting:\n", - "\u001b[32;1m\u001b[1;3mSystem: The following is a friendly conversation between a knowledgeable helpful assistant and a customer. The assistant is talkative and provides lots of specific details from it's context.\n", - "\n", - "Current conversation:\n", - "\n", - "User: Hi there!\n", - "Bot:\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n", - " Hello! How can I assist you today?\n" - ] - } - ], + "outputs": [], "source": [ "from langchain.chains import ConversationChain\n", "from langchain.llms.bedrock import Bedrock\n", @@ -322,33 +179,11 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n", - "Prompt after formatting:\n", - "\u001b[32;1m\u001b[1;3mSystem: The following is a friendly conversation between a knowledgeable helpful assistant and a customer. The assistant is talkative and provides lots of specific details from it's context.\n", - "\n", - "Current conversation:\n", - "User: Hi there!\n", - "Bot: Hello! How can I assist you today?\n", - "User: Give me a few tips on how to start a new garden.\n", - "Bot:\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n", - " Sure, I would love to help! Gardening is such a rewarding hobby. First, you will need to decide\n", - "what you would like to grow.\n" - ] - } - ], + "outputs": [], "source": [ "print_ww(conversation.predict(input=\"Give me a few tips on how to start a new garden.\"))" ] @@ -364,35 +199,11 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n", - "Prompt after formatting:\n", - "\u001b[32;1m\u001b[1;3mSystem: The following is a friendly conversation between a knowledgeable helpful assistant and a customer. The assistant is talkative and provides lots of specific details from it's context.\n", - "\n", - "Current conversation:\n", - "User: Hi there!\n", - "Bot: Hello! How can I assist you today?\n", - "User: Give me a few tips on how to start a new garden.\n", - "Bot: Sure, I would love to help! Gardening is such a rewarding hobby. First, you will need to decide what you would like to grow.\n", - "User: Cool. Will that work with tomatoes?\n", - "Bot:\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n", - " Absolutely! Tomatoes are a great choice for a beginner gardener. They are relatively easy to grow\n", - "and can yield a lot of fruit.\n" - ] - } - ], + "outputs": [], "source": [ "print_ww(conversation.predict(input=\"Cool. Will that work with tomatoes?\"))" ] @@ -406,36 +217,11 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n", - "Prompt after formatting:\n", - "\u001b[32;1m\u001b[1;3mSystem: The following is a friendly conversation between a knowledgeable helpful assistant and a customer. The assistant is talkative and provides lots of specific details from it's context.\n", - "\n", - "Current conversation:\n", - "User: Hi there!\n", - "Bot: Hello! How can I assist you today?\n", - "User: Give me a few tips on how to start a new garden.\n", - "Bot: Sure, I would love to help! Gardening is such a rewarding hobby. First, you will need to decide what you would like to grow.\n", - "User: Cool. Will that work with tomatoes?\n", - "Bot: Absolutely! Tomatoes are a great choice for a beginner gardener. They are relatively easy to grow and can yield a lot of fruit.\n", - "User: That's all, thank you!\n", - "Bot:\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n", - " You're welcome! Have a great day!\n" - ] - } - ], + "outputs": [], "source": [ "print_ww(conversation.predict(input=\"That's all, thank you!\"))" ] @@ -456,24 +242,11 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ChatBot:DEFAULT:PROMPT:TEMPLATE: is =System: The following is a friendly conversation between a knowledgeable helpful assistant and a customer. The assistant is talkative and provides lots of specific details from it's context.\n", - "\n", - "Current conversation:\n", - "{history}\n", - "User: {input}\n", - "Bot:\n" - ] - } - ], + "outputs": [], "source": [ "from langchain.memory import ConversationBufferMemory\n", "from langchain import PromptTemplate\n", @@ -495,7 +268,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -561,31 +334,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Starting chat bot\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "21cf954a65a0417f92356025b1968956", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "chat = ChatUX(qa)\n", "chat.start_chat()" @@ -609,35 +360,11 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n", - "Prompt after formatting:\n", - "\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n", - "\n", - "Current conversation:\n", - "Human: You will be acting as a career coach. Your goal is to give career advice to users\n", - "AI: I am career coach and give career advice\n", - "Human: What are the career options in AI?\n", - "AI:\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n", - " There are many different career options in the field of AI. Some common roles include AI engineer,\n", - "AI researcher, AI data scientist, AI business analyst, and AI project manager. These roles require\n", - "different skills and expertise, such as programming, statistics, machine learning, and problem-\n", - "solving.\n" - ] - } - ], + "outputs": [], "source": [ "memory = ConversationBufferMemory()\n", "memory.chat_memory.add_user_message(\"You will be acting as a career coach. Your goal is to give career advice to users\")\n", @@ -659,19 +386,11 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " I do not have knowledge about cars.\n" - ] - } - ], + "outputs": [], "source": [ "conversation.verbose = False\n", "print_ww(conversation.predict(input=\"How to fix my car?\"))" @@ -699,7 +418,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": { "tags": [] }, @@ -734,22 +453,11 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "download: s3://jumpstart-cache-prod-us-east-2/training-datasets/Amazon_SageMaker_FAQs/Amazon_SageMaker_FAQs.csv to rag_data/Amazon_SageMaker_FAQs.csv\n", - "documents:loaded:size=153\n", - "Documents:after split and chunking size=154\n", - "vectorstore_faiss_aws:created=::\n" - ] - } - ], + "outputs": [], "source": [ "from langchain.document_loaders import CSVLoader\n", "from langchain.text_splitter import CharacterTextSplitter\n", @@ -786,22 +494,11 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " R is supported with Amazon SageMaker notebook instances, which include a preinstalled R kernel and\n", - "the reticulate library. Reticulate offers an R interface for the Amazon SageMaker Python SDK,\n", - "enabling ML practitioners to build, train, tune, and deploy R models.\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "wrapper_store_faiss = VectorStoreIndexWrapper(vectorstore=vectorstore_faiss_aws)\n", "print_ww(wrapper_store_faiss.query(\"R in SageMaker\", llm=titan_llm))" @@ -822,7 +519,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": { "tags": [] }, @@ -867,7 +564,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": { "tags": [] }, @@ -909,31 +606,9 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Starting chat bot\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "1a567c533f3e47adb75247ea4bc62865", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "chat = ChatUX(qa, retrievalChain=True)\n", "chat.start_chat()"