From f3773afa2faeb5ce394883d02851e5560a106a82 Mon Sep 17 00:00:00 2001 From: Arnaud Date: Thu, 14 Sep 2023 18:21:43 +0100 Subject: [PATCH] update workshop with new titan embeddings --- 04_Chatbot/00_Chatbot_Titan.ipynb | 150 ++++++++++++++---------------- 1 file changed, 72 insertions(+), 78 deletions(-) diff --git a/04_Chatbot/00_Chatbot_Titan.ipynb b/04_Chatbot/00_Chatbot_Titan.ipynb index 7ae8c033..c00a898f 100644 --- a/04_Chatbot/00_Chatbot_Titan.ipynb +++ b/04_Chatbot/00_Chatbot_Titan.ipynb @@ -17,7 +17,7 @@ "source": [ "## Overview\n", "\n", - "Conversational interfaces such as chatbots and virtual assistants can be used to enhance the user experience for your customers.Chatbots uses natural language processing (NLP) and machine learning algorithms to understand and respond to user queries. Chatbots can be used in a variety of applications, such as customer service, sales, and e-commerce, to provide quick and efficient responses to users. They can be accessed through various channels such as websites, social media platforms, and messaging apps.\n", + "Conversational interfaces such as chatbots and virtual assistants can be used to enhance the user experience for your customers. Chatbots uses natural language processing (NLP) and machine learning algorithms to understand and respond to user queries. Chatbots can be used in a variety of applications, such as customer service, sales, and e-commerce, to provide quick and efficient responses to users. They can be accessed through various channels such as websites, social media platforms, and messaging apps.\n", "\n", "\n", "## Chatbot using Amazon Bedrock\n", @@ -30,7 +30,7 @@ "1. **Chatbot (Basic)** - Zero Shot chatbot with a FM model\n", "2. **Chatbot using prompt** - template(Langchain) - Chatbot with some context provided in the prompt template\n", "3. **Chatbot with persona** - Chatbot with defined roles. i.e. Career Coach and Human interactions\n", - "4. **Contextual-aware chatbot** - Passing in context through an external file by generating embeddings.\n", + "4. **Contextual-aware chatbot** - Passing in context through an external file by generating embeddings\n", "\n", "## Langchain framework for building Chatbot with Amazon Bedrock\n", "In Conversational interfaces such as chatbots, it is highly important to remember previous interactions, both at a short term but also at a long term level.\n", @@ -40,11 +40,11 @@ "\n", "## Building Chatbot with Context - Key Elements\n", "\n", - "The first process in a building a contextual-aware chatbot is to **generate embeddings** for the context. Typically, you will have an ingestion process which will run through your embedding model and generate the embeddings which will be stored in a sort of a vector store. In this example we are using Titan Embeddings model for this\n", + "The first process in a building a contextual-aware chatbot is to **generate embeddings** for the context. Typically, you will have an ingestion process which will run through your embedding model and generate the embeddings which will be stored in a sort of a vector store. In this example we are using a Titan embeddings model for this.\n", "\n", "![Embeddings](./images/embeddings_lang.png)\n", "\n", - "Second process is the user request orchestration , interaction, invoking and returing the results\n", + "Second process is the user request orchestration , interaction, invoking and returing the results.\n", "\n", "![Chatbot](./images/chatbot_lang.png)\n", "\n", @@ -279,15 +279,15 @@ "\n", "\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n", "Prompt after formatting:\n", - "\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n", + "\u001b[32;1m\u001b[1;3mSystem: The following is a friendly conversation between a knowledgeable helpful assistant and a customer. The assistant is talkative and provides lots of specific details from it's context.\n", "\n", "Current conversation:\n", "\n", - "Human: Hi there!\n", - "AI:\u001b[0m\n", + "User: Hi there!\n", + "Bot:\u001b[0m\n", "\n", "\u001b[1m> Finished chain.\u001b[0m\n", - " Hello! How are you today?\n" + " Hello! How can I assist you today?\n" ] } ], @@ -297,10 +297,16 @@ "from langchain.memory import ConversationBufferMemory\n", "\n", "titan_llm = Bedrock(model_id=\"amazon.titan-tg1-large\", client=boto3_bedrock)\n", + "titan_llm.model_kwargs = {'temperature': 0.5, \"maxTokenCount\": 700}\n", + "\n", "memory = ConversationBufferMemory()\n", + "memory.human_prefix = \"User\"\n", + "memory.ai_prefix = \"Bot\"\n", + "\n", "conversation = ConversationChain(\n", " llm=titan_llm, verbose=True, memory=memory\n", ")\n", + "conversation.prompt.template = \"\"\"System: The following is a friendly conversation between a knowledgeable helpful assistant and a customer. The assistant is talkative and provides lots of specific details from it's context.\\n\\nCurrent conversation:\\n{history}\\nUser: {input}\\nBot:\"\"\"\n", "\n", "print_ww(conversation.predict(input=\"Hi there!\"))" ] @@ -329,23 +335,17 @@ "\n", "\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n", "Prompt after formatting:\n", - "\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n", + "\u001b[32;1m\u001b[1;3mSystem: The following is a friendly conversation between a knowledgeable helpful assistant and a customer. The assistant is talkative and provides lots of specific details from it's context.\n", "\n", "Current conversation:\n", - "Human: Hi there!\n", - "AI: Hello! How are you today?\n", - "Human: Give me a few tips on how to start a new garden.\n", - "AI:\u001b[0m\n", + "User: Hi there!\n", + "Bot: Hello! How can I assist you today?\n", + "User: Give me a few tips on how to start a new garden.\n", + "Bot:\u001b[0m\n", "\n", "\u001b[1m> Finished chain.\u001b[0m\n", - " Sure, I'd be happy to help! Here are some tips for starting a new garden:\n", - "1. Choose the right location: Select a spot in your yard that receives plenty of sunlight for at\n", - "least 6-8 hours per day.\n", - "2. Prepare the soil: Clear the area of any weeds, rocks, or debris, and loosen the soil with a\n", - "tiller or garden fork.\n", - "3. Choose your plants: Select plants that are well-suited to your climate and soil type, and\n", - "consider factors like sunlight, water requirements, and space requirements.\n", - "4. Start from seeds or seedlings: You can either start\n" + " Sure, I would love to help! Gardening is such a rewarding hobby. First, you will need to decide\n", + "what you would like to grow.\n" ] } ], @@ -377,22 +377,19 @@ "\n", "\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n", "Prompt after formatting:\n", - "\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n", + "\u001b[32;1m\u001b[1;3mSystem: The following is a friendly conversation between a knowledgeable helpful assistant and a customer. The assistant is talkative and provides lots of specific details from it's context.\n", "\n", "Current conversation:\n", - "Human: Hi there!\n", - "AI: Hello! How are you today?\n", - "Human: Give me a few tips on how to start a new garden.\n", - "AI: Sure, I'd be happy to help! Here are some tips for starting a new garden:\n", - "1. Choose the right location: Select a spot in your yard that receives plenty of sunlight for at least 6-8 hours per day.\n", - "2. Prepare the soil: Clear the area of any weeds, rocks, or debris, and loosen the soil with a tiller or garden fork.\n", - "3. Choose your plants: Select plants that are well-suited to your climate and soil type, and consider factors like sunlight, water requirements, and space requirements.\n", - "4. Start from seeds or seedlings: You can either start\n", - "Human: Cool. Will that work with tomatoes?\n", - "AI:\u001b[0m\n", + "User: Hi there!\n", + "Bot: Hello! How can I assist you today?\n", + "User: Give me a few tips on how to start a new garden.\n", + "Bot: Sure, I would love to help! Gardening is such a rewarding hobby. First, you will need to decide what you would like to grow.\n", + "User: Cool. Will that work with tomatoes?\n", + "Bot:\u001b[0m\n", "\n", "\u001b[1m> Finished chain.\u001b[0m\n", - " I am sorry. I do not know the answer to that question.\n" + " Absolutely! Tomatoes are a great choice for a beginner gardener. They are relatively easy to grow\n", + "and can yield a lot of fruit.\n" ] } ], @@ -422,24 +419,20 @@ "\n", "\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n", "Prompt after formatting:\n", - "\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n", + "\u001b[32;1m\u001b[1;3mSystem: The following is a friendly conversation between a knowledgeable helpful assistant and a customer. The assistant is talkative and provides lots of specific details from it's context.\n", "\n", "Current conversation:\n", - "Human: Hi there!\n", - "AI: Hello! How are you today?\n", - "Human: Give me a few tips on how to start a new garden.\n", - "AI: Sure, I'd be happy to help! Here are some tips for starting a new garden:\n", - "1. Choose the right location: Select a spot in your yard that receives plenty of sunlight for at least 6-8 hours per day.\n", - "2. Prepare the soil: Clear the area of any weeds, rocks, or debris, and loosen the soil with a tiller or garden fork.\n", - "3. Choose your plants: Select plants that are well-suited to your climate and soil type, and consider factors like sunlight, water requirements, and space requirements.\n", - "4. Start from seeds or seedlings: You can either start\n", - "Human: Cool. Will that work with tomatoes?\n", - "AI: I am sorry. I do not know the answer to that question.\n", - "Human: That's all, thank you!\n", - "AI:\u001b[0m\n", + "User: Hi there!\n", + "Bot: Hello! How can I assist you today?\n", + "User: Give me a few tips on how to start a new garden.\n", + "Bot: Sure, I would love to help! Gardening is such a rewarding hobby. First, you will need to decide what you would like to grow.\n", + "User: Cool. Will that work with tomatoes?\n", + "Bot: Absolutely! Tomatoes are a great choice for a beginner gardener. They are relatively easy to grow and can yield a lot of fruit.\n", + "User: That's all, thank you!\n", + "Bot:\u001b[0m\n", "\n", "\u001b[1m> Finished chain.\u001b[0m\n", - " You're welcome! Feel free to ask me any additional questions.\n" + " You're welcome! Have a great day!\n" ] } ], @@ -458,7 +451,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "PromptTemplate is responsible for the construction of this input. LangChain provides several classes and functions to make constructing and working with prompts easy. We will use the default Prompt Template here. [PromptTemplate](https://python.langchain.com/en/latest/modules/prompts/getting_started.html)" + "PromptTemplate is responsible for the construction of this input. LangChain provides several classes and functions to make constructing and working with prompts easy. We will use the default [PromptTemplate](https://python.langchain.com/en/latest/modules/prompts/getting_started.html) here." ] }, { @@ -472,12 +465,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "ChatBot:DEFAULT:PROMPT:TEMPLATE: is =The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n", + "ChatBot:DEFAULT:PROMPT:TEMPLATE: is =System: The following is a friendly conversation between a knowledgeable helpful assistant and a customer. The assistant is talkative and provides lots of specific details from it's context.\n", "\n", "Current conversation:\n", "{history}\n", - "Human: {input}\n", - "AI:\n" + "User: {input}\n", + "Bot:\n" ] } ], @@ -487,10 +480,15 @@ "\n", "chat_history = []\n", "\n", + "memory=ConversationBufferMemory()\n", + "memory.human_prefix = \"User\"\n", + "memory.ai_prefix = \"Bot\"\n", + "\n", "# turn verbose to true to see the full logs and documents\n", "qa= ConversationChain(\n", - " llm=titan_llm, verbose=False, memory=ConversationBufferMemory() #memory_chain\n", + " llm=titan_llm, verbose=False, memory=memory #memory_chain\n", ")\n", + "qa.prompt.template = \"\"\"System: The following is a friendly conversation between a knowledgeable helpful assistant and a customer. The assistant is talkative and provides lots of specific details from it's context.\\n\\nCurrent conversation:\\n{history}\\nUser: {input}\\nBot:\"\"\"\n", "\n", "print(f\"ChatBot:DEFAULT:PROMPT:TEMPLATE: is ={qa.prompt.template}\")" ] @@ -576,7 +574,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2687f36dd08e4456942bb0abb000a118", + "model_id": "21cf954a65a0417f92356025b1968956", "version_major": 2, "version_minor": 0 }, @@ -606,7 +604,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "AI assistant will play the role of a career coach. Role Play Dialogue requires user message to be set in before starting the chat. ConversationBufferMemory is used to pre-populate the dialog" + "AI assistant will play the role of a career coach. Role Play Dialogue requires user message to be set in before starting the chat. ConversationBufferMemory is used to pre-populate the dialog." ] }, { @@ -633,21 +631,10 @@ "AI:\u001b[0m\n", "\n", "\u001b[1m> Finished chain.\u001b[0m\n", - " If you work in artificial intelligence, you can do things like:\n", - "\n", - "1. Research scientist: Work on developing new AI technology and conducting experiments.\n", - "\n", - "2. Data scientist: Gather, clean, and analyze large amounts of data to help improve AI systems.\n", - "\n", - "3. Machine learning engineer: Build and train machine learning models that can make predictions and\n", - "decisions.\n", - "\n", - "4. AI/ML consultant: Advise companies on how to use AI and machine learning to solve business\n", - "problems.\n", - "\n", - "5. Product manager: Create and manage AI-powered products, such as chatbots or autonomous vehicles.\n", - "\n", - "6. Technical writer: Write documentation and user guides\n" + " There are many different career options in the field of AI. Some common roles include AI engineer,\n", + "AI researcher, AI data scientist, AI business analyst, and AI project manager. These roles require\n", + "different skills and expertise, such as programming, statistics, machine learning, and problem-\n", + "solving.\n" ] } ], @@ -667,7 +654,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Let's ask a question that is not specaility of this Persona and the model shouldnn't answer that question and give a reason for that" + "##### Let's ask a question that is not specaility of this Persona and the model shouldnn't answer that question and give a reason for that." ] }, { @@ -681,7 +668,7 @@ "name": "stdout", "output_type": "stream", "text": [ - " I do not know.\n" + " I do not have knowledge about cars.\n" ] } ], @@ -707,7 +694,7 @@ "Embeddings are a way to represent words, phrases or any other discrete items as vectors in a continuous vector space. This allows machine learning models to perform mathematical operations on these representations and capture semantic relationships between them.\n", "\n", "\n", - "This will be used for the RAG [document search capability](https://labelbox.com/blog/how-vector-similarity-search-works/) \n" + "This will be used for the RAG [document search capability](https://labelbox.com/blog/how-vector-similarity-search-works/). \n" ] }, { @@ -725,6 +712,13 @@ "br_embeddings = BedrockEmbeddings(model_id=\"amazon.titan-embed-g1-text-02\", client=boto3_bedrock)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Create the embeddings for document search" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -752,7 +746,7 @@ "download: s3://jumpstart-cache-prod-us-east-2/training-datasets/Amazon_SageMaker_FAQs/Amazon_SageMaker_FAQs.csv to rag_data/Amazon_SageMaker_FAQs.csv\n", "documents:loaded:size=153\n", "Documents:after split and chunking size=154\n", - "vectorstore_faiss_aws:created=::\n" + "vectorstore_faiss_aws:created=::\n" ] } ], @@ -801,10 +795,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "R in SageMaker notebook instances which include a preinstalled R kernel and the reticulate library.\n", - "Reticulate offers an R interface for the Amazon SageMaker Python SDK, enabling ML practitioners to\n", - "build, train, tune, and deploy R models.\n" + " R is supported with Amazon SageMaker notebook instances, which include a preinstalled R kernel and\n", + "the reticulate library. Reticulate offers an R interface for the Amazon SageMaker Python SDK,\n", + "enabling ML practitioners to build, train, tune, and deploy R models.\n", + "\n" ] } ], @@ -928,7 +922,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f5bb735e4e2c41ad83eeddc12bbd1b33", + "model_id": "1a567c533f3e47adb75247ea4bc62865", "version_major": 2, "version_minor": 0 },