diff --git a/.gitignore b/.gitignore index a7aabf98..c9ac6ade 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ __pycache__ # Files generated by the workshop: /dependencies +data/ diff --git a/00_Intro/bedrock_boto3_setup.ipynb b/00_Intro/bedrock_boto3_setup.ipynb index 6684c7d4..60997daa 100644 --- a/00_Intro/bedrock_boto3_setup.ipynb +++ b/00_Intro/bedrock_boto3_setup.ipynb @@ -61,7 +61,7 @@ }, "outputs": [], "source": [ - "%pip install --quiet langchain==0.0.190" + "%pip install --quiet langchain==0.0.249" ] }, { diff --git a/01_Generation/00_generate_w_bedrock.ipynb b/01_Generation/00_generate_w_bedrock.ipynb index aeb5c793..49fbd02e 100644 --- a/01_Generation/00_generate_w_bedrock.ipynb +++ b/01_Generation/00_generate_w_bedrock.ipynb @@ -72,7 +72,7 @@ " ../dependencies/boto3-1.26.162-py3-none-any.whl \\\n", " ../dependencies/botocore-1.29.162-py3-none-any.whl\n", "\n", - "%pip install --quiet langchain==0.0.190" + "%pip install --quiet langchain==0.0.249" ] }, { diff --git a/01_Generation/01_zero_shot_generation.ipynb b/01_Generation/01_zero_shot_generation.ipynb index 7cc0afe3..58507409 100644 --- a/01_Generation/01_zero_shot_generation.ipynb +++ b/01_Generation/01_zero_shot_generation.ipynb @@ -68,7 +68,7 @@ " ../dependencies/boto3-1.26.162-py3-none-any.whl \\\n", " ../dependencies/botocore-1.29.162-py3-none-any.whl\n", "\n", - "%pip install --quiet langchain==0.0.190" + "%pip install --quiet langchain==0.0.249" ] }, { diff --git a/01_Generation/02_contextual_generation.ipynb b/01_Generation/02_contextual_generation.ipynb index 4ed69eef..224383e4 100644 --- a/01_Generation/02_contextual_generation.ipynb +++ b/01_Generation/02_contextual_generation.ipynb @@ -78,7 +78,7 @@ " ../dependencies/boto3-1.26.162-py3-none-any.whl \\\n", " ../dependencies/botocore-1.29.162-py3-none-any.whl\n", "\n", - "%pip install --quiet langchain==0.0.190 transformers==4.24.0" + "%pip install --quiet langchain==0.0.249 \"transformers>=4.24,<5\"" ] }, { diff --git a/02_Summarization/01.small-text-summarization-claude.ipynb b/02_Summarization/01.small-text-summarization-claude.ipynb index a205cc05..708837dc 100644 --- a/02_Summarization/01.small-text-summarization-claude.ipynb +++ b/02_Summarization/01.small-text-summarization-claude.ipynb @@ -65,7 +65,7 @@ " ../dependencies/boto3-1.26.162-py3-none-any.whl \\\n", " ../dependencies/botocore-1.29.162-py3-none-any.whl\n", "\n", - "%pip install --quiet langchain==0.0.190" + "%pip install --quiet langchain==0.0.249" ] }, { diff --git a/02_Summarization/01.small-text-summarization-titan.ipynb b/02_Summarization/01.small-text-summarization-titan.ipynb index efc64196..d98844e3 100644 --- a/02_Summarization/01.small-text-summarization-titan.ipynb +++ b/02_Summarization/01.small-text-summarization-titan.ipynb @@ -64,7 +64,7 @@ " ../dependencies/boto3-1.26.162-py3-none-any.whl \\\n", " ../dependencies/botocore-1.29.162-py3-none-any.whl\n", "\n", - "%pip install --quiet langchain==0.0.190" + "%pip install --quiet langchain==0.0.249" ] }, { diff --git a/02_Summarization/02.long-text-summarization-titan.ipynb b/02_Summarization/02.long-text-summarization-titan.ipynb index 5c1767d0..38ad13e9 100644 --- a/02_Summarization/02.long-text-summarization-titan.ipynb +++ b/02_Summarization/02.long-text-summarization-titan.ipynb @@ -65,7 +65,7 @@ " ../dependencies/boto3-1.26.162-py3-none-any.whl \\\n", " ../dependencies/botocore-1.29.162-py3-none-any.whl\n", "\n", - "%pip install --quiet langchain==0.0.190 transformers==4.24.0" + "%pip install --quiet langchain==0.0.249 \"transformers>=4.24,<5\"" ] }, { @@ -129,16 +129,16 @@ "source": [ "from langchain.llms.bedrock import Bedrock\n", "\n", - "llm = Bedrock(model_id=\"amazon.titan-tg1-large\", \n", - " model_kwargs ={\n", - " \"textGenerationConfig\": {\n", - " \"maxTokenCount\": 4096,\n", - " \"stopSequences\": [],\n", - " \"temperature\":0,\n", - " \"topP\":1\n", - " },\n", - " },\n", - " client=boto3_bedrock)" + "llm = Bedrock(\n", + " model_id=\"amazon.titan-tg1-large\",\n", + " model_kwargs={\n", + " \"maxTokenCount\": 4096,\n", + " \"stopSequences\": [],\n", + " \"temperature\": 0,\n", + " \"topP\": 1,\n", + " },\n", + " client=boto3_bedrock,\n", + ")" ] }, { diff --git a/03_QuestionAnswering/00_qa_w_bedrock_titan.ipynb b/03_QuestionAnswering/00_qa_w_bedrock_titan.ipynb index 1761eb4c..706edab6 100644 --- a/03_QuestionAnswering/00_qa_w_bedrock_titan.ipynb +++ b/03_QuestionAnswering/00_qa_w_bedrock_titan.ipynb @@ -49,7 +49,7 @@ " ../dependencies/boto3-1.26.162-py3-none-any.whl \\\n", " ../dependencies/botocore-1.29.162-py3-none-any.whl\n", "\n", - "%pip install --quiet langchain==0.0.190" + "%pip install --quiet langchain==0.0.249" ] }, { diff --git a/03_QuestionAnswering/01_qa_w_rag_claude.ipynb b/03_QuestionAnswering/01_qa_w_rag_claude.ipynb index 9f293e8f..96d53e78 100644 --- a/03_QuestionAnswering/01_qa_w_rag_claude.ipynb +++ b/03_QuestionAnswering/01_qa_w_rag_claude.ipynb @@ -118,7 +118,7 @@ " ../dependencies/boto3-1.26.162-py3-none-any.whl \\\n", " ../dependencies/botocore-1.29.162-py3-none-any.whl\n", "\n", - "%pip install --quiet faiss-cpu==1.7.4 langchain==0.0.190 pypdf=3.8.1" + "%pip install --quiet \"faiss-cpu>=1.7,<2\" langchain==0.0.249 \"pypdf>=3.8,<4\"" ] }, { @@ -204,13 +204,15 @@ "outputs": [], "source": [ "from urllib.request import urlretrieve\n", + "\n", + "os.makedirs(\"data\", exist_ok=True)\n", "files = [\n", - " 'https://www.irs.gov/pub/irs-pdf/p1544.pdf',\n", - " 'https://www.irs.gov/pub/irs-pdf/p15.pdf',\n", - " 'https://www.irs.gov/pub/irs-pdf/p1212.pdf'\n", + " \"https://www.irs.gov/pub/irs-pdf/p1544.pdf\",\n", + " \"https://www.irs.gov/pub/irs-pdf/p15.pdf\",\n", + " \"https://www.irs.gov/pub/irs-pdf/p1212.pdf\",\n", "]\n", "for url in files:\n", - " file_path = './data/' + url.split('/')[-1]\n", + " file_path = os.path.join(\"data\", url.rpartition(\"/\")[2])\n", " urlretrieve(url, file_path)" ] }, @@ -443,7 +445,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "from langchain.chains import RetrievalQA\n", "from langchain.prompts import PromptTemplate\n", "\n", @@ -453,6 +454,7 @@ "\n", "Question: {question}\n", "Assistant:\"\"\"\n", + "\n", "PROMPT = PromptTemplate(\n", " template=prompt_template, input_variables=[\"context\", \"question\"]\n", ")\n", diff --git a/04_Chatbot/00_Chatbot_AI21.ipynb b/04_Chatbot/00_Chatbot_AI21.ipynb index a29aebf1..3ce1136a 100644 --- a/04_Chatbot/00_Chatbot_AI21.ipynb +++ b/04_Chatbot/00_Chatbot_AI21.ipynb @@ -93,7 +93,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install --quiet faiss-cpu==1.7.4 \"ipywidgets>=7,<8\" langchain==0.0.190 pypdf==3.8.1" + "%pip install --quiet \"faiss-cpu>=1.7,<2\" \"ipywidgets>=7,<8\" langchain==0.0.249 \"pypdf>=3.8,<4\"" ] }, { diff --git a/04_Chatbot/00_Chatbot_Claude.ipynb b/04_Chatbot/00_Chatbot_Claude.ipynb index 0cafd4da..babb9ee0 100644 --- a/04_Chatbot/00_Chatbot_Claude.ipynb +++ b/04_Chatbot/00_Chatbot_Claude.ipynb @@ -93,7 +93,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install --quiet faiss-cpu==1.7.4 \"ipywidgets>=7,<8\" langchain==0.0.190 pypdf==3.8.1" + "%pip install --quiet \"faiss-cpu>=1.7,<2\" \"ipywidgets>=7,<8\" langchain==0.0.249 \"pypdf>=3.8,<4\"" ] }, { @@ -162,7 +162,7 @@ ")\n", "memory = ConversationBufferMemory()\n", "conversation = ConversationChain(\n", - " llm=titan_llm, verbose=True, memory=memory\n", + " llm=cl_llm, verbose=True, memory=memory\n", ")\n", "\n", "print_ww(conversation.predict(input=\"Hi there!\"))" @@ -591,7 +591,6 @@ }, "outputs": [], "source": [ - "\n", "from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT\n", "\n", "print_ww(CONDENSE_QUESTION_PROMPT.template)" diff --git a/04_Chatbot/00_Chatbot_Titan.ipynb b/04_Chatbot/00_Chatbot_Titan.ipynb index 9c0c40d9..85d215b0 100644 --- a/04_Chatbot/00_Chatbot_Titan.ipynb +++ b/04_Chatbot/00_Chatbot_Titan.ipynb @@ -93,7 +93,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install --quiet faiss-cpu==1.7.4 \"ipywidgets>=7,<8\" langchain==0.0.190 pypdf==3.8.1" + "%pip install --quiet \"faiss-cpu>=1.7,<2\" \"ipywidgets>=7,<8\" langchain==0.0.249 \"pypdf>=3.8,<4\"" ] }, { diff --git a/05_Image/Bedrock Stable Diffusion XL.ipynb b/05_Image/Bedrock Stable Diffusion XL.ipynb index a2c6ce9d..a55f2cbc 100644 --- a/05_Image/Bedrock Stable Diffusion XL.ipynb +++ b/05_Image/Bedrock Stable Diffusion XL.ipynb @@ -4,14 +4,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Introduction to Bedrock - Generating images using Stable Diffusion\n", + "# Generating images using Stable Diffusion\n", "\n", "> *This notebook should work well with the **`Data Science 2.0`** kernel in SageMaker Studio*\n", "\n", "---\n", - "In this demo notebook, we demonstrate how to use the Bedrock SDK for an image generation task. We show how to use the Stable Diffusion foundational model to create images\n", - "1. Text to Image\n", - "2. Image to Image\n", + "\n", + "In this demo notebook, we show how to use [Stable Diffusion XL](https://stability.ai/stablediffusion) (SDXL) on [Amazon Bedrock](https://aws.amazon.com/bedrock/) for image generation (text-to-image) and image editing (image-to-image).\n", "\n", "Images in Stable Diffusion are generated by these 4 main models below\n", "1. The CLIP text encoder;\n", @@ -23,11 +22,22 @@ "\n", "see this diagram below\n", "\n", - "![SD Architecture](./images/sd.png)\n", + "![SD Architecture](./images/sd.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Image prompting\n", + "\n", + "Writing a good prompt can be somewhat of an art. It's often difficult to predict whether a certain prompt will yield a satisfactory image with a given model. However, there are certain templates that have been observed to work. Broadly, a prompt can be roughly broken down into three pieces:\n", "\n", - "#### Image prompting\n", + "1. **Type** of image (photograph/sketch/painting etc.)\n", + "2. **Description** of the content (subject/object/environment/scene etc.), and\n", + "3. **Style** of the image (realistic/artistic/type of art etc.).\n", "\n", - "Writing a good prompt can sometime be an art. It is often difficult to predict whether a certain prompt will yield a satisfactory image with a given model. However, there are certain templates that have been observed to work. Broadly, a prompt can be roughly broken down into three pieces: (i) type of image (photograph/sketch/painting etc.), (ii) description (subject/object/environment/scene etc.) and (iii) the style of the image (realistic/artistic/type of art etc.). You can change each of the three parts individually to generate variations of an image. Adjectives have been known to play a significant role in the image generation process. Also, adding more details help in the generation process.\n", + "You can change each of the three parts individually to generate variations of an image. Adjectives have been known to play a significant role in the image generation process. Also, adding more details help in the generation process.\n", "\n", "To generate a realistic image, you can use phrases such as “a photo of”, “a photograph of”, “realistic” or “hyper realistic”. To generate images by artists you can use phrases like “by Pablo Piccaso” or “oil painting by Rembrandt” or “landscape art by Frederic Edwin Church” or “pencil drawing by Albrecht Dürer”. You can also combine different artists as well. To generate artistic image by category, you can add the art category in the prompt such as “lion on a beach, abstract”. Some other categories include “oil painting”, “pencil drawing, “pop art”, “digital art”, “anime”, “cartoon”, “futurism”, “watercolor”, “manga” etc. You can also include details such as lighting or camera lens such as 35mm wide lens or 85mm wide lens and details about the framing (portrait/landscape/close up etc.).\n", "\n", @@ -50,7 +60,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# Make sure you ran `download-dependencies.sh` from the root of the repository first!\n", @@ -59,7 +71,7 @@ " ../dependencies/boto3-1.26.162-py3-none-any.whl \\\n", " ../dependencies/botocore-1.29.162-py3-none-any.whl\n", "\n", - "%pip install --quiet pillow==9.5.0" + "%pip install --quiet \"pillow>=9.5,<10\"" ] }, { @@ -104,27 +116,32 @@ "metadata": {}, "source": [ "## Text to Image\n", - "In order to generate an image, a description of what needs to be generated is needed. This is called `prompt`.\n", "\n", - "You can also provide some negative prompts to guide the model to avoid certain type of outputs.\n", + "In text-to-image mode, we'll provide a text description of what image **should** be generated, called a `prompt`.\n", + "\n", + "With Stable Diffusion XL (SDXL) we can also specify certain [style presets](https://platform.stability.ai/docs/release-notes#style-presets) to help influence the generation.\n", + "\n", + "But what if we want to nudge the model to ***avoid*** specific content or style choices? Because image generation models are typically trained from *image descriptions*, trying to directly specify what you **don't** want in the prompt (for example `man without a beard`) doesn't usually work well: It would be very unusual to describe an image by the things it isn't!\n", "\n", - "Prompt acts as the input to the model and steers the model to generate a relevant output. With Stable Diffusion XL you have the option to choose certain [style presets](https://platform.stability.ai/docs/release-notes#style-presets) as well" + "Instead, SDXL lets us specify a `weight` for each prompt, which can be negative. We'll use this to provide `negative_prompts` as shown below:" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "prompt = \"Dog in a forest\"\n", "negative_prompts = [\n", - " \"poorly rendered\", \n", - " \"poor background details\", \n", - " \"poorly drawn dog\", \n", - " \"disfigured dog features\"\n", - " ]\n", - "style_preset = \"photographic\" # (photographic, digital-art, cinematic, ...)\n", + " \"poorly rendered\",\n", + " \"poor background details\",\n", + " \"poorly drawn dog\",\n", + " \"disfigured dog features\",\n", + "]\n", + "style_preset = \"photographic\" # (e.g. photographic, digital-art, cinematic, ...)\n", "#prompt = \"photo taken from above of an italian landscape. cloud is clear with few clouds. Green hills and few villages, a lake\"" ] }, @@ -132,53 +149,59 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "`Bedrock` class implements a method `generate_image`. This method takes input a prompt and prepares a payload to be sent over to Bedrock API.\n", - "You can provide the following model inference parameters to control the repetitiveness of responses:\n", - "- prompt (string): Input text prompt for the model\n", - "- seed (int): Determines initial noise. Using same seed with same settings will create similar images.\n", - "- cfg_scale (float): Presence strength - Determines how much final image portrays prompts.\n", - "- steps (int): Generation step - How many times image is sampled. More steps may be more accurate." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As an output the Bedrock generates a `base64` encoded string respresentation of the image." + "The Amazon Bedrock `InvokeModel` provides access to SDXL by setting the right model ID, and returns a JSON response including a [Base64 encoded string](https://en.wikipedia.org/wiki/Base64) that represents the (PNG) image.\n", + "\n", + "For more information on available input parameters for the model, refer to the [Stability AI docs](https://platform.stability.ai/docs/api-reference#tag/v1generation/operation/textToImage).\n", + "\n", + "The cell below invokes the SDXL model through Amazon Bedrock to create an initial image string:" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "model = bedrock.Bedrock(boto3_bedrock)\n", - "base_64_img_str = model.generate_image(prompt, cfg_scale=5, seed=5450, steps=70, style_preset=style_preset)" + "request = json.dumps({\n", + " \"text_prompts\": (\n", + " [{\"text\": prompt, \"weight\": 1.0}]\n", + " + [{\"text\": negprompt, \"weight\": -1.0} for negprompt in negative_prompts]\n", + " ),\n", + " \"cfg_scale\": 5,\n", + " \"seed\": 5450,\n", + " \"steps\": 70,\n", + " \"style_preset\": style_preset,\n", + "})\n", + "modelId = \"stability.stable-diffusion-xl\"\n", + "\n", + "response = boto3_bedrock.invoke_model(body=request, modelId=modelId)\n", + "response_body = json.loads(response.get(\"body\").read())\n", + "\n", + "print(response_body[\"result\"])\n", + "base_64_img_str = response_body[\"artifacts\"][0].get(\"base64\")\n", + "print(f\"{base_64_img_str[0:80]}...\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "We can convert the `base64` image to a PIL image to be displayed" + "By decoding our Base64 string to binary, and loading it with an image processing library like [Pillow](https://pillow.readthedocs.io/en/stable/) that can read PNG files, we can display and manipulate the image here in the notebook:" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "image_1 = Image.open(io.BytesIO(base64.decodebytes(bytes(base_64_img_str, \"utf-8\"))))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ + "os.makedirs(\"data\", exist_ok=True)\n", + "image_1 = Image.open(io.BytesIO(base64.decodebytes(bytes(base_64_img_str, \"utf-8\"))))\n", + "image_1.save(\"data/image_1.png\")\n", "image_1" ] }, @@ -188,21 +211,41 @@ "source": [ "## Image to Image\n", "\n", - "Stable Diffusion let's us do some interesting stuff with our images like adding new characters or modifying scenery let's give it a try.\n", + "Generating images from text is powerful, but in some cases could need many rounds of prompt refinement to get an image \"just right\".\n", + "\n", + "Rather than starting from scratch with text each time, image-to-image generation lets us **modify an existing image** to make the specific changes we'd like.\n", "\n", - "You can use the previously generated image or use a different one to create a base64 string to be passed on as an initial image to the model." + "We'll have to pass our initial image in to the API in base64 encoding, so first let's prepare that. You can use either the initial image from the previous section, or a different one if you'd prefer:" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "buffer = io.BytesIO()\n", - "image_1.save(buffer, format=\"JPEG\")\n", - "img_bytes = buffer.getvalue()\n", - "init_image = base64.b64encode(img_bytes).decode()" + "def image_to_base64(img) -> str:\n", + " \"\"\"Convert a PIL Image or local image file path to a base64 string for Amazon Bedrock\"\"\"\n", + " if isinstance(img, str):\n", + " if os.path.isfile(img):\n", + " print(f\"Reading image from file: {img}\")\n", + " with open(img, \"rb\") as f:\n", + " return base64.b64encode(f.read()).decode(\"utf-8\")\n", + " else:\n", + " raise FileNotFoundError(f\"File {img} does not exist\")\n", + " elif isinstance(img, Image.Image):\n", + " print(\"Converting PIL Image to base64 string\")\n", + " buffer = io.BytesIO()\n", + " img.save(buffer, format=\"PNG\")\n", + " return base64.b64encode(buffer.getvalue()).decode(\"utf-8\")\n", + " else:\n", + " raise ValueError(f\"Expected str (filename) or PIL Image. Got {type(img)}\")\n", + "\n", + "\n", + "init_image_b64 = image_to_base64(image_1)\n", + "print(init_image_b64[:80] + \"...\")" ] }, { @@ -215,7 +258,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "change_prompt = \"add some leaves around the dog\"" @@ -225,33 +270,51 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The `generate_image` method also accepts an additional paramter `init_image` which can be used to pass the initial image to the Stable Diffusion model on Bedrock." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "base_64_img_str = model.generate_image(change_prompt, init_image=init_image, seed=321, start_schedule=0.6)" + "The existing image is then passed through to the Stable Diffusion model via the `init_image` parameter.\n", + "\n", + "Again, you can refer to the [Stable Diffusion API docs](https://platform.stability.ai/docs/api-reference#tag/v1generation/operation/imageToImage) for more tips on how to use the different parameters:" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "image_2 = Image.open(io.BytesIO(base64.decodebytes(bytes(base_64_img_str, \"utf-8\"))))" + "request = json.dumps({\n", + " \"text_prompts\": (\n", + " [{\"text\": change_prompt, \"weight\": 1.0}]\n", + " + [{\"text\": negprompt, \"weight\": -1.0} for negprompt in negative_prompts]\n", + " ),\n", + " \"cfg_scale\": 10,\n", + " \"init_image\": init_image_b64,\n", + " \"seed\": 321,\n", + " \"start_schedule\": 0.6,\n", + " \"steps\": 50,\n", + " \"style_preset\": style_preset,\n", + "})\n", + "modelId = \"stability.stable-diffusion-xl\"\n", + "\n", + "response = boto3_bedrock.invoke_model(body=request, modelId=modelId)\n", + "response_body = json.loads(response.get(\"body\").read())\n", + "\n", + "print(response_body[\"result\"])\n", + "image_2_b64_str = response_body[\"artifacts\"][0].get(\"base64\")\n", + "print(f\"{image_2_b64_str[0:80]}...\")" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ + "image_2 = Image.open(io.BytesIO(base64.decodebytes(bytes(image_2_b64_str, \"utf-8\"))))\n", + "image_2.save(\"data/image_2.png\")\n", "image_2" ] }, @@ -259,9 +322,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Summary\n", + "## Summary\n", + "\n", + "In this lab we demonstrated how to generate new images from text, and transform existing images with text instructions - using [Stable Diffusion XL](https://stability.ai/stablediffusion) on [Amazon Bedrock](https://aws.amazon.com/bedrock/).\n", + "\n", + "Through the Bedrock API, we can provide a range of parameters to influence image generation which generally correspond to those listed in the [Stable Diffusion API docs](https://platform.stability.ai/docs/api-reference#tag/v1generation).\n", + "\n", + "One key point to note when using Bedrock is that output image PNG/JPEG data is returned as a [Base64 encoded string](https://en.wikipedia.org/wiki/Base64) within the JSON API response: You can use the Python built-in [base64 library](https://docs.python.org/3/library/base64.html) to decode this image data - for example to save a `.png` file. We also showed that image processing libraries like [Pillow](https://pillow.readthedocs.io/en/stable/) can be used to load (and perhaps edit) the images within Python.\n", "\n", - "And play around with different prompts to see amazing results." + "From here you can explore more advanced image generation options - or combine GenAI with traditional image processing tools - to build the best creative workflow for your use-case." ] } ], diff --git a/utils/bedrock.py b/utils/bedrock.py index e57cf396..1aa23ec0 100644 --- a/utils/bedrock.py +++ b/utils/bedrock.py @@ -2,16 +2,12 @@ # SPDX-License-Identifier: MIT-0 """Helper utilities for working with Amazon Bedrock from Python notebooks""" # Python Built-Ins: -from enum import Enum -import json import os -from time import sleep -from typing import Dict, Optional +from typing import Optional # External Dependencies: import boto3 from botocore.config import Config -from pydantic import root_validator def get_bedrock_client( @@ -79,131 +75,3 @@ def get_bedrock_client( print("boto3 Bedrock client successfully created!") print(bedrock_client._endpoint) return bedrock_client - - -class BedrockMode(Enum): - IMAGE = "image" - - -class BedrockModel(Enum): - STABLE_DIFFUSION = "stability.stable-diffusion-xl" - - -class Bedrock: - __DEFAULT_EMPTY_EMBEDDING = [ - 0.0 - ] * 4096 # - we need to return an array of floats 4096 in size - __RETRY_BACKOFF_SEC = 3 - __RETRY_ATTEMPTS = 3 - - def __init__(self, client=None) -> None: - if client is None: - self.client = get_bedrock_client(assumed_role=None) - else: - assert str(type(client)) == "", f"The client passed in not a valid boto3 bedrock client, got {type(client)}" - self.client = client - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - bedrock_client = get_bedrock_client(assumed_role=None) #boto3.client("bedrock") - values["client"] = bedrock_client - return values - - def generate_image(self, prompt: str, init_image: Optional[str] = None, **kwargs): - """ - Invoke Bedrock model to generate embeddings. - Args: - text (str): Input text - """ - mode = BedrockMode.IMAGE - model_type = BedrockModel.STABLE_DIFFUSION - payload = self.prepare_input( - prompt, init_image=init_image, mode=mode, model_type=model_type, **kwargs - ) - response = self._invoke_model(model_id=model_type, body_string=payload) - _, _, img_base_64 = self.extract_results(response, model_type) - return img_base_64 - - @staticmethod - def extract_results(response, model_type: BedrockModel, verbose=False): - response = response["body"].read() - if verbose: - print(f"response body readlines() returns: {response}") - - json_obj = json.loads(response) - if model_type == BedrockModel.STABLE_DIFFUSION: - in_token_count, out_token_count = None, None - if json_obj["result"] == "success": - model_output = json_obj["artifacts"][0]["base64"] - else: - model_output = None - else: - raise Exception(f" This class is for Stable Diffusion ONLY::model_type={model_type}") - - return in_token_count, out_token_count, model_output - - @staticmethod - def prepare_input( - prompt_text, - negative_prompts=[], - stop_sequences=[], - cfg_scale=10, - seed=1, - steps=50, - start_schedule=0.5, - init_image=None, - style_preset='photographic', - mode=BedrockMode.IMAGE, - model_type=BedrockModel.STABLE_DIFFUSION, - **kwargs, - ): - stop_sequences = stop_sequences[ - :1 - ] # Temporary addition as Bedrock models can't take multiple stop_sequences yet. Will change later. - if mode == BedrockMode.IMAGE: - if model_type in [BedrockModel.STABLE_DIFFUSION]: - positives = [{"text": prompt_text, "weight": 1}] - negatives = [{"text": prompt, "weight": -1} for prompt in negative_prompts] - json_obj = { - "text_prompts": positives + negatives, - "cfg_scale": cfg_scale, - "seed": seed, - "steps": steps, - "style_preset": style_preset - } - if init_image is not None: - json_obj["init_image"] = init_image - json_obj["start_schedule"] = start_schedule - else: - raise Exception( - 'Unsupported model_type, only "STABLE_DIFFUSION" model_type is supported.' - ) - - return json.dumps(json_obj) - - def list_models(self): - response = self.client.list_foundation_models() - if response["ResponseMetadata"]["HTTPStatusCode"] == 200: - return response["modelSummaries"] - else: - raise Exception("Invalid response") - - def _invoke_model(self, model_id: BedrockModel, body_string: str): - body = bytes(body_string, "utf-8") - response = None - for attempt_no in range(self.__RETRY_ATTEMPTS): - try: - response = self.client.invoke_model( - modelId=model_id.value, - contentType="application/json", - accept="application/json", - body=body, - ) - break - except: - print( - f"bedrock:invoke_model: Attempt no. {attempt_no+1} failed:: Retrying after {self.__RETRY_BACKOFF_SEC} seconds!" - ) - sleep(self.__RETRY_BACKOFF_SEC) - continue - return response