diff --git a/06_CodeGeneration/00_code_generatation_w_bedrock.ipynb b/06_CodeGeneration/00_code_generatation_w_bedrock.ipynb index 96ccdbd3..38c3e545 100644 --- a/06_CodeGeneration/00_code_generatation_w_bedrock.ipynb +++ b/06_CodeGeneration/00_code_generatation_w_bedrock.ipynb @@ -93,12 +93,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "776fd083", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Create new client\n", + " Using region: us-east-1\n", + " Using profile: fine-tuning-bedrock\n", + "boto3 Bedrock client successfully created!\n", + "bedrock(https://bedrock.us-east-1.amazonaws.com)\n" + ] + } + ], "source": [ "import json\n", "import os\n", @@ -136,9 +148,81 @@ "Following on the use case explained above, let's prepare an input for the Amazon Bedrock service to generate python program for our use-case." ] }, + { + "cell_type": "markdown", + "id": "e7656be8", + "metadata": {}, + "source": [ + "#### Lab setup - create sample sales.csv data for this lab.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "89a0ad24", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sales.csv has been created!\n" + ] + } + ], + "source": [ + "# create sales.csv file\n", + "import csv\n", + "\n", + "data = [\n", + " [\"date\", \"product_id\", \"price\", \"units_sold\"],\n", + " [\"2023-01-01\", \"P001\", 50, 20],\n", + " [\"2023-01-02\", \"P002\", 60, 15],\n", + " [\"2023-01-03\", \"P001\", 50, 18],\n", + " [\"2023-01-04\", \"P003\", 70, 30],\n", + " [\"2023-01-05\", \"P001\", 50, 25],\n", + " [\"2023-01-06\", \"P002\", 60, 22],\n", + " [\"2023-01-07\", \"P003\", 70, 24],\n", + " [\"2023-01-08\", \"P001\", 50, 28],\n", + " [\"2023-01-09\", \"P002\", 60, 17],\n", + " [\"2023-01-10\", \"P003\", 70, 29],\n", + " [\"2023-02-11\", \"P001\", 50, 23],\n", + " [\"2023-02-12\", \"P002\", 60, 19],\n", + " [\"2023-02-13\", \"P001\", 50, 21],\n", + " [\"2023-02-14\", \"P003\", 70, 31],\n", + " [\"2023-03-15\", \"P001\", 50, 26],\n", + " [\"2023-03-16\", \"P002\", 60, 20],\n", + " [\"2023-03-17\", \"P003\", 70, 33],\n", + " [\"2023-04-18\", \"P001\", 50, 27],\n", + " [\"2023-04-19\", \"P002\", 60, 18],\n", + " [\"2023-04-20\", \"P003\", 70, 32],\n", + " [\"2023-04-21\", \"P001\", 50, 22],\n", + " [\"2023-04-22\", \"P002\", 60, 16],\n", + " [\"2023-04-23\", \"P003\", 70, 34],\n", + " [\"2023-05-24\", \"P001\", 50, 24],\n", + " [\"2023-05-25\", \"P002\", 60, 21]\n", + "]\n", + "\n", + "# Write data to sales.csv\n", + "with open('sales.csv', 'w', newline='') as csvfile:\n", + " writer = csv.writer(csvfile)\n", + " writer.writerows(data)\n", + "\n", + "print(\"sales.csv has been created!\")" + ] + }, + { + "cell_type": "markdown", + "id": "d68e8af6", + "metadata": {}, + "source": [ + "#### Analyzing sales with Amazon Bedrock generated Python program" + ] + }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 42, "id": "45ee2bae-6415-4dba-af98-a19028305c98", "metadata": { "tags": [] @@ -146,22 +230,23 @@ "outputs": [], "source": [ "# Create the prompt\n", - "# Analyzing sales with a Python Program\n", + "# Analyzing sales\n", "\n", "prompt_data = \"\"\"\n", - "Command: Human: You have a CSV, sales.csv, with columns:\n", + "Human: You have a CSV, sales.csv, with columns:\n", "- date (YYYY-MM-DD)\n", "- product_id\n", "- price\n", "- units_sold\n", "\n", - "Wrte a python program to load the data and determine \n", + "Create a python program to analyze the sales data from a CSV file. The program should be able to read the data, and determine below:\n", "\n", "- Total revenue for the year\n", "- The product with the highest revenue\n", "- The date with the highest revenue\n", "- Visualize monthly sales using a bar chart\n", "\n", + "Ensure the code is syntactically correct, bug-free, optimized, not span multiple lines unnessarily, and prefer to use standard libraries. Return only python code without any surrounding text, explanation or context.\n", "Assistant:\n", "\"\"\"" ] @@ -176,7 +261,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 43, "id": "8af670eb-ad02-40df-a19c-3ed835fac8d9", "metadata": { "tags": [] @@ -194,29 +279,6 @@ " }) " ] }, - { - "cell_type": "markdown", - "id": "c4ca6751", - "metadata": {}, - "source": [ - "The Amazon Bedrock API provides you with an API `invoke_model` which accepts the following:\n", - "- `modelId`: This is the model ARN for the various foundation models available under Amazon Bedrock\n", - "- `accept`: The type of input request\n", - "- `contentType`: The content type of the output\n", - "- `body`: A json string consisting of the prompt and the configurations\n", - "\n", - "Available text generation models under Amazon Bedrock have the following IDs:\n", - "- `amazon.titan-tg1-large`\n", - "- `amazon.titan-e1t-medium`\n", - "- `ai21.j2-grande-instruct`\n", - "- `ai21.j2-jumbo-instruct`\n", - "- `ai21.j2-mid`\n", - "- `ai21.j2-ultra`\n", - "- `anthropic.claude-instant-v1`\n", - "- `anthropic.claude-v1`\n", - "- `anthropic.claude-v2`" - ] - }, { "cell_type": "markdown", "id": "088cf6bf-dd73-4710-a0cc-6c11d220c431", @@ -225,19 +287,9 @@ "#### Invoke the Anthropic Claude v2 model" ] }, - { - "cell_type": "markdown", - "id": "379498f2", - "metadata": {}, - "source": [ - "First, we explore how the model generates an output based on the prompt created earlier.\n", - "\n", - "##### Complete Output Generation" - ] - }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 40, "id": "016a118a", "metadata": {}, "outputs": [ @@ -245,49 +297,51 @@ "name": "stdout", "output_type": "stream", "text": [ - " Here is a Python program to analyze the sales CSV file as described:\n", + " Here is the Python code to analyze sales data from a CSV file:\n", "\n", "```python\n", "import csv\n", "from collections import defaultdict\n", "import matplotlib.pyplot as plt\n", "\n", - "revenue_by_month = defaultdict(int)\n", - "\n", - "with open('sales.csv', 'r') as f:\n", - " reader = csv.DictReader(f)\n", - " total_revenue = 0\n", - " max_revenue_product = None\n", - " max_revenue = 0\n", - " max_revenue_date = None\n", + "revenue = 0\n", + "monthly_revenue = defaultdict(int)\n", + "product_revenue = defaultdict(int)\n", + "max_revenue = 0\n", + "max_revenue_date = ''\n", + "max_revenue_product = ''\n", "\n", + "with open('sales.csv') as f:\n", + " reader = csv.reader(f)\n", + " next(reader)\n", " for row in reader:\n", - " revenue = float(row['price']) * int(row['units_sold'])\n", - " total_revenue += revenue\n", + " date = row[0]\n", + " product = row[1]\n", + " price = float(row[2])\n", + " units = int(row[3])\n", "\n", - " date = row['date']\n", - " month = date.split('-')[1]\n", - " revenue_by_month[month] += revenue\n", + " revenue += price * units\n", + " product_revenue[product] += price * units\n", + " monthly_revenue[date[:7]] += price * units\n", "\n", " if revenue > max_revenue:\n", " max_revenue = revenue\n", - " max_revenue_product = row['product_id']\n", " max_revenue_date = date\n", + " max_revenue_product = product\n", "\n", - "print('Total revenue:', total_revenue)\n", - "print('Product with max revenue:', max_revenue_product)\n", - "print('Date with max revenue:', max_revenue_date)\n", + "months = list(monthly_revenue.keys())\n", + "values = list(monthly_revenue.values())\n", "\n", - "plt.bar(revenue_by_month.keys(), revenue_by_month.values())\n", + "plt.bar(months, values)\n", "plt.xlabel('Month')\n", "plt.ylabel('Revenue')\n", - "plt.title('Revenue by Month')\n", + "plt.title('Monthly Revenue')\n", "plt.show()\n", - "```\n", "\n", - "This loads the CSV data, calculates the total revenue, finds the product and date with max revenue,\n", - "and visualizes the revenue per month in a bar chart. The defaultdict is used to easily accumulate\n", - "values by month.\n" + "print('Total Revenue:', revenue)\n", + "print('Product with max revenue:', max_revenue_product)\n", + "print('Date with max revenue:', max_revenue_date)\n", + "```\n" ] } ], @@ -312,28 +366,28 @@ }, { "cell_type": "code", - "execution_count": 24, - "id": "395fad3b", + "execution_count": 41, + "id": "77d9b428", "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total revenue: 35490.0\n", - "Product with max revenue: P003\n", - "Date with max revenue: 2023-04-23\n" - ] - }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total Revenue: 35490.0\n", + "Product with max revenue: P002\n", + "Date with max revenue: 2023-05-25\n" + ] } ], "source": [ @@ -342,38 +396,44 @@ "import csv\n", "from collections import defaultdict\n", "import matplotlib.pyplot as plt\n", - " \n", - "revenue_by_month = defaultdict(int)\n", "\n", - "with open('sales.csv', 'r') as f:\n", - " reader = csv.DictReader(f)\n", - " total_revenue = 0\n", - " max_revenue_product = None\n", - " max_revenue = 0\n", - " max_revenue_date = None\n", + "revenue = 0\n", + "monthly_revenue = defaultdict(int)\n", + "product_revenue = defaultdict(int)\n", + "max_revenue = 0\n", + "max_revenue_date = ''\n", + "max_revenue_product = ''\n", "\n", + "with open('sales.csv') as f:\n", + " reader = csv.reader(f)\n", + " next(reader)\n", " for row in reader:\n", - " revenue = float(row['price']) * int(row['units_sold'])\n", - " total_revenue += revenue\n", + " date = row[0]\n", + " product = row[1]\n", + " price = float(row[2])\n", + " units = int(row[3])\n", "\n", - " date = row['date']\n", - " month = date.split('-')[1]\n", - " revenue_by_month[month] += revenue\n", + " revenue += price * units\n", + " product_revenue[product] += price * units\n", + " monthly_revenue[date[:7]] += price * units\n", "\n", " if revenue > max_revenue:\n", " max_revenue = revenue\n", - " max_revenue_product = row['product_id']\n", " max_revenue_date = date\n", + " max_revenue_product = product\n", "\n", - "print('Total revenue:', total_revenue)\n", - "print('Product with max revenue:', max_revenue_product)\n", - "print('Date with max revenue:', max_revenue_date)\n", - "# Plot 'Revenue by Month'\n", - "plt.bar(revenue_by_month.keys(), revenue_by_month.values())\n", + "months = list(monthly_revenue.keys())\n", + "values = list(monthly_revenue.values())\n", + "\n", + "plt.bar(months, values)\n", "plt.xlabel('Month')\n", "plt.ylabel('Revenue')\n", - "plt.title('Revenue by Month')\n", - "plt.show()" + "plt.title('Monthly Revenue')\n", + "plt.show()\n", + "\n", + "print('Total Revenue:', revenue)\n", + "print('Product with max revenue:', max_revenue_product)\n", + "print('Date with max revenue:', max_revenue_date)" ] }, { diff --git a/06_CodeGeneration/sales.csv b/06_CodeGeneration/sales.csv deleted file mode 100644 index 6f89b0af..00000000 --- a/06_CodeGeneration/sales.csv +++ /dev/null @@ -1,26 +0,0 @@ -date,product_id,price,units_sold -2023-01-01,P001,50,20 -2023-01-02,P002,60,15 -2023-01-03,P001,50,18 -2023-01-04,P003,70,30 -2023-01-05,P001,50,25 -2023-01-06,P002,60,22 -2023-01-07,P003,70,24 -2023-01-08,P001,50,28 -2023-01-09,P002,60,17 -2023-01-10,P003,70,29 -2023-02-11,P001,50,23 -2023-02-12,P002,60,19 -2023-02-13,P001,50,21 -2023-02-14,P003,70,31 -2023-03-15,P001,50,26 -2023-03-16,P002,60,20 -2023-03-17,P003,70,33 -2023-04-18,P001,50,27 -2023-04-19,P002,60,18 -2023-04-20,P003,70,32 -2023-04-21,P001,50,22 -2023-04-22,P002,60,16 -2023-04-23,P003,70,34 -2023-05-24,P001,50,24 -2023-05-25,P002,60,21 \ No newline at end of file