Skip to content

Commit b6a5cc2

Browse files
committed
feat: scaffold a lambda that uses an openai function
1 parent bc17919 commit b6a5cc2

File tree

18 files changed

+630
-32
lines changed

18 files changed

+630
-32
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ api-init:
6565
$(ACTIVATE_VENV) && \
6666
$(PIP) install --upgrade pip && \
6767
$(PIP) install -r requirements.txt && \
68+
$(PYTHON) -m spacy download en_core_web_sm
6869
deactivate && \
6970
pre-commit install
7071

File renamed without changes.
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#------------------------------------------------------------------------------
2+
# written by: Lawrence McDaniel
3+
# https://lawrencemcdaniel.com/
4+
#
5+
# date: sep-2023
6+
#
7+
# usage: implement a Python Lambda function to to format and pass
8+
# text-based http requests directly to OpenAI API.
9+
# - create a Lambda zip archive
10+
# - pass openai api key credentials to Lambda in a safe manner
11+
# - create a Cloudwatch log for the Lambda
12+
# * note that IAM permissions are implemented on the resource(s)
13+
# that call this Lambda, rather than here.
14+
#------------------------------------------------------------------------------
15+
locals {
16+
openai_functionfunction_name = "lambda_openai_function"
17+
openai_functionbuild_path = "${path.module}/build/distribution_package"
18+
openai_functionsource_directory = "${path.module}/python/openai_api"
19+
openai_functionpackaging_script = "${local.openai_functionsource_directory}/create_pkg.sh"
20+
openai_functiondist_package_name = "${local.openai_functionfunction_name}_dist_pkg.zip"
21+
}
22+
23+
###############################################################################
24+
# Python package
25+
# https://alek-cora-glez.medium.com/deploying-aws-lambda-function-with-terraform-custom-dependencies-7874407cd4fc
26+
###############################################################################
27+
resource "null_resource" "package_lambda_openai_function" {
28+
triggers = {
29+
always_redeploy = timestamp()
30+
}
31+
32+
provisioner "local-exec" {
33+
interpreter = ["/bin/bash"]
34+
command = local.openai_functionpackaging_script
35+
36+
environment = {
37+
TERRAFORM_ROOT = path.module
38+
SOURCE_CODE_PATH = local.openai_functionsource_directory
39+
BUILD_PATH = local.openai_functionbuild_path
40+
PACKAGE_FOLDER = local.openai_functionfunction_name
41+
}
42+
}
43+
}
44+
45+
data "archive_file" "lambda_openai_function" {
46+
# see https://registry.terraform.io/providers/hashicorp/archive/latest/docs/data-sources/file
47+
source_dir = local.openai_functionbuild_path
48+
output_path = "${path.module}/build/${local.openai_functiondist_package_name}"
49+
type = "zip"
50+
depends_on = [null_resource.package_lambda_openai_function]
51+
}
52+
53+
###############################################################################
54+
# OpenAI API key and organization
55+
###############################################################################
56+
data "external" "env_lambda_openai_function" {
57+
# kluge to read and map the openai api key and org data contained in .env
58+
program = ["${local.openai_functionsource_directory}/${local.openai_functionfunction_name}/env.sh"]
59+
60+
# For Windows (or Powershell core on MacOS and Linux),
61+
# run a Powershell script instead
62+
#program = ["${path.module}/scripts/env.ps1"]
63+
}
64+
65+
###############################################################################
66+
# AWS Lambda function
67+
###############################################################################
68+
resource "aws_lambda_function" "lambda_openai_function" {
69+
# see https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function.html
70+
# see https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html
71+
function_name = local.openai_functionfunction_name
72+
description = "LangChain request handler"
73+
role = aws_iam_role.lambda.arn
74+
publish = true
75+
runtime = var.lambda_python_runtime
76+
memory_size = var.lambda_memory_size
77+
timeout = var.lambda_timeout
78+
handler = "openai_api.lambda_openai_function.lambda_handler.handler"
79+
architectures = var.compatible_architectures
80+
filename = data.archive_file.lambda_openai_function.output_path
81+
source_code_hash = data.archive_file.lambda_openai_function.output_base64sha256
82+
layers = [aws_lambda_layer_version.genai.arn]
83+
tags = var.tags
84+
85+
environment {
86+
variables = {
87+
DEBUG_MODE = var.debug_mode
88+
OPENAI_API_ORGANIZATION = data.external.env_lambda_openai_function.result["OPENAI_API_ORGANIZATION"]
89+
OPENAI_API_KEY = data.external.env_lambda_openai_function.result["OPENAI_API_KEY"]
90+
OPENAI_ENDPOINT_IMAGE_N = var.openai_endpoint_image_n
91+
OPENAI_ENDPOINT_IMAGE_SIZE = var.openai_endpoint_image_size
92+
AWS_DEPLOYED = true
93+
}
94+
}
95+
}
96+
97+
###############################################################################
98+
# Cloudwatch logging
99+
###############################################################################
100+
resource "aws_cloudwatch_log_group" "lambda_openai_function" {
101+
name = "/aws/lambda/${local.openai_functionfunction_name}"
102+
retention_in_days = var.log_retention_days
103+
tags = var.tags
104+
}

api/terraform/python/layer_genai/Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,8 @@ WORKDIR /var/task
99

1010
COPY requirements.txt .
1111

12+
ENV PYTHONPATH "${PYTHONPATH}:python/lib/python3.11/site-packages"
13+
1214
RUN yum install -y zip
1315
RUN pip install -r requirements.txt --target python/lib/python3.11/site-packages
16+
RUN python3 -m spacy download en_core_web_sm

api/terraform/python/layer_genai/requirements.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,20 @@
99
# -----------------------------------------------------------------------------
1010

1111
# generative AI requirements
12+
# --------------------------
1213
langchain
1314
langchain-experimental
15+
langchain-openai==0.0.3
1416
openai==1.7.2
15-
pinecone-client==2.2.4
1617

1718
# general requirements
19+
# --------------------------
1820
pydantic==2.5.3
1921
pydantic-settings==2.1.0
2022
python-dotenv==1.0.0
2123
python-hcl2==4.3.2
24+
25+
# NLP requirements
26+
# --------------------------
27+
python-Levenshtein==0.23.0
28+
spacy==3.7.2

api/terraform/python/openai_api/common/utils.py

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,13 @@
66
import datetime
77
import json # library for interacting with JSON data https://www.json.org/json-en.html
88
import logging
9+
import re
10+
import string
911
import sys # libraries for error management
1012
import traceback # libraries for error management
1113

14+
import Levenshtein
15+
import spacy
1216
from openai_api.common.const import LANGCHAIN_MESSAGE_HISTORY_ROLES, OpenAIObjectTypes
1317
from openai_api.common.exceptions import OpenAIAPIValueError
1418
from openai_api.common.validators import (
@@ -24,6 +28,7 @@
2428

2529

2630
logger = logging.getLogger(__name__)
31+
nlp = spacy.load("en_core_web_sm")
2732

2833

2934
class DateTimeEncoder(json.JSONEncoder):
@@ -147,7 +152,7 @@ def get_request_body(event) -> dict:
147152

148153
def parse_request(request_body: dict):
149154
"""Parse the request body and return the endpoint, model, messages, and input_text"""
150-
object_type = request_body.get("object")
155+
object_type = request_body.get("object_type")
151156
model = request_body.get("model")
152157
messages = request_body.get("messages")
153158
input_text = request_body.get("input_text")
@@ -214,3 +219,61 @@ def get_messages_for_role(messages: list, role: str) -> list:
214219
"""Get the text content from the messages list for a given role"""
215220
retval = [d.get("content") for d in messages if d["role"] == role]
216221
return retval
222+
223+
224+
def request_meta_data_factory(model, object_type, temperature, max_tokens, input_text):
225+
"""
226+
Return a dictionary of request meta data.
227+
"""
228+
return {
229+
"request_meta_data": {
230+
"lambda": "lambda_openai_v2",
231+
"model": model,
232+
"object_type": object_type,
233+
"temperature": temperature,
234+
"max_tokens": max_tokens,
235+
"input_text": input_text,
236+
}
237+
}
238+
239+
240+
def does_refer_to(prompt: str, refers_to: str, threshold=3) -> bool:
241+
"""Check if the prompt refers to the given string."""
242+
243+
# clean up the prompt by adding spaces before capital letters
244+
# converts "WhoIsLawrenceMcDaniel" to "Who Is Lawrence McDaniel"
245+
clean_prompt = []
246+
for word in prompt.split():
247+
word = word.translate(str.maketrans("", "", string.punctuation))
248+
doc = nlp(word)
249+
words = re.sub("([A-Z][a-z]+)", r" \1", re.sub("([A-Z]+)", r" \1", word)).split()
250+
words = (
251+
["".join(re.findall("[a-zA-Z]+", w)) for w in words]
252+
if not any(ent.label_ in ["PERSON", "ORG"] for ent in doc.ents)
253+
else [word.title()]
254+
)
255+
clean_prompt.extend(words)
256+
prompt = " ".join(clean_prompt)
257+
258+
# first, try to find the target string in the prompt
259+
prompt_words = prompt.lower().split()
260+
token_count = len(refers_to.split())
261+
found_count = 0
262+
for token in refers_to.lower().split():
263+
if token.lower() in prompt_words:
264+
found_count += 1
265+
if found_count >= token_count:
266+
return True
267+
268+
# try to extract any names/titles from the prompt and then use
269+
# the Levenshtein distance algorithm to see if any of them are
270+
# close enough to the target name
271+
words = prompt.split()
272+
names = [word for word in words if word.istitle()]
273+
for name in names:
274+
distance = Levenshtein.distance(refers_to, name)
275+
if distance <= threshold:
276+
return True
277+
278+
# bust. we didn't find the target string in the prompt
279+
return False

api/terraform/python/openai_api/lambda_langchain/lambda_handler.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,14 @@
2121
import json
2222

2323
from langchain.chains import LLMChain
24-
from langchain.chat_models import ChatOpenAI
2524
from langchain.memory import ConversationBufferMemory
2625
from langchain.prompts import (
2726
ChatPromptTemplate,
2827
HumanMessagePromptTemplate,
2928
MessagesPlaceholder,
3029
SystemMessagePromptTemplate,
3130
)
31+
from langchain_openai import ChatOpenAI
3232
from openai_api.common.conf import settings
3333
from openai_api.common.const import ( # VALID_EMBEDDING_MODELS,
3434
VALID_CHAT_COMPLETION_MODELS,
@@ -46,6 +46,7 @@
4646
get_request_body,
4747
http_response_factory,
4848
parse_request,
49+
request_meta_data_factory,
4950
)
5051
from openai_api.common.validators import ( # validate_embedding_request,
5152
validate_completion_request,
@@ -77,17 +78,7 @@ def handler(event, context):
7778
request_body = get_request_body(event=event)
7879
validate_request_body(request_body=request_body)
7980
object_type, model, messages, input_text, temperature, max_tokens = parse_request(request_body)
80-
request_meta_data = {
81-
"request_meta_data": {
82-
"lambda": "lambda_langchain",
83-
"model": model,
84-
"object_type": object_type,
85-
"temperature": temperature,
86-
"max_tokens": max_tokens,
87-
"input_text": input_text,
88-
}
89-
}
90-
81+
request_meta_data = request_meta_data_factory(model, object_type, temperature, max_tokens, input_text)
9182
validate_messages(request_body=request_body)
9283

9384
match object_type:

api/terraform/python/openai_api/lambda_openai_function/__init__.py

Whitespace-only changes.
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/bin/sh
2+
#------------------------------------------------------------------------------
3+
# written by: Lawrence McDaniel
4+
# https://lawrencemcdaniel.com/
5+
#
6+
# date: sep-2023
7+
#
8+
# usage: reformat environment variables for consumption by Terraform.
9+
# Called by Terraform data "external" in lambda_langchain.tf
10+
#------------------------------------------------------------------------------
11+
12+
# https://support.hashicorp.com/hc/en-us/articles/4547786359571-Reading-and-using-environment-variables-in-Terraform-runs
13+
14+
15+
set -o allexport
16+
source ../../.env set # this is relative to the calling Terraform resource
17+
+o allexport
18+
19+
# Change the contents of this output to get the environment variables
20+
# of interest. The output must be valid JSON, with strings for both
21+
# keys and values.
22+
cat <<EOF
23+
{
24+
"OPENAI_API_KEY": "$OPENAI_API_KEY",
25+
"OPENAI_API_ORGANIZATION": "$OPENAI_API_ORGANIZATION"
26+
}
27+
EOF

0 commit comments

Comments
 (0)