Skip to content

Commit

Permalink
Merge pull request #262 from parea-ai/add-lc-bedrock
Browse files Browse the repository at this point in the history
add bedrock to cookbook
json parse set
  • Loading branch information
jalexanderII committed Dec 22, 2023
2 parents 1582d7e + ac88155 commit 92dce7d
Show file tree
Hide file tree
Showing 7 changed files with 250 additions and 12 deletions.
61 changes: 61 additions & 0 deletions parea/cookbook/data/2022-letter.txt

Large diffs are not rendered by default.

28 changes: 19 additions & 9 deletions parea/cookbook/langchain/trace_langchain_RAG_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from datetime import datetime
from operator import itemgetter

import boto3
from dotenv import load_dotenv

# LangChain libs
Expand All @@ -13,13 +14,13 @@
from langchain.schema.output_parser import StrOutputParser
from langchain.text_splitter import TokenTextSplitter
from langchain.vectorstores import Chroma
from langchain_community.llms.bedrock import Bedrock

# Parea libs
from parea import Parea
from parea.evals.general import answer_matches_target_llm_grader_factory
from parea.evals.rag import (
answer_context_faithfulness_binary_factory,
answer_context_faithfulness_precision_factory,
answer_context_faithfulness_statement_level_factory,
context_query_relevancy_factory,
percent_target_supported_by_context_factory,
Expand All @@ -34,6 +35,13 @@
p = Parea(api_key=os.getenv("PAREA_API_KEY"))


bedrock_client = boto3.client("bedrock-runtime", region_name="us-east-1")
bedrock_model = Bedrock(client=bedrock_client, model_id="amazon.titan-text-express-v1", model_kwargs={"maxTokenCount": 4096, "stopSequences": [], "temperature": 0, "topP": 1})
bedrock = {"model_name": "amazon.titan-text-express-v1", "model": bedrock_model, "provider": "bedrock"}
openai_model = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)
openai = {"model_name": "gpt-3.5-turbo-16k", "model": openai_model, "provider": "openai"}


class DocumentRetriever:
def __init__(self, url: str):
api_loader = RecursiveUrlLoader(url)
Expand Down Expand Up @@ -61,7 +69,8 @@ def get_retriever(self):


class DocumentationChain:
def __init__(self, retriever, model: str):
def __init__(self, retriever, model, is_bedrock=False):
self.is_bedrock = is_bedrock
prompt = ChatPromptTemplate.from_messages(
[
(
Expand All @@ -74,8 +83,6 @@ def __init__(self, retriever, model: str):
]
).partial(time=str(datetime.now()))

model = ChatOpenAI(model=model, temperature=0)

response_generator = prompt | model | StrOutputParser()

self.chain = (
Expand All @@ -89,6 +96,8 @@ def get_context(self) -> str:
return self.context

def _format_docs(self, docs) -> str:
if self.is_bedrock:
docs = docs[0::2] # avoid context window limit, get every 2nd doc
context = "\n\n".join(doc.page_content for doc in docs)
# set context as an attribute, so we can access it later
self.context = context
Expand Down Expand Up @@ -129,8 +138,6 @@ def get_chain(self):
EVALS = [
# question field only
EvalFuncTuple(name="matches_target", func=answer_matches_target_llm_grader_factory(question_field="question")),
# context field only
EvalFuncTuple(name="faithfulness_precision", func=answer_context_faithfulness_precision_factory(context_field="context")),
# questions field and single context field
EvalFuncTuple(name="faithfulness_binary", func=answer_context_faithfulness_binary_factory(question_field="question", context_field="context")),
# questions field and accepts multiple context fields
Expand All @@ -148,13 +155,16 @@ def create_log(model, question, context, output, target, provider="openai") -> L


def main():
model = "gpt-3.5-turbo-16k"
model = openai["model"]
model_name = openai["model_name"]
provider = openai["provider"]

# instantiate tracer integration
handler = PareaAILangchainTracer()
# set up retriever
retriever = DocumentRetriever("https://en.wikipedia.org/wiki/New_York_City").get_retriever()
# set up chain
dc = DocumentationChain(retriever, model)
dc = DocumentationChain(retriever, model, is_bedrock=provider == "bedrock")

# iterate through questions and answers
for question, answer in zip(eval_questions, eval_answers):
Expand All @@ -167,7 +177,7 @@ def main():
# after chain is called, get the context for evaluation metric functions
context = dc.get_context()
# build log component needed for evaluation metric functions
log = create_log(model, question, context, output, answer)
log = create_log(model_name, question, context, output, answer, provider)

# helper function to run evaluation metrics in a thread to avoid blocking return of chain
run_evals_in_thread_and_log(trace_id=str(parent_trace_id), log=log, eval_funcs=EVALS, verbose=True)
Expand Down
91 changes: 91 additions & 0 deletions parea/cookbook/langchain/trace_langchain_bedrock_rag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import os

import boto3
from dotenv import load_dotenv
from langchain.document_loaders import TextLoader
from langchain.llms.bedrock import Bedrock
from langchain.output_parsers import XMLOutputParser
from langchain.prompts import PromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter

from parea import Parea
from parea.utils.trace_integrations.langchain import PareaAILangchainTracer

load_dotenv()

p = Parea(api_key=os.getenv("PAREA_API_KEY"))
handler = PareaAILangchainTracer()


def get_docs():
loader = TextLoader("../data/2022-letter.txt")
letter = loader.load()
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=4000, chunk_overlap=100)
return text_splitter.split_documents(letter)


xml_parser = XMLOutputParser(tags=["insight"])
str_parser = StrOutputParser()

insight_prompt = PromptTemplate(
template="""
Human:
{instructions} : \"{document}\"
Format help: {format_instructions}.
Assistant:""",
input_variables=["instructions", "document"],
partial_variables={"format_instructions": xml_parser.get_format_instructions()},
)

summary_prompt = PromptTemplate(
template="""
Human:
{instructions} : \"{document}\"
Assistant:""",
input_variables=["instructions", "document"],
)

docs = get_docs()
bedrock_client = boto3.client("bedrock-runtime", region_name="us-east-1")
bedrock_llm = Bedrock(
client=bedrock_client,
model_id="amazon.titan-text-express-v1",
model_kwargs={"maxTokenCount": 4096, "stopSequences": [], "temperature": 0, "topP": 1},
)

insight_chain = insight_prompt | bedrock_llm | StrOutputParser()
summary_chain = summary_prompt | bedrock_llm | StrOutputParser()


def get_insights(docs):
insights = []
for i in range(len(docs)):
insight = insight_chain.invoke(
{"instructions": "Provide Key insights from the following text", "document": {docs[i].page_content}}, config={"callbacks": [PareaAILangchainTracer()]}
)
insights.append(insight)
return insights


def main():
print("Starting")
insights = get_insights(docs)
print(insights)
summary = summary_chain.invoke(
{
"instructions": "You will be provided with multiple sets of insights. Compile and summarize these "
"insights and provide key takeaways in one concise paragraph. Do not use the original xml "
"tags. Just provide a paragraph with your compiled insights.",
"document": {"\n".join(insights)},
},
config={"callbacks": [PareaAILangchainTracer()]},
)
print(summary)
print("Done")


if __name__ == "__main__":
main()
6 changes: 5 additions & 1 deletion parea/evals/rag/answer_context_faithfulness_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@ def answer_context_faithfulness_precision(log: Log) -> float:
if provider == "openai":
import tiktoken

encoding = tiktoken.encoding_for_model(model)
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
print("Warning: model not found. Using cl100k_base encoding.")
encoding = tiktoken.get_encoding("cl100k_base")
context_tokens = encoding.encode(context)
output_tokens = encoding.encode(log.output)
else:
Expand Down
2 changes: 2 additions & 0 deletions parea/utils/universal_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ class UniversalEncoder(json.JSONEncoder):
def default(self, obj: Any):
if isinstance(obj, str):
return obj
elif isinstance(obj, set):
return list(obj)
elif is_dataclass_instance(obj):
return dataclasses.asdict(obj)
elif is_attrs_instance(obj):
Expand Down
71 changes: 70 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "parea-ai"
packages = [{ include = "parea" }]
version = "0.2.25a0"
version = "0.2.25"
description = "Parea python sdk"
readme = "README.md"
authors = ["joel-parea-ai <[email protected]>"]
Expand Down Expand Up @@ -75,6 +75,7 @@ langchain-experimental = "^0.0.47"
anthropic = "^0.8.0"
ragas = "^0.0.22"
html2text = "^2020.1.16"
boto3 = "^1.34.6"

[tool.black]
# https://github.com/psf/black
Expand Down

0 comments on commit 92dce7d

Please sign in to comment.