Skip to content

Commit

Permalink
Merge branch 'main' into alpha
Browse files Browse the repository at this point in the history
  • Loading branch information
actions-user committed Dec 2, 2023
2 parents 1f68a1d + 80a0897 commit 397a339
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 12 deletions.
2 changes: 2 additions & 0 deletions models/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@
OPENAI_CHAT_TEMPERATURE = float(os.environ.get("OPENAI_CHAT_TEMPERATURE", 0.0))
OPENAI_CHAT_MAX_RETRIES = int(os.environ.get("OPENAI_CHAT_MAX_RETRIES", 3))
OPENAI_CHAT_CACHE = bool(os.environ.get("OPENAI_CHAT_CACHE", True))
DEBUG_MODE = bool(os.environ.get("DEBUG_MODE", False))
else:
raise FileNotFoundError("No .env file found in root directory of repository")


class Config:
"""Configuration parameters."""

DEBUG_MODE: bool = DEBUG_MODE
OPENAI_CHAT_MODEL_NAME: str = OPENAI_CHAT_MODEL_NAME
OPENAI_PROMPT_MODEL_NAME: str = OPENAI_PROMPT_MODEL_NAME
OPENAI_CHAT_TEMPERATURE: float = OPENAI_CHAT_TEMPERATURE
Expand Down
24 changes: 13 additions & 11 deletions models/hybrid_search_retreiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

# document loading
import glob
import logging
import os
import textwrap

Expand Down Expand Up @@ -52,6 +53,7 @@
DEFAULT_MODEL_NAME = Config.OPENAI_PROMPT_MODEL_NAME
pinecone.init(api_key=Credentials.PINECONE_API_KEY, environment=Credentials.PINECONE_ENVIRONMENT)
set_llm_cache(InMemoryCache())
logging.basicConfig(level=logging.DEBUG if Config.DEBUG_MODE else logging.INFO)


class TextSplitter:
Expand Down Expand Up @@ -123,16 +125,16 @@ def load(self, filepath: str):
https://docs.pinecone.io/docs/manage-indexes#selective-metadata-indexing
"""
try:
print("Deleting index...")
logging.debug("Deleting index...")
pinecone.delete_index(Credentials.PINECONE_INDEX_NAME)
except pinecone.exceptions.PineconeException:
print("Index does not exist. Continuing...")
logging.debug("Index does not exist. Continuing...")

metadata_config = {
"indexed": ["lc_id", "lc_type"],
"context": ["lc_text"],
}
print("Creating index. This may take a few minutes...")
logging.debug("Creating index. This may take a few minutes...")
pinecone.create_index(
Credentials.PINECONE_INDEX_NAME, dimension=1536, metric="dotproduct", metadata_config=metadata_config
)
Expand All @@ -142,19 +144,19 @@ def load(self, filepath: str):
for pdf_file in pdf_files:
i += 1
j = len(pdf_files)
print(f"Loading PDF {i} of {j}: ", pdf_file)
logging.debug("Loading PDF %s of %s: %s", i, j, pdf_file)
loader = PyPDFLoader(file_path=pdf_file)
docs = loader.load()
k = 0
for doc in docs:
k += 1
print(k * "-", end="\r")
logging.debug(k * "-", end="\r")
documents = self.text_splitter.create_documents([doc.page_content])
document_texts = [doc.page_content for doc in documents]
embeddings = self.openai_embeddings.embed_documents(document_texts)
self.vector_store.add_documents(documents=documents, embeddings=embeddings)

print("Finished loading PDFs")
logging.debug("Finished loading PDFs")

def rag(self, prompt: str):
"""
Expand All @@ -176,7 +178,7 @@ def rag(self, prompt: str):
embeddings=self.openai_embeddings, sparse_encoder=self.bm25_encoder, index=self.pinecone_index
)
documents = retriever.get_relevant_documents(query=prompt)
print(f"Retrieved {len(documents)} related documents from Pinecone")
logging.debug("Retrieved %i related documents from Pinecone", len(documents))

# Extract the text from the documents
document_texts = [doc.page_content for doc in documents]
Expand All @@ -191,14 +193,14 @@ def rag(self, prompt: str):
# Create a prompt that includes the document texts
prompt_with_relevant_documents = f"{prompt + leader} {'. '.join(document_texts)}"

print(f"Prompt contains {len(prompt_with_relevant_documents.split())} words")
print("Prompt:", prompt_with_relevant_documents)
logging.debug("Prompt contains %i words", len(prompt_with_relevant_documents.split()))
logging.debug("Prompt: %s", prompt_with_relevant_documents)

# Get a response from the GPT-3.5-turbo model
response = self.cached_chat_request(
system_message="You are a helpful assistant.", human_message=prompt_with_relevant_documents
)

print("Response:")
print("------------------------------------------------------")
logging.debug("Response:")
logging.debug("------------------------------------------------------")
return response
2 changes: 1 addition & 1 deletion models/tests/test_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,5 @@ def test_training_services(self):
prompt = self.templates.training_services
result = self.hsr.prompt_with_template(prompt=prompt, concept="Microsoft certified Azure AI engineer associate")
assert result
assert "Microsoft" in result
assert "Microsoft" in result or "Azure" in result
assert "training" in result

0 comments on commit 397a339

Please sign in to comment.