Skip to content

Commit

Permalink
Merge branch 'main' into beta
Browse files Browse the repository at this point in the history
  • Loading branch information
actions-user committed Dec 2, 2023
2 parents 936f886 + 3abb0fb commit 07679cf
Show file tree
Hide file tree
Showing 15 changed files with 135 additions and 65 deletions.
10 changes: 8 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
## [1.1.1](https://github.com/lpm0073/netec-llm/compare/v1.1.0...v1.1.1) (2023-12-01)
## [1.1.2](https://github.com/lpm0073/hybrid-search-retriever/compare/v1.1.1...v1.1.2) (2023-12-01)


### Bug Fixes

* syntax error in examples.prompt ([230b709](https://github.com/lpm0073/hybrid-search-retriever/commit/230b7090c96bdd4d7d8757b182f891ab1b82c6f4))

## [1.1.1](https://github.com/lpm0073/netec-llm/compare/v1.1.0...v1.1.1) (2023-12-01)

### Bug Fixes

* had to switch to bm25_encoder so that vector store is searchable ([bad6994](https://github.com/lpm0073/netec-llm/commit/bad699481d217dde81877d85124395529652dabe))
- had to switch to bm25_encoder so that vector store is searchable ([bad6994](https://github.com/lpm0073/netec-llm/commit/bad699481d217dde81877d85124395529652dabe))

# [1.1.0](https://github.com/lpm0073/netec-llm/compare/v1.0.0...v1.1.0) (2023-12-01)

Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ SHELL := /bin/bash
ifneq ("$(wildcard .env)","")
include .env
else
$(shell echo -e "OPENAI_API_ORGANIZATION=PLEASE-ADD-ME\nOPENAI_API_KEY=PLEASE-ADD-ME\nPINECONE_API_KEY=PLEASE-ADD-ME\nPINECONE_ENVIRONMENT=gcp-starter\nPINECONE_INDEX_NAME=netec-ssm\nDEBUG_MODE=True\n" >> .env)
$(shell echo -e "OPENAI_API_ORGANIZATION=PLEASE-ADD-ME\nOPENAI_API_KEY=PLEASE-ADD-ME\nPINECONE_API_KEY=PLEASE-ADD-ME\nPINECONE_ENVIRONMENT=gcp-starter\nPINECONE_INDEX_NAME=hsr\nOPENAI_CHAT_MODEL_NAME=gpt-3.5-turbo\nOPENAI_PROMPT_MODEL_NAME=text-davinci-003\nOPENAI_CHAT_TEMPERATURE=0.0\nOPENAI_CHAT_MAX_RETRIES=3\nDEBUG_MODE=True\n" >> .env)
endif

.PHONY: analyze init activate test lint clean
Expand Down
19 changes: 17 additions & 2 deletions models/const.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# pylint: disable=too-few-public-methods
"""Sales Support Model (SSM) for the LangChain project."""
"""Sales Support Model (hsr) for the LangChain project."""

import os

Expand All @@ -15,11 +15,26 @@
OPENAI_API_ORGANIZATION = os.environ["OPENAI_API_ORGANIZATION"]
PINECONE_API_KEY = os.environ["PINECONE_API_KEY"]
PINECONE_ENVIRONMENT = os.environ["PINECONE_ENVIRONMENT"]
PINECONE_INDEX_NAME = os.environ["PINECONE_INDEX_NAME"]
PINECONE_INDEX_NAME = os.environ.get("PINECONE_INDEX_NAME", "hsr")
OPENAI_CHAT_MODEL_NAME = os.environ.get("OPENAI_CHAT_MODEL_NAME", "gpt-3.5-turbo")
OPENAI_PROMPT_MODEL_NAME = os.environ.get("OPENAI_PROMPT_MODEL_NAME", "text-davinci-003")
OPENAI_CHAT_TEMPERATURE = float(os.environ.get("OPENAI_CHAT_TEMPERATURE", 0.0))
OPENAI_CHAT_MAX_RETRIES = int(os.environ.get("OPENAI_CHAT_MAX_RETRIES", 3))
OPENAI_CHAT_CACHE = bool(os.environ.get("OPENAI_CHAT_CACHE", True))
else:
raise FileNotFoundError("No .env file found in root directory of repository")


class Config:
"""Configuration parameters."""

OPENAI_CHAT_MODEL_NAME: str = OPENAI_CHAT_MODEL_NAME
OPENAI_PROMPT_MODEL_NAME: str = OPENAI_PROMPT_MODEL_NAME
OPENAI_CHAT_TEMPERATURE: float = OPENAI_CHAT_TEMPERATURE
OPENAI_CHAT_MAX_RETRIES: int = OPENAI_CHAT_MAX_RETRIES
OPENAI_CHAT_CACHE: bool = OPENAI_CHAT_CACHE


class Credentials:
"""Credentials."""

Expand Down
6 changes: 3 additions & 3 deletions models/examples/load.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# -*- coding: utf-8 -*-
"""Sales Support Model (SSM) Retrieval Augmented Generation (RAG)"""
"""Sales Support Model (hsr) Retrieval Augmented Generation (RAG)"""
import argparse

from models.hybrid_search_retreiver import HybridSearchRetriever


ssm = HybridSearchRetriever()
hsr = HybridSearchRetriever()

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="RAG example")
parser.add_argument("filepath", type=str, help="Location of PDF documents")
args = parser.parse_args()

ssm.load(filepath=args.filepath)
hsr.load(filepath=args.filepath)
8 changes: 4 additions & 4 deletions models/examples/prompt.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
# -*- coding: utf-8 -*-
"""Sales Support Model (SSM)"""
"""Sales Support Model (hsr)"""
import argparse

from models.hybrid_search_retreiver import HybridSearchRetriever


ssm = HybridSearchRetriever()
hsr = HybridSearchRetriever()


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="SSM examples")
parser = argparse.ArgumentParser(description="hsr examples")
parser.add_argument("system_prompt", type=str, help="A system prompt to send to the model.")
parser.add_argument("human_prompt", type=str, help="A human prompt to send to the model.")
args = parser.parse_args()

result = ssm.cached_chat_request(args.system_prompt, args.human_prompt)
result = hsr.cached_chat_request(args.system_prompt, args.human_prompt)
print(result)
6 changes: 3 additions & 3 deletions models/examples/rag.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
# -*- coding: utf-8 -*-
"""Sales Support Model (SSM) Retrieval Augmented Generation (RAG)"""
"""Sales Support Model (hsr) Retrieval Augmented Generation (RAG)"""
import argparse

from models.hybrid_search_retreiver import HybridSearchRetriever


ssm = HybridSearchRetriever()
hsr = HybridSearchRetriever()

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="RAG example")
parser.add_argument("prompt", type=str, help="A question about the PDF contents")
args = parser.parse_args()

result = ssm.rag(prompt=args.prompt)
result = hsr.rag(prompt=args.prompt)
print(result)
8 changes: 4 additions & 4 deletions models/examples/training_services.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
# -*- coding: utf-8 -*-
"""Sales Support Model (SSM) for the LangChain project."""
"""Sales Support Model (hsr) for the LangChain project."""
import argparse

from models.hybrid_search_retreiver import HybridSearchRetriever
from models.prompt_templates import NetecPromptTemplates


ssm = HybridSearchRetriever()
hsr = HybridSearchRetriever()
templates = NetecPromptTemplates()

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="SSM examples")
parser = argparse.ArgumentParser(description="hsr examples")
parser.add_argument("concept", type=str, help="A kind of training that Netec provides.")
args = parser.parse_args()

prompt = templates.training_services
result = ssm.prompt_with_template(prompt=prompt, concept=args.concept)
result = hsr.prompt_with_template(prompt=prompt, concept=args.concept)
print(result)
8 changes: 4 additions & 4 deletions models/examples/training_services_oracle.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
# -*- coding: utf-8 -*-
"""Sales Support Model (SSM) for the LangChain project."""
"""Sales Support Model (hsr) for the LangChain project."""
import argparse

from models.hybrid_search_retreiver import HybridSearchRetriever
from models.prompt_templates import NetecPromptTemplates


ssm = HybridSearchRetriever()
hsr = HybridSearchRetriever()
templates = NetecPromptTemplates()

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="SSM Oracle examples")
parser = argparse.ArgumentParser(description="hsr Oracle examples")
parser.add_argument("concept", type=str, help="An Oracle certification exam prep")
args = parser.parse_args()

prompt = templates.oracle_training_services
result = ssm.prompt_with_template(prompt=prompt, concept=args.concept)
result = hsr.prompt_with_template(prompt=prompt, concept=args.concept)
print(result)
41 changes: 11 additions & 30 deletions models/hybrid_search_retreiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import glob
import os
import textwrap
from typing import List

# pinecone integration
import pinecone
Expand All @@ -44,25 +43,23 @@
from pinecone_text.sparse import BM25Encoder

# this project
from models.const import Credentials
from models.const import Config, Credentials


###############################################################################
# initializations
###############################################################################
DEFAULT_MODEL_NAME = "text-davinci-003"
DEFAULT_MODEL_NAME = Config.OPENAI_PROMPT_MODEL_NAME
pinecone.init(api_key=Credentials.PINECONE_API_KEY, environment=Credentials.PINECONE_ENVIRONMENT)
set_llm_cache(InMemoryCache())


class TextSplitter:
"""
Custom text splitter that add metadata to the Document object
Custom text splitter that adds metadata to the Document object
which is required by PineconeHybridSearchRetriever.
"""

# ...

def create_documents(self, texts):
"""Create documents"""
documents = []
Expand All @@ -74,16 +71,16 @@ def create_documents(self, texts):


class HybridSearchRetriever:
"""Sales Support Model (SSM)."""
"""Hybrid Search Retriever (OpenAI + Pinecone)"""

# prompting wrapper
chat = ChatOpenAI(
api_key=Credentials.OPENAI_API_KEY,
organization=Credentials.OPENAI_API_ORGANIZATION,
cache=True,
max_retries=3,
model="gpt-3.5-turbo",
temperature=0.0,
cache=Config.OPENAI_CHAT_CACHE,
max_retries=Config.OPENAI_CHAT_MAX_RETRIES,
model=Config.OPENAI_CHAT_MODEL_NAME,
temperature=Config.OPENAI_CHAT_TEMPERATURE,
)

# embeddings
Expand Down Expand Up @@ -112,22 +109,6 @@ def prompt_with_template(self, prompt: PromptTemplate, concept: str, model: str
retval = llm(prompt.format(concept=concept))
return retval

def fit_tf_idf_values(self, corpus: List[str]):
"""Fit TF-IDF values.
1. Fit the BM25 encoder on the corpus
2. Encode the corpus
3. Store the encoded corpus in Pinecone
"""
corpus = ["foo", "bar", "world", "hello"]

# fit tf-idf values on your corpus
self.bm25_encoder.fit(corpus)

# persist the values to a json file
self.bm25_encoder.dump("bm25_values.json")
self.bm25_encoder = BM25Encoder().load("bm25_values.json")
self.bm25_encoder.fit(corpus)

def load(self, filepath: str):
"""
Embed PDF.
Expand Down Expand Up @@ -201,9 +182,9 @@ def rag(self, prompt: str):
document_texts = [doc.page_content for doc in documents]
leader = textwrap.dedent(
"""\
You can assume that the following is true,
and you should attempt to incorporate these facts
in your response:
\n\nYou can assume that the following is true.
You should attempt to incorporate these facts
into your response:\n\n
"""
)

Expand Down
2 changes: 1 addition & 1 deletion models/prompt_templates.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# pylint: disable=too-few-public-methods
"""Sales Support Model (SSM) prompt templates"""
"""Sales Support Model (hsr) prompt templates"""

from langchain.prompts import PromptTemplate

Expand Down
68 changes: 68 additions & 0 deletions models/tests/test_examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# -*- coding: utf-8 -*-
# flake8: noqa: F401
"""
Test command line example prompts.
"""
from unittest.mock import MagicMock, patch

import pytest # pylint: disable=unused-import

from models.examples.prompt import hsr as prompt_hrs
from models.examples.rag import hsr as rag_hsr
from models.examples.training_services import hsr as training_services_hsr
from models.examples.training_services_oracle import hsr as training_services_oracle_hsr
from models.prompt_templates import NetecPromptTemplates


HUMAN_PROMPT = 'return the word "SUCCESS" in upper case.'


class TestExamples:
"""Test command line examples."""

@patch("argparse.ArgumentParser.parse_args")
def test_prompt(self, mock_parse_args):
"""Test prompt example."""
mock_args = MagicMock()
mock_args.system_prompt = "you are a helpful assistant"
mock_args.human_prompt = HUMAN_PROMPT
mock_parse_args.return_value = mock_args

result = prompt_hrs.cached_chat_request(mock_args.system_prompt, mock_args.human_prompt)
assert result == "SUCCESS"

@patch("argparse.ArgumentParser.parse_args")
def test_rag(self, mock_parse_args):
"""Test RAG example."""
mock_args = MagicMock()
mock_args.human_prompt = HUMAN_PROMPT
mock_parse_args.return_value = mock_args

result = rag_hsr.rag(mock_args.human_prompt)
assert result == "SUCCESS"

@patch("argparse.ArgumentParser.parse_args")
def test_training_services(self, mock_parse_args):
"""Test training services templates."""
mock_args = MagicMock()
mock_args.human_prompt = HUMAN_PROMPT
mock_parse_args.return_value = mock_args

templates = NetecPromptTemplates()
prompt = templates.training_services

result = training_services_hsr.prompt_with_template(prompt=prompt, concept=mock_args.human_prompt)
assert "SUCCESS" in result

@patch("argparse.ArgumentParser.parse_args")
def test_oracle_training_services(self, mock_parse_args):
"""Test oracle training services."""
mock_args = MagicMock()
mock_args.human_prompt = HUMAN_PROMPT
mock_parse_args.return_value = mock_args

templates = NetecPromptTemplates()
prompt = templates.oracle_training_services

result = training_services_oracle_hsr.prompt_with_template(prompt=prompt, concept=mock_args.human_prompt)
assert "SUCCESS" in result
10 changes: 5 additions & 5 deletions models/tests/test_hsr.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ def test_01_basic(self):
def test_02_class_aatribute_types(self):
"""ensure that class attributes are of the correct type"""

ssm = HybridSearchRetriever()
assert isinstance(ssm.chat, ChatOpenAI)
assert isinstance(ssm.pinecone_index, Index)
assert isinstance(ssm.text_splitter, TextSplitter)
assert isinstance(ssm.openai_embeddings, OpenAIEmbeddings)
hsr = HybridSearchRetriever()
assert isinstance(hsr.chat, ChatOpenAI)
assert isinstance(hsr.pinecone_index, Index)
assert isinstance(hsr.text_splitter, TextSplitter)
assert isinstance(hsr.openai_embeddings, OpenAIEmbeddings)
4 changes: 2 additions & 2 deletions models/tests/test_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ class TestOpenAI:
def test_03_test_openai_connectivity(self):
"""Ensure that we have connectivity to OpenAI."""

ssm = HybridSearchRetriever()
retval = ssm.cached_chat_request(
hsr = HybridSearchRetriever()
retval = hsr.cached_chat_request(
"your are a helpful assistant", "please return the value 'CORRECT' in all upper case."
)
assert retval == "CORRECT"
6 changes: 3 additions & 3 deletions models/tests/test_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
class TestPrompts:
"""Test HybridSearchRetriever class."""

ssm = HybridSearchRetriever()
hsr = HybridSearchRetriever()
templates = NetecPromptTemplates()

def test_oracle_training_services(self):
"""Test a prompt with the Oracle training services template"""

prompt = self.templates.oracle_training_services
result = self.ssm.prompt_with_template(prompt=prompt, concept="Oracle database administrator")
result = self.hsr.prompt_with_template(prompt=prompt, concept="Oracle database administrator")
assert result
assert "Oracle" in result
assert "training" in result
Expand All @@ -28,7 +28,7 @@ def test_training_services(self):
"""Test a prompt with the training services template"""

prompt = self.templates.training_services
result = self.ssm.prompt_with_template(prompt=prompt, concept="Microsoft certified Azure AI engineer associate")
result = self.hsr.prompt_with_template(prompt=prompt, concept="Microsoft certified Azure AI engineer associate")
assert result
assert "Microsoft" in result
assert "training" in result
Loading

0 comments on commit 07679cf

Please sign in to comment.