Skip to content

Commit

Permalink
Merge pull request #62 from oracle-samples/cdb-fix
Browse files Browse the repository at this point in the history
LLama3.2/HF TGI
  • Loading branch information
corradodebari authored Dec 10, 2024
2 parents c6911aa + 360129f commit 7001fa7
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 3 deletions.
5 changes: 3 additions & 2 deletions app/src/modules/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,10 @@ def langchain_rag(self, rag_params, chat_instr, context_instr, input, chat_histo

retrieved_documents = retriever.invoke(input)
logger.debug("Retrieved %i documents", len(retrieved_documents))
logger.info("Retrieved %i documents", len(retrieved_documents))
# Retrieve documents for inspection (Use for debugging)
# for i, doc in enumerate(retrieved_documents):
# logger.debug("Document %i %s", i + 1, doc)
for i, doc in enumerate(retrieved_documents):
logger.info("Document %i %s", i + 1, doc)

# QA Chain
context_messages = [("system", context_instr)]
Expand Down
27 changes: 27 additions & 0 deletions app/src/modules/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,19 @@ def ll_models():
"frequency_penalty": [0.0, 0.0, -1.0, 1.0],
"presence_penalty": [0.0, 0.0, -2.0, 2.0],
},
"tgi": {
"enabled": False,
"api": "OpenAI",
"url": "http://127.0.0.1:8080",
"api_key": "",
"openai_compat": True,
"context_length": 127072,
"temperature": [1.0, 1.0, 0.0, 2.0],
"top_p": [0.99, .99, 0.0, 0.99],
"max_tokens": [256, 256, 1, 8191],
"frequency_penalty": [0.0, 0.0, -1.0, 1.0],
"presence_penalty": [0.0, 0.0, -2.0, 2.0],
},
"gpt-4o": {
"enabled": os.getenv("OPENAI_API_KEY") is not None,
"api": "OpenAI",
Expand Down Expand Up @@ -192,6 +205,20 @@ def ll_models():
"frequency_penalty": [0.0, 0.0, -2.0, 2.0],
"presence_penalty": [0.0, 0.0, -2.0, 2.0],
},
# llama3.2-3b
"llama3.2": {
"enabled": os.getenv("ON_PREM_OLLAMA_URL") is not None,
"api": "ChatOllama",
"url": os.environ.get("ON_PREM_OLLAMA_URL", default="http://127.0.0.1:11434"),
"api_key": "",
"openai_compat": True,
"context_length": 131072,
"temperature": [1.0, 1.0, 0.0, 2.0],
"top_p": [1.0, 1.0, 0.0, 1.0],
"max_tokens": [256, 256, 1, 2048],
"frequency_penalty": [0.0, 0.0, -2.0, 2.0],
"presence_penalty": [0.0, 0.0, -2.0, 2.0],
},
}
return ll_models_dict

Expand Down
2 changes: 1 addition & 1 deletion app/src/modules/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def get_ll_model(model, ll_models_config=None, giskarded=False):
_client = OpenAI(api_key=giskard_key, base_url=f"{llm_url}/v1/")
client = OpenAIClient(model=model, client=_client)
elif llm_api == "OpenAI":
client = ChatOpenAI(api_key=lm_params["api_key"], **common_params)
client = ChatOpenAI(api_key=lm_params["api_key"],base_url=f"{llm_url}/v1/", **common_params)
elif llm_api == "Cohere":
client = ChatCohere(cohere_api_key=lm_params["api_key"], **common_params)
elif llm_api == "ChatPerplexity":
Expand Down
1 change: 1 addition & 0 deletions spring_ai/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ ollama:
number: 1
models:
- llama3.1
- llama3.2
- mxbai-embed-large
- nomic-embed-text
nodeSelector:
Expand Down
13 changes: 13 additions & 0 deletions spring_ai/ollama-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
ollama:
gpu:
enabled: true
type: 'nvidia'
number: 1
models:
- llama3.1
- llama3.2
- mxbai-embed-large
- nomic-embed-text
nodeSelector:
node.kubernetes.io/instance-type: VM.GPU.A10.1

0 comments on commit 7001fa7

Please sign in to comment.