Skip to content

Commit

Permalink
Langchain libs update (#769)
Browse files Browse the repository at this point in the history
* LLMs with latest langchain dev libraries

* conflict resolved

* all llm models with latest library changes
  • Loading branch information
aashipandya authored Sep 27, 2024
1 parent 501ece4 commit ba6a9d2
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 34 deletions.
33 changes: 16 additions & 17 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,22 +69,22 @@ jsonpath-python==1.0.6
jsonpointer==2.4
json-repair==0.25.2
kiwisolver==1.4.5
langchain
langchain-aws
langchain-anthropic
langchain-fireworks
langchain-google-genai
langchain-community
langchain-core
langchain-experimental
langchain-google-vertexai
langchain-groq
langchain-openai
langchain-text-splitters
langchain==0.3.0
langchain-aws==0.2.1
langchain-anthropic==0.2.1
langchain-fireworks==0.2.0
langchain-google-genai==2.0.0
langchain-community==0.3.0
langchain-core==0.3.5
langchain-experimental==0.3.1
langchain-google-vertexai==2.0.1
langchain-groq==0.2.0
langchain-openai==0.2.0
langchain-text-splitters==0.3.0
langdetect==1.0.9
langsmith==0.1.83
langsmith==0.1.128
layoutparser==0.3.4
langserve==0.2.2
langserve==0.3.0
#langchain-cli==0.0.25
lxml==5.1.0
MarkupSafe==2.1.5
Expand All @@ -100,7 +100,7 @@ numpy==1.26.4
omegaconf==2.3.0
onnx==1.16.1
onnxruntime==1.18.1
openai==1.35.10
openai==1.47.1
opencv-python==4.8.0.76
orjson==3.9.15
packaging==23.2
Expand Down Expand Up @@ -144,7 +144,6 @@ shapely==2.0.3
six==1.16.0
sniffio==1.3.1
soupsieve==2.5
SQLAlchemy==2.0.28
starlette==0.37.2
sse-starlette==2.1.2
starlette-session==0.4.3
Expand All @@ -159,7 +158,7 @@ transformers==4.42.3
types-protobuf
types-requests
typing-inspect==0.9.0
typing_extensions==4.9.0
typing_extensions==4.12.2
tzdata==2024.1
unstructured==0.14.9
unstructured-client==0.23.8
Expand Down
10 changes: 7 additions & 3 deletions backend/src/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.prompts import ChatPromptTemplate
from langchain_anthropic import ChatAnthropic
from langchain_fireworks import ChatFireworks
from langchain_aws import ChatBedrock
from langchain_community.chat_models import ChatOllama
import boto3
import google.auth
from src.shared.constants import MODEL_VERSIONS
from src.shared.constants import MODEL_VERSIONS, PROMPT_TO_ALL_LLMs


def get_llm(model: str):
Expand All @@ -28,7 +29,7 @@ def get_llm(model: str):
model_name = MODEL_VERSIONS[model]
llm = ChatVertexAI(
model_name=model_name,
convert_system_message_to_human=True,
#convert_system_message_to_human=True,
credentials=credentials,
project=project_id,
temperature=0,
Expand Down Expand Up @@ -149,8 +150,9 @@ def get_graph_document_list(
if "diffbot_api_key" in dir(llm):
llm_transformer = llm
else:
if "get_name" in dir(llm) and llm.get_name() == "ChatOllama":
if "get_name" in dir(llm) and llm.get_name() != "ChatOenAI" or llm.get_name() != "ChatVertexAI" or llm.get_name() != "AzureChatOpenAI":
node_properties = False
relationship_properties = False
else:
node_properties = ["description"]
relationship_properties = ["description"]
Expand All @@ -160,6 +162,8 @@ def get_graph_document_list(
relationship_properties=relationship_properties,
allowed_nodes=allowedNodes,
allowed_relationships=allowedRelationship,
ignore_tool_usage=True,
#prompt = ChatPromptTemplate.from_messages(["system",PROMPT_TO_ALL_LLMs])
)
with ThreadPoolExecutor(max_workers=10) as executor:
for chunk in combined_chunk_document_list:
Expand Down
16 changes: 8 additions & 8 deletions backend/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,14 +433,14 @@ def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, datab
node_type= node.type
if (node_id, node_type) not in distinct_nodes:
distinct_nodes.add((node_id, node_type))
#get all relations
for relation in graph_document.relationships:
relations.append(relation.type)

node_count += len(distinct_nodes)
rel_count += len(relations)
print(f'node count internal func:{node_count}')
print(f'relation count internal func:{rel_count}')
#get all relations
for relation in graph_document.relationships:
relations.append(relation.type)

node_count += len(distinct_nodes)
rel_count += len(relations)
print(f'node count internal func:{node_count}')
print(f'relation count internal func:{rel_count}')
return node_count,rel_count

def get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition):
Expand Down
52 changes: 48 additions & 4 deletions backend/src/shared/constants.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
MODEL_VERSIONS = {
"openai-gpt-3.5": "gpt-3.5-turbo-0125",
"gemini-1.0-pro": "gemini-1.0-pro-001",
"gemini-1.5-pro": "gemini-1.5-pro-preview-0514",
"gemini-1.5-pro": "gemini-1.5-pro-002",
"gemini-1.5-flash": "gemini-1.5-flash-002",
"openai-gpt-4": "gpt-4-turbo-2024-04-09",
"diffbot" : "gpt-4-turbo-2024-04-09",
"openai-gpt-4o-mini": "gpt-4o-mini-2024-07-18",
"openai-gpt-4o":"gpt-4o-2024-08-06",
"groq-llama3" : "llama3-70b-8192"
}
OPENAI_MODELS = ["openai-gpt-3.5", "openai-gpt-4o", "openai-gpt-4o-mini"]
GEMINI_MODELS = ["gemini-1.0-pro", "gemini-1.5-pro"]
GEMINI_MODELS = ["gemini-1.0-pro", "gemini-1.5-pro", "gemini-1.5-flash"]
GROQ_MODELS = ["groq-llama3"]
BUCKET_UPLOAD = 'llm-graph-builder-upload'
BUCKET_FAILED_FILE = 'llm-graph-builder-failed'
Expand Down Expand Up @@ -92,14 +93,14 @@
CHAT_DOC_SPLIT_SIZE = 3000
CHAT_EMBEDDING_FILTER_SCORE_THRESHOLD = 0.10
CHAT_TOKEN_CUT_OFF = {
("openai-gpt-3.5",'azure_ai_gpt_35',"gemini-1.0-pro","gemini-1.5-pro","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4,
("openai-gpt-3.5",'azure_ai_gpt_35',"gemini-1.0-pro","gemini-1.5-pro","gemini-1.5-flash","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4,
("openai-gpt-4","diffbot" ,'azure_ai_gpt_4o',"openai-gpt-4o", "openai-gpt-4o-mini") : 28,
("ollama_llama3") : 2
}


CHAT_TOKEN_CUT_OFF = {
("openai-gpt-3.5",'azure_ai_gpt_35',"gemini-1.0-pro","gemini-1.5-pro","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4,
("openai-gpt-3.5",'azure_ai_gpt_35',"gemini-1.0-pro","gemini-1.5-pro", "gemini-1.5-flash","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4,
("openai-gpt-4","diffbot" ,'azure_ai_gpt_4o',"openai-gpt-4o", "openai-gpt-4o-mini") : 28,
("ollama_llama3") : 2
}
Expand Down Expand Up @@ -476,3 +477,46 @@
START_FROM_BEGINNING = "start_from_beginning"
DELETE_ENTITIES_AND_START_FROM_BEGINNING = "delete_entities_and_start_from_beginning"
START_FROM_LAST_PROCESSED_POSITION = "start_from_last_processed_position"

PROMPT_TO_ALL_LLMs = """
"# Knowledge Graph Instructions for LLMs\n"
"## 1. Overview\n"
"You are a top-tier algorithm designed for extracting information in structured "
"formats to build a knowledge graph.\n"
"Try to capture as much information from the text as possible without "
"sacrificing accuracy. Do not add any information that is not explicitly "
"mentioned in the text.\n"
"- **Nodes** represent entities and concepts.\n"
"- The aim is to achieve simplicity and clarity in the knowledge graph, making it\n"
"accessible for a vast audience.\n"
"## 2. Labeling Nodes\n"
"- **Consistency**: Ensure you use available types for node labels.\n"
"Ensure you use basic or elementary types for node labels.\n"
"- For example, when you identify an entity representing a person, "
"always label it as **'person'**. Avoid using more specific terms "
"like 'mathematician' or 'scientist'."
"- **Node IDs**: Never utilize integers as node IDs. Node IDs should be "
"names or human-readable identifiers found in the text.\n"
"- **Relationships** represent connections between entities or concepts.\n"
"Ensure consistency and generality in relationship types when constructing "
"knowledge graphs. Instead of using specific and momentary types "
"such as 'BECAME_PROFESSOR', use more general and timeless relationship types "
"like 'PROFESSOR'. Make sure to use general and timeless relationship types!\n"
"## 3. Coreference Resolution\n"
"- **Maintain Entity Consistency**: When extracting entities, it's vital to "
"ensure consistency.\n"
'If an entity, such as "John Doe", is mentioned multiple times in the text '
'but is referred to by different names or pronouns (e.g., "Joe", "he"),'
"always use the most complete identifier for that entity throughout the "
'knowledge graph. In this example, use "John Doe" as the entity ID.\n'
"Remember, the knowledge graph should be coherent and easily understandable, "
"so maintaining consistency in entity references is crucial.\n"
"## 4. Node Properties\n"
"- Dates, URLs, Time, and Numerical Values: Instead of creating separate nodes for
these elements, represent them as properties of existing nodes."
"- Example: Instead of creating a node labeled "2023-03-15" and connecting it to another node
with the relationship "BORN_ON", add a property called "born_on" to the person node with the
value "2023-03-15"."
"## 5. Strict Compliance\n"
"Adhere to the rules strictly. Non-compliance will result in termination."
"""
3 changes: 2 additions & 1 deletion backend/src/shared/schema_extraction.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import List
from langchain_core.pydantic_v1 import BaseModel, Field
#from langchain_core.pydantic_v1 import BaseModel, Field
from pydantic.v1 import BaseModel, Field
from src.llm import get_llm
from src.shared.constants import MODEL_VERSIONS
from langchain_core.prompts import ChatPromptTemplate
Expand Down
3 changes: 2 additions & 1 deletion frontend/src/utils/Constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,13 @@ export const llms =
'openai-gpt-4o-mini',
'gemini-1.0-pro',
'gemini-1.5-pro',
'gemini-1.5-flash',
'azure_ai_gpt_35',
'azure_ai_gpt_4o',
'ollama_llama3',
'groq_llama3_70b',
'anthropic_claude_3_5_sonnet',
'fireworks_v3p1_405b',
'fireworks_llama_v3p2_90b',
'bedrock_claude_3_5_sonnet',
];

Expand Down

0 comments on commit ba6a9d2

Please sign in to comment.