Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions services/docsite_explainer/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Docsite Explainer

This is a service for explaining passages in the OpenFn documentation.

This service uses an input text to query a Claude model for a clarification. It also searches an embedding collection containing the full documentation to give the model passages from it as additional context.

## Implementation Notes

This service currently uses the Zilliz embedding storage provider for OpenAI embeddings, and requires both Zilliz (URI, Token) and OpenAI credentials to run.

## Usage

This service can be run from the services folder via the entry.py module, using a JSON file containing the text that needs clarification:

```bash
python entry.py docsite_explainer tmp/docsite_explainer_input.json tmp/docsite_output.json
```

JSON input format example:

```json
{
"text": "JavaScript is an untyped language - which is very conveient for OpenFn jobs and usually makes life easier."
}
```

This service can also be run using CURL:

```bash
curl -X POST http://localhost:3000/services/docsite_explainer --json @tmp/docsite_explainer_input.json
```
89 changes: 89 additions & 0 deletions services/docsite_explainer/docsite_explainer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import os
from dotenv import load_dotenv
import anthropic
from search.search import Payload, validate_payload, connect_to_milvus, get_search_embeddings, search_database, extract_documents
from docsite_explainer.prompts import build_prompt

load_dotenv()
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
ZILLIZ_URI = os.getenv("ZILLIZ_CLOUD_URI")
ZILLIZ_TOKEN = os.getenv("ZILLIZ_CLOUD_API_KEY")

client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)


def query_llm(system_prompt, formatted_user_prompt, model="claude-3-5-sonnet-20241022", max_tokens=500):
message = client.messages.create(
model=model,
max_tokens=max_tokens,
temperature=0,
system=system_prompt,
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": formatted_user_prompt
}
]
}
]
)
answer = message.content[0].text

return answer

def explain_docs(input_text):

# Set and validate database settings
settings_dict = {"api_key":OPENAI_API_KEY, "query":input_text, "partition_name":"normal_docs", "limit": 4}
data = Payload(settings_dict)
validate_payload(data)

# Connect to Milvus database
milvus_client = connect_to_milvus(db_name="openfn_docs")

# Generate embeddings for the search query
search_embeddings = get_search_embeddings(api_key=data.api_key, query=data.query)

# Perform the search
limit = int(data.limit)
res = search_database(milvus_client, search_embeddings, data.partition_name, limit)

# Extract documents from search results
documents = extract_documents(res)

# Collate llm input
context_dict = {
"input_text" : input_text,
"context" : documents[0],
"additional_context_a" : documents[1],
"additional_context_b" : documents[2],
"additional_context_c" : documents[3],
}

# Get formatted prompts
system_prompt, formatted_user_prompt = build_prompt(context_dict)

# Query llm with the prompts
result = query_llm(system_prompt, formatted_user_prompt)

return result


def main(data):

input_text = data.get("text", "")

result = explain_docs(input_text)
print(f"Input query: {input_text}")
print(f"Answer: {result}")

return {
"answer": result
}

if __name__ == "__main__":
main()
32 changes: 32 additions & 0 deletions services/docsite_explainer/prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

system_prompt = """
You are a helpful assistant for understanding the documentation for OpenFn,
the world's leading digital public good for workflow automation. You will
get a passage from the documentation which you will need to explain more clearly.
You will also get the immediate context from which the text is taken, and additional
passages from the documentation which may provide more information. You can use
this additional context or broader sector-specific knowledge (e.g. programming) to explain the passage.
Keep your answers short, friendly and professional.
"""

user_prompt = """
Passage to explain: "{input_text}"

Here's the context from which the text is from:

"{context}"

Here's additional context from the documentation which may be useful:

"...{additional_context_a}..."

"...{additional_context_b}..."

"...{additional_context_c}..."
"""


def build_prompt(context_dict):
formatted_user_prompt = user_prompt.format_map(context_dict)

return system_prompt, formatted_user_prompt
4 changes: 2 additions & 2 deletions services/search/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def validate_payload(data: Payload):
if data.partition_name and data.partition_name not in ["normal_docs", "adaptor_docs"]:
raise ValueError("Invalid partition_name. Expected values are 'normal_docs' or 'adaptor_docs'.")

def connect_to_milvus() -> MilvusClient:
def connect_to_milvus(db_name="openfn_docs") -> MilvusClient:
"""
Connects to the Milvus database client using environment variables.

Expand All @@ -43,7 +43,7 @@ def connect_to_milvus() -> MilvusClient:

logger.info(f"Connecting to Milvus database...")

return MilvusClient(uri=zilliz_uri, token=zilliz_token, db_name="openfn_docs")
return MilvusClient(uri=zilliz_uri, token=zilliz_token, db_name=db_name)


def get_search_embeddings(api_key: str, query: str) -> list:
Expand Down