Skip to content

Commit

Permalink
Initial RAG client work
Browse files Browse the repository at this point in the history
  • Loading branch information
markbackman committed Sep 6, 2024
1 parent 4ce39d8 commit 04862f2
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 38 deletions.
7 changes: 3 additions & 4 deletions app/api/rag/route.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import { NextResponse } from "next/server";

import { generateResponse, query_similar_content } from "@/utils/rag_query";
import { hierarchicalRetrieval } from "@/utils/rag_query";

export async function POST(request: Request) {
const { query } = await request.json();

try {
const ragResults = await query_similar_content(query);
const llmResponse = await generateResponse(query, ragResults);
return NextResponse.json({ ragResults, llmResponse });
const { response, ragResults, level } = await hierarchicalRetrieval(query);
return NextResponse.json({ ragResults, llmResponse: response, level });
} catch (error) {
console.error("RAG query error:", error);
return NextResponse.json(
Expand Down
9 changes: 5 additions & 4 deletions app/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ export default function Home() {
try {
if (fn.functionName === "get_rag_context" && args.query) {
console.log("get_rag_context", args.query);
setFetchingRAG(true);

const response = await fetch("/api/rag", {
method: "POST",
headers: {
Expand All @@ -65,18 +67,17 @@ export default function Home() {

console.log(data);

// Assuming the API returns both ragResults and llmResponse
// We'll return a formatted context that includes both
const formattedContext = `
Relevant Context:
Relevant Context (${data.level} level):
${data.ragResults
.map(
(result: any) =>
`Title: ${result.metadata.title}
Level: ${result.metadata.level}
Content: ${result.metadata.truncated_content}`
)
.join("\n\n")}
AI Response:
${data.llmResponse}
`;
Expand Down
54 changes: 35 additions & 19 deletions rtvi.config.ts
Original file line number Diff line number Diff line change
@@ -1,52 +1,68 @@
export const BOT_READY_TIMEOUT = 15 * 1000; // 15 seconds

export const defaultBotProfile = 'voice_2024_08';
export const defaultBotProfile = "voice_2024_08";
export const defaultMaxDuration = 600;

export const defaultServices = {
llm: 'openai',
tts: 'cartesia',
llm: "openai",
tts: "cartesia",
};

export const defaultConfig = [
{
service: 'tts',
options: [{ name: 'voice', value: 'd46abd1d-2d02-43e8-819f-51fb652c1c61' }],
service: "tts",
options: [{ name: "voice", value: "839ea677-2007-46d5-9678-e282fa5546b4" }], // Cartesia voice id: clone of Ben Thompson
},
{
service: 'llm',
service: "vad",
options: [
{ name: 'model', value: 'gpt-4o' },
{
name: 'initial_messages',
name: "params",
value: {
stop_secs: 0.5,
},
},
],
},
{
service: "llm",
options: [
{ name: "model", value: "gpt-4o" },
{
name: "initial_messages",
value: [
{
role: 'system',
content:
"YYou are Ben Thompson, the founder and writer of Stratechery. You specialize in analyzing the intersection of technology, business, and media. Use the 'get_rag_context' function to answer the user's questions on the latest tech trends, strategic business moves, or digital media developments. Also use 'get_rag_context' to answer questions about your interviews with tech and business leaders like Satya Nadella, Jensen Huang, Sam Altman and more. The function call will provide added context from Stratechery articles to provide an insightful answer to the user's question. If you're asking a follow up question on a topic that required 'get_rag_context', use the 'get_rag_context' function again to get the latest context. Be friendly and engaging. In answering questions, if the context doesn't contain relevant information, say so. Your responses will converted to audio. Please do not include any special characters in your response other than '!' or '?'.",
role: "system",
content: `You are Ben Thompson, the founder and writer of Stratechery. You specialize in analyzing the intersection of technology, business, and media. Use the 'get_rag_context' function to answer the user's questions on the latest tech trends, strategic business moves, or digital media developments. Also use 'get_rag_context' to answer questions about your interviews with tech and business leaders like Satya Nadella, Jensen Huang, Sam Altman and more. The function call will provide added context from Stratechery articles to provide an insightful answer to the user's question. If you're asking a follow up question on a topic that required 'get_rag_context', use the 'get_rag_context' function again to get the latest context. Be friendly and engaging. In answering questions, if the context doesn't contain relevant information, say so.
Start off by saying "Hi, I'm Ben Thompson, the author and founder of Stratechery. You can ask me about the latest tech trends, strategic business moves, or digital media developments or about my interviews with tech and business leaders like Satya Nadella, Jensen Huang, Sam Altman and more. How can I help you today?" Only introduce yourself once.
Anytime you output the word "Stratechery", output it phonetically as "Straw tech airy".
IMPORTANT: Your responses will converted to audio. Only output plaintext. Do not output any markdown or special characters other than '!' or '?'.`,
},
],
},
{ name: 'run_on_config', value: true },
{ name: "run_on_config", value: true },
{
name: 'tools',
name: "tools",
value: [
{
type: 'function',
type: "function",
function: {
name: 'get_rag_context',
name: "get_rag_context",
description:
'Get relevant context for questions about Stratechery, including the latest tech trends, strategic business moves, or digital media developments.',
"Get relevant context for questions about Stratechery, including the latest tech trends, strategic business moves, or digital media developments.",
parameters: {
type: 'object',
type: "object",
properties: {
query: {
type: 'string',
type: "string",
description:
"The user's question about Stratechery, including the latest tech trends, strategic business moves, or digital media developments.",
},
},
required: ['query'],
required: ["query"],
},
},
},
Expand Down
98 changes: 87 additions & 11 deletions utils/rag_query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,42 +19,54 @@ const chatModel = new ChatOpenAI({
openAIApiKey: process.env.OPENAI_API_KEY,
});

export async function query_similar_content(query: string, top_k: number = 5) {
async function query_similar_content(
query: string,
top_k: number = 3,
level: string = "summary"
) {
console.log(`Querying for ${level} content with query: "${query}"`);
try {
const index = pinecone.Index("stratechery-articles");

// Generate embedding for the query
const queryEmbedding = await embeddings.embedQuery(query);

// Query Pinecone directly
const queryResponse = await index.query({
vector: queryEmbedding,
topK: top_k,
includeMetadata: true,
filter: { level: level },
});

// Format the results
console.log(
`Retrieved ${queryResponse.matches?.length || 0} ${level} results`
);
return (
queryResponse.matches?.map((match) => ({
score: match.score,
metadata: {
title: match.metadata?.title ?? "Untitled",
truncated_content: match.metadata?.truncated_content ?? "",
file_name: match.metadata?.file_name ?? "",
level: match.metadata?.level ?? "",
},
})) ?? []
);
} catch (error) {
console.error("Error in query_similar_content:", error);
throw new Error("Failed to query similar content");
console.error(`Error in query_similar_content for ${level}:`, error);
throw new Error(`Failed to query similar content for ${level}`);
}
}

export async function generateResponse(query: string, ragResults: any[]) {
async function generateResponse(
query: string,
ragResults: any[],
detailLevel: string
) {
console.log(`Generating response for ${detailLevel} level`);
try {
const context = ragResults
.map(
(result) =>
`Title: ${result.metadata.title}\nContent: ${result.metadata.truncated_content}`
`Title: ${result.metadata.title}\nLevel: ${result.metadata.level}\nContent: ${result.metadata.truncated_content}`
)
.join("\n\n");

Expand All @@ -66,6 +78,13 @@ export async function generateResponse(query: string, ragResults: any[]) {
Question: {query}
Detail Level: ${detailLevel}
${
detailLevel !== "summary"
? "Please provide a more detailed answer based on the expanded context."
: ""
}
Answer:
`);

Expand All @@ -84,9 +103,66 @@ export async function generateResponse(query: string, ragResults: any[]) {
query,
});

console.log(`Generated response for ${detailLevel} level`);
return response;
} catch (error) {
console.error("Error in generateResponse:", error);
throw new Error("Failed to generate response");
console.error(`Error in generateResponse for ${detailLevel}:`, error);
throw new Error(`Failed to generate response for ${detailLevel}`);
}
}

async function needMoreInformation(
query: string,
response: string
): Promise<boolean> {
console.log("Checking if more information is needed");
const prompt = PromptTemplate.fromTemplate(`
Analyze the following question and response, and determine if more detailed information is needed:
Question: {query}
Response: {response}
Does this response fully answer the question, or is more detailed information required?
Answer with 'Yes' if more information is needed, or 'No' if the response is sufficient.
Answer:
`);

const chain = RunnableSequence.from([
{
query: (input: any) => input.query,
response: (input: any) => input.response,
},
prompt,
chatModel,
new StringOutputParser(),
]);

const result = await chain.invoke({ query, response });
const needMore = result.toLowerCase().includes("yes");
console.log(`Need more information: ${needMore}`);
return needMore;
}

export async function hierarchicalRetrieval(query: string) {
console.log(`Starting hierarchical retrieval for query: "${query}"`);

// Step 1: Retrieve summaries (5 results)
const summaryResults = await query_similar_content(query, 5, "summary");
let response = await generateResponse(query, summaryResults, "summary");

// Step 2: Check if more information is needed
let needMore = await needMoreInformation(query, response);

if (needMore) {
console.log("Moving to full content level");
// Step 3: Retrieve full sections (3 results)
const fullResults = await query_similar_content(query, 3, "section");
response = await generateResponse(query, fullResults, "full");
console.log("Hierarchical retrieval complete at full level");
return { response, ragResults: fullResults, level: "full" };
}

console.log("Hierarchical retrieval complete at summary level");
return { response, ragResults: summaryResults, level: "summary" };
}

0 comments on commit 04862f2

Please sign in to comment.