Initial RAG client work

daily-demos · Sep 6, 2024 · 04862f2 · 04862f2
1 parent 4ce39d8
commit 04862f2
Show file tree

Hide file tree

Showing 4 changed files with 130 additions and 38 deletions.
diff --git a/app/api/rag/route.ts b/app/api/rag/route.ts
@@ -1,14 +1,13 @@
 import { NextResponse } from "next/server";
 
-import { generateResponse, query_similar_content } from "@/utils/rag_query";
+import { hierarchicalRetrieval } from "@/utils/rag_query";
 
 export async function POST(request: Request) {
   const { query } = await request.json();
 
   try {
-    const ragResults = await query_similar_content(query);
-    const llmResponse = await generateResponse(query, ragResults);
-    return NextResponse.json({ ragResults, llmResponse });
+    const { response, ragResults, level } = await hierarchicalRetrieval(query);
+    return NextResponse.json({ ragResults, llmResponse: response, level });
   } catch (error) {
     console.error("RAG query error:", error);
     return NextResponse.json(

diff --git a/app/page.tsx b/app/page.tsx
@@ -47,6 +47,8 @@ export default function Home() {
       try {
         if (fn.functionName === "get_rag_context" && args.query) {
           console.log("get_rag_context", args.query);
+          setFetchingRAG(true);
+
           const response = await fetch("/api/rag", {
             method: "POST",
             headers: {
@@ -65,18 +67,17 @@ export default function Home() {
 
           console.log(data);
 
-          // Assuming the API returns both ragResults and llmResponse
-          // We'll return a formatted context that includes both
           const formattedContext = `
-            Relevant Context:
+            Relevant Context (${data.level} level):
             ${data.ragResults
               .map(
                 (result: any) =>
                   `Title: ${result.metadata.title}
+               Level: ${result.metadata.level}
                Content: ${result.metadata.truncated_content}`
               )
               .join("\n\n")}
-  
+    
             AI Response:
             ${data.llmResponse}
           `;

diff --git a/rtvi.config.ts b/rtvi.config.ts
@@ -1,52 +1,68 @@
 export const BOT_READY_TIMEOUT = 15 * 1000; // 15 seconds
 
-export const defaultBotProfile = 'voice_2024_08';
+export const defaultBotProfile = "voice_2024_08";
 export const defaultMaxDuration = 600;
 
 export const defaultServices = {
-  llm: 'openai',
-  tts: 'cartesia',
+  llm: "openai",
+  tts: "cartesia",
 };
 
 export const defaultConfig = [
   {
-    service: 'tts',
-    options: [{ name: 'voice', value: 'd46abd1d-2d02-43e8-819f-51fb652c1c61' }],
+    service: "tts",
+    options: [{ name: "voice", value: "839ea677-2007-46d5-9678-e282fa5546b4" }], // Cartesia voice id: clone of Ben Thompson
   },
   {
-    service: 'llm',
+    service: "vad",
     options: [
-      { name: 'model', value: 'gpt-4o' },
       {
-        name: 'initial_messages',
+        name: "params",
+        value: {
+          stop_secs: 0.5,
+        },
+      },
+    ],
+  },
+  {
+    service: "llm",
+    options: [
+      { name: "model", value: "gpt-4o" },
+      {
+        name: "initial_messages",
         value: [
           {
-            role: 'system',
-            content:
-              "YYou are Ben Thompson, the founder and writer of Stratechery. You specialize in analyzing the intersection of technology, business, and media. Use the 'get_rag_context' function to answer the user's questions on the latest tech trends, strategic business moves, or digital media developments. Also use 'get_rag_context' to answer questions about your interviews with tech and business leaders like Satya Nadella, Jensen Huang, Sam Altman and more. The function call will provide added context from Stratechery articles to provide an insightful answer to the user's question. If you're asking a follow up question on a topic that required 'get_rag_context', use the 'get_rag_context' function again to get the latest context. Be friendly and engaging. In answering questions, if the context doesn't contain relevant information, say so. Your responses will converted to audio. Please do not include any special characters in your response other than '!' or '?'.",
+            role: "system",
+            content: `You are Ben Thompson, the founder and writer of Stratechery. You specialize in analyzing the intersection of technology, business, and media. Use the 'get_rag_context' function to answer the user's questions on the latest tech trends, strategic business moves, or digital media developments. Also use 'get_rag_context' to answer questions about your interviews with tech and business leaders like Satya Nadella, Jensen Huang, Sam Altman and more. The function call will provide added context from Stratechery articles to provide an insightful answer to the user's question. If you're asking a follow up question on a topic that required 'get_rag_context', use the 'get_rag_context' function again to get the latest context. Be friendly and engaging. In answering questions, if the context doesn't contain relevant information, say so.
+              
+              Start off by saying "Hi, I'm Ben Thompson, the author and founder of Stratechery. You can ask me about the latest tech trends, strategic business moves, or digital media developments or about my interviews with tech and business leaders like Satya Nadella, Jensen Huang, Sam Altman and more. How can I help you today?" Only introduce yourself once.
+
+              Anytime you output the word "Stratechery", output it phonetically as "Straw tech airy".
+              
+              IMPORTANT: Your responses will converted to audio. Only output plaintext. Do not output any markdown or special characters other than '!' or '?'.`,
           },
         ],
       },
-      { name: 'run_on_config', value: true },
+      { name: "run_on_config", value: true },
       {
-        name: 'tools',
+        name: "tools",
         value: [
           {
-            type: 'function',
+            type: "function",
             function: {
-              name: 'get_rag_context',
+              name: "get_rag_context",
               description:
-                'Get relevant context for questions about Stratechery, including the latest tech trends, strategic business moves, or digital media developments.',
+                "Get relevant context for questions about Stratechery, including the latest tech trends, strategic business moves, or digital media developments.",
               parameters: {
-                type: 'object',
+                type: "object",
                 properties: {
                   query: {
-                    type: 'string',
+                    type: "string",
                     description:
                       "The user's question about Stratechery, including the latest tech trends, strategic business moves, or digital media developments.",
                   },
                 },
-                required: ['query'],
+                required: ["query"],
               },
             },
           },

diff --git a/utils/rag_query.ts b/utils/rag_query.ts
@@ -19,42 +19,54 @@ const chatModel = new ChatOpenAI({
   openAIApiKey: process.env.OPENAI_API_KEY,
 });
 
-export async function query_similar_content(query: string, top_k: number = 5) {
+async function query_similar_content(
+  query: string,
+  top_k: number = 3,
+  level: string = "summary"
+) {
+  console.log(`Querying for ${level} content with query: "${query}"`);
   try {
     const index = pinecone.Index("stratechery-articles");
-
-    // Generate embedding for the query
     const queryEmbedding = await embeddings.embedQuery(query);
 
-    // Query Pinecone directly
     const queryResponse = await index.query({
       vector: queryEmbedding,
       topK: top_k,
       includeMetadata: true,
+      filter: { level: level },
     });
 
-    // Format the results
+    console.log(
+      `Retrieved ${queryResponse.matches?.length || 0} ${level} results`
+    );
     return (
       queryResponse.matches?.map((match) => ({
         score: match.score,
         metadata: {
           title: match.metadata?.title ?? "Untitled",
           truncated_content: match.metadata?.truncated_content ?? "",
+          file_name: match.metadata?.file_name ?? "",
+          level: match.metadata?.level ?? "",
         },
       })) ?? []
     );
   } catch (error) {
-    console.error("Error in query_similar_content:", error);
-    throw new Error("Failed to query similar content");
+    console.error(`Error in query_similar_content for ${level}:`, error);
+    throw new Error(`Failed to query similar content for ${level}`);
   }
 }
 
-export async function generateResponse(query: string, ragResults: any[]) {
+async function generateResponse(
+  query: string,
+  ragResults: any[],
+  detailLevel: string
+) {
+  console.log(`Generating response for ${detailLevel} level`);
   try {
     const context = ragResults
       .map(
         (result) =>
-          `Title: ${result.metadata.title}\nContent: ${result.metadata.truncated_content}`
+          `Title: ${result.metadata.title}\nLevel: ${result.metadata.level}\nContent: ${result.metadata.truncated_content}`
       )
       .join("\n\n");
 
@@ -66,6 +78,13 @@ export async function generateResponse(query: string, ragResults: any[]) {
 
       Question: {query}
 
+      Detail Level: ${detailLevel}
+      ${
+        detailLevel !== "summary"
+          ? "Please provide a more detailed answer based on the expanded context."
+          : ""
+      }
+
       Answer:
     `);
 
@@ -84,9 +103,66 @@ export async function generateResponse(query: string, ragResults: any[]) {
       query,
     });
 
+    console.log(`Generated response for ${detailLevel} level`);
     return response;
   } catch (error) {
-    console.error("Error in generateResponse:", error);
-    throw new Error("Failed to generate response");
+    console.error(`Error in generateResponse for ${detailLevel}:`, error);
+    throw new Error(`Failed to generate response for ${detailLevel}`);
   }
 }
+
+async function needMoreInformation(
+  query: string,
+  response: string
+): Promise<boolean> {
+  console.log("Checking if more information is needed");
+  const prompt = PromptTemplate.fromTemplate(`
+    Analyze the following question and response, and determine if more detailed information is needed:
+
+    Question: {query}
+    Response: {response}
+
+    Does this response fully answer the question, or is more detailed information required?
+    Answer with 'Yes' if more information is needed, or 'No' if the response is sufficient.
+
+    Answer:
+  `);
+
+  const chain = RunnableSequence.from([
+    {
+      query: (input: any) => input.query,
+      response: (input: any) => input.response,
+    },
+    prompt,
+    chatModel,
+    new StringOutputParser(),
+  ]);
+
+  const result = await chain.invoke({ query, response });
+  const needMore = result.toLowerCase().includes("yes");
+  console.log(`Need more information: ${needMore}`);
+  return needMore;
+}
+
+export async function hierarchicalRetrieval(query: string) {
+  console.log(`Starting hierarchical retrieval for query: "${query}"`);
+
+  // Step 1: Retrieve summaries (5 results)
+  const summaryResults = await query_similar_content(query, 5, "summary");
+  let response = await generateResponse(query, summaryResults, "summary");
+
+  // Step 2: Check if more information is needed
+  let needMore = await needMoreInformation(query, response);
+
+  if (needMore) {
+    console.log("Moving to full content level");
+    // Step 3: Retrieve full sections (3 results)
+    const fullResults = await query_similar_content(query, 3, "section");
+    response = await generateResponse(query, fullResults, "full");
+    console.log("Hierarchical retrieval complete at full level");
+    return { response, ragResults: fullResults, level: "full" };
+  }
+
+  console.log("Hierarchical retrieval complete at summary level");
+  return { response, ragResults: summaryResults, level: "summary" };
+}