Skip to content

Commit

Permalink
Semantic chunking improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
markbackman committed Sep 10, 2024
1 parent 04862f2 commit cead6ba
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 97 deletions.
18 changes: 15 additions & 3 deletions app/api/rag/route.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,25 @@
import { NextResponse } from "next/server";

import { hierarchicalRetrieval } from "@/utils/rag_query";
import { generateResponse, query_similar_content } from "@/utils/rag_query";

export async function POST(request: Request) {
const { query } = await request.json();

console.log(`Received query: "${query}"`);
console.time("total_rag_process");

try {
const { response, ragResults, level } = await hierarchicalRetrieval(query);
return NextResponse.json({ ragResults, llmResponse: response, level });
const ragResults = await query_similar_content(query);
const { response: llmResponse, usage } = await generateResponse(
query,
ragResults
);

console.timeEnd("total_rag_process");
console.log(`RAG process completed for query: "${query}"`);
console.log("Total token usage:", usage);

return NextResponse.json({ ragResults, llmResponse, usage });
} catch (error) {
console.error("RAG query error:", error);
return NextResponse.json(
Expand Down
5 changes: 2 additions & 3 deletions app/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,12 @@ export default function Home() {
console.log(data);

const formattedContext = `
Relevant Context (${data.level} level):
Relevant Context:
${data.ragResults
.map(
(result: any) =>
`Title: ${result.metadata.title}
Level: ${result.metadata.level}
Content: ${result.metadata.truncated_content}`
Content: ${result.metadata.content}`
)
.join("\n\n")}
Expand Down
4 changes: 2 additions & 2 deletions rtvi.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ export const defaultConfig = [
Start off by saying "Hi, I'm Ben Thompson, the author and founder of Stratechery. You can ask me about the latest tech trends, strategic business moves, or digital media developments or about my interviews with tech and business leaders like Satya Nadella, Jensen Huang, Sam Altman and more. How can I help you today?" Only introduce yourself once.
Anytime you output the word "Stratechery", output it phonetically as "Straw tech airy".
Anytime you output the word "Stratechery", output it phonetically as "Stra-tekery".
IMPORTANT: Your responses will converted to audio. Only output plaintext. Do not output any markdown or special characters other than '!' or '?'.`,
IMPORTANT: Your responses will converted to audio. Output in prose, not lists. ONLY OUTPUT PLAINTEXT. DO NOT OUTPUT MARKDOWN. NO ASTERISKS (*). Do not output special characters other than '!' or '?'.`,
},
],
},
Expand Down
160 changes: 71 additions & 89 deletions utils/rag_query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@ import { OpenAIEmbeddings } from "@langchain/openai";
import { ChatOpenAI } from "@langchain/openai";
import { Pinecone } from "@pinecone-database/pinecone";

// Define types for our metadata and results
type ChunkMetadata = {
title: string;
content: string;
file_name: string;
chunk_index: number;
};

type QueryResult = {
score: number;
metadata: ChunkMetadata;
};

const embeddings = new OpenAIEmbeddings({
openAIApiKey: process.env.OPENAI_API_KEY,
modelName: "text-embedding-3-small",
Expand All @@ -15,58 +28,72 @@ const pinecone = new Pinecone({
});

const chatModel = new ChatOpenAI({
modelName: "gpt-4o",
modelName: "gpt-4o-mini",
openAIApiKey: process.env.OPENAI_API_KEY,
verbose: true,
});

async function query_similar_content(
export async function query_similar_content(
query: string,
top_k: number = 3,
level: string = "summary"
) {
console.log(`Querying for ${level} content with query: "${query}"`);
top_k: number = 5
): Promise<QueryResult[]> {
console.time("query_similar_content");
try {
const index = pinecone.Index("stratechery-articles");

// Generate embedding for the query
const queryEmbedding = await embeddings.embedQuery(query);

// Query Pinecone
const queryResponse = await index.query({
vector: queryEmbedding,
topK: top_k,
includeMetadata: true,
filter: { level: level },
});

console.log(
`Retrieved ${queryResponse.matches?.length || 0} ${level} results`
);
return (
console.timeEnd("query_similar_content");

// Format and log the results
const results: QueryResult[] =
queryResponse.matches?.map((match) => ({
score: match.score,
score: match.score || 0,
metadata: {
title: match.metadata?.title ?? "Untitled",
truncated_content: match.metadata?.truncated_content ?? "",
file_name: match.metadata?.file_name ?? "",
level: match.metadata?.level ?? "",
title: (match.metadata?.title as string) || "Untitled",
content: (match.metadata?.content as string) || "",
file_name: (match.metadata?.file_name as string) || "",
chunk_index: (match.metadata?.chunk_index as number) || 0,
},
})) ?? []
);
})) ?? [];

console.log(`RAG Results for query: "${query}"`);
results.forEach((result, index) => {
console.log(`\nResult ${index + 1}:`);
console.log(`Score: ${result.score}`);
console.log(`Title: ${result.metadata.title}`);
console.log(`File: ${result.metadata.file_name}`);
console.log(`Chunk Index: ${result.metadata.chunk_index}`);
console.log(
`Content Preview: ${result.metadata.content.substring(0, 100)}...`
);
});

return results;
} catch (error) {
console.error(`Error in query_similar_content for ${level}:`, error);
throw new Error(`Failed to query similar content for ${level}`);
console.error("Error in query_similar_content:", error);
throw new Error("Failed to query similar content");
}
}

async function generateResponse(
export async function generateResponse(
query: string,
ragResults: any[],
detailLevel: string
ragResults: QueryResult[]
) {
console.log(`Generating response for ${detailLevel} level`);
console.time("generateResponse");
try {
const context = ragResults
.map(
(result) =>
`Title: ${result.metadata.title}\nLevel: ${result.metadata.level}\nContent: ${result.metadata.truncated_content}`
`Title: ${result.metadata.title}\nContent: ${result.metadata.content}`
)
.join("\n\n");

Expand All @@ -78,13 +105,6 @@ async function generateResponse(
Question: {query}
Detail Level: ${detailLevel}
${
detailLevel !== "summary"
? "Please provide a more detailed answer based on the expanded context."
: ""
}
Answer:
`);

Expand All @@ -103,66 +123,28 @@ async function generateResponse(
query,
});

console.log(`Generated response for ${detailLevel} level`);
return response;
} catch (error) {
console.error(`Error in generateResponse for ${detailLevel}:`, error);
throw new Error(`Failed to generate response for ${detailLevel}`);
}
}

async function needMoreInformation(
query: string,
response: string
): Promise<boolean> {
console.log("Checking if more information is needed");
const prompt = PromptTemplate.fromTemplate(`
Analyze the following question and response, and determine if more detailed information is needed:
Question: {query}
Response: {response}
Does this response fully answer the question, or is more detailed information required?
Answer with 'Yes' if more information is needed, or 'No' if the response is sufficient.
Answer:
`);

const chain = RunnableSequence.from([
{
query: (input: any) => input.query,
response: (input: any) => input.response,
},
prompt,
chatModel,
new StringOutputParser(),
]);

const result = await chain.invoke({ query, response });
const needMore = result.toLowerCase().includes("yes");
console.log(`Need more information: ${needMore}`);
return needMore;
}
// Access the underlying OpenAI client to make a direct API call
const openaiClient = (chatModel as any).client;
const promptContent = await prompt.format({ context, query });

export async function hierarchicalRetrieval(query: string) {
console.log(`Starting hierarchical retrieval for query: "${query}"`);
const openaiResponse = await openaiClient.chat.completions.create({
model: "gpt-4o",
messages: [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", content: promptContent },
],
});

// Step 1: Retrieve summaries (5 results)
const summaryResults = await query_similar_content(query, 5, "summary");
let response = await generateResponse(query, summaryResults, "summary");
const usage = openaiResponse.usage;

// Step 2: Check if more information is needed
let needMore = await needMoreInformation(query, response);
console.timeEnd("generateResponse");
console.log(`Generated response for query: "${query}"`);
console.log(`Response: ${response}`);
console.log("Token Usage:", usage);

if (needMore) {
console.log("Moving to full content level");
// Step 3: Retrieve full sections (3 results)
const fullResults = await query_similar_content(query, 3, "section");
response = await generateResponse(query, fullResults, "full");
console.log("Hierarchical retrieval complete at full level");
return { response, ragResults: fullResults, level: "full" };
return { response, usage };
} catch (error) {
console.error("Error in generateResponse:", error);
throw new Error("Failed to generate response");
}

console.log("Hierarchical retrieval complete at summary level");
return { response, ragResults: summaryResults, level: "summary" };
}

0 comments on commit cead6ba

Please sign in to comment.