Skip to content

Commit ed18642

Browse files
feat(langchain) new middleware concept for createAgent (#8851)
1 parent 427c2dd commit ed18642

36 files changed

+6404
-96
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"langchain": patch
3+
---
4+
5+
feat(langchain) new middleware concept for createAgent

β€Žexamples/package.jsonβ€Ž

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@
9696
"ioredis": "^5.3.2",
9797
"js-yaml": "^4.1.0",
9898
"langchain": "workspace:*",
99+
"lorem-ipsum": "^2.0.8",
99100
"lunary": "^0.8.8",
100101
"mariadb": "^3.4.0",
101102
"mem0ai": "^2.1.8",
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
import { createAgent, HumanMessage, tool, MemorySaver } from "langchain";
2+
import { humanInTheLoopMiddleware } from "langchain/middleware";
3+
import { Command } from "@langchain/langgraph";
4+
import { z } from "zod";
5+
6+
const checkpointSaver = new MemorySaver();
7+
8+
// Define a safe tool (no approval needed)
9+
const calculateTool = tool(
10+
async ({ a, b, operation }: { a: number; b: number; operation: string }) => {
11+
console.log(
12+
`πŸ› οΈ calculator tool called with args: ${a}, ${b}, ${operation}`
13+
);
14+
switch (operation) {
15+
case "add":
16+
return `${a} + ${b} = ${a + b}`;
17+
case "multiply":
18+
return `${a} * ${b} = ${a * b}`;
19+
default:
20+
return "Unknown operation";
21+
}
22+
},
23+
{
24+
name: "calculator",
25+
description: "Perform basic math operations",
26+
schema: z.object({
27+
a: z.number().describe("First number"),
28+
b: z.number().describe("Second number"),
29+
operation: z.enum(["add", "multiply"]).describe("Math operation"),
30+
}),
31+
}
32+
);
33+
34+
// Define a tool that requires approval
35+
const writeFileTool = tool(
36+
async ({ filename, content }: { filename: string; content: string }) => {
37+
console.log(
38+
`πŸ› οΈ write_file tool called with args: ${filename}, ${content}`
39+
);
40+
// Simulate file writing
41+
return `Successfully wrote ${content.length} characters to ${filename}`;
42+
},
43+
{
44+
name: "write_file",
45+
description: "Write content to a file",
46+
schema: z.object({
47+
filename: z.string().describe("Name of the file"),
48+
content: z.string().describe("Content to write"),
49+
}),
50+
}
51+
);
52+
53+
// Configure HITL middleware
54+
const hitlMiddleware = humanInTheLoopMiddleware({
55+
toolConfigs: {
56+
write_file: {
57+
requireApproval: true,
58+
description: "⚠️ File write operation requires approval",
59+
},
60+
calculator: {
61+
requireApproval: false, // Math is safe
62+
},
63+
},
64+
});
65+
66+
// Create agent with HITL middleware
67+
const agent = createAgent({
68+
model: "openai:gpt-4o-mini",
69+
checkpointSaver,
70+
prompt:
71+
"You are a helpful assistant. Use the tools provided to help the user.",
72+
tools: [calculateTool, writeFileTool],
73+
middlewares: [hitlMiddleware] as const,
74+
});
75+
const config = {
76+
configurable: {
77+
thread_id: "123",
78+
},
79+
};
80+
81+
console.log("πŸš€ HITL Tool Approval Example");
82+
console.log("=============================\n");
83+
84+
// Example 1: Safe tool - no approval needed
85+
console.log("πŸ“Š Example 1: Using calculator (auto-approved)");
86+
const mathResult = await agent.invoke(
87+
{
88+
messages: [new HumanMessage("Calculate 42 * 17")],
89+
},
90+
config
91+
);
92+
console.log("Result:", mathResult.messages.at(-1)?.content);
93+
94+
// Example 2: Tool requiring approval
95+
console.log("\nπŸ“ Example 2: Writing to file (requires approval)");
96+
console.log("User: Write 'Hello World' to greeting.txt\n");
97+
98+
// This will pause at the HITL middleware for approval
99+
const initialResult = await agent.invoke(
100+
{
101+
messages: [new HumanMessage("Write 'Hello World' to greeting.txt")],
102+
},
103+
config
104+
);
105+
106+
// Check if the agent is paused (waiting for approval)
107+
const state = await agent.graph.getState(config);
108+
if (state.next && state.next.length > 0) {
109+
console.log("⏸️ Interrupted for approval!");
110+
111+
// Get the interrupt data from the task
112+
const task = state.tasks?.[0];
113+
if (task?.interrupts && task.interrupts.length > 0) {
114+
const requests = task.interrupts[0].value;
115+
console.log("Tool:", requests[0].action);
116+
console.log("Args:", JSON.stringify(requests[0].args, null, 2));
117+
118+
console.log("\nℹ️ In a real application, you would:");
119+
console.log(" - Show this to the user");
120+
console.log(" - Get their response (accept/edit/ignore/manual)");
121+
console.log(
122+
" - Resume with: agent.invoke(new Command({ resume: response }))"
123+
);
124+
125+
console.log("\nβœ… Simulating user approval...\n");
126+
127+
// Resume with approval
128+
const resumedResult = await agent.invoke(
129+
// @ts-expect-error
130+
new Command({
131+
resume: [{ type: "accept" }], // Approve the tool call
132+
}),
133+
config
134+
);
135+
136+
console.log("Result:", resumedResult.messages.at(-1)?.content);
137+
}
138+
} else {
139+
console.log("Agent completed without interruption");
140+
console.log("Result:", initialResult.messages.at(-1)?.content);
141+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import { createAgent, HumanMessage, AIMessage } from "langchain";
2+
import { anthropicPromptCachingMiddleware } from "langchain/middleware";
3+
4+
// Example usage
5+
const agent = createAgent({
6+
model: "anthropic:claude-sonnet-4-20250514",
7+
tools: [],
8+
middlewares: [anthropicPromptCachingMiddleware({ ttl: "5m" })] as const,
9+
});
10+
11+
// Usage example with a long chat history for testing caching
12+
const result = await agent.invoke({
13+
messages: [
14+
// Initial conversation about prompt caching basics
15+
new HumanMessage(
16+
"What is prompt caching and why is it important for LLM applications?"
17+
),
18+
new AIMessage(
19+
"Prompt caching is a technique that stores and reuses previously processed portions of prompts to optimize LLM API usage. It's important because:\n\n1. **Cost Reduction**: By caching common prompt prefixes, you avoid reprocessing the same tokens repeatedly, significantly reducing API costs.\n2. **Latency Improvement**: Cached prompts are processed faster since the model doesn't need to recompute embeddings for cached portions.\n3. **Better Resource Utilization**: Reduces computational load on both client and server sides.\n4. **Improved User Experience**: Faster response times lead to more responsive applications.\n\nIt's particularly valuable for applications with repetitive prompts, long system messages, or extensive conversation histories."
20+
),
21+
22+
// Deep dive into technical details
23+
new HumanMessage(
24+
"How does prompt caching work at a technical level? What happens when a prompt is cached?"
25+
),
26+
new AIMessage(
27+
"At a technical level, prompt caching works through several steps:\n\n1. **Token Processing**: When a prompt is first sent, the model processes tokens and generates internal representations (embeddings and attention states).\n\n2. **Cache Storage**: These intermediate representations are stored in a cache with a unique identifier based on the prompt prefix.\n\n3. **Cache Matching**: For subsequent requests, the system checks if any prefix of the new prompt matches cached entries.\n\n4. **Partial Processing**: If a match is found, the model skips processing the cached portion and starts from where the cache ends.\n\n5. **Cache Management**: Caches have TTL (Time To Live) settings and eviction policies to manage memory usage.\n\nThe cache typically stores:\n- Token embeddings\n- Attention matrices\n- Hidden states from transformer layers\n- Position encodings\n\nThis allows the model to resume processing from any cached point without quality loss."
28+
),
29+
30+
// Implementation specifics
31+
new HumanMessage(
32+
"What are the best practices for implementing prompt caching in a production environment?"
33+
),
34+
new AIMessage(
35+
"Here are key best practices for production prompt caching:\n\n**1. Cache Strategy**\n- Cache stable prefixes (system prompts, common instructions)\n- Use appropriate TTL values (5-60 minutes typically)\n- Implement cache warming for frequently used prompts\n\n**2. Monitoring & Metrics**\n- Track cache hit rates\n- Monitor cache size and memory usage\n- Log cache evictions and misses\n- Measure latency improvements\n\n**3. Cache Invalidation**\n- Implement versioning for cached prompts\n- Clear caches when system prompts change\n- Use cache tags for grouped invalidation\n\n**4. Error Handling**\n- Gracefully fall back to non-cached requests\n- Implement retry logic for cache failures\n- Log cache-related errors separately\n\n**5. Security Considerations**\n- Ensure cached data is properly isolated between users\n- Implement access controls for cache management\n- Consider encryption for sensitive cached content\n\n**6. Optimization Tips**\n- Batch similar requests to maximize cache reuse\n- Structure prompts with common prefixes\n- Use consistent formatting to improve cache hits"
36+
),
37+
38+
// Specific use cases
39+
new HumanMessage(
40+
"Can you provide examples of applications that benefit most from prompt caching?"
41+
),
42+
new AIMessage(
43+
"Several applications benefit significantly from prompt caching:\n\n**1. Customer Support Chatbots**\n- Long system prompts with company policies\n- FAQ responses with common prefixes\n- Multi-turn conversations with context retention\n- Benefit: 60-80% cost reduction on repeat queries\n\n**2. Code Generation Tools**\n- Large codebases as context\n- Common import statements and boilerplate\n- Repeated analysis of the same files\n- Benefit: Faster iteration on code changes\n\n**3. Document Analysis Systems**\n- Legal documents with standard clauses\n- Medical records with common formats\n- Financial reports with recurring sections\n- Benefit: Rapid document processing at scale\n\n**4. Educational Platforms**\n- Course materials as context\n- Standardized question formats\n- Student interaction histories\n- Benefit: Personalized learning at lower cost\n\n**5. Content Generation**\n- Brand voice guidelines\n- SEO templates\n- Style guides and tone specifications\n- Benefit: Consistent content generation\n\n**6. Multi-Agent Systems**\n- Shared context between agents\n- Common tool definitions\n- Workflow templates\n- Benefit: Efficient agent coordination"
44+
),
45+
46+
// Advanced caching strategies
47+
new HumanMessage(
48+
"What are some advanced caching strategies for complex conversational AI systems?"
49+
),
50+
new AIMessage(
51+
"Advanced caching strategies for complex conversational AI include:\n\n**1. Hierarchical Caching**\n- **L1 Cache**: Hot prompts (< 1 minute TTL)\n- **L2 Cache**: Warm prompts (5-15 minutes TTL)\n- **L3 Cache**: Cold storage (hours to days)\n- Automatic promotion/demotion between levels\n\n**2. Semantic Caching**\n- Cache based on semantic similarity, not just exact matches\n- Use embedding models to find similar prompts\n- Cluster related prompts for better reuse\n- Implement fuzzy matching for minor variations\n\n**3. Predictive Caching**\n- Pre-cache likely next prompts based on user behavior\n- Use ML models to predict conversation flow\n- Warm caches during low-traffic periods\n- Implement speculative caching for common paths\n\n**4. Distributed Caching**\n- Use Redis or Memcached for shared caches\n- Implement cache synchronization across instances\n- Geographic distribution for global applications\n- Consistent hashing for cache distribution\n\n**5. Context-Aware Caching**\n- Cache different versions based on user segments\n- Implement role-based cache strategies\n- Time-based cache variations (business hours vs. off-hours)\n- Location-aware caching for regional differences\n\n**6. Compression Techniques**\n- Compress cached representations\n- Use quantization for embedding storage\n- Implement delta encoding for similar prompts\n- Apply pruning for less important attention weights"
52+
),
53+
54+
// Performance optimization
55+
new HumanMessage(
56+
"How can I measure and optimize the performance impact of prompt caching in my application?"
57+
),
58+
new AIMessage(
59+
"Here's a comprehensive approach to measuring and optimizing prompt caching performance:\n\n**Performance Metrics to Track:**\n\n1. **Cache Efficiency**\n - Hit Rate: (Cache Hits / Total Requests) Γ— 100\n - Miss Rate: (Cache Misses / Total Requests) Γ— 100\n - Eviction Rate: Frequency of cache evictions\n - Cache Utilization: Used cache space vs. available\n\n2. **Latency Metrics**\n - P50, P95, P99 response times with/without cache\n - Time saved per cached request\n - Cache lookup time\n - Cache write time\n\n3. **Cost Metrics**\n - Tokens saved through caching\n - API cost reduction percentage\n - Infrastructure cost for cache storage\n - ROI of caching implementation\n\n**Optimization Strategies:**\n\n1. **Cache Key Optimization**\n ```typescript\n // Instead of caching entire prompts\n const key = hash(entirePrompt);\n \n // Cache stable prefixes\n const key = hash(systemPrompt + conversationPrefix);\n ```\n\n2. **Adaptive TTL**\n - Short TTL for volatile content (1-5 min)\n - Long TTL for stable content (1-24 hours)\n - Dynamic TTL based on access patterns\n\n3. **Cache Preloading**\n - Identify common patterns through analytics\n - Preload during application startup\n - Background refresh of popular entries\n\n4. **Memory Management**\n - Implement LRU (Least Recently Used) eviction\n - Set appropriate cache size limits\n - Monitor memory pressure\n - Use memory-mapped files for large caches\n\n**Monitoring Implementation:**\n```typescript\ninterface CacheMetrics {\n hits: number;\n misses: number;\n evictions: number;\n avgHitLatency: number;\n avgMissLatency: number;\n memorySizeMB: number;\n tokensSaved: number;\n costSaved: number;\n}\n```\n\n**A/B Testing Strategy:**\n- Run parallel experiments with different cache configs\n- Compare user experience metrics\n- Measure impact on conversion rates\n- Analyze cost-benefit ratios"
60+
),
61+
62+
// Current question for continued testing
63+
new HumanMessage(
64+
"What are the key features of prompt caching that I should focus on when implementing this middleware?"
65+
),
66+
],
67+
});
68+
69+
console.log("\nAgent response:", result.messages.at(-1)?.content);
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
import {
2+
createMiddleware,
3+
createAgent,
4+
HumanMessage,
5+
MemorySaver,
6+
} from "langchain";
7+
import { Command, interrupt } from "@langchain/langgraph";
8+
9+
const checkpointer = new MemorySaver();
10+
11+
/**
12+
* Simple Human in the Loop (HITL) Middleware
13+
*
14+
* This middleware demonstrates how to interrupt execution when information is missing
15+
* and resume with human-provided input.
16+
*/
17+
const humanInTheLoopMiddleware = createMiddleware({
18+
name: "HumanInTheLoopMiddleware",
19+
20+
beforeModel: (state) => {
21+
// Check if the user's question is missing critical information
22+
const lastUserMessage = [...state.messages]
23+
.reverse()
24+
.find((msg) => msg instanceof HumanMessage);
25+
26+
if (!lastUserMessage) {
27+
return;
28+
}
29+
30+
const userContent = lastUserMessage.content.toString().toLowerCase();
31+
32+
// Interrupt to ask for clarification
33+
const clarification = interrupt({
34+
type: "missing_information",
35+
question: "Which country or state's capital are you asking about?",
36+
originalQuery: userContent,
37+
});
38+
39+
// Add the clarification as a new message
40+
console.log(`\nβœ… Human provided clarification: "${clarification}"`);
41+
42+
// eslint-disable-next-line consistent-return
43+
return {
44+
messages: [
45+
...state.messages,
46+
new HumanMessage(`The capital of ${clarification}`),
47+
],
48+
clarificationRequested: true,
49+
};
50+
},
51+
});
52+
53+
const agent = createAgent({
54+
model: "openai:gpt-4o-mini",
55+
tools: [],
56+
checkpointer,
57+
middlewares: [humanInTheLoopMiddleware] as const,
58+
});
59+
60+
console.log("πŸš€ Human in the Loop Example - Missing Information Flow");
61+
console.log("========================================================");
62+
console.log(
63+
"\nThis example shows how the agent interrupts when information is missing"
64+
);
65+
console.log("and resumes with human-provided input.\n");
66+
67+
const config = {
68+
configurable: {
69+
thread_id: "example-thread-123",
70+
},
71+
};
72+
73+
// Step 1: Initial invocation with incomplete information
74+
console.log("πŸ“ Step 1: User asks incomplete question");
75+
console.log(' User: "What\'s the capital?"');
76+
77+
const result = await agent.invoke(
78+
{
79+
messages: [new HumanMessage("What's the capital?")],
80+
},
81+
config
82+
);
83+
84+
// This won't be reached due to interruption
85+
console.log("\nFinal message:", result.messages.at(-1)?.content);
86+
87+
// Step 2: Resume with the missing information
88+
console.log("πŸ“ Step 2: Resuming with clarification");
89+
console.log(' Human provides: "France"');
90+
91+
// Resume the graph with the clarification
92+
// The Command is properly typed for resuming from an interrupt
93+
const resumedResult = await agent.invoke(
94+
// @ts-expect-error
95+
new Command({
96+
resume: "France",
97+
}),
98+
config
99+
);
100+
101+
console.log("\nβœ… Agent successfully resumed!");
102+
console.log("Final answer:", resumedResult.messages.at(-1)?.content);

0 commit comments

Comments
Β (0)