langchain-ai
diff --git a/‎.changeset/nervous-houses-move.md‎
Lines changed: 5 additions & 0 deletions b/‎.changeset/nervous-houses-move.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎examples/package.json‎
Lines changed: 1 addition & 0 deletions b/‎examples/package.json‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/src/createAgent/middleware/hitl.ts‎
Lines changed: 141 additions & 0 deletions b/‎examples/src/createAgent/middleware/hitl.ts‎
Lines changed: 141 additions & 0 deletions
diff --git a/‎examples/src/createAgent/middleware/promptCaching.ts‎
Lines changed: 69 additions & 0 deletions b/‎examples/src/createAgent/middleware/promptCaching.ts‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎examples/src/createAgent/middleware/simpleHitl.ts‎
Lines changed: 102 additions & 0 deletions b/‎examples/src/createAgent/middleware/simpleHitl.ts‎
Lines changed: 102 additions & 0 deletions
@@ -0,0 +1,5 @@
+---
+"langchain": patch
+---
+
+feat(langchain) new middleware concept for createAgent
@@ -96,6 +96,7 @@
     "ioredis": "^5.3.2",
     "js-yaml": "^4.1.0",
     "langchain": "workspace:*",
+    "lorem-ipsum": "^2.0.8",
     "lunary": "^0.8.8",
     "mariadb": "^3.4.0",
     "mem0ai": "^2.1.8",
 
@@ -0,0 +1,141 @@
+import { createAgent, HumanMessage, tool, MemorySaver } from "langchain";
+import { humanInTheLoopMiddleware } from "langchain/middleware";
+import { Command } from "@langchain/langgraph";
+import { z } from "zod";
+
+const checkpointSaver = new MemorySaver();
+
+// Define a safe tool (no approval needed)
+const calculateTool = tool(
+  async ({ a, b, operation }: { a: number; b: number; operation: string }) => {
+    console.log(
+      `🛠️  calculator tool called with args: ${a}, ${b}, ${operation}`
+    );
+    switch (operation) {
+      case "add":
+        return `${a} + ${b} = ${a + b}`;
+      case "multiply":
+        return `${a} * ${b} = ${a * b}`;
+      default:
+        return "Unknown operation";
+    }
+  },
+  {
+    name: "calculator",
+    description: "Perform basic math operations",
+    schema: z.object({
+      a: z.number().describe("First number"),
+      b: z.number().describe("Second number"),
+      operation: z.enum(["add", "multiply"]).describe("Math operation"),
+    }),
+  }
+);
+
+// Define a tool that requires approval
+const writeFileTool = tool(
+  async ({ filename, content }: { filename: string; content: string }) => {
+    console.log(
+      `🛠️  write_file tool called with args: ${filename}, ${content}`
+    );
+    // Simulate file writing
+    return `Successfully wrote ${content.length} characters to ${filename}`;
+  },
+  {
+    name: "write_file",
+    description: "Write content to a file",
+    schema: z.object({
+      filename: z.string().describe("Name of the file"),
+      content: z.string().describe("Content to write"),
+    }),
+  }
+);
+
+// Configure HITL middleware
+const hitlMiddleware = humanInTheLoopMiddleware({
+  toolConfigs: {
+    write_file: {
+      requireApproval: true,
+      description: "⚠️ File write operation requires approval",
+    },
+    calculator: {
+      requireApproval: false, // Math is safe
+    },
+  },
+});
+
+// Create agent with HITL middleware
+const agent = createAgent({
+  model: "openai:gpt-4o-mini",
+  checkpointSaver,
+  prompt:
+    "You are a helpful assistant. Use the tools provided to help the user.",
+  tools: [calculateTool, writeFileTool],
+  middlewares: [hitlMiddleware] as const,
+});
+const config = {
+  configurable: {
+    thread_id: "123",
+  },
+};
+
+console.log("🚀 HITL Tool Approval Example");
+console.log("=============================\n");
+
+// Example 1: Safe tool - no approval needed
+console.log("📊 Example 1: Using calculator (auto-approved)");
+const mathResult = await agent.invoke(
+  {
+    messages: [new HumanMessage("Calculate 42 * 17")],
+  },
+  config
+);
+console.log("Result:", mathResult.messages.at(-1)?.content);
+
+// Example 2: Tool requiring approval
+console.log("\n📝 Example 2: Writing to file (requires approval)");
+console.log("User: Write 'Hello World' to greeting.txt\n");
+
+// This will pause at the HITL middleware for approval
+const initialResult = await agent.invoke(
+  {
+    messages: [new HumanMessage("Write 'Hello World' to greeting.txt")],
+  },
+  config
+);
+
+// Check if the agent is paused (waiting for approval)
+const state = await agent.graph.getState(config);
+if (state.next && state.next.length > 0) {
+  console.log("⏸️  Interrupted for approval!");
+
+  // Get the interrupt data from the task
+  const task = state.tasks?.[0];
+  if (task?.interrupts && task.interrupts.length > 0) {
+    const requests = task.interrupts[0].value;
+    console.log("Tool:", requests[0].action);
+    console.log("Args:", JSON.stringify(requests[0].args, null, 2));
+
+    console.log("\nℹ️  In a real application, you would:");
+    console.log("  - Show this to the user");
+    console.log("  - Get their response (accept/edit/ignore/manual)");
+    console.log(
+      "  - Resume with: agent.invoke(new Command({ resume: response }))"
+    );
+
+    console.log("\n✅ Simulating user approval...\n");
+
+    // Resume with approval
+    const resumedResult = await agent.invoke(
+      // @ts-expect-error
+      new Command({
+        resume: [{ type: "accept" }], // Approve the tool call
+      }),
+      config
+    );
+
+    console.log("Result:", resumedResult.messages.at(-1)?.content);
+  }
+} else {
+  console.log("Agent completed without interruption");
+  console.log("Result:", initialResult.messages.at(-1)?.content);
+}
@@ -0,0 +1,69 @@
+import { createAgent, HumanMessage, AIMessage } from "langchain";
+import { anthropicPromptCachingMiddleware } from "langchain/middleware";
+
+// Example usage
+const agent = createAgent({
+  model: "anthropic:claude-sonnet-4-20250514",
+  tools: [],
+  middlewares: [anthropicPromptCachingMiddleware({ ttl: "5m" })] as const,
+});
+
+// Usage example with a long chat history for testing caching
+const result = await agent.invoke({
+  messages: [
+    // Initial conversation about prompt caching basics
+    new HumanMessage(
+      "What is prompt caching and why is it important for LLM applications?"
+    ),
+    new AIMessage(
+      "Prompt caching is a technique that stores and reuses previously processed portions of prompts to optimize LLM API usage. It's important because:\n\n1. **Cost Reduction**: By caching common prompt prefixes, you avoid reprocessing the same tokens repeatedly, significantly reducing API costs.\n2. **Latency Improvement**: Cached prompts are processed faster since the model doesn't need to recompute embeddings for cached portions.\n3. **Better Resource Utilization**: Reduces computational load on both client and server sides.\n4. **Improved User Experience**: Faster response times lead to more responsive applications.\n\nIt's particularly valuable for applications with repetitive prompts, long system messages, or extensive conversation histories."
+    ),
+
+    // Deep dive into technical details
+    new HumanMessage(
+      "How does prompt caching work at a technical level? What happens when a prompt is cached?"
+    ),
+    new AIMessage(
+      "At a technical level, prompt caching works through several steps:\n\n1. **Token Processing**: When a prompt is first sent, the model processes tokens and generates internal representations (embeddings and attention states).\n\n2. **Cache Storage**: These intermediate representations are stored in a cache with a unique identifier based on the prompt prefix.\n\n3. **Cache Matching**: For subsequent requests, the system checks if any prefix of the new prompt matches cached entries.\n\n4. **Partial Processing**: If a match is found, the model skips processing the cached portion and starts from where the cache ends.\n\n5. **Cache Management**: Caches have TTL (Time To Live) settings and eviction policies to manage memory usage.\n\nThe cache typically stores:\n- Token embeddings\n- Attention matrices\n- Hidden states from transformer layers\n- Position encodings\n\nThis allows the model to resume processing from any cached point without quality loss."
+    ),
+
+    // Implementation specifics
+    new HumanMessage(
+      "What are the best practices for implementing prompt caching in a production environment?"
+    ),
+    new AIMessage(
+      "Here are key best practices for production prompt caching:\n\n**1. Cache Strategy**\n- Cache stable prefixes (system prompts, common instructions)\n- Use appropriate TTL values (5-60 minutes typically)\n- Implement cache warming for frequently used prompts\n\n**2. Monitoring & Metrics**\n- Track cache hit rates\n- Monitor cache size and memory usage\n- Log cache evictions and misses\n- Measure latency improvements\n\n**3. Cache Invalidation**\n- Implement versioning for cached prompts\n- Clear caches when system prompts change\n- Use cache tags for grouped invalidation\n\n**4. Error Handling**\n- Gracefully fall back to non-cached requests\n- Implement retry logic for cache failures\n- Log cache-related errors separately\n\n**5. Security Considerations**\n- Ensure cached data is properly isolated between users\n- Implement access controls for cache management\n- Consider encryption for sensitive cached content\n\n**6. Optimization Tips**\n- Batch similar requests to maximize cache reuse\n- Structure prompts with common prefixes\n- Use consistent formatting to improve cache hits"
+    ),
+
+    // Specific use cases
+    new HumanMessage(
+      "Can you provide examples of applications that benefit most from prompt caching?"
+    ),
+    new AIMessage(
+      "Several applications benefit significantly from prompt caching:\n\n**1. Customer Support Chatbots**\n- Long system prompts with company policies\n- FAQ responses with common prefixes\n- Multi-turn conversations with context retention\n- Benefit: 60-80% cost reduction on repeat queries\n\n**2. Code Generation Tools**\n- Large codebases as context\n- Common import statements and boilerplate\n- Repeated analysis of the same files\n- Benefit: Faster iteration on code changes\n\n**3. Document Analysis Systems**\n- Legal documents with standard clauses\n- Medical records with common formats\n- Financial reports with recurring sections\n- Benefit: Rapid document processing at scale\n\n**4. Educational Platforms**\n- Course materials as context\n- Standardized question formats\n- Student interaction histories\n- Benefit: Personalized learning at lower cost\n\n**5. Content Generation**\n- Brand voice guidelines\n- SEO templates\n- Style guides and tone specifications\n- Benefit: Consistent content generation\n\n**6. Multi-Agent Systems**\n- Shared context between agents\n- Common tool definitions\n- Workflow templates\n- Benefit: Efficient agent coordination"
+    ),
+
+    // Advanced caching strategies
+    new HumanMessage(
+      "What are some advanced caching strategies for complex conversational AI systems?"
+    ),
+    new AIMessage(
+      "Advanced caching strategies for complex conversational AI include:\n\n**1. Hierarchical Caching**\n- **L1 Cache**: Hot prompts (< 1 minute TTL)\n- **L2 Cache**: Warm prompts (5-15 minutes TTL)\n- **L3 Cache**: Cold storage (hours to days)\n- Automatic promotion/demotion between levels\n\n**2. Semantic Caching**\n- Cache based on semantic similarity, not just exact matches\n- Use embedding models to find similar prompts\n- Cluster related prompts for better reuse\n- Implement fuzzy matching for minor variations\n\n**3. Predictive Caching**\n- Pre-cache likely next prompts based on user behavior\n- Use ML models to predict conversation flow\n- Warm caches during low-traffic periods\n- Implement speculative caching for common paths\n\n**4. Distributed Caching**\n- Use Redis or Memcached for shared caches\n- Implement cache synchronization across instances\n- Geographic distribution for global applications\n- Consistent hashing for cache distribution\n\n**5. Context-Aware Caching**\n- Cache different versions based on user segments\n- Implement role-based cache strategies\n- Time-based cache variations (business hours vs. off-hours)\n- Location-aware caching for regional differences\n\n**6. Compression Techniques**\n- Compress cached representations\n- Use quantization for embedding storage\n- Implement delta encoding for similar prompts\n- Apply pruning for less important attention weights"
+    ),
+
+    // Performance optimization
+    new HumanMessage(
+      "How can I measure and optimize the performance impact of prompt caching in my application?"
+    ),
+    new AIMessage(
+      "Here's a comprehensive approach to measuring and optimizing prompt caching performance:\n\n**Performance Metrics to Track:**\n\n1. **Cache Efficiency**\n   - Hit Rate: (Cache Hits / Total Requests) × 100\n   - Miss Rate: (Cache Misses / Total Requests) × 100\n   - Eviction Rate: Frequency of cache evictions\n   - Cache Utilization: Used cache space vs. available\n\n2. **Latency Metrics**\n   - P50, P95, P99 response times with/without cache\n   - Time saved per cached request\n   - Cache lookup time\n   - Cache write time\n\n3. **Cost Metrics**\n   - Tokens saved through caching\n   - API cost reduction percentage\n   - Infrastructure cost for cache storage\n   - ROI of caching implementation\n\n**Optimization Strategies:**\n\n1. **Cache Key Optimization**\n   ```typescript\n   // Instead of caching entire prompts\n   const key = hash(entirePrompt);\n   \n   // Cache stable prefixes\n   const key = hash(systemPrompt + conversationPrefix);\n   ```\n\n2. **Adaptive TTL**\n   - Short TTL for volatile content (1-5 min)\n   - Long TTL for stable content (1-24 hours)\n   - Dynamic TTL based on access patterns\n\n3. **Cache Preloading**\n   - Identify common patterns through analytics\n   - Preload during application startup\n   - Background refresh of popular entries\n\n4. **Memory Management**\n   - Implement LRU (Least Recently Used) eviction\n   - Set appropriate cache size limits\n   - Monitor memory pressure\n   - Use memory-mapped files for large caches\n\n**Monitoring Implementation:**\n```typescript\ninterface CacheMetrics {\n  hits: number;\n  misses: number;\n  evictions: number;\n  avgHitLatency: number;\n  avgMissLatency: number;\n  memorySizeMB: number;\n  tokensSaved: number;\n  costSaved: number;\n}\n```\n\n**A/B Testing Strategy:**\n- Run parallel experiments with different cache configs\n- Compare user experience metrics\n- Measure impact on conversion rates\n- Analyze cost-benefit ratios"
+    ),
+
+    // Current question for continued testing
+    new HumanMessage(
+      "What are the key features of prompt caching that I should focus on when implementing this middleware?"
+    ),
+  ],
+});
+
+console.log("\nAgent response:", result.messages.at(-1)?.content);
@@ -0,0 +1,102 @@
+import {
+  createMiddleware,
+  createAgent,
+  HumanMessage,
+  MemorySaver,
+} from "langchain";
+import { Command, interrupt } from "@langchain/langgraph";
+
+const checkpointer = new MemorySaver();
+
+/**
+ * Simple Human in the Loop (HITL) Middleware
+ *
+ * This middleware demonstrates how to interrupt execution when information is missing
+ * and resume with human-provided input.
+ */
+const humanInTheLoopMiddleware = createMiddleware({
+  name: "HumanInTheLoopMiddleware",
+
+  beforeModel: (state) => {
+    // Check if the user's question is missing critical information
+    const lastUserMessage = [...state.messages]
+      .reverse()
+      .find((msg) => msg instanceof HumanMessage);
+
+    if (!lastUserMessage) {
+      return;
+    }
+
+    const userContent = lastUserMessage.content.toString().toLowerCase();
+
+    // Interrupt to ask for clarification
+    const clarification = interrupt({
+      type: "missing_information",
+      question: "Which country or state's capital are you asking about?",
+      originalQuery: userContent,
+    });
+
+    // Add the clarification as a new message
+    console.log(`\n✅ Human provided clarification: "${clarification}"`);
+
+    // eslint-disable-next-line consistent-return
+    return {
+      messages: [
+        ...state.messages,
+        new HumanMessage(`The capital of ${clarification}`),
+      ],
+      clarificationRequested: true,
+    };
+  },
+});
+
+const agent = createAgent({
+  model: "openai:gpt-4o-mini",
+  tools: [],
+  checkpointer,
+  middlewares: [humanInTheLoopMiddleware] as const,
+});
+
+console.log("🚀 Human in the Loop Example - Missing Information Flow");
+console.log("========================================================");
+console.log(
+  "\nThis example shows how the agent interrupts when information is missing"
+);
+console.log("and resumes with human-provided input.\n");
+
+const config = {
+  configurable: {
+    thread_id: "example-thread-123",
+  },
+};
+
+// Step 1: Initial invocation with incomplete information
+console.log("📝 Step 1: User asks incomplete question");
+console.log('   User: "What\'s the capital?"');
+
+const result = await agent.invoke(
+  {
+    messages: [new HumanMessage("What's the capital?")],
+  },
+  config
+);
+
+// This won't be reached due to interruption
+console.log("\nFinal message:", result.messages.at(-1)?.content);
+
+// Step 2: Resume with the missing information
+console.log("📝 Step 2: Resuming with clarification");
+console.log('   Human provides: "France"');
+
+// Resume the graph with the clarification
+// The Command is properly typed for resuming from an interrupt
+const resumedResult = await agent.invoke(
+  // @ts-expect-error
+  new Command({
+    resume: "France",
+  }),
+  config
+);
+
+console.log("\n✅ Agent successfully resumed!");
+console.log("Final answer:", resumedResult.messages.at(-1)?.content);
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"langchain": patch
 +---
++
 +feat(langchain) new middleware concept for createAgent