AsyncFuncAI · VishalYadavCF · May 8, 2025 · gemini-code-assist · May 8, 2025 · gemini-code-assist
diff --git a/api/rag.py b/api/rag.py
@@ -45,7 +45,7 @@ def append_dialog_turn(self, dialog_turn):
 logger = logging.getLogger(__name__)
 
 # Maximum token limit for embedding models
-MAX_INPUT_TOKENS = 7500  # Safe threshold below 8192 token limit
+MAX_INPUT_TOKENS = 3400  # Adjusted for embedding model with 4096 token limit
-MAX_INPUT_TOKENS = 3400  # Adjusted for embedding model with 4096 token limit
+MAX_INPUT_TOKENS = 3400  # Adjusted for embedding model with 4096 token limit. Testing indicates best performance at this level.
-MAX_INPUT_TOKENS = 3400  # Adjusted for embedding model with 4096 token limit
+MAX_INPUT_TOKENS = 3400  # Adjusted for embedding model with 4096 token limit. Testing indicates best performance at this level.
 
 class Memory(adal.core.component.DataComponent):
     """Simple conversation management with a list of dialog turns."""
@@ -232,7 +232,10 @@ def __init__(self, use_s3: bool = False, local_ollama: bool = False):  # noqa: F
         # --- Initialize Embedder ---
         self.embedder = adal.Embedder(
             model_client=embedder_config["model_client"](),
-            model_kwargs=embedder_config["model_kwargs"],
+            model_kwargs={
+                **embedder_config["model_kwargs"],
+                "max_input_tokens": MAX_INPUT_TOKENS,  # Use the configured token limit
+            },
-            model_kwargs={
-                **embedder_config["model_kwargs"],
-                "max_input_tokens": MAX_INPUT_TOKENS,  # Use the configured token limit
-            },
+            model_kwargs={
+                **embedder_config["model_kwargs"],
+                "max_input_tokens": MAX_INPUT_TOKENS,  # Prevents exceeding model's token limit
+            },
-            model_kwargs={
-                **embedder_config["model_kwargs"],
-                "max_input_tokens": MAX_INPUT_TOKENS,  # Use the configured token limit
-            },
+            model_kwargs={
+                **embedder_config["model_kwargs"],
+                "max_input_tokens": MAX_INPUT_TOKENS,  # Prevents exceeding model's token limit
+            },
         )
 
         # Patch: ensure query embedding is always single string for Ollama