2749 ollama client auth token (#3005)

* ollama auth token provision * auth token provision * ollama auth provision * ollama auth token * ollama auth provision * token input field css fix * Fix provider handler not using key sensible fallback to not break existing installs re-order of input fields null-check for API key and header optional insert on request linting * apply header and auth to agent invocations * upgrading to ollama 5.10 for passing headers to constructor * rename Auth systemSetting key to be more descriptive linting and copy * remove untracked files + update gitignore * remove debug * patch lockfile --------- Co-authored-by: timothycarambat <[email protected]>
Mintplex-Labs · Feb 19, 2025 · 3fd0fe8 · 3fd0fe8
1 parent 3390ccf
commit 3fd0fe8
Show file tree

Hide file tree

Showing 12 changed files with 211 additions and 128 deletions.
diff --git a/docker/.env.example b/docker/.env.example
@@ -42,6 +42,7 @@ GID='1000'
 # OLLAMA_BASE_PATH='http://host.docker.internal:11434'
 # OLLAMA_MODEL_PREF='llama2'
 # OLLAMA_MODEL_TOKEN_LIMIT=4096
+# OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)'
 
 # LLM_PROVIDER='togetherai'
 # TOGETHER_AI_API_KEY='my-together-ai-key'

diff --git a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
@@ -11,12 +11,15 @@ export default function OllamaLLMOptions({ settings }) {
     autoDetecting: loading,
     basePath,
     basePathValue,
+    authToken,
+    authTokenValue,
     showAdvancedControls,
     setShowAdvancedControls,
     handleAutoDetectClick,
   } = useProviderEndpointAutoDiscovery({
     provider: "ollama",
     initialBasePath: settings?.OllamaLLMBasePath,
+    initialAuthToken: settings?.OllamaLLMAuthToken,
     ENDPOINTS: OLLAMA_COMMON_URLS,
   });
   const [performanceMode, setPerformanceMode] = useState(
@@ -32,6 +35,7 @@ export default function OllamaLLMOptions({ settings }) {
         <OllamaLLMModelSelection
           settings={settings}
           basePath={basePath.value}
+          authToken={authToken.value}
         />
         <div className="flex flex-col w-60">
           <label className="text-white text-sm font-semibold block mb-2">
@@ -73,128 +77,158 @@ export default function OllamaLLMOptions({ settings }) {
       </div>
 
       <div hidden={!showAdvancedControls}>
-        <div className="w-full flex items-start gap-4">
-          <div className="flex flex-col w-60">
-            <div className="flex justify-between items-center mb-2">
-              <label className="text-white text-sm font-semibold">
-                Ollama Base URL
-              </label>
-              {loading ? (
-                <PreLoader size="6" />
-              ) : (
-                <>
-                  {!basePathValue.value && (
-                    <button
-                      onClick={handleAutoDetectClick}
-                      className="bg-primary-button text-xs font-medium px-2 py-1 rounded-lg hover:bg-secondary hover:text-white shadow-[0_4px_14px_rgba(0,0,0,0.25)]"
-                    >
-                      Auto-Detect
-                    </button>
-                  )}
-                </>
-              )}
+        <div className="flex flex-col">
+          <div className="w-full flex items-start gap-4">
+            <div className="flex flex-col w-60">
+              <div className="flex justify-between items-center mb-2">
+                <label className="text-white text-sm font-semibold">
+                  Ollama Base URL
+                </label>
+                {loading ? (
+                  <PreLoader size="6" />
+                ) : (
+                  <>
+                    {!basePathValue.value && (
+                      <button
+                        onClick={handleAutoDetectClick}
+                        className="bg-primary-button text-xs font-medium px-2 py-1 rounded-lg hover:bg-secondary hover:text-white shadow-[0_4px_14px_rgba(0,0,0,0.25)]"
+                      >
+                        Auto-Detect
+                      </button>
+                    )}
+                  </>
+                )}
+              </div>
+              <input
+                type="url"
+                name="OllamaLLMBasePath"
+                className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
+                placeholder="http://127.0.0.1:11434"
+                value={basePathValue.value}
+                required={true}
+                autoComplete="off"
+                spellCheck={false}
+                onChange={basePath.onChange}
+                onBlur={basePath.onBlur}
+              />
+              <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
+                Enter the URL where Ollama is running.
+              </p>
             </div>
-            <input
-              type="url"
-              name="OllamaLLMBasePath"
-              className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
-              placeholder="http://127.0.0.1:11434"
-              value={basePathValue.value}
-              required={true}
-              autoComplete="off"
-              spellCheck={false}
-              onChange={basePath.onChange}
-              onBlur={basePath.onBlur}
-            />
-            <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
-              Enter the URL where Ollama is running.
-            </p>
-          </div>
-
-          <div className="flex flex-col w-60">
-            <label className="text-white text-sm font-semibold block mb-2">
-              Ollama Keep Alive
-            </label>
-            <select
-              name="OllamaLLMKeepAliveSeconds"
-              required={true}
-              className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
-              defaultValue={settings?.OllamaLLMKeepAliveSeconds ?? "300"}
-            >
-              <option value="0">No cache</option>
-              <option value="300">5 minutes</option>
-              <option value="3600">1 hour</option>
-              <option value="-1">Forever</option>
-            </select>
-            <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
-              Choose how long Ollama should keep your model in memory before
-              unloading.
-              <a
-                className="underline text-blue-300"
-                href="https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately"
-                target="_blank"
-                rel="noreferrer"
+            <div className="flex flex-col w-60">
+              <label className="text-white text-sm font-semibold mb-2 flex items-center">
+                Performance Mode
+                <Info
+                  size={16}
+                  className="ml-2 text-white"
+                  data-tooltip-id="performance-mode-tooltip"
+                />
+              </label>
+              <select
+                name="OllamaLLMPerformanceMode"
+                required={true}
+                className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+                value={performanceMode}
+                onChange={(e) => setPerformanceMode(e.target.value)}
               >
-                {" "}
-                Learn more &rarr;
-              </a>
-            </p>
-          </div>
-
-          <div className="flex flex-col w-60">
-            <label className="text-white text-sm font-semibold mb-2 flex items-center">
-              Performance Mode
-              <Info
-                size={16}
-                className="ml-2 text-white"
-                data-tooltip-id="performance-mode-tooltip"
-              />
-            </label>
-            <select
-              name="OllamaLLMPerformanceMode"
-              required={true}
-              className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
-              value={performanceMode}
-              onChange={(e) => setPerformanceMode(e.target.value)}
-            >
-              <option value="base">Base (Default)</option>
-              <option value="maximum">Maximum</option>
-            </select>
-            <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
-              Choose the performance mode for the Ollama model.
-            </p>
-            <Tooltip
-              id="performance-mode-tooltip"
-              place="bottom"
-              className="tooltip !text-xs max-w-xs"
-            >
-              <p className="text-red-500">
-                <strong>Note:</strong> Be careful with the Maximum mode. It may
-                increase resource usage significantly.
+                <option value="base">Base (Default)</option>
+                <option value="maximum">Maximum</option>
+              </select>
+              <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
+                Choose the performance mode for the Ollama model.
               </p>
-              <br />
-              <p>
-                <strong>Base:</strong> Ollama automatically limits the context
-                to 2048 tokens, keeping resources usage low while maintaining
-                good performance. Suitable for most users and models.
+              <Tooltip
+                id="performance-mode-tooltip"
+                place="bottom"
+                className="tooltip !text-xs max-w-xs"
+              >
+                <p className="text-red-500">
+                  <strong>Note:</strong> Be careful with the Maximum mode. It
+                  may increase resource usage significantly.
+                </p>
+                <br />
+                <p>
+                  <strong>Base:</strong> Ollama automatically limits the context
+                  to 2048 tokens, keeping resources usage low while maintaining
+                  good performance. Suitable for most users and models.
+                </p>
+                <br />
+                <p>
+                  <strong>Maximum:</strong> Uses the full context window (up to
+                  Max Tokens). Will result in increased resource usage but
+                  allows for larger context conversations. <br />
+                  <br />
+                  This is not recommended for most users.
+                </p>
+              </Tooltip>
+            </div>
+            <div className="flex flex-col w-60">
+              <label className="text-white text-sm font-semibold block mb-2">
+                Ollama Keep Alive
+              </label>
+              <select
+                name="OllamaLLMKeepAliveSeconds"
+                required={true}
+                className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+                defaultValue={settings?.OllamaLLMKeepAliveSeconds ?? "300"}
+              >
+                <option value="0">No cache</option>
+                <option value="300">5 minutes</option>
+                <option value="3600">1 hour</option>
+                <option value="-1">Forever</option>
+              </select>
+              <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
+                Choose how long Ollama should keep your model in memory before
+                unloading.
+                <a
+                  className="underline text-blue-300"
+                  href="https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately"
+                  target="_blank"
+                  rel="noreferrer"
+                >
+                  {" "}
+                  Learn more &rarr;
+                </a>
               </p>
-              <br />
-              <p>
-                <strong>Maximum:</strong> Uses the full context window (up to
-                Max Tokens). Will result in increased resource usage but allows
-                for larger context conversations. <br />
+            </div>
+          </div>
+          <div className="w-full flex items-start gap-4">
+            <div className="flex flex-col w-100">
+              <label className="text-white text-sm font-semibold">
+                Auth Token
+              </label>
+              <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
+                Enter a <code>Bearer</code> Auth Token for interacting with your
+                Ollama server.
                 <br />
-                This is not recommended for most users.
+                Used <b>only</b> if running Ollama behind an authentication
+                server.
               </p>
-            </Tooltip>
+              <input
+                type="password"
+                name="OllamaLLMAuthToken"
+                className="border-none bg-theme-settings-input-bg mt-2 text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg outline-none block w-full p-2.5"
+                placeholder="Ollama Auth Token"
+                value={authTokenValue.value}
+                onChange={authToken.onChange}
+                onBlur={authToken.onBlur}
+                required={false}
+                autoComplete="off"
+                spellCheck={false}
+              />
+            </div>
           </div>
         </div>
       </div>
     </div>
   );
 }
 
-function OllamaLLMModelSelection({ settings, basePath = null }) {
+function OllamaLLMModelSelection({
+  settings,
+  basePath = null,
+  authToken = null,
+}) {
   const [customModels, setCustomModels] = useState([]);
   const [loading, setLoading] = useState(true);
 
@@ -207,7 +241,11 @@ function OllamaLLMModelSelection({ settings, basePath = null }) {
       }
       setLoading(true);
       try {
-        const { models } = await System.customModels("ollama", null, basePath);
+        const { models } = await System.customModels(
+          "ollama",
+          authToken,
+          basePath
+        );
         setCustomModels(models || []);
       } catch (error) {
         console.error("Failed to fetch custom models:", error);
@@ -216,7 +254,7 @@ function OllamaLLMModelSelection({ settings, basePath = null }) {
       setLoading(false);
     }
     findCustomModels();
-  }, [basePath]);
+  }, [basePath, authToken]);
 
   if (loading || customModels.length == 0) {
     return (

diff --git a/frontend/src/hooks/useProviderEndpointAutoDiscovery.js b/frontend/src/hooks/useProviderEndpointAutoDiscovery.js
@@ -5,11 +5,15 @@ import showToast from "@/utils/toast";
 export default function useProviderEndpointAutoDiscovery({
   provider = null,
   initialBasePath = "",
+  initialAuthToken = null,
   ENDPOINTS = [],
 }) {
   const [loading, setLoading] = useState(false);
   const [basePath, setBasePath] = useState(initialBasePath);
   const [basePathValue, setBasePathValue] = useState(initialBasePath);
+
+  const [authToken, setAuthToken] = useState(initialAuthToken);
+  const [authTokenValue, setAuthTokenValue] = useState(initialAuthToken);
   const [autoDetectAttempted, setAutoDetectAttempted] = useState(false);
   const [showAdvancedControls, setShowAdvancedControls] = useState(true);
 
@@ -20,7 +24,7 @@ export default function useProviderEndpointAutoDiscovery({
     ENDPOINTS.forEach((endpoint) => {
       possibleEndpoints.push(
         new Promise((resolve, reject) => {
-          System.customModels(provider, null, endpoint, 2_000)
+          System.customModels(provider, authTokenValue, endpoint, 2_000)
             .then((results) => {
               if (!results?.models || results.models.length === 0)
                 throw new Error("No models");
@@ -74,9 +78,18 @@ export default function useProviderEndpointAutoDiscovery({
     setBasePath(basePathValue);
   }
 
+  function handleAuthTokenChange(e) {
+    const value = e.target.value;
+    setAuthTokenValue(value);
+  }
+
+  function handleAuthTokenBlur() {
+    setAuthToken(authTokenValue);
+  }
+
   useEffect(() => {
     if (!initialBasePath && !autoDetectAttempted) autoDetect(true);
-  }, [initialBasePath, autoDetectAttempted]);
+  }, [initialBasePath, initialAuthToken, autoDetectAttempted]);
 
   return {
     autoDetecting: loading,
@@ -93,6 +106,16 @@ export default function useProviderEndpointAutoDiscovery({
       value: basePathValue,
       set: setBasePathValue,
     },
+    authToken: {
+      value: authToken,
+      set: setAuthTokenValue,
+      onChange: handleAuthTokenChange,
+      onBlur: handleAuthTokenBlur,
+    },
+    authTokenValue: {
+      value: authTokenValue,
+      set: setAuthTokenValue,
+    },
     handleAutoDetectClick,
     runAutoDetect: autoDetect,
   };

diff --git a/server/.env.example b/server/.env.example
@@ -39,6 +39,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
 # OLLAMA_BASE_PATH='http://host.docker.internal:11434'
 # OLLAMA_MODEL_PREF='llama2'
 # OLLAMA_MODEL_TOKEN_LIMIT=4096
+# OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)'
 
 # LLM_PROVIDER='togetherai'
 # TOGETHER_AI_API_KEY='my-together-ai-key'

diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
@@ -471,6 +471,7 @@ const SystemSettings = {
       OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT,
       OllamaLLMKeepAliveSeconds: process.env.OLLAMA_KEEP_ALIVE_TIMEOUT ?? 300,
       OllamaLLMPerformanceMode: process.env.OLLAMA_PERFORMANCE_MODE ?? "base",
+      OllamaLLMAuthToken: process.env.OLLAMA_AUTH_TOKEN ?? null,
 
       // Novita LLM Keys
       NovitaLLMApiKey: !!process.env.NOVITA_LLM_API_KEY,