Skip to content

Commit

Permalink
2749 ollama client auth token (#3005)
Browse files Browse the repository at this point in the history
* ollama auth token provision

* auth token provision

* ollama auth provision

* ollama auth token

* ollama auth provision

* token input field css fix

* Fix provider handler not using key
sensible fallback to not break existing installs
re-order of input fields
null-check for API key and header optional insert on request
linting

* apply header and auth to agent invocations

* upgrading to ollama 5.10 for passing headers to constructor

* rename Auth systemSetting key to be more descriptive
linting and copy

* remove untracked files + update gitignore

* remove debug

* patch lockfile

---------

Co-authored-by: timothycarambat <[email protected]>
  • Loading branch information
ssbodapati and timothycarambat authored Feb 19, 2025
1 parent 3390ccf commit 3fd0fe8
Show file tree
Hide file tree
Showing 12 changed files with 211 additions and 128 deletions.
1 change: 1 addition & 0 deletions docker/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ GID='1000'
# OLLAMA_BASE_PATH='http://host.docker.internal:11434'
# OLLAMA_MODEL_PREF='llama2'
# OLLAMA_MODEL_TOKEN_LIMIT=4096
# OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)'

# LLM_PROVIDER='togetherai'
# TOGETHER_AI_API_KEY='my-together-ai-key'
Expand Down
260 changes: 149 additions & 111 deletions frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@ export default function OllamaLLMOptions({ settings }) {
autoDetecting: loading,
basePath,
basePathValue,
authToken,
authTokenValue,
showAdvancedControls,
setShowAdvancedControls,
handleAutoDetectClick,
} = useProviderEndpointAutoDiscovery({
provider: "ollama",
initialBasePath: settings?.OllamaLLMBasePath,
initialAuthToken: settings?.OllamaLLMAuthToken,
ENDPOINTS: OLLAMA_COMMON_URLS,
});
const [performanceMode, setPerformanceMode] = useState(
Expand All @@ -32,6 +35,7 @@ export default function OllamaLLMOptions({ settings }) {
<OllamaLLMModelSelection
settings={settings}
basePath={basePath.value}
authToken={authToken.value}
/>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-2">
Expand Down Expand Up @@ -73,128 +77,158 @@ export default function OllamaLLMOptions({ settings }) {
</div>

<div hidden={!showAdvancedControls}>
<div className="w-full flex items-start gap-4">
<div className="flex flex-col w-60">
<div className="flex justify-between items-center mb-2">
<label className="text-white text-sm font-semibold">
Ollama Base URL
</label>
{loading ? (
<PreLoader size="6" />
) : (
<>
{!basePathValue.value && (
<button
onClick={handleAutoDetectClick}
className="bg-primary-button text-xs font-medium px-2 py-1 rounded-lg hover:bg-secondary hover:text-white shadow-[0_4px_14px_rgba(0,0,0,0.25)]"
>
Auto-Detect
</button>
)}
</>
)}
<div className="flex flex-col">
<div className="w-full flex items-start gap-4">
<div className="flex flex-col w-60">
<div className="flex justify-between items-center mb-2">
<label className="text-white text-sm font-semibold">
Ollama Base URL
</label>
{loading ? (
<PreLoader size="6" />
) : (
<>
{!basePathValue.value && (
<button
onClick={handleAutoDetectClick}
className="bg-primary-button text-xs font-medium px-2 py-1 rounded-lg hover:bg-secondary hover:text-white shadow-[0_4px_14px_rgba(0,0,0,0.25)]"
>
Auto-Detect
</button>
)}
</>
)}
</div>
<input
type="url"
name="OllamaLLMBasePath"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="http://127.0.0.1:11434"
value={basePathValue.value}
required={true}
autoComplete="off"
spellCheck={false}
onChange={basePath.onChange}
onBlur={basePath.onBlur}
/>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
Enter the URL where Ollama is running.
</p>
</div>
<input
type="url"
name="OllamaLLMBasePath"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="http://127.0.0.1:11434"
value={basePathValue.value}
required={true}
autoComplete="off"
spellCheck={false}
onChange={basePath.onChange}
onBlur={basePath.onBlur}
/>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
Enter the URL where Ollama is running.
</p>
</div>

<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-2">
Ollama Keep Alive
</label>
<select
name="OllamaLLMKeepAliveSeconds"
required={true}
className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
defaultValue={settings?.OllamaLLMKeepAliveSeconds ?? "300"}
>
<option value="0">No cache</option>
<option value="300">5 minutes</option>
<option value="3600">1 hour</option>
<option value="-1">Forever</option>
</select>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
Choose how long Ollama should keep your model in memory before
unloading.
<a
className="underline text-blue-300"
href="https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately"
target="_blank"
rel="noreferrer"
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold mb-2 flex items-center">
Performance Mode
<Info
size={16}
className="ml-2 text-white"
data-tooltip-id="performance-mode-tooltip"
/>
</label>
<select
name="OllamaLLMPerformanceMode"
required={true}
className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
value={performanceMode}
onChange={(e) => setPerformanceMode(e.target.value)}
>
{" "}
Learn more &rarr;
</a>
</p>
</div>

<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold mb-2 flex items-center">
Performance Mode
<Info
size={16}
className="ml-2 text-white"
data-tooltip-id="performance-mode-tooltip"
/>
</label>
<select
name="OllamaLLMPerformanceMode"
required={true}
className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
value={performanceMode}
onChange={(e) => setPerformanceMode(e.target.value)}
>
<option value="base">Base (Default)</option>
<option value="maximum">Maximum</option>
</select>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
Choose the performance mode for the Ollama model.
</p>
<Tooltip
id="performance-mode-tooltip"
place="bottom"
className="tooltip !text-xs max-w-xs"
>
<p className="text-red-500">
<strong>Note:</strong> Be careful with the Maximum mode. It may
increase resource usage significantly.
<option value="base">Base (Default)</option>
<option value="maximum">Maximum</option>
</select>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
Choose the performance mode for the Ollama model.
</p>
<br />
<p>
<strong>Base:</strong> Ollama automatically limits the context
to 2048 tokens, keeping resources usage low while maintaining
good performance. Suitable for most users and models.
<Tooltip
id="performance-mode-tooltip"
place="bottom"
className="tooltip !text-xs max-w-xs"
>
<p className="text-red-500">
<strong>Note:</strong> Be careful with the Maximum mode. It
may increase resource usage significantly.
</p>
<br />
<p>
<strong>Base:</strong> Ollama automatically limits the context
to 2048 tokens, keeping resources usage low while maintaining
good performance. Suitable for most users and models.
</p>
<br />
<p>
<strong>Maximum:</strong> Uses the full context window (up to
Max Tokens). Will result in increased resource usage but
allows for larger context conversations. <br />
<br />
This is not recommended for most users.
</p>
</Tooltip>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-2">
Ollama Keep Alive
</label>
<select
name="OllamaLLMKeepAliveSeconds"
required={true}
className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
defaultValue={settings?.OllamaLLMKeepAliveSeconds ?? "300"}
>
<option value="0">No cache</option>
<option value="300">5 minutes</option>
<option value="3600">1 hour</option>
<option value="-1">Forever</option>
</select>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
Choose how long Ollama should keep your model in memory before
unloading.
<a
className="underline text-blue-300"
href="https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately"
target="_blank"
rel="noreferrer"
>
{" "}
Learn more &rarr;
</a>
</p>
<br />
<p>
<strong>Maximum:</strong> Uses the full context window (up to
Max Tokens). Will result in increased resource usage but allows
for larger context conversations. <br />
</div>
</div>
<div className="w-full flex items-start gap-4">
<div className="flex flex-col w-100">
<label className="text-white text-sm font-semibold">
Auth Token
</label>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
Enter a <code>Bearer</code> Auth Token for interacting with your
Ollama server.
<br />
This is not recommended for most users.
Used <b>only</b> if running Ollama behind an authentication
server.
</p>
</Tooltip>
<input
type="password"
name="OllamaLLMAuthToken"
className="border-none bg-theme-settings-input-bg mt-2 text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg outline-none block w-full p-2.5"
placeholder="Ollama Auth Token"
value={authTokenValue.value}
onChange={authToken.onChange}
onBlur={authToken.onBlur}
required={false}
autoComplete="off"
spellCheck={false}
/>
</div>
</div>
</div>
</div>
</div>
);
}

function OllamaLLMModelSelection({ settings, basePath = null }) {
function OllamaLLMModelSelection({
settings,
basePath = null,
authToken = null,
}) {
const [customModels, setCustomModels] = useState([]);
const [loading, setLoading] = useState(true);

Expand All @@ -207,7 +241,11 @@ function OllamaLLMModelSelection({ settings, basePath = null }) {
}
setLoading(true);
try {
const { models } = await System.customModels("ollama", null, basePath);
const { models } = await System.customModels(
"ollama",
authToken,
basePath
);
setCustomModels(models || []);
} catch (error) {
console.error("Failed to fetch custom models:", error);
Expand All @@ -216,7 +254,7 @@ function OllamaLLMModelSelection({ settings, basePath = null }) {
setLoading(false);
}
findCustomModels();
}, [basePath]);
}, [basePath, authToken]);

if (loading || customModels.length == 0) {
return (
Expand Down
27 changes: 25 additions & 2 deletions frontend/src/hooks/useProviderEndpointAutoDiscovery.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,15 @@ import showToast from "@/utils/toast";
export default function useProviderEndpointAutoDiscovery({
provider = null,
initialBasePath = "",
initialAuthToken = null,
ENDPOINTS = [],
}) {
const [loading, setLoading] = useState(false);
const [basePath, setBasePath] = useState(initialBasePath);
const [basePathValue, setBasePathValue] = useState(initialBasePath);

const [authToken, setAuthToken] = useState(initialAuthToken);
const [authTokenValue, setAuthTokenValue] = useState(initialAuthToken);
const [autoDetectAttempted, setAutoDetectAttempted] = useState(false);
const [showAdvancedControls, setShowAdvancedControls] = useState(true);

Expand All @@ -20,7 +24,7 @@ export default function useProviderEndpointAutoDiscovery({
ENDPOINTS.forEach((endpoint) => {
possibleEndpoints.push(
new Promise((resolve, reject) => {
System.customModels(provider, null, endpoint, 2_000)
System.customModels(provider, authTokenValue, endpoint, 2_000)
.then((results) => {
if (!results?.models || results.models.length === 0)
throw new Error("No models");
Expand Down Expand Up @@ -74,9 +78,18 @@ export default function useProviderEndpointAutoDiscovery({
setBasePath(basePathValue);
}

function handleAuthTokenChange(e) {
const value = e.target.value;
setAuthTokenValue(value);
}

function handleAuthTokenBlur() {
setAuthToken(authTokenValue);
}

useEffect(() => {
if (!initialBasePath && !autoDetectAttempted) autoDetect(true);
}, [initialBasePath, autoDetectAttempted]);
}, [initialBasePath, initialAuthToken, autoDetectAttempted]);

return {
autoDetecting: loading,
Expand All @@ -93,6 +106,16 @@ export default function useProviderEndpointAutoDiscovery({
value: basePathValue,
set: setBasePathValue,
},
authToken: {
value: authToken,
set: setAuthTokenValue,
onChange: handleAuthTokenChange,
onBlur: handleAuthTokenBlur,
},
authTokenValue: {
value: authTokenValue,
set: setAuthTokenValue,
},
handleAutoDetectClick,
runAutoDetect: autoDetect,
};
Expand Down
1 change: 1 addition & 0 deletions server/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# OLLAMA_BASE_PATH='http://host.docker.internal:11434'
# OLLAMA_MODEL_PREF='llama2'
# OLLAMA_MODEL_TOKEN_LIMIT=4096
# OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)'

# LLM_PROVIDER='togetherai'
# TOGETHER_AI_API_KEY='my-together-ai-key'
Expand Down
1 change: 1 addition & 0 deletions server/models/systemSettings.js
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@ const SystemSettings = {
OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT,
OllamaLLMKeepAliveSeconds: process.env.OLLAMA_KEEP_ALIVE_TIMEOUT ?? 300,
OllamaLLMPerformanceMode: process.env.OLLAMA_PERFORMANCE_MODE ?? "base",
OllamaLLMAuthToken: process.env.OLLAMA_AUTH_TOKEN ?? null,

// Novita LLM Keys
NovitaLLMApiKey: !!process.env.NOVITA_LLM_API_KEY,
Expand Down
Loading

0 comments on commit 3fd0fe8

Please sign in to comment.