-
-
Notifications
You must be signed in to change notification settings - Fork 3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for Google Generative AI (Gemini) embedder (#2895)
* Add support for Google Generative AI (Gemini) embedder * Add missing example in docker Fix UI key elements in options Add Gemini to data handling section Patch issues with chunk handling during embedding * remove dupe in env --------- Co-authored-by: timothycarambat <[email protected]>
- Loading branch information
1 parent
8d302c3
commit fa3079b
Showing
9 changed files
with
138 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
47 changes: 47 additions & 0 deletions
47
frontend/src/components/EmbeddingSelection/GeminiOptions/index.jsx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
export default function GeminiOptions({ settings }) { | ||
return ( | ||
<div className="w-full flex flex-col gap-y-4"> | ||
<div className="w-full flex items-center gap-[36px] mt-1.5"> | ||
<div className="flex flex-col w-60"> | ||
<label className="text-white text-sm font-semibold block mb-3"> | ||
API Key | ||
</label> | ||
<input | ||
type="password" | ||
name="GeminiEmbeddingApiKey" | ||
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5" | ||
placeholder="Gemini API Key" | ||
defaultValue={settings?.GeminiEmbeddingApiKey ? "*".repeat(20) : ""} | ||
required={true} | ||
autoComplete="off" | ||
spellCheck={false} | ||
/> | ||
</div> | ||
<div className="flex flex-col w-60"> | ||
<label className="text-white text-sm font-semibold block mb-3"> | ||
Model Preference | ||
</label> | ||
<select | ||
name="EmbeddingModelPref" | ||
required={true} | ||
className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" | ||
> | ||
<optgroup label="Available embedding models"> | ||
{["text-embedding-004"].map((model) => { | ||
return ( | ||
<option | ||
key={model} | ||
value={model} | ||
selected={settings?.EmbeddingModelPref === model} | ||
> | ||
{model} | ||
</option> | ||
); | ||
})} | ||
</optgroup> | ||
</select> | ||
</div> | ||
</div> | ||
</div> | ||
); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
class GeminiEmbedder { | ||
constructor() { | ||
if (!process.env.GEMINI_API_KEY) | ||
throw new Error("No Gemini API key was set."); | ||
const { GoogleGenerativeAI } = require("@google/generative-ai"); | ||
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY); | ||
this.model = process.env.EMBEDDING_MODEL_PREF || "text-embedding-004"; | ||
this.gemini = genAI.getGenerativeModel({ model: this.model }); | ||
|
||
// This property is disabled as it causes issues when sending multiple chunks at once | ||
// since when given 4 chunks at once, the gemini api returns 1 embedding for all 4 chunks | ||
// instead of 4 embeddings - no idea why this is the case, but it is not how the results are | ||
// expected to be returned. | ||
// this.maxConcurrentChunks = 1; | ||
|
||
// https://ai.google.dev/gemini-api/docs/models/gemini#text-embedding-and-embedding | ||
this.embeddingMaxChunkLength = 2_048; | ||
this.log(`Initialized with ${this.model}`); | ||
} | ||
|
||
log(text, ...args) { | ||
console.log(`\x1b[36m[GeminiEmbedder]\x1b[0m ${text}`, ...args); | ||
} | ||
|
||
/** | ||
* Embeds a single text input | ||
* @param {string} textInput - The text to embed | ||
* @returns {Promise<Array<number>>} The embedding values | ||
*/ | ||
async embedTextInput(textInput) { | ||
const result = await this.gemini.embedContent(textInput); | ||
return result.embedding.values || []; | ||
} | ||
|
||
/** | ||
* Embeds a list of text inputs | ||
* @param {Array<string>} textInputs - The list of text to embed | ||
* @returns {Promise<Array<Array<number>>>} The embedding values | ||
*/ | ||
async embedChunks(textChunks = []) { | ||
let embeddings = []; | ||
for (const chunk of textChunks) { | ||
const results = await this.gemini.embedContent(chunk); | ||
if (!results.embedding || !results.embedding.values) | ||
throw new Error("No embedding values returned from gemini"); | ||
embeddings.push(results.embedding.values); | ||
} | ||
return embeddings; | ||
} | ||
} | ||
|
||
module.exports = { | ||
GeminiEmbedder, | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters