huggingface · nsarrazin · Jan 9, 2024 · Dec 19, 2023 · Dec 20, 2023 · Dec 20, 2023
diff --git a/.env b/.env
@@ -42,6 +42,19 @@ CA_PATH=#
 CLIENT_KEY_PASSWORD=#
 REJECT_UNAUTHORIZED=true
 
+TEXT_EMBEDDING_MODELS = `[
+  {
+    "name": "Xenova/gte-small",
+    "displayName": "Xenova/gte-small",
+    "description": "Local embedding model running on the server.",
+    "maxSequenceLength": 512,
+    "endpoints": [
+      { "type": "xenova" }
+    ]
+  }
+]`
+
+
 # 'name', 'userMessageToken', 'assistantMessageToken' are required
 MODELS=`[
     {

diff --git a/.env.template b/.env.template
@@ -196,6 +196,7 @@ MODELS=`[
       "max_new_tokens" : 8192,
       "stop" : ["</s>"]
     },
+    "embeddingModelName": "thenlper/gte-base",
     "promptExamples" : [
       {
         "title": "Write an email from bullet list",
@@ -215,6 +216,38 @@ OLD_MODELS=`[{"name":"bigcode/starcoder"}, {"name":"OpenAssistant/oasst-sft-6-ll
 
 TASK_MODEL='mistralai/Mistral-7B-Instruct-v0.2'
 
+# Default to using the first text embedding model when not specifying 'embeddingModelName' in the model itself.
+TEXT_EMBEDDING_MODELS = `[
+  {
+    "name": "Xenova/gte-small",
+    "displayName": "Xenova/gte-small",
+    "description": "Local embedding model running on the server.",
+    "maxSequenceLength": 512,
+    "endpoints": [
+      { "type": "xenova" }
+    ]
+  },
+  {
+    "name": "thenlper/gte-base",
+    "displayName": "thenlper/gte-base",
+    "description": "Hosted embedding model running on the cloud somewhere.",
+    "maxSequenceLength": 512,
+    "endpoints": [
+      { "type": "tei", "http://localhost:8080/" }
+    ]
+  },
+  {
+    "name": "intfloat/multilingual-e5-large",
+    "displayName": "intfloat/multilingual-e5-large",
+    "description": "Hosted embedding model running on the cloud somewhere.",
+    "maxSequenceLength": 512,
+    "preQuery": "query: ", # See https://huggingface.co/intfloat/multilingual-e5-large#faq 
+    "prePassage": "passage: ", # See https://huggingface.co/intfloat/multilingual-e5-large#faq 
+    "endpoints": [
+      { "type": "tei", "http://localhost:8085/" }
+    ]
+  }
+]`
 
 APP_BASE="/chat"
 PUBLIC_ORIGIN=https://huggingface.co

diff --git a/src/lib/components/OpenWebSearchResults.svelte b/src/lib/components/OpenWebSearchResults.svelte
@@ -30,8 +30,8 @@
 		{:else}
 			<CarbonCheckmark class="my-auto text-gray-500" />
 		{/if}
-		<span class="px-2 font-medium" class:text-red-700={error} class:dark:text-red-500={error}
-			>Web search
+		<span class="px-2 font-medium" class:text-red-700={error} class:dark:text-red-500={error}>
+			Web search
 		</span>
 		<div class="my-auto transition-all" class:rotate-90={detailsOpen}>
 			<CarbonCaretRight />

diff --git a/src/lib/server/embeddingEndpoints/embeddingEndpoints.ts b/src/lib/server/embeddingEndpoints/embeddingEndpoints.ts
@@ -0,0 +1,38 @@
+import {
+	embeddingEndpointTei,
+	embeddingEndpointTeiParametersSchema,
+} from "./tei/embeddingEndpoints";
+import { z } from "zod";
+import embeddingEndpointXenova, {
+	embeddingEndpointXenovaParametersSchema,
+} from "./xenova/embeddingEndpoints";
+
+// parameters passed when generating text
+interface EmbeddingEndpointParameters {
+	inputs: string[];
+}
+
+interface CommonEmbeddingEndpoint {
+	weight: number;
+}
+
+// type signature for the endpoint
+export type EmbeddingEndpoint = (params: EmbeddingEndpointParameters) => Promise<number[][]>;
+
+// generator function that takes in parameters for defining the endpoint and return the endpoint
+export type EmbeddingEndpointGenerator<T extends CommonEmbeddingEndpoint> = (
+	parameters: T
+) => EmbeddingEndpoint;
+
+// list of all endpoint generators
+export const embeddingEndpoints = {
+	tei: embeddingEndpointTei,
+	xenova: embeddingEndpointXenova,
+};
+
+export const embeddingEndpointSchema = z.discriminatedUnion("type", [
+	embeddingEndpointTeiParametersSchema,
+	embeddingEndpointXenovaParametersSchema,
+]);
+
+export default embeddingEndpoints;
diff --git a/src/lib/server/embeddingEndpoints/tei/embeddingEndpoints.ts b/src/lib/server/embeddingEndpoints/tei/embeddingEndpoints.ts
@@ -0,0 +1,65 @@
+import { z } from "zod";
+import type { EmbeddingEndpoint } from "../embeddingEndpoints";
+import { chunk } from "$lib/utils/chunk";
+
+export const embeddingEndpointTeiParametersSchema = z.object({
+	weight: z.number().int().positive().default(1),
+	model: z.any(),
+	type: z.literal("tei"),
+	url: z.string().url(),
+});
+
+const getModelInfoByUrl = async (url: string) => {
+	const { origin } = new URL(url);
+
+	const response = await fetch(`${origin}/info`, {
+		headers: {
+			Accept: "application/json",
+			"Content-Type": "application/json",
+		},
+	});
+
+	const info = await response.json();
+
+	return info;
+};
+
+export async function embeddingEndpointTei(
+	input: z.input<typeof embeddingEndpointTeiParametersSchema>
+): Promise<EmbeddingEndpoint> {
+	const { url, model } = embeddingEndpointTeiParametersSchema.parse(input);
+
+	const { max_client_batch_size, max_batch_tokens } = await getModelInfoByUrl(url);
+	const maxBatchSize = Math.min(
+		max_client_batch_size,
+		Math.floor(max_batch_tokens / model.maxSequenceLength)
+	);
+
+	return async ({ inputs }) => {
+		const { origin } = new URL(url);
+
+		const batchesInputs = chunk(inputs, maxBatchSize);
+
+		const batchesResults = await Promise.all(
+			batchesInputs.map(async (batchInputs) => {
+				const response = await fetch(`${origin}/embed`, {
+					method: "POST",
+					headers: {
+						Accept: "application/json",
+						"Content-Type": "application/json",
+					},
+					body: JSON.stringify({ inputs: batchInputs, normalize: true, truncate: true }),
+				});
+
+				const embeddings: number[][] = await response.json();
+				return embeddings;
+			})
+		);
+
+		const allEmbeddings = batchesResults.flatMap((embeddings) => embeddings);
+
+		return allEmbeddings;
+	};
+}
+
+export default embeddingEndpointTei;
diff --git a/src/lib/server/embeddingEndpoints/xenova/embeddingEndpoints.ts b/src/lib/server/embeddingEndpoints/xenova/embeddingEndpoints.ts
@@ -0,0 +1,47 @@
+import { z } from "zod";
+import type { EmbeddingEndpoint } from "../embeddingEndpoints";
+import type { Tensor, Pipeline } from "@xenova/transformers";
+import { pipeline } from "@xenova/transformers";
+
+export const embeddingEndpointXenovaParametersSchema = z.object({
+	weight: z.number().int().positive().default(1),
+	model: z.any(),
+	type: z.literal("xenova"),
+});
+
+// Use the Singleton pattern to enable lazy construction of the pipeline.
+class XenovaModelsSingleton {
+	static instances: Array<[string, Promise<Pipeline>]> = [];
+
+	static async getInstance(modelName: string): Promise<Pipeline> {
+		const modelPipeline = this.instances.find(([name]) => name === modelName);
+
+		if (modelPipeline) {
+			return modelPipeline[1];
+		}
+
+		const newModelPipeline = pipeline("feature-extraction", modelName);
+		this.instances.push([modelName, newModelPipeline]);
+
+		return newModelPipeline;
+	}
+}
+
+export async function calculateEmbedding(modelName: string, inputs: string[]) {
+	const extractor = await XenovaModelsSingleton.getInstance(modelName);
+	const output: Tensor = await extractor(inputs, { pooling: "mean", normalize: true });
+
+	return output.tolist();
+}
+
+export function embeddingEndpointXenova(
+	input: z.input<typeof embeddingEndpointXenovaParametersSchema>
+): EmbeddingEndpoint {
+	const { model } = embeddingEndpointXenovaParametersSchema.parse(input);
+
+	return async ({ inputs }) => {
+		return calculateEmbedding(model.name, inputs);
+	};
+}
+
+export default embeddingEndpointXenova;
diff --git a/src/lib/server/embeddingModels.ts b/src/lib/server/embeddingModels.ts
@@ -0,0 +1,78 @@
+import { TEXT_EMBEDDING_MODELS } from "$env/static/private";
+
+import { z } from "zod";
+import { sum } from "$lib/utils/sum";
+import embeddingEndpoints, {
+	embeddingEndpointSchema,
+	type EmbeddingEndpoint,
+} from "./embeddingEndpoints/embeddingEndpoints";
+import embeddingEndpointXenova from "./embeddingEndpoints/xenova/embeddingEndpoints";
+
+const modelConfig = z.object({
+	/** Used as an identifier in DB */
+	id: z.string().optional(),
+	/** Used to link to the model page, and for inference */
+	name: z.string().min(1),
+	displayName: z.string().min(1).optional(),
+	description: z.string().min(1).optional(),
+	websiteUrl: z.string().url().optional(),
+	modelUrl: z.string().url().optional(),
+	endpoints: z.array(embeddingEndpointSchema).optional(),
+	maxSequenceLength: z.number().positive(),
+	preQuery: z.string().default(""),
+	prePassage: z.string().default(""),
+});
+
+const embeddingModelsRaw = z.array(modelConfig).parse(JSON.parse(TEXT_EMBEDDING_MODELS));
+
+const processEmbeddingModel = async (m: z.infer<typeof modelConfig>) => ({
+	...m,
+	id: m.id || m.name,
+});
+
+const addEndpoint = (m: Awaited<ReturnType<typeof processEmbeddingModel>>) => ({
+	...m,
+	getEndpoint: async (): Promise<EmbeddingEndpoint> => {
+		if (!m.endpoints) {
+			return embeddingEndpointXenova({
+				type: "xenova",
+				weight: 1,
+				model: m,
+			});
+		}
+
+		const totalWeight = sum(m.endpoints.map((e) => e.weight));
+
+		let random = Math.random() * totalWeight;
+
+		for (const endpoint of m.endpoints) {
+			if (random < endpoint.weight) {
+				const args = { ...endpoint, model: m };
+
+				switch (args.type) {
+					case "tei":
+						return embeddingEndpoints.tei(args);
+					case "xenova":
+						return embeddingEndpoints.xenova(args);
+				}
+			}
+
+			random -= endpoint.weight;
+		}
+
+		throw new Error(`Failed to select endpoint`);
+	},
+});
+
+export const embeddingModels = await Promise.all(
+	embeddingModelsRaw.map((e) => processEmbeddingModel(e).then(addEndpoint))
+);
+
+export const defaultEmbeddingModel = embeddingModels[0];
+
+export const validateEmbeddingModel = (_models: EmbeddingBackendModel[]) => {
+	// Zod enum function requires 2 parameters
+	return z.enum([_models[0].id, ..._models.slice(1).map((m) => m.id)]);
+};
+
+export type EmbeddingBackendModel = typeof defaultEmbeddingModel;
diff --git a/src/lib/server/models.ts b/src/lib/server/models.ts
@@ -66,6 +66,7 @@ const modelConfig = z.object({
 		.optional(),
 	multimodal: z.boolean().default(false),
 	unlisted: z.boolean().default(false),
+	embeddingModelName: z.string().optional(),
 });
 
 const modelsRaw = z.array(modelConfig).parse(JSON.parse(MODELS));

diff --git a/src/lib/server/sentenceSimilarity.ts b/src/lib/server/sentenceSimilarity.ts
@@ -0,0 +1,41 @@
+import { dot } from "@xenova/transformers";
+import type { EmbeddingBackendModel } from "./embeddingModels";
+
+// see here: https://github.com/nmslib/hnswlib/blob/359b2ba87358224963986f709e593d799064ace6/README.md?plain=1#L34
+function innerProduct(embeddingA: number[], embeddingB: number[]) {
+	return 1.0 - dot(embeddingA, embeddingB);
+}
+
+export async function findSimilarSentences(
+	embeddingModel: EmbeddingBackendModel,
+	query: string,
+	sentences: string[],
+	{ topK = 5 }: { topK: number }
+): Promise<number[]> {
+	const inputs = [
+		`${embeddingModel.preQuery}${query}`,
+		...sentences.map((sentence) => `${embeddingModel.prePassage}${sentence}`),
+	];
+
+	const embeddingEndpoint = await embeddingModel.getEndpoint();
+	const output = await embeddingEndpoint({ inputs });
+
+	const queryEmbedding: number[] = output[0];
+	const sentencesEmbeddings: number[][] = output.slice(1, inputs.length - 1);
+
+	const distancesFromQuery: { distance: number; index: number }[] = [...sentencesEmbeddings].map(
+		(sentenceEmbedding: number[], index: number) => {
+			return {
+				distance: innerProduct(queryEmbedding, sentenceEmbedding),
+				index: index,
+			};
+		}
+	);
+
+	distancesFromQuery.sort((a, b) => {
+		return a.distance - b.distance;
+	});
+
+	// Return the indexes of the closest topK sentences
+	return distancesFromQuery.slice(0, topK).map((item) => item.index);
+}