diff --git a/packages/benchmarks/src/quizQuestions/bin/makeTags.ts b/packages/benchmarks/src/quizQuestions/bin/makeTags.ts index ed2b8343f..8f42529fc 100644 --- a/packages/benchmarks/src/quizQuestions/bin/makeTags.ts +++ b/packages/benchmarks/src/quizQuestions/bin/makeTags.ts @@ -1,4 +1,4 @@ -import { mongoDbMetadata } from "chatbot-server-mongodb-public"; +import mongoDbMetadata from "mongodb-rag-core/mongoDbMetadata"; import { QuizQuestionData } from "../QuizQuestionData"; const { mongoDbProductNames, mongoDbTopics, mongoDbProgrammingLanguages } = diff --git a/packages/chatbot-server-mongodb-public/src/eval/ConversationEval.ts b/packages/chatbot-server-mongodb-public/src/eval/ConversationEval.ts index c02455256..dedf9a573 100644 --- a/packages/chatbot-server-mongodb-public/src/eval/ConversationEval.ts +++ b/packages/chatbot-server-mongodb-public/src/eval/ConversationEval.ts @@ -21,7 +21,7 @@ import { Factuality, } from "autoevals"; import { strict as assert } from "assert"; -import { MongoDbTag } from "../mongoDbMetadata"; +import { MongoDbTag } from "mongodb-rag-core/mongoDbMetadata"; import { fuzzyLinkMatch } from "./fuzzyLinkMatch"; import { binaryNdcgAtK } from "./scorers/binaryNdcgAtK"; import { ConversationEvalCase as ConversationEvalCaseSource } from "mongodb-rag-core/eval"; diff --git a/packages/chatbot-server-mongodb-public/src/eval/bin/generateEvalCasesYamlFromCSV.ts b/packages/chatbot-server-mongodb-public/src/eval/bin/generateEvalCasesYamlFromCSV.ts index 4994a5605..0cda5af28 100644 --- a/packages/chatbot-server-mongodb-public/src/eval/bin/generateEvalCasesYamlFromCSV.ts +++ b/packages/chatbot-server-mongodb-public/src/eval/bin/generateEvalCasesYamlFromCSV.ts @@ -10,13 +10,13 @@ Can be run through npm script or directly using node: ```bash - npm run generate-eval-cases -- [transformationType] + npm run generate-eval-cases -- [transformationType] [transformationOptions] ``` Or: ```bash - node generateEvalCasesYamlFromCSV.js [transformationType] + node generateEvalCasesYamlFromCSV.js [transformationType] [transformationOptions] ``` ### Arguments @@ -24,10 +24,12 @@ - `csvFilePath`: (Required) Absolute path to the input CSV file - `yamlFileName`: (Required) Name of the output YAML file (without .yml extension) - `transformationType`: (Optional) Type of transformation to apply to the cases + - `transformationOptions`: (Optional) Additional options for the transformation ### Available Transformations - - `web`: Adds a "web" tag to all evaluation cases + - `addTags`: Adds specified tags to all evaluation cases + - `addCustomTags`: Adds specified custom tags to all evaluation cases ### File Paths @@ -36,14 +38,14 @@ ### Example ```bash - npm run generate-eval-cases -- Users/first.lastname/Downloads/input-file.csv output-file-name web + npm run generate-eval-cases -- /path/to/input.csv output-name addTags tag1 tag2 ``` This will: - 1. Read from: /Users/first.lastname/Downloads/input-file.csv - 2. Apply the web transformation - 3. Write to: evalCases/output-file-name.yml - 4. Log missing resources to the console in a warning + 1. Read from: /path/to/input.csv + 2. Add tags "tag1" and "tag2" to all cases, after validating them against the MongoDbTags enum. + 3. Write to: evalCases/output-name.yml + 4. Log any missing resources to the console as warnings */ import fs from "fs"; @@ -55,6 +57,7 @@ import { } from "mongodb-rag-core/eval"; import { MONGODB_CONNECTION_URI, MONGODB_DATABASE_NAME } from "../../config"; import { makeMongoDbPageStore } from "mongodb-rag-core"; +import { validateTags } from "mongodb-rag-core"; const SRC_ROOT = path.resolve(__dirname, "../"); @@ -63,24 +66,30 @@ const pageStore = makeMongoDbPageStore({ databaseName: MONGODB_DATABASE_NAME, }); -function addWebDataSourceTag(evalCases: ConversationEvalCase[]) { - return evalCases.map((caseItem) => { - const tags = caseItem.tags || []; - if (!tags.includes("web")) { - tags.push("web"); - } - return { - ...caseItem, - tags, - }; - }); +function addTags({ + evalCases, + tagNames, + custom = false, +}: { + evalCases: ConversationEvalCase[]; + tagNames: string[]; + custom?: boolean; +}): ConversationEvalCase[] { + validateTags(tagNames, custom); + return evalCases.map((caseItem) => ({ + ...caseItem, + tags: [...(caseItem.tags || []), ...tagNames], + })); } const transformationMap: Record< string, - (cases: ConversationEvalCase[]) => ConversationEvalCase[] + (cases: ConversationEvalCase[], options?: string[]) => ConversationEvalCase[] > = { - web: addWebDataSourceTag, + addTags: (cases: ConversationEvalCase[], options?: string[]) => + addTags({ evalCases: cases, tagNames: options || [] }), + addCustomTags: (cases: ConversationEvalCase[], options?: string[]) => + addTags({ evalCases: cases, tagNames: options || [], custom: true }), // Add more transformation functions here as needed }; @@ -103,15 +112,20 @@ async function main({ csvFilePath, yamlFileName, transformationType, + transformationOptions, }: { csvFilePath: string; yamlFileName: string; transformationType?: keyof typeof transformationMap; + transformationOptions?: string[]; }): Promise { console.log(`Reading from: ${csvFilePath}`); const evalCases = await getConversationEvalCasesFromCSV( csvFilePath, - transformationType ? transformationMap[transformationType] : undefined + transformationType + ? (cases) => + transformationMap[transformationType](cases, transformationOptions) + : undefined ); const expectedUrls = Array.from( new Set(evalCases.flatMap((caseItem) => caseItem.expectedLinks ?? [])) @@ -142,7 +156,12 @@ async function main({ // Checks if the script is being run directly (not imported as a module) and handles command-line arguments. if (require.main === module) { const args = process.argv.slice(2); - const [csvFilePath, yamlFileName, transformationType] = args; + const [ + csvFilePath, + yamlFileName, + transformationType, + ...transformationOptions + ] = args; const availableTransformationTypes = Object.keys(transformationMap); if ( args.length < 2 || @@ -150,7 +169,7 @@ if (require.main === module) { !availableTransformationTypes.includes(transformationType)) ) { console.error( - "Usage: node generateEvalCasesYamlFromCSV.js [transformationType]\n" + + "Usage: node generateEvalCasesYamlFromCSV.js [transformationType] [tranformationOptions]\n" + "Arguments:\n" + " csvFileName: Input CSV file name (required)\n" + " yamlFileName: Output YAML file name (required)\n" + @@ -166,6 +185,7 @@ if (require.main === module) { csvFilePath, yamlFileName, transformationType, + transformationOptions, }) .catch((error) => { console.error("Error:", error); diff --git a/packages/chatbot-server-mongodb-public/src/eval/experiments/skillsQuestionsTest.eval.ts b/packages/chatbot-server-mongodb-public/src/eval/experiments/skillsQuestionsTest.eval.ts index d8059e3a2..bc054325b 100644 --- a/packages/chatbot-server-mongodb-public/src/eval/experiments/skillsQuestionsTest.eval.ts +++ b/packages/chatbot-server-mongodb-public/src/eval/experiments/skillsQuestionsTest.eval.ts @@ -17,7 +17,10 @@ async function conversationEval() { // Get dotcom question set eval cases from YAML const basePath = path.resolve(__dirname, "..", "..", "..", "evalCases"); const conversationEvalCases = getConversationsEvalCasesFromYaml( - fs.readFileSync(path.resolve(basePath, "uni_skills_evaluation_questions.yml"), "utf8") + fs.readFileSync( + path.resolve(basePath, "uni_skills_evaluation_questions.yml"), + "utf8" + ) ); const generateConfig = { @@ -52,4 +55,4 @@ async function conversationEval() { generate: generateConfig, }); } -conversationEval(); \ No newline at end of file +conversationEval(); diff --git a/packages/chatbot-server-mongodb-public/src/lib.ts b/packages/chatbot-server-mongodb-public/src/lib.ts index 45c3766a6..08d7bc9a6 100644 --- a/packages/chatbot-server-mongodb-public/src/lib.ts +++ b/packages/chatbot-server-mongodb-public/src/lib.ts @@ -3,4 +3,4 @@ Export some modules from the implementation for use in things like evaluation. */ export { systemPrompt } from "./systemPrompt"; -export * as mongoDbMetadata from "./mongoDbMetadata"; +export * as mongoDbMetadata from "mongodb-rag-core"; diff --git a/packages/chatbot-server-mongodb-public/src/mongoDbMetadata/index.ts b/packages/chatbot-server-mongodb-public/src/mongoDbMetadata/index.ts deleted file mode 100644 index 27f79b587..000000000 --- a/packages/chatbot-server-mongodb-public/src/mongoDbMetadata/index.ts +++ /dev/null @@ -1,4 +0,0 @@ -export * from "./products"; -export * from "./programmingLanguages"; -export * from "./tags"; -export * from "./topics"; diff --git a/packages/chatbot-server-mongodb-public/src/mongoDbMetadata/products.ts b/packages/chatbot-server-mongodb-public/src/mongoDbMetadata/products.ts deleted file mode 100644 index 9d39f85d5..000000000 --- a/packages/chatbot-server-mongodb-public/src/mongoDbMetadata/products.ts +++ /dev/null @@ -1,330 +0,0 @@ -import { z } from "zod"; -import { mongoDbProgrammingLanguageIds } from "./programmingLanguages"; - -export const MongoDbProductSchema = z.object({ - id: z.string().describe("Unique identifier for the product"), - name: z.string().describe("Human-friendly name of the product"), - description: z - .string() - .optional() - .describe("Brief description of the product"), - programmingLanguage: z - .enum(mongoDbProgrammingLanguageIds) - .optional() - .describe("The programming language used to interact with the product"), - parentProductId: z.string().optional().describe("`id` of the parent product"), -}); -export type MongoDbProduct = z.infer; - -/** - Available MongoDB drivers. - */ -export const mongodbDrivers = [ - { - id: "c_driver", - name: "C Driver", - description: "MongoDB C Driver", - parentProductId: "driver", - programmingLanguage: "c", - }, - { - id: "cpp_driver", - name: "C++ Driver", - description: "MongoDB C++ Driver", - parentProductId: "driver", - programmingLanguage: "cpp", - }, - { - id: "csharp_driver", - name: "C# Driver", - description: "MongoDB C# Driver", - parentProductId: "driver", - programmingLanguage: "csharp", - }, - { - id: "entity_framework_core", - name: "Entity Framework Core Provider", - description: "MongoDB Entity Framework Core Provider", - parentProductId: "driver", - programmingLanguage: "csharp", - }, - { - id: "go_driver", - name: "Go Driver", - description: "MongoDB Go Driver", - parentProductId: "driver", - programmingLanguage: "go", - }, - { - id: "java_sync_driver", - name: "Java Sync Driver", - description: "MongoDB Java Sync Driver", - parentProductId: "driver", - programmingLanguage: "java", - }, - { - id: "java_reactive_streams_driver", - name: "Java Reactive Streams Driver", - description: "MongoDB Java Reactive Streams Driver", - parentProductId: "driver", - programmingLanguage: "java", - }, - { - id: "kotlin_coroutine_driver", - name: "Kotlin Coroutine Driver", - description: "MongoDB Kotlin Coroutine Driver", - parentProductId: "driver", - programmingLanguage: "kotlin", - }, - { - id: "kotlin_sync_driver", - name: "Kotlin Sync Driver", - description: "MongoDB Kotlin Sync Driver", - parentProductId: "driver", - programmingLanguage: "kotlin", - }, - { - id: "nodejs_driver", - name: "Node.js Driver", - description: "MongoDB Node.js Driver", - parentProductId: "driver", - programmingLanguage: "javascript", - }, - { - id: "laravel_mongodb", - name: "Laravel MongoDB", - description: "Laravel MongoDB integration", - parentProductId: "driver", - programmingLanguage: "php", - }, - { - id: "php_library", - name: "PHP Library", - description: "MongoDB PHP Library", - parentProductId: "driver", - programmingLanguage: "php", - }, - { - id: "pymongo_driver", - name: "PyMongo Driver", - description: "MongoDB PyMongo Driver", - parentProductId: "driver", - programmingLanguage: "python", - }, - { - id: "pymongo_arrow_driver", - name: "PyMongo Arrow Driver", - description: - "MongoDB PyMongo Arrow Driver for Apache Arrow tables, NumPy arrays, and Pandas or Polars DataFrames", - parentProductId: "driver", - programmingLanguage: "python", - }, - { - id: "ruby_driver", - name: "Ruby Driver", - description: "MongoDB Ruby Driver", - parentProductId: "driver", - programmingLanguage: "ruby", - }, - { - id: "mongoid_odm", - name: "Mongoid ODM", - description: "MongoDB Mongoid ODM for Ruby on Rails", - parentProductId: "driver", - programmingLanguage: "ruby", - }, - { - id: "rust_driver", - name: "Rust Driver", - description: "MongoDB Rust Driver", - parentProductId: "driver", - programmingLanguage: "rust", - }, - { - id: "scala_driver", - name: "Scala Driver", - description: "MongoDB Scala Driver", - parentProductId: "driver", - programmingLanguage: "scala", - }, - { - id: "swift_driver", - name: "Swift Driver", - description: "MongoDB Swift Driver", - parentProductId: "driver", - programmingLanguage: "swift", - }, -] as const satisfies MongoDbProduct[]; - -/** - Available MongoDB products. - */ -export const mongoDbProducts = [ - { - id: "server", - name: "MongoDB Server", - description: "Core MongoDB server", - }, - { - id: "aggregation", - name: "Aggregation Framework", - description: "Process multiple documents and return computed results", - parentProductId: "server", - }, - { - id: "atlas", - name: "MongoDB Atlas", - description: "Cloud database platform-as-a-service", - }, - { - id: "atlas_charts", - name: "Atlas Charts", - description: "Visualize data stored in MongoDB Atlas", - parentProductId: "atlas", - }, - { - id: "atlas_search", - name: "Atlas Search", - description: "Full-text search on your data in MongoDB Atlas", - parentProductId: "atlas", - }, - { - id: "atlas_vector_search", - name: "Atlas Vector Search", - description: "Vector search on your data in MongoDB Atlas", - parentProductId: "atlas", - }, - { - id: "data_federation", - name: "Data Federation", - description: - "Query data across multiple MongoDB databases and cloud object stores", - }, - { - id: "atlas_cli", - name: "Atlas CLI", - description: "CLI to interact with MongoDB Atlas", - parentProductId: "atlas", - }, - { - id: "driver", - name: "Drivers", - description: "Client libraries for querying MongoDB", - parentProductId: "server", - }, - { - id: "change_streams", - name: "Change Streams", - description: "Listen to changes in MongoDB data", - parentProductId: "server", - }, - { - id: "compass", - name: "MongoDB Compass", - description: "GUI tool for MongoDB", - }, - { - id: "gridfs", - name: "GridFS", - description: "Store large files across multiple MongoDB documents", - parentProductId: "server", - }, - { - id: "bi_connector", - name: "MongoDB Connector for BI", - description: - "Query MongoDB data with SQL using business intelligence tools.", - }, - { - id: "atlas_stream_processing", - name: "Atlas Stream Processing", - parentProductId: "atlas", - description: "Real-time data processing with MongoDB Atlas", - }, - { - id: "atlas_triggers", - name: "Atlas Triggers", - parentProductId: "atlas", - }, - { - id: "mongodb_ops_manager", - name: "MongoDB Ops Manager", - description: "On-prem management tool for MongoDB", - }, - { - id: "mongodb_cloud_manager", - name: "MongoDB Cloud Manager", - description: "Self-hosted management tool for MongoDB in the cloud", - }, - { - id: "spark_connector", - name: "Spark Connector", - description: "MongoDB Connector for Apache Spark", - }, - { - id: "shell", - name: "MongoDB Shell (mongosh)", - description: - "JavaScript and Node.js REPL for interacting with MongoDB deployments", - }, - { - id: "atlas_gov", - name: "MongoDB Atlas for Government", - description: "MongoDB Atlas for Government", - }, - { - id: "vs_code_extension", - name: "VS Code Extension", - description: "Visual Studio Cod extension for MongoDB", - }, - { - id: "mongodb_cli", - name: "MongoDB CLI", - description: "CLI for interacting with MongoDB deployments", - }, - { - id: "visual_studio_extension", - name: "C# Analyzer", - description: "C# Roslyn Analyzer for MongoDB", - }, - { - id: "kafka_connector", - name: "Kafka Connector", - description: "MongoDB Kafka Connector", - }, - { - id: "cluster_sync", - name: "Cluster-to-Cluster Sync", - description: "Sync data between MongoDB clusters", - }, - { - id: "k8s_operator", - name: "Kubernetes Operator", - description: - "Manage the typical lifecycle events for a MongoDB cluster deployed to Kubernetes", - }, - { - id: "relational_migrator", - name: "Relational Migrator", - description: "Migrates data from relational databases to MongoDB", - }, - { - id: "mongodb_university", - name: "MongoDB University", - description: "Online platform that offers certifications, courses, labs, and skills badges", - }, - { - id: "skills", - name: "MongoDB University Skills", - description: "An educational program that allows users to earn a skill badge after taking a short course and completing an assessment", - }, -] as const satisfies MongoDbProduct[]; - -export type MongoDbProductName = (typeof mongoDbProducts)[number]["name"]; -export type MongoDbProductNameEnum = [ - MongoDbProductName, - ...MongoDbProductName[] -]; -export const mongoDbProductNames = mongoDbProducts.map( - (prod) => prod.name -) as MongoDbProductNameEnum; diff --git a/packages/chatbot-server-mongodb-public/src/mongoDbMetadata/programmingLanguages.ts b/packages/chatbot-server-mongodb-public/src/mongoDbMetadata/programmingLanguages.ts deleted file mode 100644 index 47ae6082d..000000000 --- a/packages/chatbot-server-mongodb-public/src/mongoDbMetadata/programmingLanguages.ts +++ /dev/null @@ -1,86 +0,0 @@ -import { z } from "zod"; - -export const mongoDbProgrammingLanguages = [ - { - id: "shell", - }, - { - id: "javascript", - name: "JavaScript", - }, - { - id: "typescript", - name: "TypeScript", - }, - { - id: "python", - name: "Python", - }, - { - id: "java", - name: "Java", - }, - { - id: "csharp", - name: "C#", - }, - { - id: "cpp", - name: "C++", - }, - { - id: "ruby", - name: "Ruby", - }, - { - id: "kotlin", - name: "Kotlin", - }, - { - id: "c", - name: "C", - }, - { - id: "dart", - name: "Dart", - }, - { - id: "go", - name: "Go", - }, - { - id: "php", - name: "PHP", - }, - { - id: "rust", - name: "Rust", - }, - { - id: "scala", - name: "Scala", - }, - { - id: "swift", - name: "Swift", - }, -] as const satisfies MongoDbProgrammingLanguage[]; - -export type MongoDbProgrammingLanguageId = - (typeof mongoDbProgrammingLanguages)[number]["id"]; -export type MongoDbProgrammingLanguageIdEnum = [ - MongoDbProgrammingLanguageId, - ...MongoDbProgrammingLanguageId[] -]; -export const mongoDbProgrammingLanguageIds = mongoDbProgrammingLanguages.map( - (language) => language.id -) as MongoDbProgrammingLanguageIdEnum; - -export const MongoDBProgrammingLanguageSchema = z.object({ - id: z.string().describe("Unique identifier for the topic"), - name: z.string().optional().describe("Human-friendly name of the topic"), - description: z.string().optional().describe("Brief description of the topic"), -}); -export type MongoDbProgrammingLanguage = z.infer< - typeof MongoDBProgrammingLanguageSchema ->; diff --git a/packages/chatbot-server-mongodb-public/src/mongoDbMetadata/tags.ts b/packages/chatbot-server-mongodb-public/src/mongoDbMetadata/tags.ts deleted file mode 100644 index a42538888..000000000 --- a/packages/chatbot-server-mongodb-public/src/mongoDbMetadata/tags.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { mongoDbProducts, mongodbDrivers } from "./products"; -import { mongoDbProgrammingLanguageIds } from "./programmingLanguages"; -import { mongoDbTopics } from "./topics"; - -// Helpers for constructing the `MongoDbTag` union type -const mongoDbProductIds = mongoDbProducts.map((product) => product.id); -const mongoDbDriverIds = mongodbDrivers.map((driver) => driver.id); -const mongoDbTopicIds = mongoDbTopics.map((topic) => topic.id); - -/** - All possible MongoDB tags. Useful for tagging evaluations. - */ -export type MongoDbTag = - | (typeof mongoDbProgrammingLanguageIds)[number] - | (typeof mongoDbProductIds)[number] - | (typeof mongoDbDriverIds)[number] - | (typeof mongoDbTopicIds)[number]; diff --git a/packages/chatbot-server-mongodb-public/src/mongoDbMetadata/topics.ts b/packages/chatbot-server-mongodb-public/src/mongoDbMetadata/topics.ts deleted file mode 100644 index 8527745ba..000000000 --- a/packages/chatbot-server-mongodb-public/src/mongoDbMetadata/topics.ts +++ /dev/null @@ -1,37 +0,0 @@ -import { z } from "zod"; -export const MongoDbTopicSchema = z.object({ - id: z.string().describe("Unique identifier for the topic"), - name: z.string().describe("Human-friendly name of the topic").optional(), - description: z.string().optional().describe("Brief description of the topic"), -}); -export type MongoDbTopic = z.infer; - -export const mongoDbTopics = [ - { - id: "multi_cloud", - }, - { - id: "analytics", - }, - { - id: "security", - }, - { - id: "sharding", - }, - { - id: "replication", - }, - { - id: "performance", - }, - { - id: "indexes", - }, - { - id: "billing", - }, - { - id: "iam", - }, -] as const satisfies MongoDbTopic[]; diff --git a/packages/chatbot-server-mongodb-public/src/processors/extractMongoDbMetadataFromUserMessage.eval.ts b/packages/chatbot-server-mongodb-public/src/processors/extractMongoDbMetadataFromUserMessage.eval.ts index 34804f10d..e84156828 100644 --- a/packages/chatbot-server-mongodb-public/src/processors/extractMongoDbMetadataFromUserMessage.eval.ts +++ b/packages/chatbot-server-mongodb-public/src/processors/extractMongoDbMetadataFromUserMessage.eval.ts @@ -5,7 +5,7 @@ import { } from "./extractMongoDbMetadataFromUserMessage"; import { Eval } from "mongodb-rag-core/braintrust"; import { Scorer } from "autoevals"; -import { MongoDbTag } from "../mongoDbMetadata"; +import { MongoDbTag } from "mongodb-rag-core/mongoDbMetadata"; import { openAiClient, OPENAI_PREPROCESSOR_CHAT_COMPLETION_DEPLOYMENT, diff --git a/packages/chatbot-server-mongodb-public/src/processors/extractMongoDbMetadataFromUserMessage.ts b/packages/chatbot-server-mongodb-public/src/processors/extractMongoDbMetadataFromUserMessage.ts index 54e7e7e2c..ed6ac972a 100644 --- a/packages/chatbot-server-mongodb-public/src/processors/extractMongoDbMetadataFromUserMessage.ts +++ b/packages/chatbot-server-mongodb-public/src/processors/extractMongoDbMetadataFromUserMessage.ts @@ -8,7 +8,7 @@ import { OpenAI } from "mongodb-rag-core/openai"; import { mongoDbProductNames, mongoDbProgrammingLanguageIds, -} from "../mongoDbMetadata"; +} from "mongodb-rag-core/mongoDbMetadata"; export const ExtractMongoDbMetadataFunctionSchema = z.object({ programmingLanguage: z diff --git a/packages/chatbot-server-mongodb-public/src/processors/makeStepBackUserQuery.eval.ts b/packages/chatbot-server-mongodb-public/src/processors/makeStepBackUserQuery.eval.ts index cbcf454d5..af4163318 100644 --- a/packages/chatbot-server-mongodb-public/src/processors/makeStepBackUserQuery.eval.ts +++ b/packages/chatbot-server-mongodb-public/src/processors/makeStepBackUserQuery.eval.ts @@ -6,7 +6,7 @@ import { } from "./makeStepBackUserQuery"; import { Message, updateFrontMatter } from "mongodb-chatbot-server"; import { ObjectId } from "mongodb-rag-core/mongodb"; -import { MongoDbTag } from "../mongoDbMetadata"; +import { MongoDbTag } from "mongodb-rag-core/mongoDbMetadata"; import { OPENAI_PREPROCESSOR_CHAT_COMPLETION_DEPLOYMENT, OPENAI_API_KEY, diff --git a/packages/chatbot-server-mongodb-public/src/processors/makeStepBackUserQuery.ts b/packages/chatbot-server-mongodb-public/src/processors/makeStepBackUserQuery.ts index 4ea2da9c2..eb3ec846d 100644 --- a/packages/chatbot-server-mongodb-public/src/processors/makeStepBackUserQuery.ts +++ b/packages/chatbot-server-mongodb-public/src/processors/makeStepBackUserQuery.ts @@ -110,8 +110,9 @@ const fewShotExamples: OpenAI.ChatCompletionMessageParam[] = [ mongoDbProduct: "MongoDB University", }) ), - makeAssistantFunctionCallMessage(name,{ - transformedUserQuery: "What is the skill badge program on MongoDB University?", + makeAssistantFunctionCallMessage(name, { + transformedUserQuery: + "What is the skill badge program on MongoDB University?", } satisfies StepBackUserQueryMongoDbFunction), ]; diff --git a/packages/chatbot-server-mongodb-public/src/processors/retrieveRelevantContent.eval.ts b/packages/chatbot-server-mongodb-public/src/processors/retrieveRelevantContent.eval.ts index 896b04199..9e411f6e2 100644 --- a/packages/chatbot-server-mongodb-public/src/processors/retrieveRelevantContent.eval.ts +++ b/packages/chatbot-server-mongodb-public/src/processors/retrieveRelevantContent.eval.ts @@ -20,7 +20,7 @@ import { binaryNdcgAtK } from "../eval/scorers/binaryNdcgAtK"; import { f1AtK } from "../eval/scorers/f1AtK"; import { precisionAtK } from "../eval/scorers/precisionAtK"; import { recallAtK } from "../eval/scorers/recallAtK"; -import { MongoDbTag } from "../mongoDbMetadata"; +import { MongoDbTag } from "mongodb-rag-core/mongoDbMetadata"; import { extractMongoDbMetadataFromUserMessage, ExtractMongoDbMetadataFunction, diff --git a/packages/chatbot-server-mongodb-public/src/processors/userMessageMongoDbGuardrail.eval.ts b/packages/chatbot-server-mongodb-public/src/processors/userMessageMongoDbGuardrail.eval.ts index 4b253323b..64c9f5e29 100644 --- a/packages/chatbot-server-mongodb-public/src/processors/userMessageMongoDbGuardrail.eval.ts +++ b/packages/chatbot-server-mongodb-public/src/processors/userMessageMongoDbGuardrail.eval.ts @@ -5,7 +5,7 @@ import { } from "./userMessageMongoDbGuardrail"; import { Eval } from "mongodb-rag-core/braintrust"; import { Scorer } from "autoevals"; -import { MongoDbTag } from "../mongoDbMetadata"; +import { MongoDbTag } from "mongodb-rag-core/mongoDbMetadata"; import { OPENAI_PREPROCESSOR_CHAT_COMPLETION_DEPLOYMENT, openAiClient, diff --git a/packages/chatbot-server-mongodb-public/src/processors/userMessageMongoDbGuardrail.ts b/packages/chatbot-server-mongodb-public/src/processors/userMessageMongoDbGuardrail.ts index 340a52057..f12176726 100644 --- a/packages/chatbot-server-mongodb-public/src/processors/userMessageMongoDbGuardrail.ts +++ b/packages/chatbot-server-mongodb-public/src/processors/userMessageMongoDbGuardrail.ts @@ -123,14 +123,14 @@ const fewShotExamples: OpenAI.ChatCompletionMessageParam[] = [ "This query is unclear but could be about filtering data, which is a common operation in MongoDB. Therefore, it is relevant to MongoDB.", type: "valid", } satisfies UserMessageMongoDbGuardrailFunction), - // Example 12 + // Example 9 makeUserMessage("and"), makeAssistantFunctionCallMessage(name, { reasoning: "This query is unclear and may be a typo or incomplete. However, it could be related to the $and operator in MongoDB. It is certainly not inappropriate. Therefore, it is relevant to MongoDB.", type: "valid", } satisfies UserMessageMongoDbGuardrailFunction), - // Example 9 + // Example 10 makeUserMessage( "What courses do you have on generative artificial intelligence?" ), @@ -139,14 +139,14 @@ const fewShotExamples: OpenAI.ChatCompletionMessageParam[] = [ "This query asks for courses on generative artificial intelligence, which is a relevant area to MongoDB's business. Therefore, it is relevant to MongoDB.", type: "valid", } satisfies UserMessageMongoDbGuardrailFunction), - // Example 10 + // Example 11 makeUserMessage("What is an ODL?"), makeAssistantFunctionCallMessage(name, { reasoning: "This query asks about an Operational Data Layer (ODL), which is an architectural pattern that can be used with MongoDB. Therefore, it is relevant to MongoDB.", type: "valid", } satisfies UserMessageMongoDbGuardrailFunction), - // Example 11 + // Example 12 makeUserMessage("What is a skill?"), makeAssistantFunctionCallMessage(name, { reasoning: diff --git a/packages/chatbot-server-mongodb-public/src/verifiedAnswers.eval.ts b/packages/chatbot-server-mongodb-public/src/verifiedAnswers.eval.ts index 0095fe535..abd286050 100644 --- a/packages/chatbot-server-mongodb-public/src/verifiedAnswers.eval.ts +++ b/packages/chatbot-server-mongodb-public/src/verifiedAnswers.eval.ts @@ -1,5 +1,5 @@ import { Eval, EvalCase, EvalScorer } from "braintrust"; -import { MongoDbTag } from "./mongoDbMetadata"; +import { MongoDbTag } from "mongodb-rag-core/mongoDbMetadata"; import { findVerifiedAnswer, verifiedAnswerConfig, diff --git a/packages/mongodb-rag-core/src/mongoDbMetadata/tags.ts b/packages/mongodb-rag-core/src/mongoDbMetadata/tags.ts index a42538888..dc6d42460 100644 --- a/packages/mongodb-rag-core/src/mongoDbMetadata/tags.ts +++ b/packages/mongodb-rag-core/src/mongoDbMetadata/tags.ts @@ -15,3 +15,38 @@ export type MongoDbTag = | (typeof mongoDbProductIds)[number] | (typeof mongoDbDriverIds)[number] | (typeof mongoDbTopicIds)[number]; + +// Combine all tag arrays into a single array +const allTags = [ + ...mongoDbProgrammingLanguageIds, + ...mongoDbProductIds, + ...mongoDbDriverIds, + ...mongoDbTopicIds, +]; + +/** + Validates an array of tag names against the MongoDbTags enum. + + @param tagNames - An array of strings representing tag names to validate + @param custom - A boolean flag indicating whether custom tags are allowed + @throws {Error} When non-custom tags are used that don't exist in MongoDbTags enum + + @remarks + If custom is false, all tags must exist in the MongoDbTags enum. + If any invalid tags are found, throws an error with the list of invalid tags + and the allowed tags from MongoDbTags enum. + */ +export const validateTags = (tagNames: string[], custom: boolean): void => { + if (!custom) { + const invalidTags = tagNames.filter((tag) => !allTags.includes(tag as MongoDbTag)); + if (invalidTags.length > 0) { + throw new Error( + `Invalid tags found: ${invalidTags.join( + ", " + )} \nUse the "addCustomTags" transformation instead or use allowed tags: \n - ${allTags + .sort() + .join("\n - ")}` + ); + } + } +};