diff --git a/firestore-bigquery-export/guides/GENERATE_SCHEMA_VIEWS.md b/firestore-bigquery-export/guides/GENERATE_SCHEMA_VIEWS.md index 8c28bef7a..8d8f230b6 100644 --- a/firestore-bigquery-export/guides/GENERATE_SCHEMA_VIEWS.md +++ b/firestore-bigquery-export/guides/GENERATE_SCHEMA_VIEWS.md @@ -60,6 +60,7 @@ You'll be prompted for: - BigQuery dataset ID - Table Prefix - Firestore collection path to sample +- Whether to use collection group query - Google AI API key - Directory and filename for the schema @@ -78,6 +79,34 @@ npx @firebaseextensions/fs-bq-schema-views \ --gemini-schema-file-name=user_schema ``` +For collection group queries (to query all collections with the same name across your database): + +```bash +npx @firebaseextensions/fs-bq-schema-views \ + --non-interactive \ + --project=my-firebase-project \ + --big-query-project=my-bq-project \ + --dataset=firestore_changelog \ + --table-name-prefix=user_profiles \ + --use-gemini=secure \ + --query-collection-group \ + --google-ai-key=$GOOGLE_API_KEY \ + --schema-directory=./schemas \ + --gemini-schema-file-name=user_schema +``` + +#### Understanding Collection vs Collection Group Queries + +- **Collection Query** (default): Queries documents from a specific collection path + + - Example: `users/123/orders` - queries orders for a specific user + - Use when you have a specific collection path + +- **Collection Group Query** (`--query-collection-group`): Queries all collections with the same name across your entire database + - Example: `orders` - queries all order collections regardless of their parent path + - Use when you have collections with the same name under different documents + - Useful for subcollections that appear in multiple places + ⚠️ **Important**: Always review generated schemas before using them in production. ### Option 2: Create a Schema File Manually @@ -133,6 +162,19 @@ npx @firebaseextensions/fs-bq-schema-views \ --schema-files=./test_schema.json ``` +For collection group queries with manual schemas: + +```bash +npx @firebaseextensions/fs-bq-schema-views \ + --non-interactive \ + --project=YOUR_PROJECT_ID \ + --big-query-project=YOUR_BIGQUERY_PROJECT_ID \ + --dataset=YOUR_DATASET_ID \ + --table-name-prefix=YOUR_TABLE_PREFIX \ + --schema-files=./test_schema.json \ + --query-collection-group +``` + For multiple schema files, use comma separation: ``` diff --git a/firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/config/index.test.ts b/firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/config/index.test.ts index b7597bca9..e84df471b 100644 --- a/firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/config/index.test.ts +++ b/firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/config/index.test.ts @@ -68,6 +68,7 @@ describe("parseConfig", () => { googleAiKey: undefined, schemaDirectory: undefined, useGemini: false, + isCollectionGroupQuery: undefined, }); }); @@ -107,6 +108,7 @@ describe("parseConfig", () => { googleAiKey: "test-key", geminiAnalyzeCollectionPath: "test-collection", schemaDirectory: "test-directory", + queryCollectionGroup: true, outputHelp: jest.fn(), }; @@ -120,6 +122,7 @@ describe("parseConfig", () => { expect(result.geminiAnalyzeCollectionPath).toBe("test-collection"); expect(result.schemaDirectory).toBe("test-directory"); expect(result.agentSampleSize).toBe(100); + expect(result.isCollectionGroupQuery).toBe(true); }); it("should exit if required parameters are missing", async () => { diff --git a/firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/config/interactive.test.ts b/firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/config/interactive.test.ts index 46fa7672a..3a5b7aba3 100644 --- a/firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/config/interactive.test.ts +++ b/firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/config/interactive.test.ts @@ -24,7 +24,7 @@ describe("Interactive Prompts", () => { describe("questions array", () => { it("should have the correct number of questions", () => { - expect(questions).toHaveLength(10); + expect(questions).toHaveLength(11); }); it("should have properly formatted questions with required properties", () => { diff --git a/firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/config/non-interactive.test.ts b/firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/config/non-interactive.test.ts index 8a4bf0942..8764736cf 100644 --- a/firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/config/non-interactive.test.ts +++ b/firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/config/non-interactive.test.ts @@ -69,7 +69,7 @@ describe("Command Line Parser", () => { expect(commander.version).toHaveBeenCalledWith("1.0.0"); // Check that all options are configured - expect(commander.option).toHaveBeenCalledTimes(10); + expect(commander.option).toHaveBeenCalledTimes(11); // Check specific options - just a sample to ensure we're setting up correctly expect(commander.option).toHaveBeenCalledWith( @@ -89,6 +89,12 @@ describe("Command Line Parser", () => { collect, [] ); + + expect(commander.option).toHaveBeenCalledWith( + "--query-collection-group", + "Use collection group query instead of regular collection query", + false + ); }); it("should return the configured program", () => { diff --git a/firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/genkit/sampleFirestoreDocuments.test.ts b/firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/genkit/sampleFirestoreDocuments.test.ts index f57ab0834..12b1a72b0 100644 --- a/firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/genkit/sampleFirestoreDocuments.test.ts +++ b/firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/genkit/sampleFirestoreDocuments.test.ts @@ -17,6 +17,7 @@ interface FirestoreModule { (): { collection: jest.Mock; + collectionGroup: jest.Mock; where: jest.Mock; limit: jest.Mock; get: jest.Mock; @@ -31,6 +32,7 @@ interface FirestoreModule { jest.mock("firebase-admin", () => { const mockFirestore = { collection: jest.fn().mockReturnThis(), + collectionGroup: jest.fn().mockReturnThis(), where: jest.fn().mockReturnThis(), limit: jest.fn().mockReturnThis(), get: jest.fn().mockResolvedValue({ @@ -136,4 +138,95 @@ describe("sampleFirestoreDocuments", () => { sampleFirestoreDocuments(collectionPath, sampleSize) ).rejects.toThrow("Firestore error"); }); + + describe("collection group queries", () => { + it("should sample documents from Firestore collection group", async () => { + const collectionPath = "orders"; + const sampleSize = 2; + const isCollectionGroupQuery = true; + + // Mock collection group data (subcollections from different parents) + const firebase = require("firebase-admin"); + const mockFirestore = firebase.firestore(); + + // Clear mocks and set up specific mock for this test + jest.clearAllMocks(); + mockFirestore.get.mockResolvedValueOnce({ + docs: [ + { + data: () => ({ orderId: "order1", amount: 50, userId: "user1" }), + id: "order1", + }, + { + data: () => ({ orderId: "order2", amount: 75, userId: "user2" }), + id: "order2", + }, + ], + }); + + const result = await sampleFirestoreDocuments( + collectionPath, + sampleSize, + isCollectionGroupQuery + ); + + expect(mockFirestore.collectionGroup).toHaveBeenCalledWith( + collectionPath + ); + expect(mockFirestore.collection).not.toHaveBeenCalled(); + expect(mockFirestore.where).not.toHaveBeenCalled(); + expect(mockFirestore.limit).toHaveBeenCalledWith(sampleSize); + expect(mockFirestore.get).toHaveBeenCalled(); + + expect(result).toHaveLength(2); + expect(result[0]).toHaveProperty("orderId", "order1"); + expect(result[0]).toHaveProperty("amount", 50); + expect(result[0]).toHaveProperty("userId", "user1"); + }); + + it("should default to regular collection query when isCollectionGroupQuery is false", async () => { + const collectionPath = "test-collection"; + const sampleSize = 2; + const isCollectionGroupQuery = false; + + const firebase = require("firebase-admin"); + const mockFirestore = firebase.firestore(); + + // Clear mocks for this test + jest.clearAllMocks(); + + const result = await sampleFirestoreDocuments( + collectionPath, + sampleSize, + isCollectionGroupQuery + ); + + expect(mockFirestore.collection).toHaveBeenCalledWith(collectionPath); + expect(mockFirestore.collectionGroup).not.toHaveBeenCalled(); + expect(result).toHaveLength(2); + }); + + it("should handle errors properly for collection group queries", async () => { + const firebase = require("firebase-admin"); + const mockFirestore = firebase.firestore(); + + // Clear mocks and set up error for this test + jest.clearAllMocks(); + mockFirestore.get.mockRejectedValueOnce( + new Error("Collection group error") + ); + + const collectionPath = "orders"; + const sampleSize = 2; + const isCollectionGroupQuery = true; + + await expect( + sampleFirestoreDocuments( + collectionPath, + sampleSize, + isCollectionGroupQuery + ) + ).rejects.toThrow("Collection group error"); + }); + }); }); diff --git a/firestore-bigquery-export/scripts/gen-schema-view/src/config/index.ts b/firestore-bigquery-export/scripts/gen-schema-view/src/config/index.ts index 25fbfaa82..92f01ef09 100644 --- a/firestore-bigquery-export/scripts/gen-schema-view/src/config/index.ts +++ b/firestore-bigquery-export/scripts/gen-schema-view/src/config/index.ts @@ -33,6 +33,7 @@ export interface CliConfig { googleAiKey?: string; schemaDirectory?: string; geminiSchemaFileName?: string; + isCollectionGroupQuery?: boolean; } export async function parseConfig(): Promise { @@ -55,6 +56,7 @@ export async function parseConfig(): Promise { googleAiKey: program.googleAiKey, schemaDirectory: program.schemaDirectory, geminiSchemaFileName: program.geminiSchemaFileName, + isCollectionGroupQuery: program.queryCollectionGroup, }; } const { @@ -68,6 +70,7 @@ export async function parseConfig(): Promise { googleAiKey, schemaDirectory, geminiSchemaFileName, + isCollectionGroupQuery, } = await promptInquirer(); return { @@ -82,5 +85,6 @@ export async function parseConfig(): Promise { googleAiKey, schemaDirectory, geminiSchemaFileName, + isCollectionGroupQuery, }; } diff --git a/firestore-bigquery-export/scripts/gen-schema-view/src/config/interactive.ts b/firestore-bigquery-export/scripts/gen-schema-view/src/config/interactive.ts index 24104ebb5..8329a6a38 100644 --- a/firestore-bigquery-export/scripts/gen-schema-view/src/config/interactive.ts +++ b/firestore-bigquery-export/scripts/gen-schema-view/src/config/interactive.ts @@ -90,7 +90,15 @@ export const questions = [ }, { message: - "What is the Firestore collection path you want Gemini to analyze?", + "Do you want to use a collection group query instead of a regular collection query?", + name: "isCollectionGroupQuery", + type: "confirm", + when: (answers) => answers.useGemini, + default: false, + }, + { + message: + "What is the Firestore collection or collection group path you want Gemini to analyze?", name: "geminiAnalyzeCollectionPath", type: "input", when: (answers) => answers.useGemini, diff --git a/firestore-bigquery-export/scripts/gen-schema-view/src/config/non-interactive.ts b/firestore-bigquery-export/scripts/gen-schema-view/src/config/non-interactive.ts index c6e659638..8e69e6681 100644 --- a/firestore-bigquery-export/scripts/gen-schema-view/src/config/non-interactive.ts +++ b/firestore-bigquery-export/scripts/gen-schema-view/src/config/non-interactive.ts @@ -74,6 +74,11 @@ export const configureProgram = () => { "--gemini-schema-file-name ", "Name of schema json file generated by Gemini (without .json extension)", "schema" + ) + .option( + "--query-collection-group", + "Use collection group query instead of regular collection query", + false ); return program; diff --git a/firestore-bigquery-export/scripts/gen-schema-view/src/schema/genkit.ts b/firestore-bigquery-export/scripts/gen-schema-view/src/schema/genkit.ts index 31259bb9f..8119196cc 100644 --- a/firestore-bigquery-export/scripts/gen-schema-view/src/schema/genkit.ts +++ b/firestore-bigquery-export/scripts/gen-schema-view/src/schema/genkit.ts @@ -24,16 +24,25 @@ import inquirer from "inquirer"; export async function sampleFirestoreDocuments( collectionPath: string, - sampleSize: number + sampleSize: number, + isCollectionGroupQuery: boolean = false ): Promise { const db = firebase.firestore(); try { - const snapshot = await db - .collection(collectionPath) - .where("__name__", ">=", Math.random().toString()) - .limit(sampleSize) - .get(); + const query = isCollectionGroupQuery + ? db.collectionGroup(collectionPath) + : db.collection(collectionPath); + + let snapshot = null; + if (isCollectionGroupQuery) { + snapshot = await query.limit(sampleSize).get(); + } else { + snapshot = await query + .where("__name__", ">=", Math.random().toString()) + .limit(sampleSize) + .get(); + } const documents = snapshot.docs.map((doc) => { const data = doc.data(); @@ -197,7 +206,8 @@ export const generateSchemaFilesWithGemini = async (config: CliConfig) => { // get sample data from Firestore const sampleData = await sampleFirestoreDocuments( config.geminiAnalyzeCollectionPath!, - config.agentSampleSize! + config.agentSampleSize!, + config.isCollectionGroupQuery || false ); if (sampleData.length === 0) {