diff --git a/.continue/rules/unit-testing-rules.yaml b/.continue/rules/unit-testing-rules.yaml new file mode 100644 index 0000000000..ec22e94675 --- /dev/null +++ b/.continue/rules/unit-testing-rules.yaml @@ -0,0 +1,30 @@ +name: unit-testing-rules +version: 0.0.1 +schema: v1 +rules: + - name: unit-testing-rules + rule: >- + For unit testing in this project: + + + 1. The project uses Jest as the testing framework + + 2. Run tests using `npm test` from within the specific package/module + directory + + 3. Command structure: `cd [directory] && npm test -- [test file path]` + + 4. The test script uses experimental VM modules via NODE_OPTIONS flag + + 5. Test files follow the pattern `*.test.ts` + + 6. Tests must import Jest with `import { jest } from "@jest/globals";` + + 7. Run tests from within the specific package directory (e.g., `cd core` + for core module tests) + + 8. Write tests as top-level `test()` functions - DO NOT use `describe()` + blocks + + 9. Include the function name being tested in the test description for + clarity diff --git a/core/indexing/CodeSnippetsIndex.ts b/core/indexing/CodeSnippetsIndex.ts index 9a2da5008b..bf79b093ef 100644 --- a/core/indexing/CodeSnippetsIndex.ts +++ b/core/indexing/CodeSnippetsIndex.ts @@ -7,7 +7,7 @@ import { getQueryForFile, } from "../util/treeSitter"; -import { DatabaseConnection, SqliteDb, tagToString } from "./refreshIndex"; +import { DatabaseConnection, SqliteDb } from "./refreshIndex"; import { IndexResultType, MarkCompleteCallback, @@ -29,6 +29,7 @@ import { getLastNUriRelativePathParts, getUriPathBasename, } from "../util/uri"; +import { tagToString } from "./utils"; type SnippetChunk = ChunkWithoutID & { title: string; signature: string }; diff --git a/core/indexing/FullTextSearchCodebaseIndex.ts b/core/indexing/FullTextSearchCodebaseIndex.ts index de183e158d..1bac74e4a5 100644 --- a/core/indexing/FullTextSearchCodebaseIndex.ts +++ b/core/indexing/FullTextSearchCodebaseIndex.ts @@ -3,13 +3,14 @@ import { RETRIEVAL_PARAMS } from "../util/parameters"; import { getUriPathBasename } from "../util/uri"; import { ChunkCodebaseIndex } from "./chunk/ChunkCodebaseIndex"; -import { DatabaseConnection, SqliteDb, tagToString } from "./refreshIndex"; +import { DatabaseConnection, SqliteDb } from "./refreshIndex"; import { IndexResultType, MarkCompleteCallback, RefreshIndexResults, type CodebaseIndex, } from "./types"; +import { tagToString } from "./utils"; export interface RetrieveConfig { tags: BranchAndDir[]; @@ -97,11 +98,14 @@ export class FullTextSearchCodebaseIndex implements CodebaseIndex { // Delete for (const item of results.del) { - await db.run(` + await db.run( + ` DELETE FROM fts WHERE rowid IN ( SELECT id FROM fts_metadata WHERE path = ? AND cacheKey = ? ) - `,[item.path, item.cacheKey]); + `, + [item.path, item.cacheKey], + ); await db.run("DELETE FROM fts_metadata WHERE path = ? AND cacheKey = ?", [ item.path, item.cacheKey, diff --git a/core/indexing/LanceDbIndex.ts b/core/indexing/LanceDbIndex.ts index 3bf9a645d2..cf41beaca4 100644 --- a/core/indexing/LanceDbIndex.ts +++ b/core/indexing/LanceDbIndex.ts @@ -14,7 +14,7 @@ import { getUriPathBasename } from "../util/uri"; import { basicChunker } from "./chunk/basic.js"; import { chunkDocument, shouldChunk } from "./chunk/chunk.js"; -import { DatabaseConnection, SqliteDb, tagToString } from "./refreshIndex.js"; +import { DatabaseConnection, SqliteDb } from "./refreshIndex.js"; import { CodebaseIndex, IndexResultType, @@ -24,6 +24,7 @@ import { } from "./types"; import type * as LanceType from "vectordb"; +import { tagToString } from "./utils"; interface LanceDbRow { uuid: string; diff --git a/core/indexing/chunk/ChunkCodebaseIndex.ts b/core/indexing/chunk/ChunkCodebaseIndex.ts index 9b39e506a7..412ff74800 100644 --- a/core/indexing/chunk/ChunkCodebaseIndex.ts +++ b/core/indexing/chunk/ChunkCodebaseIndex.ts @@ -4,7 +4,7 @@ import { RunResult } from "sqlite3"; import { IContinueServerClient } from "../../continueServer/interface.js"; import { Chunk, IndexTag, IndexingProgressUpdate } from "../../index.js"; -import { DatabaseConnection, SqliteDb, tagToString } from "../refreshIndex.js"; +import { DatabaseConnection, SqliteDb } from "../refreshIndex.js"; import { IndexResultType, MarkCompleteCallback, @@ -13,8 +13,9 @@ import { type CodebaseIndex, } from "../types.js"; -import { chunkDocument, shouldChunk } from "./chunk.js"; import { getUriPathBasename } from "../../util/uri.js"; +import { tagToString } from "../utils.js"; +import { chunkDocument, shouldChunk } from "./chunk.js"; export class ChunkCodebaseIndex implements CodebaseIndex { relativeExpectedTime: number = 1; diff --git a/core/indexing/refreshIndex.ts b/core/indexing/refreshIndex.ts index f76c6a7f64..ec84f631b3 100644 --- a/core/indexing/refreshIndex.ts +++ b/core/indexing/refreshIndex.ts @@ -18,10 +18,6 @@ import { export type DatabaseConnection = Database; -export function tagToString(tag: IndexTag): string { - return `${tag.directory}::${tag.branch}::${tag.artifactId}`; -} - export class SqliteDb { static db: DatabaseConnection | null = null; diff --git a/core/indexing/test/indexing.ts b/core/indexing/test/indexing.ts index 5f22d0aeb9..b90f525642 100644 --- a/core/indexing/test/indexing.ts +++ b/core/indexing/test/indexing.ts @@ -3,11 +3,11 @@ import { jest } from "@jest/globals"; import { IndexTag } from "../.."; import { IContinueServerClient } from "../../continueServer/interface"; import { ChunkCodebaseIndex } from "../chunk/ChunkCodebaseIndex"; -import { tagToString } from "../refreshIndex"; import { CodebaseIndex, RefreshIndexResults } from "../types"; import { testIde } from "../../test/fixtures"; import { addToTestDir, TEST_DIR } from "../../test/testDir"; +import { tagToString } from "../utils"; export const mockFilename = "test.py"; export const mockPathAndCacheKey = { diff --git a/core/indexing/utils.test.ts b/core/indexing/utils.test.ts new file mode 100644 index 0000000000..f432da0235 --- /dev/null +++ b/core/indexing/utils.test.ts @@ -0,0 +1,58 @@ +import { IndexTag } from ".."; +import { tagToString } from "./utils"; + +test("tagToString returns full tag string when under length limit", () => { + const tag: IndexTag = { + directory: "/normal/path/to/repo", + branch: "main", + artifactId: "12345", + }; + + expect(tagToString(tag)).toBe("/normal/path/to/repo::main::12345"); +}); + +test("tagToString truncates beginning of directory when path is too long", () => { + // Create a very long directory path that exceeds MAX_DIR_LENGTH (200) + const longPrefix = "/very/long/path/that/will/be/truncated/"; + const importantSuffix = "/user/important-project/src/feature"; + const longPath = longPrefix + "x".repeat(200) + importantSuffix; + + const tag: IndexTag = { + directory: longPath, + branch: "feature-branch", + artifactId: "67890", + }; + + const result = tagToString(tag); + + // The result should keep the important suffix part + expect(result).toContain(importantSuffix); + // The result should NOT contain the beginning of the path + expect(result).not.toContain(longPrefix); + // The result should include the branch and artifactId + expect(result).toContain("::feature-branch::67890"); + // The result should be within the MAX_TABLE_NAME_LENGTH limit (240) + expect(result.length).toBeLessThanOrEqual(240); +}); + +test("tagToString preserves branch and artifactId exactly, even when truncating", () => { + const longPath = "/a".repeat(300); // Much longer than MAX_DIR_LENGTH + const tag: IndexTag = { + directory: longPath, + branch: "release-v2.0", + artifactId: "build-123", + }; + + const result = tagToString(tag); + + // Should contain the exact branch and artifactId + expect(result).toContain("::release-v2.0::build-123"); + // Should contain the end of the path + expect(result).toContain("/a/a/a"); + // Should not contain the full original path (it should be truncated) + expect(result.length).toBeLessThan( + longPath.length + "::release-v2.0::build-123".length, + ); + // The result should be within the MAX_TABLE_NAME_LENGTH limit + expect(result.length).toBeLessThanOrEqual(240); +}); diff --git a/core/indexing/utils.ts b/core/indexing/utils.ts new file mode 100644 index 0000000000..0713507f03 --- /dev/null +++ b/core/indexing/utils.ts @@ -0,0 +1,45 @@ +import { IndexTag } from ".."; + +// Maximum length for table names to stay under OS filename limits +const MAX_TABLE_NAME_LENGTH = 240; + +// Leave room for branch and artifactId +const MAX_DIR_LENGTH = 200; + +/** + * Converts an IndexTag to a string representation, safely handling long paths. + * + * The string is used as a table name and identifier in various places, so it needs + * to stay under OS filename length limits (typically 255 chars). This is especially + * important for dev containers where the directory path can be very long due to + * containing container configuration. + * + * The format is: "{directory}::{branch}::{artifactId}" + * + * To handle long paths: + * 1. First tries the full string - most backwards compatible + * 2. If too long, truncates directory from the beginning to maintain uniqueness + * (since final parts of paths are more unique than prefixes) + * 3. Finally ensures entire string stays under MAX_TABLE_NAME_LENGTH for OS compatibility + * + * @param tag The tag containing directory, branch, and artifactId + * @returns A string representation safe for use as a table name + */ +export function tagToString(tag: IndexTag): string { + const result = `${tag.directory}::${tag.branch}::${tag.artifactId}`; + + if (result.length <= MAX_TABLE_NAME_LENGTH) { + return result; + } + + // Truncate from the beginning of directory path to preserve the more unique end parts + const dir = + tag.directory.length > MAX_DIR_LENGTH + ? tag.directory.slice(tag.directory.length - MAX_DIR_LENGTH) + : tag.directory; + + return `${dir}::${tag.branch}::${tag.artifactId}`.slice( + 0, + MAX_TABLE_NAME_LENGTH, + ); +}