Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 36 additions & 12 deletions packages/core/src/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,8 @@ export class Context {
/**
* Public wrapper for prepareCollection private method
*/
async getPreparedCollection(codebasePath: string): Promise<void> {
return this.prepareCollection(codebasePath);
async getPreparedCollection(codebasePath: string, gitRepoIdentifier?: string | null): Promise<void> {
return this.prepareCollection(codebasePath, false, gitRepoIdentifier);
}

/**
Expand All @@ -230,12 +230,31 @@ export class Context {

/**
* Generate collection name based on codebase path and hybrid mode
* Optionally accepts a git repository identifier for consistent naming across different local paths
*/
public getCollectionName(codebasePath: string): string {
public getCollectionName(codebasePath: string, gitRepoIdentifier?: string | null): string {
const isHybrid = this.getIsHybrid();
const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks';

// If git repository identifier is provided, use it for collection naming
if (gitRepoIdentifier) {
// Create a clean identifier by replacing special characters
const cleanIdentifier = gitRepoIdentifier
.replace(/[^a-zA-Z0-9]/g, '_') // Replace non-alphanumeric with underscore
.toLowerCase()
.substring(0, 32); // Limit length for collection name

// Create hash from the git identifier for uniqueness
const hash = crypto.createHash('md5').update(gitRepoIdentifier).digest('hex');

console.log(`[Context] Using git-based collection naming for: ${gitRepoIdentifier}`);
return `${prefix}_git_${cleanIdentifier}_${hash.substring(0, 8)}`;
}

// Fallback to path-based naming (original behavior)
const normalizedPath = path.resolve(codebasePath);
const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks';
console.log(`[Context] Using path-based collection naming for: ${normalizedPath}`);
return `${prefix}_${hash.substring(0, 8)}`;
}

Expand Down Expand Up @@ -405,13 +424,15 @@ export class Context {
* @param query Search query
* @param topK Number of results to return
* @param threshold Similarity threshold
* @param filterExpr Optional filter expression
* @param gitRepoIdentifier Optional git repository identifier for consistent collection naming
*/
async semanticSearch(codebasePath: string, query: string, topK: number = 5, threshold: number = 0.5, filterExpr?: string): Promise<SemanticSearchResult[]> {
async semanticSearch(codebasePath: string, query: string, topK: number = 5, threshold: number = 0.5, filterExpr?: string, gitRepoIdentifier?: string | null): Promise<SemanticSearchResult[]> {
const isHybrid = this.getIsHybrid();
const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
console.log(`[Context] 🔍 Executing ${searchType}: "${query}" in ${codebasePath}`);

const collectionName = this.getCollectionName(codebasePath);
const collectionName = this.getCollectionName(codebasePath, gitRepoIdentifier);
console.log(`[Context] 🔍 Using collection: ${collectionName}`);

// Check if collection exists and has data
Expand Down Expand Up @@ -518,27 +539,30 @@ export class Context {
/**
* Check if index exists for codebase
* @param codebasePath Codebase path to check
* @param gitRepoIdentifier Optional git repository identifier for consistent collection naming
* @returns Whether index exists
*/
async hasIndex(codebasePath: string): Promise<boolean> {
const collectionName = this.getCollectionName(codebasePath);
async hasIndex(codebasePath: string, gitRepoIdentifier?: string | null): Promise<boolean> {
const collectionName = this.getCollectionName(codebasePath, gitRepoIdentifier);
return await this.vectorDatabase.hasCollection(collectionName);
}

/**
* Clear index
* @param codebasePath Codebase path to clear index for
* @param progressCallback Optional progress callback function
* @param gitRepoIdentifier Optional git repository identifier for consistent collection naming
*/
async clearIndex(
codebasePath: string,
progressCallback?: (progress: { phase: string; current: number; total: number; percentage: number }) => void
progressCallback?: (progress: { phase: string; current: number; total: number; percentage: number }) => void,
gitRepoIdentifier?: string | null
): Promise<void> {
console.log(`[Context] 🧹 Cleaning index data for ${codebasePath}...`);

progressCallback?.({ phase: 'Checking existing index...', current: 0, total: 100, percentage: 0 });

const collectionName = this.getCollectionName(codebasePath);
const collectionName = this.getCollectionName(codebasePath, gitRepoIdentifier);
const collectionExists = await this.vectorDatabase.hasCollection(collectionName);

progressCallback?.({ phase: 'Removing index data...', current: 50, total: 100, percentage: 50 });
Expand Down Expand Up @@ -622,11 +646,11 @@ export class Context {
/**
* Prepare vector collection
*/
private async prepareCollection(codebasePath: string, forceReindex: boolean = false): Promise<void> {
private async prepareCollection(codebasePath: string, forceReindex: boolean = false, gitRepoIdentifier?: string | null): Promise<void> {
const isHybrid = this.getIsHybrid();
const collectionType = isHybrid === true ? 'hybrid vector' : 'vector';
console.log(`[Context] 🔧 Preparing ${collectionType} collection for codebase: ${codebasePath}${forceReindex ? ' (FORCE REINDEX)' : ''}`);
const collectionName = this.getCollectionName(codebasePath);
const collectionName = this.getCollectionName(codebasePath, gitRepoIdentifier);

// Check if collection already exists
const collectionExists = await this.vectorDatabase.hasCollection(collectionName);
Expand Down
48 changes: 40 additions & 8 deletions packages/mcp/src/handlers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@ import * as path from "path";
import * as crypto from "crypto";
import { Context, COLLECTION_LIMIT_MESSAGE } from "@zilliz/claude-context-core";
import { SnapshotManager } from "./snapshot.js";
import { ensureAbsolutePath, truncateContent, trackCodebasePath } from "./utils.js";
import {
ensureAbsolutePath,
truncateContent,
trackCodebasePath,
getRepositoryIdentifier
} from "./utils.js";

export class ToolHandlers {
private context: Context;
Expand Down Expand Up @@ -199,8 +204,14 @@ export class ToolHandlers {
};
}

// Get git repository identifier for consistent collection naming
const gitRepoIdentifier = getRepositoryIdentifier(absolutePath);
if (gitRepoIdentifier) {
console.log(`[INDEX-VALIDATION] 🔗 Git repository detected: ${gitRepoIdentifier}`);
}

//Check if the snapshot and cloud index are in sync
if (this.snapshotManager.getIndexedCodebases().includes(absolutePath) !== await this.context.hasIndex(absolutePath)) {
if (this.snapshotManager.getIndexedCodebases().includes(absolutePath) !== await this.context.hasIndex(absolutePath, gitRepoIdentifier)) {
console.warn(`[INDEX-VALIDATION] ❌ Snapshot and cloud index mismatch: ${absolutePath}`);
}

Expand All @@ -221,9 +232,9 @@ export class ToolHandlers {
console.log(`[FORCE-REINDEX] 🔄 Removing '${absolutePath}' from indexed list for re-indexing`);
this.snapshotManager.removeIndexedCodebase(absolutePath);
}
if (await this.context.hasIndex(absolutePath)) {
if (await this.context.hasIndex(absolutePath, gitRepoIdentifier)) {
console.log(`[FORCE-REINDEX] 🔄 Clearing index for '${absolutePath}'`);
await this.context.clearIndex(absolutePath);
await this.context.clearIndex(absolutePath, undefined, gitRepoIdentifier);
}
}

Expand Down Expand Up @@ -339,6 +350,14 @@ export class ToolHandlers {
console.warn(`[BACKGROUND-INDEX] Non-AST splitter '${splitterType}' requested; falling back to AST splitter`);
}

// Get git repository identifier if available
const gitRepoIdentifier = getRepositoryIdentifier(absolutePath);
if (gitRepoIdentifier) {
console.log(`[BACKGROUND-INDEX] 🔗 Git repository detected: ${gitRepoIdentifier}`);
} else {
console.log(`[BACKGROUND-INDEX] 📁 Using path-based identification (not a git repository or no remote)`);
}

// Load ignore patterns from files first (including .ignore, .gitignore, etc.)
await this.context.getLoadedIgnorePatterns(absolutePath);

Expand All @@ -350,8 +369,8 @@ export class ToolHandlers {
await synchronizer.initialize();

// Store synchronizer in the context (let context manage collection names)
await this.context.getPreparedCollection(absolutePath);
const collectionName = this.context.getCollectionName(absolutePath);
await this.context.getPreparedCollection(absolutePath, gitRepoIdentifier);
const collectionName = this.context.getCollectionName(absolutePath, gitRepoIdentifier);
this.context.setSynchronizer(collectionName, synchronizer);
if (contextForThisTask !== this.context) {
contextForThisTask.setSynchronizer(collectionName, synchronizer);
Expand Down Expand Up @@ -447,6 +466,12 @@ export class ToolHandlers {

trackCodebasePath(absolutePath);

// Get git repository identifier if available for consistent collection naming
const gitRepoIdentifier = getRepositoryIdentifier(absolutePath);
if (gitRepoIdentifier) {
console.log(`[SEARCH] 🔗 Git repository detected: ${gitRepoIdentifier}`);
}

// Check if this codebase is indexed or being indexed
const isIndexed = this.snapshotManager.getIndexedCodebases().includes(absolutePath);
const isIndexing = this.snapshotManager.getIndexingCodebases().includes(absolutePath);
Expand Down Expand Up @@ -500,7 +525,8 @@ export class ToolHandlers {
query,
Math.min(resultLimit, 50),
0.3,
filterExpr
filterExpr,
gitRepoIdentifier
);

console.log(`[SEARCH] ✅ Search completed! Found ${searchResults.length} results using ${embeddingProvider.getProvider()} embeddings`);
Expand Down Expand Up @@ -621,10 +647,16 @@ export class ToolHandlers {
};
}

// Get git repository identifier for consistent collection naming
const gitRepoIdentifier = getRepositoryIdentifier(absolutePath);
if (gitRepoIdentifier) {
console.log(`[CLEAR] 🔗 Git repository detected: ${gitRepoIdentifier}`);
}

console.log(`[CLEAR] Clearing codebase: ${absolutePath}`);

try {
await this.context.clearIndex(absolutePath);
await this.context.clearIndex(absolutePath, undefined, gitRepoIdentifier);
console.log(`[CLEAR] Successfully cleared index for: ${absolutePath}`);
} catch (error: any) {
const errorMsg = `Failed to clear ${absolutePath}: ${error.message}`;
Expand Down
11 changes: 9 additions & 2 deletions packages/mcp/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ Index a codebase directory to enable semantic search using a configurable code s

⚠️ **IMPORTANT**:
- You MUST provide an absolute path to the target codebase.
- If the path is a git repository with a remote URL, it will automatically use the git remote for consistent collection naming across different local paths.

🔗 **Git Repository Support**:
- Automatically detects git repositories and uses remote URL for collection naming
- Same repository cloned to different paths will share the same collection
- Ensures consistency across team members and machines

✨ **Usage Guidance**:
- This tool is typically used when search fails due to an unindexed codebase.
Expand All @@ -100,6 +106,7 @@ Search the indexed codebase using natural language queries within a specified ab

⚠️ **IMPORTANT**:
- You MUST provide an absolute path.
- If the path is a git repository, it will automatically use the correct collection based on the git remote URL.

🎯 **When to Use**:
This tool is versatile and can be used before completing various tasks to retrieve relevant context:
Expand Down Expand Up @@ -195,7 +202,7 @@ This tool is versatile and can be used before completing various tasks to retrie
},
{
name: "clear_index",
description: `Clear the search index. IMPORTANT: You MUST provide an absolute path.`,
description: `Clear the search index. IMPORTANT: You MUST provide an absolute path. Git repositories will be identified by their remote URL for accurate clearing.`,
inputSchema: {
type: "object",
properties: {
Expand All @@ -209,7 +216,7 @@ This tool is versatile and can be used before completing various tasks to retrie
},
{
name: "get_indexing_status",
description: `Get the current indexing status of a codebase. Shows progress percentage for actively indexing codebases and completion status for indexed codebases.`,
description: `Get the current indexing status of a codebase. Shows progress percentage for actively indexing codebases and completion status for indexed codebases. Git repositories are identified by their remote URL.`,
inputSchema: {
type: "object",
properties: {
Expand Down
101 changes: 101 additions & 0 deletions packages/mcp/src/utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import * as path from "path";
import { execSync } from "child_process";
import * as fs from "fs";

/**
* Truncate content to specified length
Expand Down Expand Up @@ -27,4 +29,103 @@ export function ensureAbsolutePath(inputPath: string): string {
export function trackCodebasePath(codebasePath: string): void {
const absolutePath = ensureAbsolutePath(codebasePath);
console.log(`[TRACKING] Tracked codebase path: ${absolutePath} (not marked as indexed)`);
}

/**
* Check if a directory is a git repository
*/
export function isGitRepository(dirPath: string): boolean {
try {
const gitDir = path.join(dirPath, '.git');
return fs.existsSync(gitDir);
} catch {
return false;
}
}

/**
* Extract git remote URL from a repository path
* @param repoPath Path to the git repository
* @returns Git remote URL or null if not a git repo or no remote
*/
export function extractGitRemoteUrl(repoPath: string): string | null {
try {
if (!isGitRepository(repoPath)) {
return null;
}

// Try to get the origin remote URL
const result = execSync('git remote get-url origin', {
cwd: repoPath,
encoding: 'utf8',
stdio: ['ignore', 'pipe', 'ignore'] // Ignore stderr to suppress git errors
}).trim();

return result || null;
} catch {
// If no origin remote or git command fails, return null
return null;
}
}

/**
* Parse and normalize a git URL to a standard identifier
* Handles various formats:
* - https://github.com/org/repo.git
* - [email protected]:org/repo.git
* - https://gitlab.com/org/repo
*
* @param gitUrl The git remote URL
* @returns Normalized identifier like "github.com/org/repo"
*/
export function parseGitUrl(gitUrl: string): string | null {
try {
// Remove trailing whitespace
gitUrl = gitUrl.trim();

// Handle SSH format ([email protected]:org/repo.git)
const sshMatch = gitUrl.match(/^git@([^:]+):(.+?)(\.git)?$/);
if (sshMatch) {
const host = sshMatch[1];
const path = sshMatch[2];
return `${host}/${path}`;
}

// Handle HTTPS format (https://github.com/org/repo.git)
const httpsMatch = gitUrl.match(/^https?:\/\/([^\/]+)\/(.+?)(\.git)?$/);
if (httpsMatch) {
const host = httpsMatch[1];
const path = httpsMatch[2];
return `${host}/${path}`;
}

// If no match, return null
return null;
} catch {
return null;
}
}

/**
* Get a repository identifier from a path
* First tries to use git remote URL, falls back to path-based identifier
*
* @param dirPath Directory path
* @returns Repository identifier or null
*/
export function getRepositoryIdentifier(dirPath: string): string | null {
// Try to get git remote URL
const gitUrl = extractGitRemoteUrl(dirPath);

if (gitUrl) {
const identifier = parseGitUrl(gitUrl);
if (identifier) {
console.log(`[GIT-UTILS] Repository identified via git remote: ${identifier}`);
return identifier;
}
}

// If not a git repo or parsing fails, return null
// The caller will handle the fallback to path-based identification
return null;
}