diff --git a/.env.example b/.env.example index 8eb0266a..f678dbb1 100644 --- a/.env.example +++ b/.env.example @@ -20,6 +20,10 @@ EMBEDDING_MODEL=text-embedding-3-small # You can customize it according to the throughput of your embedding model. Generally, larger batch size means less indexing time. EMBEDDING_BATCH_SIZE=100 +# Maximum number of chunks to index before stopping (default: 450000) +# Set a lower value to limit indexing for very large codebases. Minimum value is 1000. +# CHUNK_LIMIT=450000 + # ============================================================================= # OpenAI Configuration # ============================================================================= diff --git a/packages/core/src/context.ts b/packages/core/src/context.ts index 1af13058..065bce90 100644 --- a/packages/core/src/context.ts +++ b/packages/core/src/context.ts @@ -702,8 +702,9 @@ export class Context { ): Promise<{ processedFiles: number; totalChunks: number; status: 'completed' | 'limit_reached' }> { const isHybrid = this.getIsHybrid(); const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10)); - const CHUNK_LIMIT = 450000; + const CHUNK_LIMIT = Math.max(1000, parseInt(envManager.get('CHUNK_LIMIT') || '450000', 10)); console.log(`[Context] 🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`); + console.log(`[Context] 🔧 Using CHUNK_LIMIT: ${CHUNK_LIMIT}`); let chunkBuffer: Array<{ chunk: CodeChunk; codebasePath: string }> = []; let processedFiles = 0;