Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ Code Context is a monorepo containing three main packages:
- **Embedding Providers**: [OpenAI](https://openai.com), [VoyageAI](https://voyageai.com), [Ollama](https://ollama.ai), [Gemini](https://gemini.google.com)
- **Vector Databases**: [Milvus](https://milvus.io) or [Zilliz Cloud](https://zilliz.com/cloud)(fully managed vector database as a service)
- **Code Splitters**: AST-based splitter (with automatic fallback), LangChain character-based splitter
- **Languages**: TypeScript, JavaScript, Python, Java, C++, C#, Go, Rust, PHP, Ruby, Swift, Kotlin, Scala, Markdown
- **Languages**: TypeScript, JavaScript, Python, Java, C++, C#, Go, Rust, Zig, PHP, Ruby, Swift, Kotlin, Scala, Markdown
- **Development Tools**: VSCode, Model Context Protocol

---
Expand Down
5 changes: 4 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,8 @@
"url": "https://github.com/zilliztech/code-context.git"
},
"license": "MIT",
"author": "Cheney Zhang <[email protected]>"
"author": "Cheney Zhang <[email protected]>",
"dependencies": {
"@tree-sitter-grammars/tree-sitter-zig": "^1.1.2"
}
}
4 changes: 2 additions & 2 deletions packages/core/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ results.forEach(result => {

## Features

- **Multi-language Support**: Index TypeScript, JavaScript, Python, Java, C++, and many other programming languages
- **Multi-language Support**: Index TypeScript, JavaScript, Python, Java, C++, Zig, and many other programming languages
- **Semantic Search**: Find code using natural language queries powered by AI embeddings
- **Flexible Architecture**: Pluggable embedding providers and vector databases
- **Smart Chunking**: Intelligent code splitting that preserves context and structure
Expand Down Expand Up @@ -136,7 +136,7 @@ interface CodeContextConfig {
[
// Programming languages
'.ts', '.tsx', '.js', '.jsx', '.py', '.java', '.cpp', '.c', '.h', '.hpp',
'.cs', '.go', '.rs', '.php', '.rb', '.swift', '.kt', '.scala', '.m', '.mm',
'.cs', '.go', '.rs', '.zig', '.php', '.rb', '.swift', '.kt', '.scala', '.m', '.mm',
// Text and markup files
'.md', '.markdown'
]
Expand Down
1 change: 1 addition & 0 deletions packages/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
},
"dependencies": {
"@google/genai": "^1.9.0",
"@tree-sitter-grammars/tree-sitter-zig": "^1.1.2",
"@zilliz/milvus2-sdk-node": "^2.5.10",
"faiss-node": "^0.5.1",
"fs-extra": "^11.0.0",
Expand Down
5 changes: 3 additions & 2 deletions packages/core/src/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import { FileSynchronizer } from './sync/synchronizer';
const DEFAULT_SUPPORTED_EXTENSIONS = [
// Programming languages
'.ts', '.tsx', '.js', '.jsx', '.py', '.java', '.cpp', '.c', '.h', '.hpp',
'.cs', '.go', '.rs', '.php', '.rb', '.swift', '.kt', '.scala', '.m', '.mm',
'.cs', '.go', '.rs', '.php', '.rb', '.swift', '.kt', '.scala', '.m', '.mm', '.zig',
// Text and markup files
'.md', '.markdown', '.ipynb',
// '.txt', '.json', '.yaml', '.yml', '.xml', '.html', '.htm',
Expand Down Expand Up @@ -626,7 +626,8 @@ export class CodeContext {
'.scala': 'scala',
'.m': 'objective-c',
'.mm': 'objective-c',
'.ipynb': 'jupyter'
'.ipynb': 'jupyter',
'.zig': 'zig'
};
return languageMap[ext] || 'text';
}
Expand Down
22 changes: 17 additions & 5 deletions packages/core/src/splitter/ast-splitter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const Java = require('tree-sitter-java');
const Cpp = require('tree-sitter-cpp');
const Go = require('tree-sitter-go');
const Rust = require('tree-sitter-rust');
const Zig = require('@tree-sitter-grammars/tree-sitter-zig');

// Node types that represent logical code units
const SPLITTABLE_NODE_TYPES = {
Expand All @@ -18,7 +19,8 @@ const SPLITTABLE_NODE_TYPES = {
java: ['method_declaration', 'class_declaration', 'interface_declaration', 'constructor_declaration'],
cpp: ['function_definition', 'class_specifier', 'namespace_definition', 'declaration'],
go: ['function_declaration', 'method_declaration', 'type_declaration', 'var_declaration', 'const_declaration'],
rust: ['function_item', 'impl_item', 'struct_item', 'enum_item', 'trait_item', 'mod_item']
rust: ['function_item', 'impl_item', 'struct_item', 'enum_item', 'trait_item', 'mod_item'],
zig: ['function_declaration', 'variable_declaration', 'test_declaration', 'comptime_declaration', 'using_namespace_declaration']
};

export class AstCodeSplitter implements Splitter {
Expand Down Expand Up @@ -93,7 +95,8 @@ export class AstCodeSplitter implements Splitter {
'c': { parser: Cpp, nodeTypes: SPLITTABLE_NODE_TYPES.cpp },
'go': { parser: Go, nodeTypes: SPLITTABLE_NODE_TYPES.go },
'rust': { parser: Rust, nodeTypes: SPLITTABLE_NODE_TYPES.rust },
'rs': { parser: Rust, nodeTypes: SPLITTABLE_NODE_TYPES.rust }
'rs': { parser: Rust, nodeTypes: SPLITTABLE_NODE_TYPES.rust },
'zig': { parser: Zig, nodeTypes: SPLITTABLE_NODE_TYPES.zig }
};

return langMap[language.toLowerCase()] || null;
Expand All @@ -109,9 +112,18 @@ export class AstCodeSplitter implements Splitter {
const chunks: CodeChunk[] = [];
const codeLines = code.split('\n');

const traverse = (currentNode: Parser.SyntaxNode) => {
// For Zig and similar languages, only extract top-level declarations
// to avoid duplicating nested functions/types
const shouldOnlyExtractTopLevel = ['zig', 'rust', 'go'].includes(language);

const traverse = (currentNode: Parser.SyntaxNode, depth: number = 0) => {
// Check if this node type should be split into a chunk
if (splittableTypes.includes(currentNode.type)) {
// For certain languages, skip nested declarations
if (shouldOnlyExtractTopLevel && depth > 1) {
return;
}

const startLine = currentNode.startPosition.row + 1;
const endLine = currentNode.endPosition.row + 1;
const nodeText = code.slice(currentNode.startIndex, currentNode.endIndex);
Expand All @@ -132,7 +144,7 @@ export class AstCodeSplitter implements Splitter {

// Continue traversing child nodes
for (const child of currentNode.children) {
traverse(child);
traverse(child, depth + 1);
}
};

Expand Down Expand Up @@ -256,7 +268,7 @@ export class AstCodeSplitter implements Splitter {
static isLanguageSupported(language: string): boolean {
const supportedLanguages = [
'javascript', 'js', 'typescript', 'ts', 'python', 'py',
'java', 'cpp', 'c++', 'c', 'go', 'rust', 'rs'
'java', 'cpp', 'c++', 'c', 'go', 'rust', 'rs', 'zig'
];
return supportedLanguages.includes(language.toLowerCase());
}
Expand Down
6 changes: 4 additions & 2 deletions packages/core/src/splitter/langchain-splitter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ export class LangChainCodeSplitter implements Splitter {

private mapLanguage(language: string): SupportedLanguage | null {
// Map common language names to LangChain supported formats
const languageMap: Record<string, SupportedLanguage> = {
const languageMap: Record<string, SupportedLanguage | null> = {
'javascript': 'js',
'typescript': 'js',
'python': 'python',
Expand All @@ -84,9 +84,11 @@ export class LangChainCodeSplitter implements Splitter {
'tex': 'latex',
'solidity': 'sol',
'sol': 'sol',
'zig': null, // LangChain doesn't have native Zig support, will use fallback
};

return languageMap[language.toLowerCase()] || null;
const mapped = languageMap[language.toLowerCase()];
return mapped !== undefined ? mapped : null;
}

private async fallbackSplit(code: string, language: string, filePath?: string): Promise<CodeChunk[]> {
Expand Down
22 changes: 22 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.