Skip to content

Commit 89bb331

Browse files
authored
Merge pull request #58 from data-exp-lab/fe-rag
Add RAG
2 parents 7a7ebb0 + bdd026d commit 89bb331

File tree

10 files changed

+1378
-174
lines changed

10 files changed

+1378
-174
lines changed

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,7 @@ venv.bak/
6767
.Spotlight-V100
6868
.Trashes
6969
ehthumbs.db
70-
Thumbs.db
70+
Thumbs.db
71+
72+
73+
**config.json

backend/app/config.example.json

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
{
2+
"ai_providers": {
3+
"openai": {
4+
"api_key": "sk-your-openai-api-key-here",
5+
"model": "gpt-4",
6+
"base_url": "https://api.openai.com/v1"
7+
},
8+
"azure_openai": {
9+
"api_key": "your-azure-openai-api-key-here",
10+
"endpoint": "https://your-resource-name.openai.azure.com/",
11+
"deployment_name": "your-deployment-name",
12+
"api_version": "2024-02-15-preview"
13+
},
14+
"google_genai": {
15+
"api_key": "your-google-api-key-here",
16+
"model": "gemini-pro"
17+
},
18+
"anthropic": {
19+
"api_key": "sk-ant-your-anthropic-api-key-here",
20+
"model": "claude-3-sonnet-20240229"
21+
}
22+
},
23+
"github": {
24+
"token": "ghp_your-github-personal-access-token-here",
25+
"rate_limit_per_hour": 5000
26+
},
27+
"graphrag": {
28+
"timeout_minutes": 50,
29+
"batch_size": 50,
30+
"cache_hours": 24,
31+
"max_repos_per_request": 1000
32+
},
33+
"server": {
34+
"host": "127.0.0.1",
35+
"port": 5002,
36+
"debug": false,
37+
"timeout": 3600
38+
},
39+
"database": {
40+
"path": "kuzu",
41+
"auto_cleanup": true,
42+
"backup_enabled": false
43+
}
44+
}

backend/app/config_manager.py

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
import json
2+
import os
3+
from pathlib import Path
4+
from typing import Dict, Any, Optional
5+
6+
class ConfigManager:
7+
"""Manages application configuration from file and environment variables."""
8+
9+
def __init__(self, config_path: str = None):
10+
"""Initialize the configuration manager."""
11+
if config_path is None:
12+
# Default to config.json in the same directory as this file
13+
self.config_path = Path(__file__).parent / "config.json"
14+
else:
15+
self.config_path = Path(config_path)
16+
17+
self.config = self._load_config()
18+
19+
def _load_config(self) -> Dict[str, Any]:
20+
"""Load configuration from file and environment variables."""
21+
config = self._get_default_config()
22+
23+
# Load from file if it exists
24+
if self.config_path.exists():
25+
try:
26+
with open(self.config_path, 'r', encoding='utf-8') as f:
27+
file_config = json.load(f)
28+
config = self._merge_config(config, file_config)
29+
except Exception as e:
30+
print(f"Warning: Could not load config file {self.config_path}: {e}")
31+
32+
# Override with environment variables
33+
config = self._apply_env_overrides(config)
34+
35+
return config
36+
37+
def _get_default_config(self) -> Dict[str, Any]:
38+
"""Get default configuration values."""
39+
return {
40+
"ai_providers": {
41+
"openai": {
42+
"api_key": "",
43+
"model": "gpt-4",
44+
"base_url": "https://api.openai.com/v1"
45+
},
46+
"azure_openai": {
47+
"api_key": "",
48+
"endpoint": "https://your-resource-name.openai.azure.com/",
49+
"deployment_name": "",
50+
"api_version": "2024-02-15-preview"
51+
},
52+
"google_genai": {
53+
"api_key": "",
54+
"model": "gemini-pro"
55+
},
56+
"anthropic": {
57+
"api_key": "",
58+
"model": "claude-3-sonnet-20240229"
59+
}
60+
},
61+
"github": {
62+
"token": "",
63+
"rate_limit_per_hour": 5000
64+
},
65+
"graphrag": {
66+
"timeout_minutes": 50,
67+
"batch_size": 50,
68+
"cache_hours": 24,
69+
"max_repos_per_request": 1000
70+
},
71+
"server": {
72+
"host": "127.0.0.1",
73+
"port": 5002,
74+
"debug": False,
75+
"timeout": 3600
76+
},
77+
"database": {
78+
"path": "kuzu",
79+
"auto_cleanup": True,
80+
"backup_enabled": False
81+
}
82+
}
83+
84+
def _merge_config(self, base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
85+
"""Recursively merge configuration dictionaries."""
86+
result = base.copy()
87+
88+
for key, value in override.items():
89+
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
90+
result[key] = self._merge_config(result[key], value)
91+
else:
92+
result[key] = value
93+
94+
return result
95+
96+
def _apply_env_overrides(self, config: Dict[str, Any]) -> Dict[str, Any]:
97+
"""Apply environment variable overrides."""
98+
# OpenAI
99+
if os.getenv("OPENAI_API_KEY"):
100+
config["ai_providers"]["openai"]["api_key"] = os.getenv("OPENAI_API_KEY")
101+
if os.getenv("OPENAI_MODEL"):
102+
config["ai_providers"]["openai"]["model"] = os.getenv("OPENAI_MODEL")
103+
104+
# Azure OpenAI
105+
if os.getenv("AZURE_OPENAI_API_KEY"):
106+
config["ai_providers"]["azure_openai"]["api_key"] = os.getenv("AZURE_OPENAI_API_KEY")
107+
if os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"):
108+
config["ai_providers"]["azure_openai"]["deployment_name"] = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
109+
if os.getenv("AZURE_OPENAI_ENDPOINT"):
110+
config["ai_providers"]["azure_openai"]["endpoint"] = os.getenv("AZURE_OPENAI_ENDPOINT")
111+
112+
# Google GenAI
113+
if os.getenv("GOOGLE_API_KEY"):
114+
config["ai_providers"]["google_genai"]["api_key"] = os.getenv("GOOGLE_API_KEY")
115+
116+
# Anthropic
117+
if os.getenv("ANTHROPIC_API_KEY"):
118+
config["ai_providers"]["anthropic"]["api_key"] = os.getenv("ANTHROPIC_API_KEY")
119+
120+
# GitHub
121+
if os.getenv("GITHUB_TOKEN"):
122+
config["github"]["token"] = os.getenv("GITHUB_TOKEN")
123+
124+
return config
125+
126+
def get(self, key_path: str, default: Any = None) -> Any:
127+
"""Get a configuration value using dot notation (e.g., 'ai_providers.openai.api_key')."""
128+
keys = key_path.split('.')
129+
value = self.config
130+
131+
try:
132+
for key in keys:
133+
value = value[key]
134+
return value
135+
except (KeyError, TypeError):
136+
return default
137+
138+
def get_ai_provider_config(self, provider: str) -> Dict[str, Any]:
139+
"""Get configuration for a specific AI provider."""
140+
return self.get(f"ai_providers.{provider}", {})
141+
142+
def get_github_token(self) -> str:
143+
"""Get GitHub token from configuration."""
144+
return self.get("github.token", "")
145+
146+
def get_graphrag_config(self) -> Dict[str, Any]:
147+
"""Get GraphRAG configuration."""
148+
return self.get("graphrag", {})
149+
150+
def get_server_config(self) -> Dict[str, Any]:
151+
"""Get server configuration."""
152+
return self.get("server", {})
153+
154+
def get_database_config(self) -> Dict[str, Any]:
155+
"""Get database configuration."""
156+
return self.get("database", {})
157+
158+
def update_config(self, updates: Dict[str, Any]) -> None:
159+
"""Update configuration with new values."""
160+
self.config = self._merge_config(self.config, updates)
161+
162+
def save_config(self) -> bool:
163+
"""Save current configuration to file."""
164+
try:
165+
with open(self.config_path, 'w', encoding='utf-8') as f:
166+
json.dump(self.config, f, indent=2, ensure_ascii=False)
167+
return True
168+
except Exception as e:
169+
print(f"Error saving config: {e}")
170+
return False
171+
172+
def create_example_config(self) -> bool:
173+
"""Create an example configuration file."""
174+
example_path = self.config_path.parent / "config.example.json"
175+
try:
176+
with open(example_path, 'w', encoding='utf-8') as f:
177+
json.dump(self._get_default_config(), f, indent=2, ensure_ascii=False)
178+
print(f"Example configuration created at: {example_path}")
179+
return True
180+
except Exception as e:
181+
print(f"Error creating example config: {e}")
182+
return False
183+
184+
def validate_config(self) -> Dict[str, Any]:
185+
"""Validate configuration and return validation results."""
186+
validation = {
187+
"valid": True,
188+
"errors": [],
189+
"warnings": [],
190+
"missing_keys": []
191+
}
192+
193+
# Check required AI provider keys
194+
required_providers = ["openai", "azure_openai", "google_genai", "anthropic"]
195+
for provider in required_providers:
196+
api_key = self.get(f"ai_providers.{provider}.api_key")
197+
if not api_key:
198+
validation["missing_keys"].append(f"ai_providers.{provider}.api_key")
199+
validation["warnings"].append(f"No API key configured for {provider}")
200+
201+
# Check GitHub token
202+
github_token = self.get_github_token()
203+
if not github_token:
204+
validation["missing_keys"].append("github.token")
205+
validation["warnings"].append("No GitHub token configured")
206+
207+
# Check if at least one AI provider has an API key
208+
has_ai_provider = any(
209+
self.get(f"ai_providers.{provider}.api_key")
210+
for provider in required_providers
211+
)
212+
213+
if not has_ai_provider:
214+
validation["errors"].append("At least one AI provider API key must be configured")
215+
validation["valid"] = False
216+
217+
return validation
218+
219+
# Global configuration instance
220+
config_manager = ConfigManager()

0 commit comments

Comments
 (0)