Skip to content

Commit 87b96f5

Browse files
authored
Merge pull request #33 from FSoft-AI4Code/feat/config-maxtokens
make max depth configurable
2 parents edd8248 + 31c8f42 commit 87b96f5

File tree

7 files changed

+66
-7
lines changed

7 files changed

+66
-7
lines changed

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ codewiki config set \
108108
# Configure max token settings
109109
codewiki config set --max-tokens 32768 --max-token-per-module 36369 --max-token-per-leaf-module 16000
110110

111+
# Configure max depth for hierarchical decomposition
112+
codewiki config set --max-depth 3
113+
111114
# Show current configuration
112115
codewiki config show
113116

@@ -216,15 +219,19 @@ codewiki config set --max-token-per-module 40000
216219
# Set max tokens for leaf modules (default: 16000)
217220
codewiki config set --max-token-per-leaf-module 20000
218221

222+
# Set max depth for hierarchical decomposition (default: 2)
223+
codewiki config set --max-depth 3
224+
219225
# Override at runtime for a single generation
220-
codewiki generate --max-tokens 16384 --max-token-per-module 40000
226+
codewiki generate --max-tokens 16384 --max-token-per-module 40000 --max-depth 3
221227
```
222228

223229
| Option | Description | Default |
224230
|--------|-------------|---------|
225231
| `--max-tokens` | Maximum output tokens for LLM response | 32768 |
226232
| `--max-token-per-module` | Input tokens threshold for module clustering | 36369 |
227233
| `--max-token-per-leaf-module` | Input tokens threshold for leaf modules | 16000 |
234+
| `--max-depth` | Maximum depth for hierarchical decomposition | 2 |
228235

229236
### Configuration Storage
230237

codewiki/cli/adapters/doc_generator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ def generate(self) -> DocumentationJob:
140140
max_tokens=self.config.get('max_tokens', 32768),
141141
max_token_per_module=self.config.get('max_token_per_module', 36369),
142142
max_token_per_leaf_module=self.config.get('max_token_per_leaf_module', 16000),
143+
max_depth=self.config.get('max_depth', 2),
143144
agent_instructions=self.config.get('agent_instructions')
144145
)
145146

codewiki/cli/commands/config.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,11 @@ def config_group():
7878
type=int,
7979
help="Maximum tokens per leaf module (default: 16000)"
8080
)
81+
@click.option(
82+
"--max-depth",
83+
type=int,
84+
help="Maximum depth for hierarchical decomposition (default: 2)"
85+
)
8186
def config_set(
8287
api_key: Optional[str],
8388
base_url: Optional[str],
@@ -86,7 +91,8 @@ def config_set(
8691
fallback_model: Optional[str],
8792
max_tokens: Optional[int],
8893
max_token_per_module: Optional[int],
89-
max_token_per_leaf_module: Optional[int]
94+
max_token_per_leaf_module: Optional[int],
95+
max_depth: Optional[int]
9096
):
9197
"""
9298
Set configuration values for CodeWiki.
@@ -114,10 +120,14 @@ def config_set(
114120
\b
115121
# Set all max token settings
116122
$ codewiki config set --max-tokens 32768 --max-token-per-module 40000 --max-token-per-leaf-module 20000
123+
124+
\b
125+
# Set max depth for hierarchical decomposition
126+
$ codewiki config set --max-depth 3
117127
"""
118128
try:
119129
# Check if at least one option is provided
120-
if not any([api_key, base_url, main_model, cluster_model, fallback_model, max_tokens, max_token_per_module, max_token_per_leaf_module]):
130+
if not any([api_key, base_url, main_model, cluster_model, fallback_model, max_tokens, max_token_per_module, max_token_per_leaf_module, max_depth]):
121131
click.echo("No options provided. Use --help for usage information.")
122132
sys.exit(EXIT_CONFIG_ERROR)
123133

@@ -154,6 +164,11 @@ def config_set(
154164
raise ConfigurationError("max_token_per_leaf_module must be a positive integer")
155165
validated_data['max_token_per_leaf_module'] = max_token_per_leaf_module
156166

167+
if max_depth is not None:
168+
if max_depth < 1:
169+
raise ConfigurationError("max_depth must be a positive integer")
170+
validated_data['max_depth'] = max_depth
171+
157172
# Create config manager and save
158173
manager = ConfigManager()
159174
manager.load() # Load existing config if present
@@ -166,7 +181,8 @@ def config_set(
166181
fallback_model=validated_data.get('fallback_model'),
167182
max_tokens=validated_data.get('max_tokens'),
168183
max_token_per_module=validated_data.get('max_token_per_module'),
169-
max_token_per_leaf_module=validated_data.get('max_token_per_leaf_module')
184+
max_token_per_leaf_module=validated_data.get('max_token_per_leaf_module'),
185+
max_depth=validated_data.get('max_depth')
170186
)
171187

172188
# Display success messages
@@ -212,6 +228,9 @@ def config_set(
212228
if max_token_per_leaf_module:
213229
click.secho(f"✓ Max token per leaf module: {max_token_per_leaf_module}", fg="green")
214230

231+
if max_depth:
232+
click.secho(f"✓ Max depth: {max_depth}", fg="green")
233+
215234
click.echo("\n" + click.style("Configuration updated successfully.", fg="green", bold=True))
216235

217236
except ConfigurationError as e:
@@ -271,6 +290,7 @@ def config_show(output_json: bool):
271290
"max_tokens": config.max_tokens if config else 32768,
272291
"max_token_per_module": config.max_token_per_module if config else 36369,
273292
"max_token_per_leaf_module": config.max_token_per_leaf_module if config else 16000,
293+
"max_depth": config.max_depth if config else 2,
274294
"agent_instructions": config.agent_instructions.to_dict() if config and config.agent_instructions else {},
275295
"config_file": str(manager.config_file_path)
276296
}
@@ -311,6 +331,11 @@ def config_show(output_json: bool):
311331
click.echo(f" Max Token/Module: {config.max_token_per_module}")
312332
click.echo(f" Max Token/Leaf Module: {config.max_token_per_leaf_module}")
313333

334+
click.echo()
335+
click.secho("Decomposition Settings", fg="cyan", bold=True)
336+
if config:
337+
click.echo(f" Max Depth: {config.max_depth}")
338+
314339
click.echo()
315340
click.secho("Agent Instructions", fg="cyan", bold=True)
316341
if config and config.agent_instructions and not config.agent_instructions.is_empty():

codewiki/cli/commands/generate.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,12 @@ def parse_patterns(patterns_str: str) -> List[str]:
120120
default=None,
121121
help="Maximum tokens per leaf module (overrides config)",
122122
)
123+
@click.option(
124+
"--max-depth",
125+
type=int,
126+
default=None,
127+
help="Maximum depth for hierarchical decomposition (overrides config)",
128+
)
123129
@click.pass_context
124130
def generate_command(
125131
ctx,
@@ -135,7 +141,8 @@ def generate_command(
135141
verbose: bool,
136142
max_tokens: Optional[int],
137143
max_token_per_module: Optional[int],
138-
max_token_per_leaf_module: Optional[int]
144+
max_token_per_leaf_module: Optional[int],
145+
max_depth: Optional[int]
139146
):
140147
"""
141148
Generate comprehensive documentation for a code repository.
@@ -176,6 +183,10 @@ def generate_command(
176183
\b
177184
# Set all max token limits
178185
$ codewiki generate --max-tokens 32768 --max-token-per-module 40000 --max-token-per-leaf-module 20000
186+
187+
\b
188+
# Override max depth for hierarchical decomposition
189+
$ codewiki generate --max-depth 3
179190
"""
180191
logger = create_logger(verbose=verbose)
181192
start_time = time.time()
@@ -310,9 +321,11 @@ def generate_command(
310321
effective_max_tokens = max_tokens if max_tokens is not None else config.max_tokens
311322
effective_max_token_per_module = max_token_per_module if max_token_per_module is not None else config.max_token_per_module
312323
effective_max_token_per_leaf = max_token_per_leaf_module if max_token_per_leaf_module is not None else config.max_token_per_leaf_module
324+
effective_max_depth = max_depth if max_depth is not None else config.max_depth
313325
logger.debug(f"Max tokens: {effective_max_tokens}")
314326
logger.debug(f"Max token/module: {effective_max_token_per_module}")
315327
logger.debug(f"Max token/leaf module: {effective_max_token_per_leaf}")
328+
logger.debug(f"Max depth: {effective_max_depth}")
316329

317330
# Get agent instructions (merge runtime with persistent)
318331
agent_instructions_dict = None
@@ -344,6 +357,8 @@ def generate_command(
344357
'max_tokens': max_tokens if max_tokens is not None else config.max_tokens,
345358
'max_token_per_module': max_token_per_module if max_token_per_module is not None else config.max_token_per_module,
346359
'max_token_per_leaf_module': max_token_per_leaf_module if max_token_per_leaf_module is not None else config.max_token_per_leaf_module,
360+
# Max depth setting (runtime override takes precedence)
361+
'max_depth': max_depth if max_depth is not None else config.max_depth,
347362
},
348363
verbose=verbose,
349364
generate_html=github_pages

codewiki/cli/config_manager.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ def save(
9191
default_output: Optional[str] = None,
9292
max_tokens: Optional[int] = None,
9393
max_token_per_module: Optional[int] = None,
94-
max_token_per_leaf_module: Optional[int] = None
94+
max_token_per_leaf_module: Optional[int] = None,
95+
max_depth: Optional[int] = None
9596
):
9697
"""
9798
Save configuration to file and keyring.
@@ -106,6 +107,7 @@ def save(
106107
max_tokens: Maximum tokens for LLM response
107108
max_token_per_module: Maximum tokens per module for clustering
108109
max_token_per_leaf_module: Maximum tokens per leaf module
110+
max_depth: Maximum depth for hierarchical decomposition
109111
"""
110112
# Ensure config directory exists
111113
try:
@@ -145,6 +147,8 @@ def save(
145147
self._config.max_token_per_module = max_token_per_module
146148
if max_token_per_leaf_module is not None:
147149
self._config.max_token_per_leaf_module = max_token_per_leaf_module
150+
if max_depth is not None:
151+
self._config.max_depth = max_depth
148152

149153
# Validate configuration (only if base fields are set)
150154
if self._config.base_url and self._config.main_model and self._config.cluster_model:

codewiki/cli/models/config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ class Configuration:
116116
max_tokens: Maximum tokens for LLM response (default: 32768)
117117
max_token_per_module: Maximum tokens per module for clustering (default: 36369)
118118
max_token_per_leaf_module: Maximum tokens per leaf module (default: 16000)
119+
max_depth: Maximum depth for hierarchical decomposition (default: 2)
119120
agent_instructions: Custom agent instructions for documentation generation
120121
"""
121122
base_url: str
@@ -126,6 +127,7 @@ class Configuration:
126127
max_tokens: int = 32768
127128
max_token_per_module: int = 36369
128129
max_token_per_leaf_module: int = 16000
130+
max_depth: int = 2
129131
agent_instructions: AgentInstructions = field(default_factory=AgentInstructions)
130132

131133
def validate(self):
@@ -150,6 +152,7 @@ def to_dict(self) -> dict:
150152
'max_tokens': self.max_tokens,
151153
'max_token_per_module': self.max_token_per_module,
152154
'max_token_per_leaf_module': self.max_token_per_leaf_module,
155+
'max_depth': self.max_depth,
153156
}
154157
if self.agent_instructions and not self.agent_instructions.is_empty():
155158
result['agent_instructions'] = self.agent_instructions.to_dict()
@@ -179,6 +182,7 @@ def from_dict(cls, data: dict) -> 'Configuration':
179182
max_tokens=data.get('max_tokens', 32768),
180183
max_token_per_module=data.get('max_token_per_module', 36369),
181184
max_token_per_leaf_module=data.get('max_token_per_leaf_module', 16000),
185+
max_depth=data.get('max_depth', 2),
182186
agent_instructions=agent_instructions,
183187
)
184188

@@ -232,6 +236,7 @@ def to_backend_config(self, repo_path: str, output_dir: str, api_key: str, runti
232236
max_tokens=self.max_tokens,
233237
max_token_per_module=self.max_token_per_module,
234238
max_token_per_leaf_module=self.max_token_per_leaf_module,
239+
max_depth=self.max_depth,
235240
agent_instructions=final_instructions.to_dict() if final_instructions else None
236241
)
237242

codewiki/src/config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ def from_cli(
158158
max_tokens: int = DEFAULT_MAX_TOKENS,
159159
max_token_per_module: int = DEFAULT_MAX_TOKEN_PER_MODULE,
160160
max_token_per_leaf_module: int = DEFAULT_MAX_TOKEN_PER_LEAF_MODULE,
161+
max_depth: int = MAX_DEPTH,
161162
agent_instructions: Optional[Dict[str, Any]] = None
162163
) -> 'Config':
163164
"""
@@ -174,6 +175,7 @@ def from_cli(
174175
max_tokens: Maximum tokens for LLM response
175176
max_token_per_module: Maximum tokens per module for clustering
176177
max_token_per_leaf_module: Maximum tokens per leaf module
178+
max_depth: Maximum depth for hierarchical decomposition
177179
agent_instructions: Custom agent instructions dict
178180
179181
Returns:
@@ -187,7 +189,7 @@ def from_cli(
187189
output_dir=base_output_dir,
188190
dependency_graph_dir=os.path.join(base_output_dir, DEPENDENCY_GRAPHS_DIR),
189191
docs_dir=output_dir,
190-
max_depth=MAX_DEPTH,
192+
max_depth=max_depth,
191193
llm_base_url=llm_base_url,
192194
llm_api_key=llm_api_key,
193195
main_model=main_model,

0 commit comments

Comments
 (0)