Merge pull request #33 from FSoft-AI4Code/feat/config-maxtokens

anhnh2002 · web-flow · commit 87b96f57abd0 · 2026-01-13T09:58:12.000+07:00
make max depth configurable
diff --git a/README.md b/README.md
@@ -108,6 +108,9 @@ codewiki config set \
 # Configure max token settings
 codewiki config set --max-tokens 32768 --max-token-per-module 36369 --max-token-per-leaf-module 16000
 
+# Configure max depth for hierarchical decomposition
+codewiki config set --max-depth 3
+
 # Show current configuration
 codewiki config show
 
@@ -216,15 +219,19 @@ codewiki config set --max-token-per-module 40000
 # Set max tokens for leaf modules (default: 16000)
 codewiki config set --max-token-per-leaf-module 20000
 
+# Set max depth for hierarchical decomposition (default: 2)
+codewiki config set --max-depth 3
+
 # Override at runtime for a single generation
-codewiki generate --max-tokens 16384 --max-token-per-module 40000
+codewiki generate --max-tokens 16384 --max-token-per-module 40000 --max-depth 3
 ```
 
 | Option | Description | Default |
 |--------|-------------|---------|
 | `--max-tokens` | Maximum output tokens for LLM response | 32768 |
 | `--max-token-per-module` | Input tokens threshold for module clustering | 36369 |
 | `--max-token-per-leaf-module` | Input tokens threshold for leaf modules | 16000 |
+| `--max-depth` | Maximum depth for hierarchical decomposition | 2 |
 
 ### Configuration Storage
 
diff --git a/codewiki/cli/adapters/doc_generator.py b/codewiki/cli/adapters/doc_generator.py
@@ -140,6 +140,7 @@ def generate(self) -> DocumentationJob:
                 max_tokens=self.config.get('max_tokens', 32768),
                 max_token_per_module=self.config.get('max_token_per_module', 36369),
                 max_token_per_leaf_module=self.config.get('max_token_per_leaf_module', 16000),
+                max_depth=self.config.get('max_depth', 2),
                 agent_instructions=self.config.get('agent_instructions')
             )
             
diff --git a/codewiki/cli/commands/config.py b/codewiki/cli/commands/config.py
@@ -78,6 +78,11 @@ def config_group():
     type=int,
     help="Maximum tokens per leaf module (default: 16000)"
 )
+@click.option(
+    "--max-depth",
+    type=int,
+    help="Maximum depth for hierarchical decomposition (default: 2)"
+)
 def config_set(
     api_key: Optional[str],
     base_url: Optional[str],
@@ -86,7 +91,8 @@ def config_set(
     fallback_model: Optional[str],
     max_tokens: Optional[int],
     max_token_per_module: Optional[int],
-    max_token_per_leaf_module: Optional[int]
+    max_token_per_leaf_module: Optional[int],
+    max_depth: Optional[int]
 ):
     """
     Set configuration values for CodeWiki.
@@ -114,10 +120,14 @@ def config_set(
     \b
     # Set all max token settings
     $ codewiki config set --max-tokens 32768 --max-token-per-module 40000 --max-token-per-leaf-module 20000
+    
+    \b
+    # Set max depth for hierarchical decomposition
+    $ codewiki config set --max-depth 3
     """
     try:
         # Check if at least one option is provided
-        if not any([api_key, base_url, main_model, cluster_model, fallback_model, max_tokens, max_token_per_module, max_token_per_leaf_module]):
+        if not any([api_key, base_url, main_model, cluster_model, fallback_model, max_tokens, max_token_per_module, max_token_per_leaf_module, max_depth]):
             click.echo("No options provided. Use --help for usage information.")
             sys.exit(EXIT_CONFIG_ERROR)
         
@@ -154,6 +164,11 @@ def config_set(
                 raise ConfigurationError("max_token_per_leaf_module must be a positive integer")
             validated_data['max_token_per_leaf_module'] = max_token_per_leaf_module
         
+        if max_depth is not None:
+            if max_depth < 1:
+                raise ConfigurationError("max_depth must be a positive integer")
+            validated_data['max_depth'] = max_depth
+        
         # Create config manager and save
         manager = ConfigManager()
         manager.load()  # Load existing config if present
@@ -166,7 +181,8 @@ def config_set(
             fallback_model=validated_data.get('fallback_model'),
             max_tokens=validated_data.get('max_tokens'),
             max_token_per_module=validated_data.get('max_token_per_module'),
-            max_token_per_leaf_module=validated_data.get('max_token_per_leaf_module')
+            max_token_per_leaf_module=validated_data.get('max_token_per_leaf_module'),
+            max_depth=validated_data.get('max_depth')
         )
         
         # Display success messages
@@ -212,6 +228,9 @@ def config_set(
         if max_token_per_leaf_module:
             click.secho(f"✓ Max token per leaf module: {max_token_per_leaf_module}", fg="green")
         
+        if max_depth:
+            click.secho(f"✓ Max depth: {max_depth}", fg="green")
+        
         click.echo("\n" + click.style("Configuration updated successfully.", fg="green", bold=True))
         
     except ConfigurationError as e:
@@ -271,6 +290,7 @@ def config_show(output_json: bool):
                 "max_tokens": config.max_tokens if config else 32768,
                 "max_token_per_module": config.max_token_per_module if config else 36369,
                 "max_token_per_leaf_module": config.max_token_per_leaf_module if config else 16000,
+                "max_depth": config.max_depth if config else 2,
                 "agent_instructions": config.agent_instructions.to_dict() if config and config.agent_instructions else {},
                 "config_file": str(manager.config_file_path)
             }
@@ -311,6 +331,11 @@ def config_show(output_json: bool):
                 click.echo(f"  Max Token/Module:        {config.max_token_per_module}")
                 click.echo(f"  Max Token/Leaf Module:   {config.max_token_per_leaf_module}")
             
+            click.echo()
+            click.secho("Decomposition Settings", fg="cyan", bold=True)
+            if config:
+                click.echo(f"  Max Depth:               {config.max_depth}")
+            
             click.echo()
             click.secho("Agent Instructions", fg="cyan", bold=True)
             if config and config.agent_instructions and not config.agent_instructions.is_empty():
diff --git a/codewiki/cli/commands/generate.py b/codewiki/cli/commands/generate.py
@@ -120,6 +120,12 @@ def parse_patterns(patterns_str: str) -> List[str]:
     default=None,
     help="Maximum tokens per leaf module (overrides config)",
 )
+@click.option(
+    "--max-depth",
+    type=int,
+    default=None,
+    help="Maximum depth for hierarchical decomposition (overrides config)",
+)
 @click.pass_context
 def generate_command(
     ctx,
@@ -135,7 +141,8 @@ def generate_command(
     verbose: bool,
     max_tokens: Optional[int],
     max_token_per_module: Optional[int],
-    max_token_per_leaf_module: Optional[int]
+    max_token_per_leaf_module: Optional[int],
+    max_depth: Optional[int]
 ):
     """
     Generate comprehensive documentation for a code repository.
@@ -176,6 +183,10 @@ def generate_command(
     \b
     # Set all max token limits
     $ codewiki generate --max-tokens 32768 --max-token-per-module 40000 --max-token-per-leaf-module 20000
+    
+    \b
+    # Override max depth for hierarchical decomposition
+    $ codewiki generate --max-depth 3
     """
     logger = create_logger(verbose=verbose)
     start_time = time.time()
@@ -310,9 +321,11 @@ def generate_command(
             effective_max_tokens = max_tokens if max_tokens is not None else config.max_tokens
             effective_max_token_per_module = max_token_per_module if max_token_per_module is not None else config.max_token_per_module
             effective_max_token_per_leaf = max_token_per_leaf_module if max_token_per_leaf_module is not None else config.max_token_per_leaf_module
+            effective_max_depth = max_depth if max_depth is not None else config.max_depth
             logger.debug(f"Max tokens: {effective_max_tokens}")
             logger.debug(f"Max token/module: {effective_max_token_per_module}")
             logger.debug(f"Max token/leaf module: {effective_max_token_per_leaf}")
+            logger.debug(f"Max depth: {effective_max_depth}")
         
         # Get agent instructions (merge runtime with persistent)
         agent_instructions_dict = None
@@ -344,6 +357,8 @@ def generate_command(
                 'max_tokens': max_tokens if max_tokens is not None else config.max_tokens,
                 'max_token_per_module': max_token_per_module if max_token_per_module is not None else config.max_token_per_module,
                 'max_token_per_leaf_module': max_token_per_leaf_module if max_token_per_leaf_module is not None else config.max_token_per_leaf_module,
+                # Max depth setting (runtime override takes precedence)
+                'max_depth': max_depth if max_depth is not None else config.max_depth,
             },
             verbose=verbose,
             generate_html=github_pages
diff --git a/codewiki/cli/config_manager.py b/codewiki/cli/config_manager.py
@@ -91,7 +91,8 @@ def save(
         default_output: Optional[str] = None,
         max_tokens: Optional[int] = None,
         max_token_per_module: Optional[int] = None,
-        max_token_per_leaf_module: Optional[int] = None
+        max_token_per_leaf_module: Optional[int] = None,
+        max_depth: Optional[int] = None
     ):
         """
         Save configuration to file and keyring.
@@ -106,6 +107,7 @@ def save(
             max_tokens: Maximum tokens for LLM response
             max_token_per_module: Maximum tokens per module for clustering
             max_token_per_leaf_module: Maximum tokens per leaf module
+            max_depth: Maximum depth for hierarchical decomposition
         """
         # Ensure config directory exists
         try:
@@ -145,6 +147,8 @@ def save(
             self._config.max_token_per_module = max_token_per_module
         if max_token_per_leaf_module is not None:
             self._config.max_token_per_leaf_module = max_token_per_leaf_module
+        if max_depth is not None:
+            self._config.max_depth = max_depth
         
         # Validate configuration (only if base fields are set)
         if self._config.base_url and self._config.main_model and self._config.cluster_model:
diff --git a/codewiki/cli/models/config.py b/codewiki/cli/models/config.py
@@ -116,6 +116,7 @@ class Configuration:
         max_tokens: Maximum tokens for LLM response (default: 32768)
         max_token_per_module: Maximum tokens per module for clustering (default: 36369)
         max_token_per_leaf_module: Maximum tokens per leaf module (default: 16000)
+        max_depth: Maximum depth for hierarchical decomposition (default: 2)
         agent_instructions: Custom agent instructions for documentation generation
     """
     base_url: str
@@ -126,6 +127,7 @@ class Configuration:
     max_tokens: int = 32768
     max_token_per_module: int = 36369
     max_token_per_leaf_module: int = 16000
+    max_depth: int = 2
     agent_instructions: AgentInstructions = field(default_factory=AgentInstructions)
     
     def validate(self):
@@ -150,6 +152,7 @@ def to_dict(self) -> dict:
             'max_tokens': self.max_tokens,
             'max_token_per_module': self.max_token_per_module,
             'max_token_per_leaf_module': self.max_token_per_leaf_module,
+            'max_depth': self.max_depth,
         }
         if self.agent_instructions and not self.agent_instructions.is_empty():
             result['agent_instructions'] = self.agent_instructions.to_dict()
@@ -179,6 +182,7 @@ def from_dict(cls, data: dict) -> 'Configuration':
             max_tokens=data.get('max_tokens', 32768),
             max_token_per_module=data.get('max_token_per_module', 36369),
             max_token_per_leaf_module=data.get('max_token_per_leaf_module', 16000),
+            max_depth=data.get('max_depth', 2),
             agent_instructions=agent_instructions,
         )
     
@@ -232,6 +236,7 @@ def to_backend_config(self, repo_path: str, output_dir: str, api_key: str, runti
             max_tokens=self.max_tokens,
             max_token_per_module=self.max_token_per_module,
             max_token_per_leaf_module=self.max_token_per_leaf_module,
+            max_depth=self.max_depth,
             agent_instructions=final_instructions.to_dict() if final_instructions else None
         )
 
diff --git a/codewiki/src/config.py b/codewiki/src/config.py
@@ -158,6 +158,7 @@ def from_cli(
         max_tokens: int = DEFAULT_MAX_TOKENS,
         max_token_per_module: int = DEFAULT_MAX_TOKEN_PER_MODULE,
         max_token_per_leaf_module: int = DEFAULT_MAX_TOKEN_PER_LEAF_MODULE,
+        max_depth: int = MAX_DEPTH,
         agent_instructions: Optional[Dict[str, Any]] = None
     ) -> 'Config':
         """
@@ -174,6 +175,7 @@ def from_cli(
             max_tokens: Maximum tokens for LLM response
             max_token_per_module: Maximum tokens per module for clustering
             max_token_per_leaf_module: Maximum tokens per leaf module
+            max_depth: Maximum depth for hierarchical decomposition
             agent_instructions: Custom agent instructions dict
             
         Returns:
@@ -187,7 +189,7 @@ def from_cli(
             output_dir=base_output_dir,
             dependency_graph_dir=os.path.join(base_output_dir, DEPENDENCY_GRAPHS_DIR),
             docs_dir=output_dir,
-            max_depth=MAX_DEPTH,
+            max_depth=max_depth,
             llm_base_url=llm_base_url,
             llm_api_key=llm_api_key,
             main_model=main_model,

Original file line number	Diff line number	Diff line change
`@@ -140,6 +140,7 @@ def generate(self) -> DocumentationJob:`
`140`	`140`	`max_tokens=self.config.get('max_tokens', 32768),`
`141`	`141`	`max_token_per_module=self.config.get('max_token_per_module', 36369),`
`142`	`142`	`max_token_per_leaf_module=self.config.get('max_token_per_leaf_module', 16000),`
	`143`	`+ max_depth=self.config.get('max_depth', 2),`
`143`	`144`	`agent_instructions=self.config.get('agent_instructions')`
`144`	`145`	`)`
`145`	`146`