-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexplorer.py
More file actions
95 lines (82 loc) · 3.54 KB
/
explorer.py
File metadata and controls
95 lines (82 loc) · 3.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""
explorer.py — Explorer node.
Reads relevant files, passes clean context to coder.
Zero LLM calls — pure filesystem reads.
FIXES:
- Only puts REAL file paths into files_to_edit (not "search_results" fake keys)
- Passes file contents as assistant message so coder gets clean conversation history
"""
from __future__ import annotations
from .state import AgentState
from tools.filesystem import read_file, list_directory, search_code
from rich.console import Console
console = Console()
def explorer_node(state: AgentState) -> dict:
console.print("[bold cyan]Exploring codebase...[/bold cyan]")
repo_path = state["repo_path"]
files_hint = state.get("files_to_edit", [])
real_files: dict[str, str] = {} # path → content, ONLY real file paths
# 1. Read every file the planner suggested
for rel_path in files_hint:
# Skip fake/synthetic keys
if not rel_path or rel_path.startswith("_") or "." not in rel_path:
continue
content = read_file.invoke({"repo_root": repo_path, "path": rel_path})
if not content.startswith("ERROR"):
real_files[rel_path] = content
console.print(f"[dim] read: {rel_path} ({len(content)} chars)[/dim]")
else:
console.print(f"[dim] {content}[/dim]")
# 2. If no files found, search by function name from issue title
if not real_files:
words = [w for w in state.get("issue_title", "").split()
if len(w) > 3 and w.replace("_","").isalpha()]
query = words[0] if words else "def "
hits = search_code.invoke({
"repo_root": repo_path,
"pattern": query,
"file_pattern": "*.py",
})
# Extract actual file paths from grep output: "path/file.py:line:content"
import re
found_paths = list(dict.fromkeys(
m.group(1) for line in hits.splitlines()
if (m := re.match(r"^([^:]+\.py):", line))
))
for p in found_paths[:4]:
content = read_file.invoke({"repo_root": repo_path, "path": p})
if not content.startswith("ERROR"):
real_files[p] = content
console.print(f"[dim] found+read: {p}[/dim]")
# 3. Always read test files — coder needs to know what's expected
test_hits = search_code.invoke({
"repo_root": repo_path,
"pattern": "def test_",
"file_pattern": "*.py",
})
import re
test_paths = list(dict.fromkeys(
m.group(1) for line in test_hits.splitlines()
if (m := re.match(r"^([^:]+\.py):", line))
))
for tp in test_paths[:2]:
if tp not in real_files:
content = read_file.invoke({"repo_root": repo_path, "path": tp})
if not content.startswith("ERROR"):
real_files[tp] = content
console.print(f"[dim] read test: {tp}[/dim]")
console.print(f"[dim] Explorer done — {len(real_files)} file(s) read, 0 API calls[/dim]")
# Build a SINGLE clean system-level context block
# Use role="system" so it doesn't create double-user-message issue
context_parts = []
for path, content in real_files.items():
context_parts.append(f"### File: {path}\n```\n{content}\n```")
context_msg = {
"role": "system",
"content": "## Codebase Files\n\n" + "\n\n".join(context_parts),
}
return {
"messages": [context_msg], # fresh, clean message list
"files_to_edit": list(real_files.keys()), # ONLY real file paths
"total_tokens": state.get("total_tokens", 0),
}