Skip to content
14 changes: 13 additions & 1 deletion openhands-tools/openhands/tools/terminal/definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,17 @@ class TerminalAction(Action):
"""Schema for bash command execution."""

command: str = Field(
description="The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process. Note: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together." # noqa
description=(
"The bash command to execute. Can be empty string to view"
" additional logs when previous exit code is `-1`. Can be a"
" special key name when `is_input` is True: `C-c` (Ctrl+C),"
" `C-d` (Ctrl+D/EOF), `C-z` (Ctrl+Z), or any `C-<letter>`"
" for Ctrl sequences; navigation keys `UP`, `DOWN`, `LEFT`,"
" `RIGHT`, `HOME`, `END`, `PGUP`, `PGDN`; and `TAB`, `ESC`,"
" `BS` (Backspace), `ENTER`. Note: You can only execute one"
" bash command at a time. If you need to run multiple commands"
" sequentially, you can use `&&` or `;` to chain them together."
)
)
is_input: bool = Field(
default=False,
Expand Down Expand Up @@ -217,6 +227,8 @@ def visualize(self) -> Text:
- Send empty `command` to retrieve additional logs
- Send text (set `command` to the text) to STDIN of the running process
- Send control commands like `C-c` (Ctrl+C), `C-d` (Ctrl+D), or `C-z` (Ctrl+Z) to interrupt the process
- Send navigation keys like `UP`, `DOWN`, `LEFT`, `RIGHT`, `TAB`, `ESC`, `BS` (Backspace), `HOME`, `END`, `PGUP`, `PGDN`
- Any `C-<letter>` Ctrl sequence is supported (e.g. `C-a`, `C-e`, `C-l`)
- If you do C-c, you can re-start the process with a longer "timeout" parameter to let it run to completion

### Best Practices
Expand Down
4 changes: 4 additions & 0 deletions openhands-tools/openhands/tools/terminal/terminal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

from openhands.tools.terminal.terminal.factory import create_terminal_session
from openhands.tools.terminal.terminal.interface import (
SUPPORTED_SPECIAL_KEYS,
TerminalInterface,
TerminalSessionBase,
parse_ctrl_key,
)
from openhands.tools.terminal.terminal.terminal_session import (
TerminalCommandStatus,
Expand All @@ -27,11 +29,13 @@


__all__ = [
"SUPPORTED_SPECIAL_KEYS",
"TerminalInterface",
"TerminalSessionBase",
"TmuxTerminal",
"SubprocessTerminal",
"TerminalSession",
"TerminalCommandStatus",
"create_terminal_session",
"parse_ctrl_key",
]
54 changes: 53 additions & 1 deletion openhands-tools/openhands/tools/terminal/terminal/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,50 @@
)


# Canonical set of named special keys that all TerminalInterface
# implementations must support. Each backend maps these to its own
# representation (ANSI escape bytes for PTY, tmux key names for tmux).
SUPPORTED_SPECIAL_KEYS: frozenset[str] = frozenset(
{
"ENTER",
"TAB",
"BS",
"ESC",
"UP",
"DOWN",
"LEFT",
"RIGHT",
"HOME",
"END",
"PGUP",
"PGDN",
"C-L",
"C-D",
"C-C",
}
)
Comment thread
sjathin marked this conversation as resolved.


def parse_ctrl_key(text: str) -> str | None:
"""Parse a Ctrl-<letter> token and return the normalized form ``C-x``.

Accepts ``C-x``, ``CTRL-x``, and ``CTRL+x`` (case-insensitive)
where *x* is a single ASCII letter. Returns ``None`` when *text*
is not a recognized Ctrl sequence.
"""
upper = text.strip().upper()
key: str | None = None
if upper.startswith("C-"):
key = upper[2:]
elif upper.startswith("CTRL-"):
key = upper[5:]
elif upper.startswith("CTRL+"):
key = upper[5:]
Comment thread
sjathin marked this conversation as resolved.
if key and len(key) == 1 and "A" <= key <= "Z":
return f"C-{key.lower()}"
return None


class TerminalInterface(ABC):
"""Abstract interface for terminal backends.

Expand Down Expand Up @@ -63,9 +107,17 @@ def close(self) -> None:
def send_keys(self, text: str, enter: bool = True) -> None:
"""Send text/keys to the terminal.

All implementations must support:
- Plain text (sent verbatim)
- Named specials: ENTER, TAB, BS, ESC, UP, DOWN, LEFT, RIGHT,
HOME, END, PGUP, PGDN, C-L, C-D, C-C
- Generic Ctrl sequences: ``C-<letter>``, ``CTRL-<letter>``,
``CTRL+<letter>`` (case-insensitive, a-z)

Args:
text: Text or key sequence to send to the terminal.
enter: Whether to send Enter key after the text. Defaults to True.
enter: Whether to send Enter key after the text.
Defaults to True. Ignored for special/ctrl keys.
"""

@abstractmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,39 @@
)
from openhands.tools.terminal.metadata import CmdOutputMetadata
from openhands.tools.terminal.terminal import TerminalInterface
from openhands.tools.terminal.terminal.interface import (
SUPPORTED_SPECIAL_KEYS,
parse_ctrl_key,
)


logger = get_logger(__name__)

ENTER = b"\n"

# Map normalized special key names to ANSI escape bytes for PTY.
_SUBPROCESS_SPECIALS: dict[str, bytes] = {
"ENTER": ENTER,
"TAB": b"\t",
"BS": b"\x7f", # Backspace (DEL)
"ESC": b"\x1b",
"UP": b"\x1b[A",
"DOWN": b"\x1b[B",
"RIGHT": b"\x1b[C",
"LEFT": b"\x1b[D",
"HOME": b"\x1b[H",
"END": b"\x1b[F",
"PGUP": b"\x1b[5~",
"PGDN": b"\x1b[6~",
"C-L": b"\x0c", # Ctrl+L
"C-D": b"\x04", # Ctrl+D (EOF)
"C-C": b"\x03", # Ctrl+C (SIGINT)
}
assert set(_SUBPROCESS_SPECIALS.keys()) == SUPPORTED_SPECIAL_KEYS, (
f"_SUBPROCESS_SPECIALS keys {set(_SUBPROCESS_SPECIALS.keys())} "
f"do not match SUPPORTED_SPECIAL_KEYS {SUPPORTED_SPECIAL_KEYS}"
)


def _normalize_eols(raw: bytes) -> bytes:
# CRLF/LF/CR -> CR, so each logical line is terminated with \r for the TTY
Expand Down Expand Up @@ -341,7 +368,7 @@ def send_keys(self, text: str, enter: bool = True) -> None:
- Plain text
- Ctrl sequences: 'C-a'..'C-z' (Ctrl+C sends ^C byte)
- Special names: 'ENTER','TAB','BS','ESC','UP','DOWN','LEFT','RIGHT',
'HOME','END','PGUP','PGDN','C-L','C-D'
'HOME','END','PGUP','PGDN','C-L','C-D','C-C'

For multi-line commands exceeding _MULTILINE_THRESHOLD lines, sends
line-by-line with pacing to prevent overwhelming the shell's input
Expand All @@ -350,40 +377,19 @@ def send_keys(self, text: str, enter: bool = True) -> None:
if not self._initialized:
raise RuntimeError("PTY terminal is not initialized")

specials = {
"ENTER": ENTER,
"TAB": b"\t",
"BS": b"\x7f", # Backspace (DEL)
"ESC": b"\x1b",
"UP": b"\x1b[A",
"DOWN": b"\x1b[B",
"RIGHT": b"\x1b[C",
"LEFT": b"\x1b[D",
"HOME": b"\x1b[H",
"END": b"\x1b[F",
"PGUP": b"\x1b[5~",
"PGDN": b"\x1b[6~",
"C-L": b"\x0c", # Ctrl+L
"C-D": b"\x04", # Ctrl+D (EOF)
}

upper = text.upper().strip()
payload: bytes | None = None

# Named specials
if upper in specials:
payload = specials[upper]
if upper in _SUBPROCESS_SPECIALS:
payload = _SUBPROCESS_SPECIALS[upper]
# Do NOT auto-append another EOL; special already includes it when needed.
append_eol = False
# Generic Ctrl-<letter>, including C-C (preferred over sending SIGINT directly)
elif upper.startswith(("C-", "CTRL-", "CTRL+")):
# last char after dash/plus is the key
key = upper.split("-", 1)[-1].split("+", 1)[-1]
if len(key) == 1 and "A" <= key <= "Z":
payload = bytes([ord(key) & 0x1F])
else:
# Unknown form; fall back to raw text
payload = text.encode("utf-8", "ignore")
# Generic Ctrl-<letter>
elif (ctrl := parse_ctrl_key(text)) is not None:
# ctrl is "C-x" — extract the letter
key_char = ctrl[-1].upper()
payload = bytes([ord(key_char) & 0x1F])
append_eol = False # ctrl combos are "instant"
else:
# Check if this is a long multi-line command that needs chunked sending
Expand Down
56 changes: 54 additions & 2 deletions openhands-tools/openhands/tools/terminal/terminal/tmux_terminal.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,37 @@
)
from openhands.tools.terminal.metadata import CmdOutputMetadata
from openhands.tools.terminal.terminal import TerminalInterface
from openhands.tools.terminal.terminal.interface import (
SUPPORTED_SPECIAL_KEYS,
parse_ctrl_key,
)


logger = get_logger(__name__)

# Map normalized special key names to tmux key names.
_TMUX_SPECIALS: dict[str, str] = {
"ENTER": "Enter",
"TAB": "Tab",
"BS": "BSpace",
"ESC": "Escape",
"UP": "Up",
"DOWN": "Down",
"LEFT": "Left",
"RIGHT": "Right",
"HOME": "Home",
"END": "End",
"PGUP": "PPage",
"PGDN": "NPage",
"C-L": "C-l",
"C-D": "C-d",
"C-C": "C-c",
}
assert set(_TMUX_SPECIALS.keys()) == SUPPORTED_SPECIAL_KEYS, (
f"_TMUX_SPECIALS keys {set(_TMUX_SPECIALS.keys())} "
f"do not match SUPPORTED_SPECIAL_KEYS {SUPPORTED_SPECIAL_KEYS}"
)

Comment thread
xingyaoww marked this conversation as resolved.

class TmuxTerminal(TerminalInterface):
"""Tmux-based terminal backend.
Expand Down Expand Up @@ -114,14 +141,39 @@ def close(self) -> None:
def send_keys(self, text: str, enter: bool = True) -> None:
"""Send text/keys to the tmux pane.

Supports:
- Plain text (uses literal paste; preserves spaces/newlines)
- Named specials: ENTER, TAB, BS, ESC, UP, DOWN, LEFT, RIGHT,
HOME, END, PGUP, PGDN, C-L, C-D, C-C
- Generic Ctrl sequences: C-a..C-z, CTRL-x, CTRL+x

Args:
text: Text or key sequence to send
enter: Whether to send Enter key after the text
enter: Whether to send Enter key after the text.
Ignored for special/ctrl keys.
"""
if not self._initialized or not isinstance(self.pane, libtmux.Pane):
raise RuntimeError("Tmux terminal is not initialized")

self.pane.send_keys(text, enter=enter)
# Map normalized names to tmux key names
upper = text.strip().upper()

# 1) Named specials
if upper in _TMUX_SPECIALS:
self.pane.send_keys(_TMUX_SPECIALS[upper], enter=False)
return

# 2) Generic Ctrl-<letter>
ctrl = parse_ctrl_key(text)
if ctrl is not None:
self.pane.send_keys(ctrl, enter=False)
return

# 3) Plain text — use literal=True so tmux doesn't split on
# whitespace or interpret special tokens.
self.pane.send_keys(text, enter=False, literal=True)
if enter and not text.endswith("\n"):
self.pane.send_keys("Enter", enter=False)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟢 Acceptable: The not text.endswith("\n") guard correctly avoids double-Enter when the input text already contains a trailing newline (which tmux sends literally). Good edge-case handling.


def read_screen(self) -> str:
"""Read the current tmux pane content.
Expand Down
Loading
Loading