diff --git a/cmd/bbox/main.go b/cmd/bbox/main.go index d1945be..1ba5092 100644 --- a/cmd/bbox/main.go +++ b/cmd/bbox/main.go @@ -871,9 +871,14 @@ func run(parentCtx context.Context, agentName string, flags runFlags) error { } if err != nil { - // Propagate the agent's exit code without printing an error. + // Propagate the agent's exit code. var exitErr *infrassh.ExitError if errors.As(err, &exitErr) { + // Print a diagnostic for unexpected terminations (OOM, crash, etc.) + // but stay quiet for normal exits and user-initiated signals. + if hint := exitErr.SignalHint(); hint != "" { + _, _ = fmt.Fprintf(os.Stderr, "\nSession ended unexpectedly: %s\n", hint) + } // os.Exit bypasses defers, so clean up snapshot and flush // the timing summary now. if sb.Snapshot != nil { diff --git a/internal/infra/ssh/terminal.go b/internal/infra/ssh/terminal.go index cc18ddc..2c948bf 100644 --- a/internal/infra/ssh/terminal.go +++ b/internal/infra/ssh/terminal.go @@ -11,10 +11,12 @@ import ( "net" "os" "strings" + "syscall" "time" "golang.org/x/crypto/ssh" "golang.org/x/crypto/ssh/agent" + "golang.org/x/sys/unix" domainagent "github.com/stacklok/brood-box/pkg/domain/agent" "github.com/stacklok/brood-box/pkg/domain/session" @@ -46,13 +48,54 @@ func NewInteractiveSession(logger *slog.Logger) *InteractiveSession { // ExitError represents a non-zero exit code from the remote command. type ExitError struct { - Code int + Code int + Signal string // SSH signal name (e.g. "KILL", "SEGV"), empty for normal exits } func (e *ExitError) Error() string { + if e.Signal != "" { + return fmt.Sprintf("remote command killed by signal SIG%s (exit code %d)", e.Signal, e.Code) + } return fmt.Sprintf("remote command exited with code %d", e.Code) } +// SignalHint returns a human-readable hint for unexpected terminations +// (OOM kill, crash, etc.). Returns empty string for normal exits and +// user-initiated signals (INT, TERM) which are expected. +func (e *ExitError) SignalHint() string { + sigName := e.Signal + if sigName == "" { + // No SSH signal — check if exit code encodes one (128 + N convention). + sigName = signalNameFromCode(e.Code) + } + switch sigName { + case "KILL": + return "process was forcefully killed (likely out of memory — try increasing VM memory with --memory)" + case "SEGV": + return "process crashed with a segmentation fault" + case "ABRT": + return "process aborted (assertion failure or fatal error)" + default: + return "" + } +} + +// signalNameFromCode returns the SSH-style signal name (e.g. "KILL") for an +// exit code that follows the 128+N convention, or empty string if the code +// does not encode a known signal. +func signalNameFromCode(code int) string { + if code <= 128 { + return "" + } + sig := syscall.Signal(code - 128) + name := unix.SignalName(sig) + if name == "" { + return "" + } + // unix.SignalName returns "SIGKILL"; strip the "SIG" prefix. + return strings.TrimPrefix(name, "SIG") +} + // Run establishes an SSH connection, requests a PTY, and runs the command // interactively with full terminal forwarding. func (s *InteractiveSession) Run(ctx context.Context, opts session.SessionOpts) error { @@ -167,7 +210,18 @@ func (s *InteractiveSession) Run(ctx context.Context, opts session.SessionOpts) case err := <-done: if err != nil { if exitErr, ok := err.(*ssh.ExitError); ok { - return &ExitError{Code: exitErr.ExitStatus()} + code := exitErr.ExitStatus() + sig := exitErr.Signal() + // When killed by signal, SSH may report exit code 0 or -1. + // Use the conventional 128+N code instead. + if sig != "" && code <= 0 { + if n := unix.SignalNum("SIG" + sig); n > 0 { + code = 128 + int(n) + } else if code <= 0 { + code = 1 + } + } + return &ExitError{Code: code, Signal: sig} } return fmt.Errorf("remote command failed: %w", err) } diff --git a/internal/infra/ssh/terminal_test.go b/internal/infra/ssh/terminal_test.go index b8764be..5e0d56e 100644 --- a/internal/infra/ssh/terminal_test.go +++ b/internal/infra/ssh/terminal_test.go @@ -9,6 +9,68 @@ import ( "github.com/stretchr/testify/assert" ) +func TestExitError_SignalHint(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + err ExitError + expected string + }{ + { + name: "normal exit no hint", + err: ExitError{Code: 1}, + expected: "", + }, + { + name: "SIGKILL via signal field", + err: ExitError{Code: 137, Signal: "KILL"}, + expected: "process was forcefully killed (likely out of memory — try increasing VM memory with --memory)", + }, + { + name: "SIGKILL via exit code 137", + err: ExitError{Code: 137}, + expected: "process was forcefully killed (likely out of memory — try increasing VM memory with --memory)", + }, + { + name: "SIGSEGV via signal field", + err: ExitError{Code: 139, Signal: "SEGV"}, + expected: "process crashed with a segmentation fault", + }, + { + name: "SIGSEGV via exit code 139", + err: ExitError{Code: 139}, + expected: "process crashed with a segmentation fault", + }, + { + name: "SIGABRT via signal field", + err: ExitError{Code: 134, Signal: "ABRT"}, + expected: "process aborted (assertion failure or fatal error)", + }, + { + name: "SIGTERM is silent", + err: ExitError{Code: 143, Signal: "TERM"}, + expected: "", + }, + { + name: "SIGINT is silent", + err: ExitError{Code: 130, Signal: "INT"}, + expected: "", + }, + { + name: "exit code 143 without signal field is silent", + err: ExitError{Code: 143}, + expected: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, tt.err.SignalHint()) + }) + } +} + func TestBuildCommand_EscapesArgs(t *testing.T) { t.Parallel()