From fe59353c88973b8bf2c82c83d827f4ef24e4dcc0 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Tue, 2 Jun 2026 18:55:23 +0530 Subject: [PATCH 01/36] docs(superpowers): spec for global push-to-talk hotkey (#3090) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Design doc for the PTT half of issue #3090 — a hold-to-talk global hotkey that lets the user speak to OpenHuman while it's in the background, with agent replies routed through TTS. Background screen capture (the other half of #3090) is scoped to a follow-up PR. --- .../specs/2026-06-02-global-ptt-design.md | 380 ++++++++++++++++++ 1 file changed, 380 insertions(+) create mode 100644 docs/superpowers/specs/2026-06-02-global-ptt-design.md diff --git a/docs/superpowers/specs/2026-06-02-global-ptt-design.md b/docs/superpowers/specs/2026-06-02-global-ptt-design.md new file mode 100644 index 0000000000..4a2c3e5a8b --- /dev/null +++ b/docs/superpowers/specs/2026-06-02-global-ptt-design.md @@ -0,0 +1,380 @@ +# Global Push-to-Talk Hotkey — Design + +**Issue:** [tinyhumansai/openhuman#3090](https://github.com/tinyhumansai/openhuman/issues/3090) — "Global push-to-talk keybind + screen share while tabbed out / in background." + +**Scope of this spec:** the *push-to-talk* half only. Background screen capture for the agent is acknowledged in the issue and tracked as a follow-up PR — same domain (voice / agent context), different surface area (host-screen sampling, fullscreen-game compatibility, image-token budget). Keeping them separate keeps each PR reviewable and coverage-gateable. + +**Outcome:** the user holds a configurable global hotkey while OpenHuman is *not* the focused window (mid-game, in their IDE, on a Slack call), speaks, releases the key, and the agent answers via TTS — without OpenHuman ever stealing focus. + +--- + +## Goals + +- A user-configurable hold-to-talk hotkey that works while OpenHuman is in the background. +- Mic opens on press, closes on release; transcript is auto-posted to the active chat thread and the agent's reply is spoken aloud. +- Audible + visual feedback (chime + small always-on-top overlay) so the user knows the mic is hot without alt-tabbing. +- Works on macOS, Windows, and Linux/X11 in v1. Wayland: documented unsupported with a clear in-app message. + +## Non-goals (v1) + +- Background screen capture for the agent. (Follow-up issue spawned from #3090.) +- Streaming partial transcripts during the hold. +- Per-thread PTT routing (always routes to the active thread). +- A DXGI-exclusive-fullscreen overlay workaround. (Documented caveat only; chime still plays.) +- Toggle-style PTT (we ship hold-to-talk only — the existing dictation toggle remains for press-once-press-again users). + +--- + +## Architecture overview + +``` +[User holds hotkey] + │ +[Tauri shell: tauri-plugin-global-shortcut] + │ ShortcutState::Pressed + ▼ +[app/src-tauri/src/ptt_hotkeys.rs] + │ emit("ptt://start", { session_id }) + ▼ +[app/src/services/pttService.ts] ─┐ + │ voice/audio_capture.start │ hold phase + │ playChime("open") │ + │ invoke("show_ptt_overlay", { active }) │ + │ armWatchdog(10s) │ + ─┘ + +[User releases hotkey] + │ ShortcutState::Released + ▼ +[ptt_hotkeys.rs] emit("ptt://stop", { session_id }) + │ +[pttService.onStop] + │ voice/audio_capture.finalize → Buffer + │ playChime("close") + hide overlay + │ dictationListener.transcribe(buf) → text + │ chatRuntime.sendMessage({ text, speakReply: true, source: "ptt" }) + ▼ +[Core: openhuman.threads_message_append] + │ normal agent turn + │ on assistant final text: + │ voice::reply_speech.synthesize_and_play(text) // if speak_reply + ▼ +[User hears reply; OpenHuman window state never changes] +``` + +The bulk of the work is in the **Tauri shell** (hotkey + overlay window) and the **renderer service layer** (state machine + glue). The Rust core gets exactly one additive change: a `speak_reply: bool` flag on `threads.message_append` so TTS reply routing doesn't require the renderer to be focused or even running its normal chat UI. + +--- + +## Components + +### Tauri shell — `app/src-tauri/src/` + +#### `ptt_hotkeys.rs` *(new)* + +Owns global hotkey registration for PTT. Mirrors `dictation_hotkeys.rs` in shape, with two key differences: it listens for **both** `Pressed` and `Released`, and rejects pure-modifier shortcuts. + +```rust +pub(crate) struct PttHotkeyState { + pub(crate) shortcut: Mutex>, // expanded variants registered + pub(crate) is_held: AtomicBool, // CAS-guarded press/release + pub(crate) session_counter: AtomicU64, +} + +pub(crate) fn expand_ptt_shortcuts(shortcut: &str) -> Result, PttError>; +// - returns Err(EmptyShortcut) if trimmed empty +// - returns Err(ModifierOnlyShortcut) if every token is a modifier (Ctrl/Cmd/Shift/Alt/Meta) +// - returns Err(InvalidShortcut(...)) if the plugin parser rejects it +// - otherwise returns 1 or 2 expanded variants (macOS CmdOrCtrl → [Cmd, Ctrl]) + +pub(crate) enum PttError { + EmptyShortcut, + ModifierOnlyShortcut, + InvalidShortcut(String), + AccessibilityRequired, // macOS + ShortcutInUse(String), // Windows + UnsupportedOnWayland, + ConflictsWithDictation(String), + RegistrationFailed(String), +} +``` + +#### `lib.rs` — two new IPC commands + +```rust +#[tauri::command] +async fn register_ptt_hotkey(app: AppHandle, shortcut: String) -> Result<(), String>; + +#[tauri::command] +async fn unregister_ptt_hotkey(app: AppHandle) -> Result<(), String>; +``` + +Behavior on `register_ptt_hotkey`: + +1. Expand & validate via `expand_ptt_shortcuts`. +2. Check overlap with the currently-registered dictation shortcut(s); on overlap return `ConflictsWithDictation`. +3. Unregister any previously-registered PTT shortcut (rollback-safe — same pattern as the dictation registration). +4. Register each expanded variant with a closure that: + - On `Pressed`: CAS `is_held: false → true`; on success, increment `session_counter` and emit `ptt://start { session_id }`. On failure (CAS lost — auto-repeat or stuck state), drop. + - On `Released`: CAS `is_held: true → false`; on success, emit `ptt://stop { session_id }` with the *current* counter value. On failure, drop. +5. Persist the registered variants in `PttHotkeyState`. + +`unregister_ptt_hotkey` unregisters all currently-registered variants and clears state. Also called on shutdown (`unregister_all` already covered by the plugin's drop). + +#### `ptt_overlay.rs` *(new)* — dedicated overlay window + +Lazy-create-on-first-register, destroyed on `unregister`. Window config: + +| Field | Value | +| --- | --- | +| `label` | `"ptt-overlay"` | +| `url` | `/#/ptt-overlay` (HashRouter route, mounted only in this window) | +| `decorations` | `false` | +| `transparent` | `true` | +| `always_on_top` | `true` | +| `skip_taskbar` | `true` | +| `focus` | `false` (never accepts focus) | +| `resizable` | `false` | +| `shadow` | `false` | +| `visible_on_all_workspaces` | `true` | +| `accept_first_mouse` | `false` | +| `size` | `160 × 56` | +| `position` | bottom-right of primary display, 24px inset (hard-coded in v1) | + +IPC command: `show_ptt_overlay({ active: bool, session_id: u64 })` — hides/shows the window with a 250ms fade on close. Window-local React state in `/#/ptt-overlay` toggles a pulsing red dot when `active: true`. + +### Rust core — `src/openhuman/` + +#### `voice/bus.rs` *(new)* + +Per the canonical module shape, the voice domain currently has no `bus.rs`. Add one with a single subscriber-less event publisher and a new variant on `DomainEvent`: + +```rust +// in src/core/event_bus/events.rs +pub enum VoiceEvent { + PttTranscriptCommitted { + thread_id: ThreadId, + session_id: u64, + text_len: usize, // never log raw transcript + held_ms: u64, + finalized_by_watchdog: bool, + }, + // ...future variants +} + +// in DomainEvent +Voice(VoiceEvent), +``` + +Subscribers will be added in the follow-up screen-capture PR (the screen-intelligence domain will hook here to grab a frame when a PTT turn commits). For v1 we publish, nobody subscribes — the test asserts publish reaches a test subscriber. + +#### Chat-send schema — `speak_reply` flag + +The user→agent message ingress RPC is **`openhuman.threads_message_append`** (`src/openhuman/threads/schemas.rs` → `"message_append"` → `handle_message_append` in `src/openhuman/threads/ops.rs`). Three additive optional fields: + +```rust +// In the message_append input schema +#[serde(default)] +pub speak_reply: Option, +#[serde(default)] +pub source: Option, // "ptt" | "dictation" | "type" | ... +#[serde(default)] +pub session_id: Option, // PTT correlation key +``` + +Non-breaking — all fields `Option`. When `speak_reply == Some(true)`, the agent-turn finalizer (whichever module emits the assistant's final text — confirm exact hook point during the implementation plan, in `src/openhuman/agent/` or `threads/turn_state`) routes the assistant text through `voice::reply_speech::synthesize_and_play(text).await`. This is the **only** Rust-core code path change beyond the schema and the bus event. + +`source` and `session_id` are stored on the message metadata (so analytics can distinguish PTT vs typed input) and included in the `VoiceEvent::PttTranscriptCommitted` bus event for the screen-capture follow-up PR. + +#### `about_app` capability catalog + +Add entry: + +```rust +Capability { + id: "voice.ptt", + label: "Global push-to-talk", + supported_on: &[Platform::MacOS, Platform::Windows, Platform::LinuxX11], + requires: &["microphone", "global_shortcut"], +} +``` + +### Frontend — `app/src/` + +#### `services/pttService.ts` *(new singleton)* + +State machine: + +``` +Idle ──[ptt://start]──▶ Capturing ──[ptt://stop]──▶ Finalizing ──▶ Idle + ▲ │ + │ ├──[10s no stop]──▶ Finalizing (watchdog=true) + │ │ + │ └──[mic-fail / preempt / register]──▶ Aborted ──▶ Idle +``` + +API surface: + +```ts +interface PttService { + init(): void; // subscribes to Tauri ptt://* events + destroy(): void; + // exposed for tests: + onStart(session_id: number): Promise; + onStop(session_id: number): Promise; + cancel(reason: "preempted_by_ptt" | "mic_failure" | "user_cancel"): void; +} +``` + +`onStart` (in order): +1. If a session is already active → call `cancel("preempted_by_ptt")`. +2. `playChime("open")`. +3. `invoke("show_ptt_overlay", { active: true, session_id })`. +4. `voice/audio_capture.start({ session_tag: "ptt:" + session_id })`. +5. `armWatchdog(10_000, () => this.onStop(session_id))`. + +`onStop`: +1. Disarm watchdog. +2. `const buf = await voice/audio_capture.finalize()`. +3. `playChime("close")`. +4. `invoke("show_ptt_overlay", { active: false, session_id })`. +5. If `buf.duration_ms < 250` → drop session, play `"no-speech"` double-click chime, log `dropped_reason: "empty_audio"`, return. +6. `const text = await dictationListener.transcribe(buf)`. +7. If `!text.trim()` → drop, log `dropped_reason: "empty_transcript"`, return. +8. Resolve `activeThreadId`: + - If `chatRuntime.activeThread` exists → use it. + - Else → create a new thread titled `"Voice"` via `openhuman.thread_create`, mark `source: "ptt"`, use its ID. +9. `chatRuntime.sendMessage({ threadId, body: text, metadata: { source: "ptt", session_id }, speakReply: state.ptt.speakReplies })`. +10. Zero the audio buffer. + +`cancel`: +- Disarm watchdog, finalize-and-discard the audio buffer (zero it), hide overlay, play error chime, log with reason. No chat message posted. + +Errors during the session — handled per the table in **§ Error handling** below. + +#### `store/slices/ptt.ts` *(new redux slice)* + +```ts +interface PttState { + shortcut: string | null; // null = unbound (default) + speakReplies: boolean; // default true + showOverlay: boolean; // default true + isHeld: boolean; // not persisted +} +``` + +Persisted (except `isHeld`) via the existing redux-persist config. Re-registers the hotkey on rehydration via a sibling `useEffect` to the existing dictation init. + +#### `pages/settings/voice/PttSettingsPanel.tsx` *(new)* + +- Hotkey-capture widget (same component family as the dictation key picker). +- Toggle: "Speak agent replies" (`speakReplies`). +- Toggle: "Show overlay while held" (`showOverlay`). +- Inline help: "Push-to-talk is off — pick a hotkey to enable." when `shortcut == null`. +- Inline error: surfaces `PttError::ConflictsWithDictation`, `ShortcutInUse`, `AccessibilityRequired` (with a "Open Accessibility settings" button on macOS), `UnsupportedOnWayland`. +- Inline hint: "In exclusive-fullscreen games the overlay won't render — you'll only hear the chime. Switch to borderless fullscreen for the overlay." + +#### `pages/PttOverlayPage.tsx` *(new — rendered only in the overlay window)* + +Borderless 160×56 region: small mic glyph, label ("Listening…"), pulsing red dot when `state.active`. Reads `active` from a local React state updated by a `useEffect` that listens for `show_ptt_overlay`-relayed events. No redux access — the overlay window has its own React root. + +#### `ChatRuntimeProvider` — forward `speak_reply` + +`sendMessage` accepts `speakReply?: boolean` and forwards it to `openhuman.threads_message_append` as the new optional `speak_reply` field. + +#### Chimes + +- `app/src/assets/audio/ptt-open.wav` — short rising tone, ~80ms. +- `app/src/assets/audio/ptt-close.wav` — short falling tone, ~80ms. +- `app/src/assets/audio/ptt-error.wav` — double-click, ~120ms. +- `app/src/assets/audio/README.md` — CC0 attribution. + +LUFS-normalized to roughly match the existing in-app notification sound. Played via a plain `Audio` element from `pttService`. + +#### i18n + +New keys under a `pttSettings` / `pttOverlay` namespace in `app/src/lib/i18n/en.ts`, real translations added to all 12 non-English locale files (`ar`, `bn`, `de`, `es`, `fr`, `hi`, `id`, `it`, `ko`, `pl`, `pt`, `ru`, `zh-CN`). `pnpm i18n:check` and `pnpm i18n:english:check` gate this. + +--- + +## Data flow / sequence diagram + +See the architecture overview above. The key invariants: + +- **No focus stealing.** No window is `show()`-ed with focus; `show_ptt_overlay` shows a `focus: false` window. The agent reply plays via TTS without any window-state mutation. +- **Single mic at a time.** `voice::audio_capture` enforces this. PTT preempts in-flight dictation; dictation cannot start during a PTT session. +- **Session ID is the correlation key.** Logged in shell + renderer + bus event + chat metadata. + +--- + +## Error handling + +| Failure | Behavior | +| --- | --- | +| Mic permission denied (`MicPermissionDenied`) | Error chime, hide overlay, log `[ptt] mic_denied`. Next time the user opens `/settings/voice`, a sticky banner links to OS mic settings. No mid-game modal. | +| Mic stream drops mid-session (USB unplug) | `cancel("mic_failure")`. No chat message posted. | +| STT call fails (network / model timeout) | Post message anyway as `[Voice — transcription failed]` so the user has a breadcrumb. Subsequent agent turn handles it normally. | +| Agent turn errors | Existing chat-error UI. TTS reply just doesn't play. Overlay already hidden by this point. | +| `ptt://stop` never arrives (OS swallowed release) | 10s watchdog finalizes. Session tagged `finalized_by_watchdog: true`. Logged at `warn`. | +| App backgrounded during hold | Hotkey still fires (global). Overlay still shows. Chime still plays. By design. | +| Empty / sub-threshold audio (< 250ms) | Drop session, play `no-speech` chime, log `dropped_reason: "empty_audio"`. No message posted. | +| Empty transcript (STT returned blank) | Same as above with `dropped_reason: "empty_transcript"`. | +| Shortcut conflict with dictation | Registration returns `ConflictsWithDictation`. Settings panel shows the inline error. | +| Wayland session | `UnsupportedOnWayland`. Settings panel surfaces a clear message. Logged once per session. | + +**Logging** (per the debug-logging rule): all logs use `[ptt]` prefix. Fields per session: `session_id`, `shortcut`, `held_ms`, `transcript_len`, `dropped_reason`, `finalized_by_watchdog`. PII-safe — never log transcript text or audio buffers, only lengths/durations. Audio buffers are zeroed after finalize. + +**Telemetry**: one new analytics event `ptt_session` mirroring the log fields (no transcript), gated by the existing analytics opt-in. + +--- + +## Configuration + +- **No `Config` TOML schema change.** All PTT settings live in the renderer's `ptt` redux slice (persisted), mirroring how dictation is configured today. +- **Default `shortcut: null`** (unbound). No hard-coded default key — every possible default conflicts with something common. +- **Default `speakReplies: true`**, **`showOverlay: true`**. +- **Boot path:** on rehydration, if `state.ptt.shortcut` is non-null, call `register_ptt_hotkey`. On settings change, unregister-then-register. Independent of the existing dictation init. + +--- + +## Migration + +Brand-new state. No migration. Existing users on `0.53.45+` see the new `/settings/voice` PTT section after upgrade with everything default-off until they bind a key. + +--- + +## Testing + +| Layer | What | Where | +| --- | --- | --- | +| Rust unit | `expand_ptt_shortcuts`: empty, modifier-only, valid combos, `CmdOrCtrl` expansion (dual-variant on macOS, single on Win/Linux) | `app/src-tauri/src/ptt_hotkeys.rs` inline `#[cfg(test)]` | +| Rust unit | `speak_reply` flag round-trips through `threads.message_append` schema serde; default behavior unchanged when omitted | `src/openhuman/threads/schemas_tests.rs` | +| Rust unit | `DomainEvent::Voice::PttTranscriptCommitted` publishes; test subscriber receives it | `src/openhuman/voice/bus.rs` inline tests | +| Rust E2E | `tests/json_rpc_e2e.rs` — call `threads.message_append` with `speak_reply: true` and assert `reply_speech::synthesize_and_play` is invoked via a test seam | `tests/json_rpc_e2e.rs` extension | +| Vitest unit | `pttService` state machine: start→stop happy path, watchdog timeout, empty-audio drop, empty-transcript drop, dictation-preempt, double-press idempotency, mic-permission-denied path | `app/src/services/pttService.test.ts` (new) | +| Vitest unit | `ptt` redux slice: shortcut set/clear, toggle settings, rehydration | `app/src/store/slices/ptt.test.ts` (new) | +| Vitest unit | `PttSettingsPanel` — render, hotkey capture, conflict-with-dictation error, mic-denied banner, Wayland banner | `app/src/pages/settings/voice/PttSettingsPanel.test.tsx` (new) | +| Vitest unit | `PttOverlayPage` — renders idle vs active states, listens for active event | `app/src/pages/PttOverlayPage.test.tsx` (new) | +| i18n gate | All new keys present in all 13 locales, no untranslated English values | `pnpm i18n:check` + `pnpm i18n:english:check` (existing CI) | +| WDIO E2E | Desktop spec: register a hotkey via settings UI, simulate the hotkey via `tauri-driver` key injection, assert overlay window appears, assert chat thread receives a message. STT mocked via the shared mock backend returning a fixed transcript. | `app/test/e2e/specs/ptt-flow.spec.ts` (new) | +| Manual smoke | Hold-while-game-in-foreground on macOS + Windows; mic permission denied flow; Wayland fallback message | PR body checklist | + +**Coverage gate.** Every changed line in the new files + the `threads.message_append` schema delta ships with ≥ 80% diff coverage per the existing merge gate. Untested escape valves (the real `Audio.play()` call, the real `tauri-driver` key injection) are isolated behind thin wrappers that can be mocked. + +--- + +## Out of scope (named explicitly) + +- **Background screen capture for the agent** — separate follow-up PR off the same issue. +- **PTT-while-dictation-mid-flight** polish beyond "preempt with reason." +- **DXGI exclusive-fullscreen overlay rendering** — documented caveat only. +- **Streaming partial transcripts during hold.** +- **Per-thread PTT routing** (v1 always uses active thread; if none, creates a `"Voice"` thread). +- **Native platform overlays** (NSWindow / Win32 layered / X11 override-redirect) — Tauri overlay window covers v1 needs. +- **PTT toggle-mode** — out; dictation toggle covers that pattern already. + +--- + +## Open questions + +None at spec time. If implementation surfaces blockers (e.g. `tauri-plugin-global-shortcut` `Released` semantics regress on a specific OS version), revisit with a small spec amendment rather than a silent design drift. From 4c3f5603ffe96ab3936059a0bb281956bcceb978 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Tue, 2 Jun 2026 19:08:03 +0530 Subject: [PATCH 02/36] docs(superpowers): correct PTT spec to use channel.web_chat RPC --- .../specs/2026-06-02-global-ptt-design.md | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/superpowers/specs/2026-06-02-global-ptt-design.md b/docs/superpowers/specs/2026-06-02-global-ptt-design.md index 4a2c3e5a8b..517cec0b75 100644 --- a/docs/superpowers/specs/2026-06-02-global-ptt-design.md +++ b/docs/superpowers/specs/2026-06-02-global-ptt-design.md @@ -54,7 +54,7 @@ │ dictationListener.transcribe(buf) → text │ chatRuntime.sendMessage({ text, speakReply: true, source: "ptt" }) ▼ -[Core: openhuman.threads_message_append] +[Core: openhuman.channel_web_chat] │ normal agent turn │ on assistant final text: │ voice::reply_speech.synthesize_and_play(text) // if speak_reply @@ -62,7 +62,7 @@ [User hears reply; OpenHuman window state never changes] ``` -The bulk of the work is in the **Tauri shell** (hotkey + overlay window) and the **renderer service layer** (state machine + glue). The Rust core gets exactly one additive change: a `speak_reply: bool` flag on `threads.message_append` so TTS reply routing doesn't require the renderer to be focused or even running its normal chat UI. +The bulk of the work is in the **Tauri shell** (hotkey + overlay window) and the **renderer service layer** (state machine + glue). The Rust core gets exactly one additive change: a `speak_reply: bool` flag on `channel.web_chat` so TTS reply routing doesn't require the renderer to be focused or even running its normal chat UI. --- @@ -170,10 +170,10 @@ Subscribers will be added in the follow-up screen-capture PR (the screen-intelli #### Chat-send schema — `speak_reply` flag -The user→agent message ingress RPC is **`openhuman.threads_message_append`** (`src/openhuman/threads/schemas.rs` → `"message_append"` → `handle_message_append` in `src/openhuman/threads/ops.rs`). Three additive optional fields: +The user→agent ingress RPC is **`openhuman.channel_web_chat`** (web channel provider — `src/openhuman/channels/providers/web.rs`, schema in `schemas("chat")`, handler `channel_web_chat`, dispatch through `start_chat`). The frontend already calls this from `app/src/services/chatService.ts::chatSend`. Three additive optional fields: ```rust -// In the message_append input schema +// In the channel.web_chat input schema (web.rs schemas()) #[serde(default)] pub speak_reply: Option, #[serde(default)] @@ -182,9 +182,9 @@ pub source: Option, // "ptt" | "dictation" | "type" | ... pub session_id: Option, // PTT correlation key ``` -Non-breaking — all fields `Option`. When `speak_reply == Some(true)`, the agent-turn finalizer (whichever module emits the assistant's final text — confirm exact hook point during the implementation plan, in `src/openhuman/agent/` or `threads/turn_state`) routes the assistant text through `voice::reply_speech::synthesize_and_play(text).await`. This is the **only** Rust-core code path change beyond the schema and the bus event. +Non-breaking — all fields `Option`. The flags flow through `channel_web_chat → start_chat → spawn_progress_bridge`. The progress bridge buffers `AgentProgress::TextDelta` chunks during the turn; on `AgentProgress::TurnCompleted`, if `speak_reply == Some(true)`, it calls `voice::reply_speech::synthesize_and_play(buffered_text).await`. This is the **only** Rust-core code path change beyond the schema and the bus event. -`source` and `session_id` are stored on the message metadata (so analytics can distinguish PTT vs typed input) and included in the `VoiceEvent::PttTranscriptCommitted` bus event for the screen-capture follow-up PR. +`source` and `session_id` are persisted on the user message metadata (via the message-record path already used by `start_chat`) and included in the `VoiceEvent::PttTranscriptCommitted` bus event for the screen-capture follow-up PR. #### `about_app` capability catalog @@ -280,7 +280,7 @@ Borderless 160×56 region: small mic glyph, label ("Listening…"), pulsing red #### `ChatRuntimeProvider` — forward `speak_reply` -`sendMessage` accepts `speakReply?: boolean` and forwards it to `openhuman.threads_message_append` as the new optional `speak_reply` field. +`chatService.chatSend` (already the single call site for `openhuman.channel_web_chat`) accepts `speakReply?: boolean`, `source?: string`, `sessionId?: number` and forwards them as the new optional fields. `ChatRuntimeProvider`'s `sendMessage` plumbs them through from `pttService`. #### Chimes @@ -348,9 +348,9 @@ Brand-new state. No migration. Existing users on `0.53.45+` see the new `/settin | Layer | What | Where | | --- | --- | --- | | Rust unit | `expand_ptt_shortcuts`: empty, modifier-only, valid combos, `CmdOrCtrl` expansion (dual-variant on macOS, single on Win/Linux) | `app/src-tauri/src/ptt_hotkeys.rs` inline `#[cfg(test)]` | -| Rust unit | `speak_reply` flag round-trips through `threads.message_append` schema serde; default behavior unchanged when omitted | `src/openhuman/threads/schemas_tests.rs` | +| Rust unit | `speak_reply` / `source` / `session_id` round-trip through `channel.web_chat` schema serde; default behavior unchanged when all omitted | `src/openhuman/channels/providers/web_tests.rs` | | Rust unit | `DomainEvent::Voice::PttTranscriptCommitted` publishes; test subscriber receives it | `src/openhuman/voice/bus.rs` inline tests | -| Rust E2E | `tests/json_rpc_e2e.rs` — call `threads.message_append` with `speak_reply: true` and assert `reply_speech::synthesize_and_play` is invoked via a test seam | `tests/json_rpc_e2e.rs` extension | +| Rust E2E | `tests/json_rpc_e2e.rs` — call `channel.web_chat` with `speak_reply: true` and assert `reply_speech::synthesize_and_play` is invoked via a test seam at the progress-bridge's `TurnCompleted` boundary | `tests/json_rpc_e2e.rs` extension | | Vitest unit | `pttService` state machine: start→stop happy path, watchdog timeout, empty-audio drop, empty-transcript drop, dictation-preempt, double-press idempotency, mic-permission-denied path | `app/src/services/pttService.test.ts` (new) | | Vitest unit | `ptt` redux slice: shortcut set/clear, toggle settings, rehydration | `app/src/store/slices/ptt.test.ts` (new) | | Vitest unit | `PttSettingsPanel` — render, hotkey capture, conflict-with-dictation error, mic-denied banner, Wayland banner | `app/src/pages/settings/voice/PttSettingsPanel.test.tsx` (new) | @@ -359,7 +359,7 @@ Brand-new state. No migration. Existing users on `0.53.45+` see the new `/settin | WDIO E2E | Desktop spec: register a hotkey via settings UI, simulate the hotkey via `tauri-driver` key injection, assert overlay window appears, assert chat thread receives a message. STT mocked via the shared mock backend returning a fixed transcript. | `app/test/e2e/specs/ptt-flow.spec.ts` (new) | | Manual smoke | Hold-while-game-in-foreground on macOS + Windows; mic permission denied flow; Wayland fallback message | PR body checklist | -**Coverage gate.** Every changed line in the new files + the `threads.message_append` schema delta ships with ≥ 80% diff coverage per the existing merge gate. Untested escape valves (the real `Audio.play()` call, the real `tauri-driver` key injection) are isolated behind thin wrappers that can be mocked. +**Coverage gate.** Every changed line in the new files + the `channel.web_chat` schema delta ships with ≥ 80% diff coverage per the existing merge gate. Untested escape valves (the real `Audio.play()` call, the real `tauri-driver` key injection) are isolated behind thin wrappers that can be mocked. --- From e6ce55886bbb5837b7a56b9c0dcda7ea5c0c2a5d Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Tue, 2 Jun 2026 19:18:51 +0530 Subject: [PATCH 03/36] docs(superpowers): implementation plan for global push-to-talk (#3090) 15-task plan covering the Rust schema delta (`channel.web_chat`), voice/bus event, Tauri-shell hotkey + overlay window, frontend redux slice + service state machine + UI + settings panel, 13-locale i18n, and a WDIO E2E spec with mocked STT. --- .../plans/2026-06-02-global-ptt.md | 3027 +++++++++++++++++ 1 file changed, 3027 insertions(+) create mode 100644 docs/superpowers/plans/2026-06-02-global-ptt.md diff --git a/docs/superpowers/plans/2026-06-02-global-ptt.md b/docs/superpowers/plans/2026-06-02-global-ptt.md new file mode 100644 index 0000000000..1516c90c4d --- /dev/null +++ b/docs/superpowers/plans/2026-06-02-global-ptt.md @@ -0,0 +1,3027 @@ +# Global Push-to-Talk Hotkey Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add a configurable hold-to-talk global hotkey that lets the user dictate to OpenHuman while it's in the background, with the agent's reply spoken back via TTS — no window focus stealing at any point. + +**Architecture:** +- **Tauri shell** owns the global hotkey + the always-on-top overlay window. Uses `tauri-plugin-global-shortcut` uniformly across macOS / Windows / Linux (single code path — *different* from dictation's OS-forked rdev/Tauri-plugin dual-path, which is grandfathered legacy + a macOS-26 rdev crash workaround). +- **Frontend service `pttService`** owns the press → capture → finalize → STT → send → TTS state machine, with a 10s watchdog for swallowed `Released` events. +- **Rust core** gets one additive change: three optional fields on `channel.web_chat` (`speak_reply`, `source`, `session_id`). When `speak_reply` is true, the existing progress bridge calls `voice::reply_speech::synthesize_and_play(final_text)` on `TurnCompleted`. + +**Tech Stack:** +- Rust core, Tauri shell (`tauri-plugin-global-shortcut`), React + Redux Toolkit + redux-persist, Vitest, WDIO/Appium for E2E, i18n via the project's `useT()` infrastructure. + +**Spec:** [`docs/superpowers/specs/2026-06-02-global-ptt-design.md`](../specs/2026-06-02-global-ptt-design.md) + +**Issue:** [tinyhumansai/openhuman#3090](https://github.com/tinyhumansai/openhuman/issues/3090) — push-to-talk half only; background screen capture is a follow-up PR. + +--- + +## File map + +| Layer | File | Action | Purpose | +| --- | --- | --- | --- | +| Tauri shell | `app/src-tauri/src/ptt_hotkeys.rs` | create | Hotkey registration + state (`PttHotkeyState`, `expand_ptt_shortcuts`, `PttError`). | +| Tauri shell | `app/src-tauri/src/ptt_overlay.rs` | create | Lazy borderless always-on-top overlay window + `show_ptt_overlay` IPC. | +| Tauri shell | `app/src-tauri/src/lib.rs` | modify | Two new IPC commands; wire `PttHotkeyState` into `.manage(...)`; conflict check vs dictation. | +| Rust core | `src/openhuman/channels/providers/web.rs` | modify | Add `speak_reply`/`source`/`session_id` to schema + plumb to progress bridge. | +| Rust core | `src/openhuman/channels/providers/web_tests.rs` | modify | Schema-roundtrip + default-omitted tests. | +| Rust core | `src/openhuman/voice/bus.rs` | create | `VoiceEvent::PttTranscriptCommitted` publish helper. | +| Rust core | `src/openhuman/voice/mod.rs` | modify | `pub mod bus;`. | +| Rust core | `src/core/event_bus/events.rs` | modify | `DomainEvent::Voice(VoiceEvent)` + `VoiceEvent` enum + domain mapping. | +| Rust core | `src/openhuman/about_app/` (capability list) | modify | Add `voice.ptt` capability entry. | +| Rust core | `tests/json_rpc_e2e.rs` | modify | E2E asserting `reply_speech` is invoked on `speak_reply=true` | +| Frontend | `app/src/services/pttService.ts` | create | Press/release state machine + watchdog + glue. | +| Frontend | `app/src/services/__tests__/pttService.test.ts` | create | State-machine unit tests. | +| Frontend | `app/src/services/chatService.ts` | modify | Forward `speak_reply` / `source` / `session_id` to `channel.web_chat`. | +| Frontend | `app/src/services/__tests__/chatService.test.ts` | modify | Assert new fields are passed through. | +| Frontend | `app/src/store/slices/ptt.ts` | create | Redux slice (`shortcut`, `speakReplies`, `showOverlay`, `isHeld`). | +| Frontend | `app/src/store/slices/__tests__/ptt.test.ts` | create | Slice unit tests. | +| Frontend | `app/src/store/index.ts` (or wherever rootReducer is) | modify | Register `ptt` slice + persist whitelist. | +| Frontend | `app/src/utils/tauriCommands/ptt.ts` | create | Wrappers for `register_ptt_hotkey` / `unregister_ptt_hotkey` / `show_ptt_overlay`. | +| Frontend | `app/src/hooks/usePttHotkey.ts` | create | Boot-time effect that registers the hotkey on rehydration. | +| Frontend | `app/src/components/PttHotkeyManager.tsx` | create | Renderless component mounted in `AppShell` that wires `usePttHotkey` + `pttService`. | +| Frontend | `app/src/AppShell.tsx` (or `App.tsx`) | modify | Mount ``. | +| Frontend | `app/src/pages/PttOverlayPage.tsx` | create | 160×56 borderless overlay UI. | +| Frontend | `app/src/pages/PttOverlayPage.test.tsx` | create | Render tests. | +| Frontend | `app/src/AppRoutes.tsx` | modify | Add `/ptt-overlay` route. | +| Frontend | `app/src/pages/settings/voice/PttSettingsPanel.tsx` | create | Hotkey capture + toggles. | +| Frontend | `app/src/pages/settings/voice/__tests__/PttSettingsPanel.test.tsx` | create | Component tests. | +| Frontend | `app/src/pages/settings/voice/VoiceSettingsPage.tsx` (or wherever the voice settings index lives) | modify | Mount the PTT panel. | +| Frontend | `app/src/assets/audio/ptt-open.wav` | create | Open chime (CC0). | +| Frontend | `app/src/assets/audio/ptt-close.wav` | create | Close chime (CC0). | +| Frontend | `app/src/assets/audio/ptt-error.wav` | create | Error chime (CC0). | +| Frontend | `app/src/assets/audio/README.md` | create | CC0 attribution. | +| i18n | `app/src/lib/i18n/en.ts` + 12 locale files | modify | New PTT keys (settings + overlay + error messages). | +| E2E | `app/test/e2e/specs/ptt-flow.spec.ts` | create | Full flow under WDIO with mocked STT. | + +Each task below ends in a single commit. Tasks are ordered so the tree compiles and tests pass at every boundary — start from core, work outward to the UI. + +--- + +## Task 1: `channel.web_chat` accepts `speak_reply` / `source` / `session_id` (schema + plumb-through) + +**Files:** +- Modify: `src/openhuman/channels/providers/web.rs` +- Test: `src/openhuman/channels/providers/web_tests.rs` + +The renderer-side call site (`chatService.chatSend`) needs to send these fields; the agent loop needs to remember them. This task wires the schema additions and threads the values from `channel_web_chat` → `start_chat` → progress bridge, but does **not yet** invoke TTS (that's Task 4). After this task the fields are accepted, logged, and otherwise ignored. + +- [ ] **Step 1.1: Write failing schema test for the new optional fields** + +Add to `src/openhuman/channels/providers/web_tests.rs`: + +```rust +#[test] +fn web_chat_schema_accepts_optional_ptt_fields() { + // Locate the `chat` schema via the public accessor. + let schema = crate::openhuman::channels::providers::web::schemas("chat"); + let names: std::collections::HashSet<&str> = + schema.inputs.iter().map(|f| f.name).collect(); + assert!( + names.contains("speak_reply"), + "channel.web_chat schema must include optional speak_reply field" + ); + assert!( + names.contains("source"), + "channel.web_chat schema must include optional source field" + ); + assert!( + names.contains("session_id"), + "channel.web_chat schema must include optional session_id field" + ); + // All three are optional. + for field in &["speak_reply", "source", "session_id"] { + let f = schema + .inputs + .iter() + .find(|f| f.name == *field) + .expect("field present"); + assert!(!f.required, "{field} must be optional"); + } +} + +#[test] +fn web_chat_params_deserialize_with_all_ptt_fields_omitted() { + use crate::openhuman::channels::providers::web::WebChatParams; + let json = serde_json::json!({ + "client_id": "c1", + "thread_id": "t1", + "message": "hello", + }); + let parsed: WebChatParams = serde_json::from_value(json).unwrap(); + assert_eq!(parsed.speak_reply, None); + assert_eq!(parsed.source, None); + assert_eq!(parsed.session_id, None); +} + +#[test] +fn web_chat_params_deserialize_with_all_ptt_fields_present() { + use crate::openhuman::channels::providers::web::WebChatParams; + let json = serde_json::json!({ + "client_id": "c1", + "thread_id": "t1", + "message": "hello", + "speak_reply": true, + "source": "ptt", + "session_id": 42_u64, + }); + let parsed: WebChatParams = serde_json::from_value(json).unwrap(); + assert_eq!(parsed.speak_reply, Some(true)); + assert_eq!(parsed.source.as_deref(), Some("ptt")); + assert_eq!(parsed.session_id, Some(42)); +} +``` + +- [ ] **Step 1.2: Run tests to verify they fail** + +```bash +pnpm debug rust web_chat_schema_accepts_optional_ptt_fields +pnpm debug rust web_chat_params_deserialize_with_all_ptt_fields +``` + +Expected: all three fail (`speak_reply` / `source` / `session_id` not in schema; `WebChatParams` has no such fields). + +- [ ] **Step 1.3: Add fields to schema and `WebChatParams`** + +In `src/openhuman/channels/providers/web.rs`, find the `schemas("chat")` arm and add three optional fields after `locale`: + +```rust +optional_bool("speak_reply", "When true, the agent's final reply is spoken via TTS (for PTT and similar background voice flows)."), +optional_string("source", "Origin of the message: \"ptt\" | \"dictation\" | \"type\" | other. Used for analytics + downstream metadata."), +optional_u64("session_id", "Optional caller-provided correlation id (PTT session id)."), +``` + +If `optional_bool` / `optional_u64` helpers don't exist in scope yet, add them following the `optional_string` / `optional_f64` pattern already in that file. Example (place near the other helpers): + +```rust +fn optional_bool(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::Bool)), + comment, + required: false, + } +} + +fn optional_u64(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::U64)), + comment, + required: false, + } +} +``` + +Then locate the `WebChatParams` struct (search `struct WebChatParams` in the same file) and add three fields: + +```rust +#[serde(default)] +pub speak_reply: Option, +#[serde(default)] +pub source: Option, +#[serde(default)] +pub session_id: Option, +``` + +- [ ] **Step 1.4: Run the schema tests to verify they pass** + +```bash +pnpm debug rust web_chat_schema_accepts_optional_ptt_fields +pnpm debug rust web_chat_params_deserialize_with_all_ptt_fields +``` + +Expected: PASS. + +- [ ] **Step 1.5: Propagate fields from `channel_web_chat` → `start_chat`** + +Find the existing `channel_web_chat` function (`pub async fn channel_web_chat`) and extend its signature with the three new optional fields. Then update `start_chat`'s signature the same way. Where the bridge is spawned (`spawn_progress_bridge(...)`), pass the new fields through. For this task they're just stored on a per-bridge struct field; Task 4 wires them to TTS. + +Concretely: locate `pub(super) struct ProgressBridgeContext` (or whatever struct already exists to carry bridge state — if none, add one) and add: + +```rust +pub(super) speak_reply: bool, +pub(super) source: Option, +pub(super) session_id: Option, +pub(super) final_assistant_text: String, // populated from TextDelta events in Task 4 +``` + +Update `handle_chat` to deserialize the new fields and pass them along. + +- [ ] **Step 1.6: Run cargo check** + +```bash +cargo check --manifest-path Cargo.toml +``` + +Expected: clean compile (warnings about unused `speak_reply` etc. acceptable — Task 4 consumes them). + +- [ ] **Step 1.7: Commit** + +```bash +git add src/openhuman/channels/providers/web.rs \ + src/openhuman/channels/providers/web_tests.rs +git commit -m "feat(channels/web): accept optional speak_reply/source/session_id on chat schema (#3090)" +``` + +--- + +## Task 2: `DomainEvent::Voice(VoiceEvent)` + `voice/bus.rs` + +**Files:** +- Modify: `src/core/event_bus/events.rs` +- Create: `src/openhuman/voice/bus.rs` +- Modify: `src/openhuman/voice/mod.rs` + +The bus event lets the future screen-capture follow-up subscribe to PTT commits without coupling. + +- [ ] **Step 2.1: Write failing publish/subscribe test** + +Create `src/openhuman/voice/bus.rs`: + +```rust +//! Voice domain event publishers. The PTT transcript-committed event is +//! published here so the future screen-intelligence follow-up can subscribe +//! and grab a frame on commit without coupling to the channel-web flow. + +use crate::core::event_bus::{publish_global, DomainEvent, VoiceEvent}; + +/// Publish a [`VoiceEvent::PttTranscriptCommitted`] event. +pub fn publish_ptt_transcript_committed( + thread_id: String, + session_id: u64, + text_len: usize, + held_ms: u64, + finalized_by_watchdog: bool, +) { + publish_global(DomainEvent::Voice(VoiceEvent::PttTranscriptCommitted { + thread_id, + session_id, + text_len, + held_ms, + finalized_by_watchdog, + })); +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::event_bus::{init_global, subscribe_global, DomainEvent, EventHandler}; + use async_trait::async_trait; + use std::sync::Arc; + use tokio::sync::Mutex as AsyncMutex; + + #[derive(Default)] + struct Capture { + events: Arc>>, + } + + #[async_trait] + impl EventHandler for Capture { + fn name(&self) -> &'static str { + "voice::ptt_test_capture" + } + async fn handle(&self, event: DomainEvent) { + self.events.lock().await.push(event); + } + } + + #[tokio::test] + async fn publishing_a_ptt_commit_reaches_a_subscriber() { + // Use the singleton (init is idempotent). + let _ = init_global(64); + let capture = Capture::default(); + let events = capture.events.clone(); + let _sub = subscribe_global(Box::new(capture)); + + publish_ptt_transcript_committed( + "thread-1".to_string(), + 42, + 17, + 850, + false, + ); + + // Give the broadcaster a tick to deliver. + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + + let got = events.lock().await; + assert!( + got.iter().any(|e| matches!( + e, + DomainEvent::Voice(VoiceEvent::PttTranscriptCommitted { + thread_id, session_id, .. + }) if thread_id == "thread-1" && *session_id == 42 + )), + "expected PttTranscriptCommitted in {got:?}", + ); + } +} +``` + +Add to `src/openhuman/voice/mod.rs`: + +```rust +pub mod bus; +``` + +- [ ] **Step 2.2: Run the test to verify it fails** + +```bash +pnpm debug rust publishing_a_ptt_commit_reaches_a_subscriber +``` + +Expected: FAIL — `VoiceEvent` is undefined and `DomainEvent::Voice` doesn't exist yet. + +- [ ] **Step 2.3: Add `VoiceEvent` and the `Voice` variant to `DomainEvent`** + +In `src/core/event_bus/events.rs`, add the enum (above or near `DomainEvent`): + +```rust +/// Voice-domain events. +#[non_exhaustive] +#[derive(Clone, Debug)] +pub enum VoiceEvent { + /// A PTT session committed a transcript to a thread. Carries only + /// length/timing — never the raw text, per the PII-safe logging rule. + PttTranscriptCommitted { + thread_id: String, + session_id: u64, + text_len: usize, + held_ms: u64, + finalized_by_watchdog: bool, + }, +} +``` + +Then add to `DomainEvent`: + +```rust +Voice(VoiceEvent), +``` + +…and extend the `domain()` match arm with: + +```rust +DomainEvent::Voice(_) => Domain::Voice, +``` + +If `Domain::Voice` isn't already defined in the `Domain` enum in the same file, add it. + +- [ ] **Step 2.4: Run the test again** + +```bash +pnpm debug rust publishing_a_ptt_commit_reaches_a_subscriber +``` + +Expected: PASS. + +- [ ] **Step 2.5: Commit** + +```bash +git add src/core/event_bus/events.rs \ + src/openhuman/voice/bus.rs \ + src/openhuman/voice/mod.rs +git commit -m "feat(voice/bus): publish DomainEvent::Voice::PttTranscriptCommitted (#3090)" +``` + +--- + +## Task 3: `expand_ptt_shortcuts` + `PttError` (pure functions, fully tested) + +**Files:** +- Create: `app/src-tauri/src/ptt_hotkeys.rs` + +Mirrors `dictation_hotkeys::expand_dictation_shortcuts` but rejects pure-modifier shortcuts (which would be unusable as PTT keys). All Tauri / app state lives in the IPC commands (Task 5); this task is pure logic + tests only. + +- [ ] **Step 3.1: Write failing tests** + +Create `app/src-tauri/src/ptt_hotkeys.rs`: + +```rust +//! Global push-to-talk hotkey state + parsing. +//! +//! See spec: `docs/superpowers/specs/2026-06-02-global-ptt-design.md`. +//! +//! `expand_ptt_shortcuts` mirrors `dictation_hotkeys::expand_dictation_shortcuts` +//! but rejects pure-modifier shortcuts (Ctrl, Cmd+Shift, etc.) because they +//! would fire constantly during normal typing. + +use std::sync::atomic::AtomicU64; +use std::sync::Mutex; + +#[derive(Debug, PartialEq, Eq)] +pub enum PttError { + EmptyShortcut, + ModifierOnlyShortcut, + ConflictsWithDictation(String), + UnsupportedOnWayland, + RegistrationFailed(String), +} + +impl std::fmt::Display for PttError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + PttError::EmptyShortcut => write!(f, "ptt shortcut cannot be empty"), + PttError::ModifierOnlyShortcut => write!( + f, + "ptt shortcut cannot be only modifier keys (Ctrl/Cmd/Shift/Alt)" + ), + PttError::ConflictsWithDictation(s) => { + write!(f, "ptt shortcut '{s}' conflicts with the dictation hotkey") + } + PttError::UnsupportedOnWayland => write!( + f, + "global shortcuts are not supported in this Wayland session — switch to X11 or use in-app dictation" + ), + PttError::RegistrationFailed(s) => { + write!(f, "failed to register ptt shortcut: {s}") + } + } + } +} + +impl std::error::Error for PttError {} + +/// Process-wide PTT state. Held in the Tauri-managed `State`. +pub(crate) struct PttHotkeyState { + /// Currently-registered shortcut variants (e.g. `["Cmd+F13", "Ctrl+F13"]` on macOS). + pub(crate) shortcut: Mutex>, + /// Monotonic counter for session IDs. + pub(crate) session_counter: AtomicU64, +} + +impl PttHotkeyState { + pub(crate) fn new() -> Self { + Self { + shortcut: Mutex::new(Vec::new()), + session_counter: AtomicU64::new(0), + } + } +} + +const MODIFIER_TOKENS: &[&str] = &[ + "ctrl", + "control", + "cmd", + "command", + "meta", + "super", + "win", + "windows", + "alt", + "option", + "shift", + "cmdorctrl", +]; + +fn is_modifier_token(token: &str) -> bool { + let lower = token.trim().to_ascii_lowercase(); + MODIFIER_TOKENS.iter().any(|m| *m == lower) +} + +/// Expand a user-typed shortcut into one or two OS-specific variants and +/// validate it isn't empty / modifier-only. +pub(crate) fn expand_ptt_shortcuts(shortcut: &str) -> Result, PttError> { + let trimmed = shortcut.trim(); + if trimmed.is_empty() { + return Err(PttError::EmptyShortcut); + } + + let parts: Vec<&str> = trimmed.split('+').map(str::trim).collect(); + if parts.iter().all(|p| is_modifier_token(p)) { + return Err(PttError::ModifierOnlyShortcut); + } + + #[cfg(target_os = "macos")] + { + if trimmed.contains("CmdOrCtrl") { + let cmd_variant = trimmed.replace("CmdOrCtrl", "Cmd"); + let ctrl_variant = trimmed.replace("CmdOrCtrl", "Ctrl"); + if cmd_variant == ctrl_variant { + return Ok(vec![cmd_variant]); + } + return Ok(vec![cmd_variant, ctrl_variant]); + } + } + + #[cfg(not(target_os = "macos"))] + { + if trimmed.contains("CmdOrCtrl") { + return Ok(vec![trimmed.replace("CmdOrCtrl", "Ctrl")]); + } + } + + Ok(vec![trimmed.to_string()]) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_shortcut_is_rejected() { + assert_eq!(expand_ptt_shortcuts(""), Err(PttError::EmptyShortcut)); + assert_eq!(expand_ptt_shortcuts(" "), Err(PttError::EmptyShortcut)); + } + + #[test] + fn modifier_only_shortcut_is_rejected() { + assert_eq!( + expand_ptt_shortcuts("Ctrl"), + Err(PttError::ModifierOnlyShortcut) + ); + assert_eq!( + expand_ptt_shortcuts("Cmd+Shift"), + Err(PttError::ModifierOnlyShortcut) + ); + assert_eq!( + expand_ptt_shortcuts("Alt+Shift+Ctrl"), + Err(PttError::ModifierOnlyShortcut) + ); + assert_eq!( + expand_ptt_shortcuts("CmdOrCtrl+Shift"), + Err(PttError::ModifierOnlyShortcut) + ); + } + + #[test] + fn plain_function_key_is_accepted() { + assert_eq!(expand_ptt_shortcuts("F13"), Ok(vec!["F13".to_string()])); + } + + #[test] + fn modifier_plus_letter_is_accepted() { + assert_eq!( + expand_ptt_shortcuts("Ctrl+Alt+T"), + Ok(vec!["Ctrl+Alt+T".to_string()]) + ); + } + + #[test] + #[cfg(target_os = "macos")] + fn cmd_or_ctrl_expands_to_both_on_macos() { + let result = expand_ptt_shortcuts("CmdOrCtrl+Shift+P").unwrap(); + assert_eq!(result.len(), 2); + assert!(result.contains(&"Cmd+Shift+P".to_string())); + assert!(result.contains(&"Ctrl+Shift+P".to_string())); + } + + #[test] + #[cfg(not(target_os = "macos"))] + fn cmd_or_ctrl_expands_to_ctrl_off_macos() { + let result = expand_ptt_shortcuts("CmdOrCtrl+Shift+P").unwrap(); + assert_eq!(result, vec!["Ctrl+Shift+P".to_string()]); + } +} +``` + +Also wire the module into the Tauri shell: add to `app/src-tauri/src/lib.rs` (near the other `mod` lines, around the existing `mod dictation_hotkeys;`): + +```rust +mod ptt_hotkeys; +``` + +- [ ] **Step 3.2: Run tests to verify they fail / verify pass** + +```bash +cargo test --manifest-path app/src-tauri/Cargo.toml ptt_hotkeys +``` + +Expected: PASS (all 6 tests; this task is implementation + tests in the same file, so they pass together — the TDD value here is the test code itself being committed alongside). + +- [ ] **Step 3.3: Run `cargo fmt`** + +```bash +cargo fmt --manifest-path app/src-tauri/Cargo.toml +``` + +- [ ] **Step 3.4: Commit** + +```bash +git add app/src-tauri/src/ptt_hotkeys.rs app/src-tauri/src/lib.rs +git commit -m "feat(tauri/ptt): add ptt_hotkeys module with shortcut expansion + validation (#3090)" +``` + +--- + +## Task 4: Wire `speak_reply` to `reply_speech` via the progress bridge (with test seam) + +**Files:** +- Modify: `src/openhuman/channels/providers/web.rs` (extend the progress bridge `TurnCompleted` handler) +- Modify: `src/openhuman/voice/reply_speech.rs` (add a test seam if none exists) +- Modify: `tests/json_rpc_e2e.rs` + +The progress bridge already receives `AgentProgress::TextDelta` events during the turn and `TurnCompleted` when the turn finishes. We accumulate the deltas and, on `TurnCompleted`, if `speak_reply` was set, hand the final text to `reply_speech`. + +- [ ] **Step 4.1: Add a test seam to `reply_speech`** + +If `reply_speech.rs` already exposes a way to intercept calls for testing, skip ahead to 4.2. Otherwise add a static observer: + +In `src/openhuman/voice/reply_speech.rs`, near the top of the file: + +```rust +#[cfg(test)] +pub mod test_seam { + use once_cell::sync::Lazy; + use std::sync::Mutex; + + pub static OBSERVED_CALLS: Lazy>> = + Lazy::new(|| Mutex::new(Vec::new())); + + pub fn clear() { + OBSERVED_CALLS.lock().unwrap().clear(); + } + pub fn observed() -> Vec { + OBSERVED_CALLS.lock().unwrap().clone() + } +} +``` + +In whichever function plays TTS (search the file for `pub async fn` and locate `synthesize_and_play` or similar — likely `pub async fn synthesize_and_play(text: &str)` or `pub async fn speak`), at the very top of the function add: + +```rust +#[cfg(test)] +{ + test_seam::OBSERVED_CALLS + .lock() + .unwrap() + .push(text.to_string()); + return Ok(()); +} +``` + +If the real return type isn't `Result<(), …>`, adapt the `return` to the actual signature (e.g. `return;` for `-> ()`). + +- [ ] **Step 4.2: Write failing E2E test in `tests/json_rpc_e2e.rs`** + +Add a new test at the end of the file: + +```rust +#[tokio::test] +async fn channel_web_chat_with_speak_reply_invokes_reply_speech() { + use openhuman::openhuman::voice::reply_speech::test_seam; + + test_seam::clear(); + + // Stand up the JSON-RPC harness — mirror an existing test in this file + // (e.g. the chat happy-path test); the helper functions for spawning the + // server + opening a client live in this file already. + let (client, _server_guard) = spawn_test_server().await; + + // Open a socket / acquire a client_id the same way the existing chat + // tests do (search for "client_id" usage in this file for the pattern). + let client_id = open_test_socket(&client).await; + let thread_id = create_test_thread(&client).await; + + // Send a web chat with speak_reply=true. + let resp = client + .call( + "openhuman.channel_web_chat", + serde_json::json!({ + "client_id": client_id, + "thread_id": thread_id, + "message": "hello", + "speak_reply": true, + "source": "ptt", + "session_id": 1_u64, + }), + ) + .await + .expect("rpc ok"); + assert_eq!(resp["accepted"], true); + + // Wait up to 10s for the agent turn to complete. + wait_for_turn_complete(&client, &client_id, &thread_id, 10_000).await; + + let observed = test_seam::observed(); + assert!( + !observed.is_empty(), + "expected reply_speech to be invoked when speak_reply=true, but observed no calls" + ); +} +``` + +If helper names (`spawn_test_server`, `open_test_socket`, `create_test_thread`, `wait_for_turn_complete`) don't already exist in `tests/json_rpc_e2e.rs`, use whichever helpers the existing chat test in that file uses — copy its shape and replace the params with the new fields. + +- [ ] **Step 4.3: Run the E2E to verify it fails** + +```bash +pnpm debug rust channel_web_chat_with_speak_reply_invokes_reply_speech +``` + +Expected: FAIL — bridge does not call `reply_speech` yet. + +- [ ] **Step 4.4: Wire the bridge to invoke `reply_speech` on `TurnCompleted`** + +In `src/openhuman/channels/providers/web.rs`, locate `spawn_progress_bridge`. We need to: +1. Buffer assistant text from `AgentProgress::TextDelta` (already received in the existing match — extend the arm). +2. On `AgentProgress::TurnCompleted`, if `speak_reply == true`, call `reply_speech::synthesize_and_play(buffered).await`. + +Pseudocode patch (apply against the actual file structure): + +```rust +let mut final_assistant_text = String::new(); +// ...inside the existing `while let Some(event) = rx.recv().await` loop: +match &event { + AgentProgress::TextDelta { delta, .. } => { + // existing log + bridge code preserved + final_assistant_text.push_str(delta); + } + AgentProgress::TurnCompleted { iterations } => { + log::debug!( + "[web_channel][bridge] turn_completed iterations={iterations} request_id={request_id} speak_reply={speak_reply}", + ); + if speak_reply && !final_assistant_text.trim().is_empty() { + let text = final_assistant_text.clone(); + tokio::spawn(async move { + if let Err(e) = + crate::openhuman::voice::reply_speech::synthesize_and_play(&text).await + { + log::warn!("[web_channel][bridge] reply_speech failed: {e}"); + } + }); + } + // Publish the PTT bus event when source == "ptt". + if source.as_deref() == Some("ptt") { + if let Some(sid) = session_id { + crate::openhuman::voice::bus::publish_ptt_transcript_committed( + thread_id.clone(), + sid, + final_assistant_text.len(), + /* held_ms */ 0, // filled by Task 13 when the renderer passes it + false, + ); + } + } + } + // ...other existing arms unchanged +} +``` + +Threading the `speak_reply`, `source`, `session_id` values into `spawn_progress_bridge` requires extending the function's signature. Add them as `Option<…>`/`bool` params and thread from `start_chat → channel_web_chat`. + +If `reply_speech::synthesize_and_play`'s real signature is different (e.g. takes `String` by value or returns a different `Result` type), adapt the call site to the real signature — check the function definition in `src/openhuman/voice/reply_speech.rs` first. + +- [ ] **Step 4.5: Run the E2E again** + +```bash +pnpm debug rust channel_web_chat_with_speak_reply_invokes_reply_speech +``` + +Expected: PASS. + +- [ ] **Step 4.6: Run unrelated chat tests to verify no regression** + +```bash +pnpm debug rust web_channel +pnpm debug rust json_rpc_e2e +``` + +Expected: green. + +- [ ] **Step 4.7: Commit** + +```bash +git add src/openhuman/channels/providers/web.rs \ + src/openhuman/voice/reply_speech.rs \ + tests/json_rpc_e2e.rs +git commit -m "feat(channels/web): invoke reply_speech + publish PttTranscriptCommitted on speak_reply=true (#3090)" +``` + +--- + +## Task 5: Tauri IPC commands `register_ptt_hotkey` / `unregister_ptt_hotkey` + conflict check + +**Files:** +- Modify: `app/src-tauri/src/lib.rs` +- Modify: `app/src-tauri/src/ptt_hotkeys.rs` (add a small conflict-helper fn) + +- [ ] **Step 5.1: Add the conflict helper to `ptt_hotkeys.rs`** + +Append to the same file: + +```rust +/// Returns `Some(conflicting_variant)` if any expanded PTT variant overlaps +/// any expanded dictation variant. Comparison is case-insensitive. +pub(crate) fn first_conflict_with( + ptt: &[String], + dictation: &[String], +) -> Option { + for p in ptt { + let p_lc = p.to_ascii_lowercase(); + for d in dictation { + if d.to_ascii_lowercase() == p_lc { + return Some(p.clone()); + } + } + } + None +} + +#[cfg(test)] +mod conflict_tests { + use super::*; + + #[test] + fn no_conflict_returns_none() { + let ptt = vec!["F13".into()]; + let dict = vec!["F14".into()]; + assert_eq!(first_conflict_with(&ptt, &dict), None); + } + + #[test] + fn case_insensitive_conflict_detected() { + let ptt = vec!["ctrl+space".into()]; + let dict = vec!["Ctrl+Space".into()]; + assert_eq!( + first_conflict_with(&ptt, &dict), + Some("ctrl+space".to_string()) + ); + } + + #[test] + fn only_one_variant_overlaps_returns_first() { + let ptt = vec!["Cmd+P".into(), "Ctrl+P".into()]; + let dict = vec!["Ctrl+P".into()]; + assert_eq!( + first_conflict_with(&ptt, &dict), + Some("Ctrl+P".to_string()) + ); + } +} +``` + +- [ ] **Step 5.2: Run conflict tests** + +```bash +cargo test --manifest-path app/src-tauri/Cargo.toml ptt_hotkeys::conflict_tests +``` + +Expected: PASS. + +- [ ] **Step 5.3: Add the two IPC commands to `lib.rs`** + +In `app/src-tauri/src/lib.rs`, near the existing `register_dictation_hotkey`: + +```rust +/// Register (or re-register) the global push-to-talk hotkey. Emits +/// `ptt://start { session_id }` on press and `ptt://stop { session_id }` +/// on release. +#[tauri::command] +async fn register_ptt_hotkey( + app: AppHandle, + shortcut: String, +) -> Result<(), String> { + log::info!("[ptt] register_ptt_hotkey: shortcut={shortcut}"); + + let expanded = ptt_hotkeys::expand_ptt_shortcuts(&shortcut) + .map_err(|e| e.to_string())?; + + // Reject overlap with the currently-registered dictation hotkey. + let dictation_current = { + let state = app.state::(); + let guard = state.0.lock().unwrap(); + guard.clone() + }; + if let Some(conflict) = + ptt_hotkeys::first_conflict_with(&expanded, &dictation_current) + { + return Err(ptt_hotkeys::PttError::ConflictsWithDictation(conflict).to_string()); + } + + let old_shortcuts = { + let state = app.state::(); + let guard = state.shortcut.lock().unwrap(); + guard.clone() + }; + + // Lazy-instantiate the overlay window so it's ready before the first press. + if let Err(e) = ptt_overlay::ensure_window(&app) { + log::warn!("[ptt] overlay window create failed (continuing): {e}"); + } + + let register_shortcut = |variant: &str| -> Result<(), String> { + let app_pressed = app.clone(); + let app_released = app.clone(); + let variant_owned = variant.to_string(); + app.global_shortcut() + .on_shortcut(variant, move |app_inner, _sc, event| { + let state = app_inner.state::(); + match event.state { + ShortcutState::Pressed => { + // Atomically bump the counter and emit start. + let session_id = state + .session_counter + .fetch_add(1, std::sync::atomic::Ordering::SeqCst) + + 1; + log::debug!( + "[ptt] pressed shortcut={variant_owned} session_id={session_id}" + ); + if let Err(e) = + app_pressed.emit("ptt://start", serde_json::json!({ + "session_id": session_id, + })) + { + log::warn!("[ptt] emit start failed: {e}"); + } + } + ShortcutState::Released => { + let session_id = state + .session_counter + .load(std::sync::atomic::Ordering::SeqCst); + log::debug!( + "[ptt] released shortcut={variant_owned} session_id={session_id}" + ); + if let Err(e) = + app_released.emit("ptt://stop", serde_json::json!({ + "session_id": session_id, + })) + { + log::warn!("[ptt] emit stop failed: {e}"); + } + } + } + }) + .map_err(|e| format!("Failed to register ptt shortcut '{variant}': {e}")) + }; + + // Unregister previous PTT variants. + let mut unregistered: Vec = Vec::new(); + for old in &old_shortcuts { + if let Err(e) = app.global_shortcut().unregister(old.as_str()) { + // Rollback already-unregistered ones. + for r in &unregistered { + let _ = register_shortcut(r); + } + return Err(format!("Failed to unregister previous ptt shortcut '{old}': {e}")); + } + unregistered.push(old.clone()); + } + + // Register the new variants. Rollback on first failure. + let mut newly_registered: Vec = Vec::new(); + for v in &expanded { + if let Err(e) = register_shortcut(v) { + for r in &newly_registered { + let _ = app.global_shortcut().unregister(r.as_str()); + } + for old in &old_shortcuts { + let _ = register_shortcut(old); + } + return Err(e); + } + newly_registered.push(v.clone()); + } + + { + let state = app.state::(); + let mut guard = state.shortcut.lock().unwrap(); + *guard = expanded.clone(); + } + + log::info!("[ptt] registered: {}", expanded.join(", ")); + Ok(()) +} + +/// Unregister the global PTT hotkey (if any). +#[tauri::command] +async fn unregister_ptt_hotkey(app: AppHandle) -> Result<(), String> { + log::info!("[ptt] unregister_ptt_hotkey: called"); + let state = app.state::(); + let old = { + let mut guard = state.shortcut.lock().unwrap(); + let v = guard.clone(); + guard.clear(); + v + }; + for s in &old { + if let Err(e) = app.global_shortcut().unregister(s.as_str()) { + log::warn!("[ptt] unregister '{s}' failed: {e}"); + } + } + // Destroy the overlay window so resources are released. + ptt_overlay::destroy_window(&app); + Ok(()) +} +``` + +Then wire state + commands. In the same file, find `.manage(dictation_hotkeys::DictationHotkeyState(...))` near `Builder::default()` and add: + +```rust +.manage(ptt_hotkeys::PttHotkeyState::new()) +``` + +And in the `tauri::generate_handler!` invocation, add: + +```rust +register_ptt_hotkey, +unregister_ptt_hotkey, +show_ptt_overlay, +``` + +(`show_ptt_overlay` is added in Task 6; if you're running this task standalone, comment it out and re-enable in Task 6.) + +- [ ] **Step 5.4: Add reverse conflict check to dictation register** + +In `register_dictation_hotkey` (existing function), after the existing `expand_dictation_shortcuts` call, add a symmetric check: + +```rust +// Reject overlap with the currently-registered PTT hotkey. +let ptt_current = { + let state = app.state::(); + let guard = state.shortcut.lock().unwrap(); + guard.clone() +}; +if let Some(conflict) = + ptt_hotkeys::first_conflict_with(&expanded_shortcuts, &ptt_current) +{ + return Err(format!( + "dictation shortcut '{conflict}' conflicts with the push-to-talk hotkey" + )); +} +``` + +- [ ] **Step 5.5: Run cargo check on the Tauri shell** + +```bash +pnpm rust:check +``` + +Expected: clean compile (or compile errors only from the `show_ptt_overlay` reference, fixed in Task 6). + +- [ ] **Step 5.6: Commit** + +```bash +git add app/src-tauri/src/ptt_hotkeys.rs app/src-tauri/src/lib.rs +git commit -m "feat(tauri/ptt): register/unregister IPC + dictation conflict guard (#3090)" +``` + +--- + +## Task 6: `ptt_overlay.rs` lazy borderless window + `show_ptt_overlay` IPC + +**Files:** +- Create: `app/src-tauri/src/ptt_overlay.rs` +- Modify: `app/src-tauri/src/lib.rs` (add `mod ptt_overlay;` + the IPC command) + +- [ ] **Step 6.1: Create the module** + +`app/src-tauri/src/ptt_overlay.rs`: + +```rust +//! Borderless always-on-top PTT overlay window. +//! +//! Lazy-created on the first `register_ptt_hotkey` call (so the window is +//! ready when the user hits the key for the first time), and destroyed by +//! `unregister_ptt_hotkey`. The window's contents are rendered by the React +//! route `/ptt-overlay` (see `app/src/pages/PttOverlayPage.tsx`). +//! +//! Cross-platform note: `focus(false)` ensures the window never steals focus +//! from the user's active app. `skip_taskbar(true)` keeps it out of the +//! Windows taskbar / macOS dock. `visible_on_all_workspaces(true)` makes it +//! follow the user across macOS Spaces. DXGI exclusive-fullscreen on Windows +//! still suppresses the overlay — documented in the settings panel as a +//! limitation; chime audio remains the fallback signal. + +use tauri::{AppHandle, Emitter, Manager, Runtime, WebviewUrl, WebviewWindowBuilder}; + +const OVERLAY_LABEL: &str = "ptt-overlay"; + +/// Ensure the overlay window exists. Idempotent — if the window already +/// exists, returns Ok without recreating it. +pub(crate) fn ensure_window(app: &AppHandle) -> Result<(), String> { + if app.get_webview_window(OVERLAY_LABEL).is_some() { + return Ok(()); + } + let url = WebviewUrl::App("index.html#/ptt-overlay".into()); + let mut builder = WebviewWindowBuilder::new(app, OVERLAY_LABEL, url) + .title("OpenHuman Push-to-Talk") + .inner_size(160.0, 56.0) + .decorations(false) + .transparent(true) + .always_on_top(true) + .skip_taskbar(true) + .focused(false) + .resizable(false) + .shadow(false) + .visible(false) + .accept_first_mouse(false); + + #[cfg(target_os = "macos")] + { + builder = builder.visible_on_all_workspaces(true); + } + + let _window = builder + .build() + .map_err(|e| format!("create ptt overlay window: {e}"))?; + log::info!("[ptt-overlay] window created (label={OVERLAY_LABEL})"); + Ok(()) +} + +/// Destroy the overlay window if it exists. +pub(crate) fn destroy_window(app: &AppHandle) { + if let Some(w) = app.get_webview_window(OVERLAY_LABEL) { + if let Err(e) = w.destroy() { + log::warn!("[ptt-overlay] destroy failed: {e}"); + } else { + log::info!("[ptt-overlay] window destroyed"); + } + } +} + +/// Show or hide the overlay. Emits `ptt-overlay://active` for the in-window +/// React tree to drive its pulsing-dot animation. +#[tauri::command] +pub(crate) async fn show_ptt_overlay( + app: AppHandle, + active: bool, + session_id: u64, +) -> Result<(), String> { + let window = app + .get_webview_window(OVERLAY_LABEL) + .ok_or_else(|| "ptt overlay window not created — register a hotkey first".to_string())?; + + if active { + window + .show() + .map_err(|e| format!("show overlay: {e}"))?; + } else { + window + .hide() + .map_err(|e| format!("hide overlay: {e}"))?; + } + + if let Err(e) = window.emit( + "ptt-overlay://active", + serde_json::json!({ + "active": active, + "session_id": session_id, + }), + ) { + log::warn!("[ptt-overlay] emit active failed: {e}"); + } + + Ok(()) +} +``` + +- [ ] **Step 6.2: Wire it into `lib.rs`** + +In `app/src-tauri/src/lib.rs`, near `mod ptt_hotkeys;`: + +```rust +mod ptt_overlay; +``` + +Confirm `show_ptt_overlay` is present in the `tauri::generate_handler!` macro invocation (added in Task 5.3); if it was commented out there, uncomment now. + +- [ ] **Step 6.3: Run `pnpm rust:check`** + +```bash +pnpm rust:check +``` + +Expected: clean compile. + +- [ ] **Step 6.4: Commit** + +```bash +git add app/src-tauri/src/ptt_overlay.rs app/src-tauri/src/lib.rs +git commit -m "feat(tauri/ptt): lazy borderless always-on-top overlay window (#3090)" +``` + +--- + +## Task 7: Chime assets + README + +**Files:** +- Create: `app/src/assets/audio/ptt-open.wav` +- Create: `app/src/assets/audio/ptt-close.wav` +- Create: `app/src/assets/audio/ptt-error.wav` +- Create: `app/src/assets/audio/README.md` + +WAVs ~80ms, LUFS-normalized to match the existing in-app notification sound (target ~ -16 LUFS). Use CC0-licensed source clips (e.g. from `freesound.org`'s CC0 collection or similar) — three short tones. + +- [ ] **Step 7.1: Add the three WAV files** + +Source three short CC0 WAV clips. Suggested: +- `ptt-open.wav`: rising 800Hz→1200Hz square wave, 80ms. +- `ptt-close.wav`: falling 1200Hz→800Hz square wave, 80ms. +- `ptt-error.wav`: two 150Hz pulses 60ms apart, 120ms total. + +If generating with `sox`: + +```bash +sox -n app/src/assets/audio/ptt-open.wav synth 0.08 sine 800-1200 norm -16 +sox -n app/src/assets/audio/ptt-close.wav synth 0.08 sine 1200-800 norm -16 +sox -n app/src/assets/audio/ptt-error.wav synth 0.06 sine 150 : synth 0.06 sine 0 : synth 0.06 sine 150 norm -16 +``` + +(If `sox` isn't available, hand-source equivalent CC0 clips and store them at the same paths.) + +- [ ] **Step 7.2: Add `README.md`** + +`app/src/assets/audio/README.md`: + +```markdown +# Audio assets + +Short UI chimes for the push-to-talk feature (`docs/superpowers/specs/2026-06-02-global-ptt-design.md`). + +| File | Purpose | Source | License | +| --- | --- | --- | --- | +| `ptt-open.wav` | Mic opened (PTT key pressed). | Generated locally with `sox synth`. | CC0 / Public Domain. | +| `ptt-close.wav` | Mic closed (PTT key released). | Generated locally with `sox synth`. | CC0 / Public Domain. | +| `ptt-error.wav` | Session aborted (empty audio, mic permission denied, etc.). | Generated locally with `sox synth`. | CC0 / Public Domain. | + +All clips are ~80–120ms, LUFS-normalized to roughly match the in-app notification sound (~ -16 LUFS). Replace freely with better-sounding equivalents — just keep them under 200ms and CC0/MIT-equivalent. +``` + +- [ ] **Step 7.3: Verify file presence** + +```bash +ls -la app/src/assets/audio/ +file app/src/assets/audio/*.wav +``` + +Expected: each file exists and is identified as a RIFF WAV. + +- [ ] **Step 7.4: Commit** + +```bash +git add app/src/assets/audio/ +git commit -m "assets(ptt): bundle CC0 open/close/error chimes (#3090)" +``` + +--- + +## Task 8: `ptt` redux slice + persistence + +**Files:** +- Create: `app/src/store/slices/ptt.ts` +- Create: `app/src/store/slices/__tests__/ptt.test.ts` +- Modify: `app/src/store/index.ts` (or wherever rootReducer + persistConfig live) + +- [ ] **Step 8.1: Write failing slice test** + +`app/src/store/slices/__tests__/ptt.test.ts`: + +```ts +import { describe, expect, it } from 'vitest'; + +import { + pttReducer, + setPttShortcut, + setSpeakReplies, + setShowOverlay, + setIsHeld, + type PttState, +} from '../ptt'; + +describe('ptt slice', () => { + const initial: PttState = { + shortcut: null, + speakReplies: true, + showOverlay: true, + isHeld: false, + }; + + it('has the documented default state', () => { + expect(pttReducer(undefined, { type: '@@INIT' })).toEqual(initial); + }); + + it('setPttShortcut stores the shortcut string', () => { + const next = pttReducer(initial, setPttShortcut('F13')); + expect(next.shortcut).toBe('F13'); + }); + + it('setPttShortcut with null clears the shortcut', () => { + const withKey: PttState = { ...initial, shortcut: 'F13' }; + const next = pttReducer(withKey, setPttShortcut(null)); + expect(next.shortcut).toBeNull(); + }); + + it('setSpeakReplies toggles the flag', () => { + expect(pttReducer(initial, setSpeakReplies(false)).speakReplies).toBe(false); + }); + + it('setShowOverlay toggles the flag', () => { + expect(pttReducer(initial, setShowOverlay(false)).showOverlay).toBe(false); + }); + + it('setIsHeld updates the runtime hold flag', () => { + expect(pttReducer(initial, setIsHeld(true)).isHeld).toBe(true); + }); +}); +``` + +- [ ] **Step 8.2: Run failing test** + +```bash +pnpm debug unit app/src/store/slices/__tests__/ptt.test.ts +``` + +Expected: FAIL — slice file does not exist yet. + +- [ ] **Step 8.3: Implement the slice** + +`app/src/store/slices/ptt.ts`: + +```ts +import { createSlice, type PayloadAction } from '@reduxjs/toolkit'; + +export interface PttState { + /** Currently-bound PTT hotkey string (e.g. "F13" or "Ctrl+Alt+T"). null = unbound. */ + shortcut: string | null; + /** When true, the agent's reply is spoken via TTS. */ + speakReplies: boolean; + /** When true, the overlay window is shown during a PTT session. */ + showOverlay: boolean; + /** Non-persisted runtime flag: is the PTT key currently held? */ + isHeld: boolean; +} + +export const initialPttState: PttState = { + shortcut: null, + speakReplies: true, + showOverlay: true, + isHeld: false, +}; + +const pttSlice = createSlice({ + name: 'ptt', + initialState: initialPttState, + reducers: { + setPttShortcut(state, action: PayloadAction) { + state.shortcut = action.payload; + }, + setSpeakReplies(state, action: PayloadAction) { + state.speakReplies = action.payload; + }, + setShowOverlay(state, action: PayloadAction) { + state.showOverlay = action.payload; + }, + setIsHeld(state, action: PayloadAction) { + state.isHeld = action.payload; + }, + }, +}); + +export const { setPttShortcut, setSpeakReplies, setShowOverlay, setIsHeld } = + pttSlice.actions; +export const pttReducer = pttSlice.reducer; +``` + +- [ ] **Step 8.4: Run slice test to verify pass** + +```bash +pnpm debug unit app/src/store/slices/__tests__/ptt.test.ts +``` + +Expected: PASS. + +- [ ] **Step 8.5: Register the slice in the root store** + +Open `app/src/store/index.ts` (or whichever file builds the root reducer — search for `combineReducers` or the existing `dictation` / `voice` slice registration). + +Add the import + register in `combineReducers`: + +```ts +import { pttReducer } from './slices/ptt'; +// ... +const rootReducer = combineReducers({ + // ...existing entries + ptt: pttReducer, +}); +``` + +If a `persistWhitelist` / `persistConfig.whitelist` array exists, add `'ptt'`. The `isHeld` field is non-persisted by being a separate runtime concern — for simple slice-level redux-persist, leave it in the slice; rehydration will reset to `false` if you exclude it via a `blacklist` of nested keys, but the simpler approach is to accept it being rehydrated and have the boot hook explicitly reset it (see Task 11). + +If using `redux-persist`'s `createTransform` to strip `isHeld`, you can add (in the same file): + +```ts +import { createTransform } from 'redux-persist'; + +const stripIsHeld = createTransform>( + (state) => { + const { isHeld: _isHeld, ...rest } = state; + return rest; + }, + (state) => ({ ...state, isHeld: false }), + { whitelist: ['ptt'] }, +); +``` + +…and add `stripIsHeld` to `persistConfig.transforms`. If `transforms` doesn't already exist in the persistConfig, this is over-engineering — accept the rehydrated value for now and reset in Task 11. + +- [ ] **Step 8.6: Run the broader unit suite to verify no regression** + +```bash +pnpm debug unit +``` + +Expected: green. + +- [ ] **Step 8.7: Commit** + +```bash +git add app/src/store/slices/ptt.ts \ + app/src/store/slices/__tests__/ptt.test.ts \ + app/src/store/index.ts +git commit -m "feat(store/ptt): redux slice for ptt hotkey + settings (#3090)" +``` + +--- + +## Task 9: Tauri-command wrappers + chatService forwards `speak_reply` + +**Files:** +- Create: `app/src/utils/tauriCommands/ptt.ts` +- Modify: `app/src/services/chatService.ts` +- Modify: `app/src/services/__tests__/chatService.test.ts` + +- [ ] **Step 9.1: Write a failing chatService test for the new fields** + +In `app/src/services/__tests__/chatService.test.ts`, add a new test alongside the existing `'channel_web_chat'` one (find the assertion block at line ~216): + +```ts +it('forwards speak_reply, source, session_id when provided', async () => { + // Set up the same fixtures the surrounding test uses (mock socket, mock callCoreRpc, etc.). + // Mirror the existing test's setup precisely — only the call args differ. + await chatSend({ + threadId: 'thread-1', + message: 'hello', + speakReply: true, + source: 'ptt', + sessionId: 42, + }); + + expect(callCoreRpcSpy).toHaveBeenCalledWith( + expect.objectContaining({ + method: 'openhuman.channel_web_chat', + params: expect.objectContaining({ + message: 'hello', + speak_reply: true, + source: 'ptt', + session_id: 42, + }), + }), + ); +}); + +it('does not include the new fields when omitted', async () => { + await chatSend({ threadId: 'thread-1', message: 'hi' }); + const params = callCoreRpcSpy.mock.calls[0][0].params; + expect(params.speak_reply).toBeUndefined(); + expect(params.source).toBeUndefined(); + expect(params.session_id).toBeUndefined(); +}); +``` + +(Adapt `callCoreRpcSpy` to the existing test file's name for the spy on `callCoreRpc`.) + +- [ ] **Step 9.2: Run failing test** + +```bash +pnpm debug unit app/src/services/__tests__/chatService.test.ts +``` + +Expected: FAIL — `ChatSendParams` does not include `speakReply` / `source` / `sessionId` yet. + +- [ ] **Step 9.3: Extend `chatService.chatSend`** + +In `app/src/services/chatService.ts`, find `ChatSendParams` and add three optional fields: + +```ts +export interface ChatSendParams { + // ...existing fields + speakReply?: boolean; + source?: string; + sessionId?: number; +} +``` + +In `chatSend`, extend the `params` object: + +```ts +await callCoreRpc({ + method: 'openhuman.channel_web_chat', + params: { + client_id: clientId, + thread_id: params.threadId, + message: params.message, + model_override: params.model ?? undefined, + profile_id: params.profileId ?? undefined, + locale: params.locale ?? undefined, + speak_reply: params.speakReply ?? undefined, + source: params.source ?? undefined, + session_id: params.sessionId ?? undefined, + }, +}); +``` + +- [ ] **Step 9.4: Run chatService tests to verify pass** + +```bash +pnpm debug unit app/src/services/__tests__/chatService.test.ts +``` + +Expected: PASS. + +- [ ] **Step 9.5: Create the Tauri-command wrappers** + +`app/src/utils/tauriCommands/ptt.ts`: + +```ts +import { isTauri } from '../../services/webviewAccountService'; +import { invoke } from '@tauri-apps/api/core'; + +/** Register (or re-register) the global push-to-talk hotkey. */ +export async function registerPttHotkey(shortcut: string): Promise { + if (!isTauri()) { + console.debug('[ptt] registerPttHotkey: skipped — not running in Tauri'); + return; + } + console.debug('[ptt] registerPttHotkey: shortcut=%s', shortcut); + await invoke('register_ptt_hotkey', { shortcut }); + console.debug('[ptt] registerPttHotkey: done'); +} + +/** Unregister the global push-to-talk hotkey. */ +export async function unregisterPttHotkey(): Promise { + if (!isTauri()) { + console.debug('[ptt] unregisterPttHotkey: skipped — not running in Tauri'); + return; + } + console.debug('[ptt] unregisterPttHotkey: invoking'); + await invoke('unregister_ptt_hotkey'); + console.debug('[ptt] unregisterPttHotkey: done'); +} + +/** Show or hide the PTT overlay window. */ +export async function showPttOverlay(active: boolean, sessionId: number): Promise { + if (!isTauri()) return; + await invoke('show_ptt_overlay', { active, sessionId }); +} +``` + +- [ ] **Step 9.6: Run full unit suite** + +```bash +pnpm debug unit +``` + +Expected: green. + +- [ ] **Step 9.7: Commit** + +```bash +git add app/src/services/chatService.ts \ + app/src/services/__tests__/chatService.test.ts \ + app/src/utils/tauriCommands/ptt.ts +git commit -m "feat(chatService): forward speakReply/source/sessionId; add ptt tauri wrappers (#3090)" +``` + +--- + +## Task 10: `pttService` state machine + watchdog (the heart of the feature) + +**Files:** +- Create: `app/src/services/pttService.ts` +- Create: `app/src/services/__tests__/pttService.test.ts` + +This is the largest single file in the plan. The state machine is documented in §2 of the spec. + +- [ ] **Step 10.1: Write the failing test suite** + +`app/src/services/__tests__/pttService.test.ts`: + +```ts +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +import { createPttService, type PttDeps } from '../pttService'; + +function makeDeps(overrides: Partial = {}): PttDeps { + return { + audioCapture: { + start: vi.fn().mockResolvedValue(undefined), + finalize: vi.fn().mockResolvedValue({ durationMs: 1500, buffer: new ArrayBuffer(0) }), + cancel: vi.fn().mockResolvedValue(undefined), + }, + transcribe: vi.fn().mockResolvedValue('hello world'), + sendMessage: vi.fn().mockResolvedValue(undefined), + resolveActiveThreadId: vi.fn().mockResolvedValue('thread-active'), + createNewVoiceThread: vi.fn().mockResolvedValue('thread-new'), + playChime: vi.fn().mockResolvedValue(undefined), + showOverlay: vi.fn().mockResolvedValue(undefined), + getSettings: () => ({ speakReplies: true, showOverlay: true }), + now: () => 1_700_000_000_000, + watchdogMs: 10_000, + minAudioMs: 250, + logger: { debug: vi.fn(), info: vi.fn(), warn: vi.fn() }, + ...overrides, + }; +} + +describe('pttService state machine', () => { + beforeEach(() => { + vi.useFakeTimers(); + }); + + it('happy path: start → stop sends the transcript to the active thread with speakReply', async () => { + const deps = makeDeps(); + const svc = createPttService(deps); + + await svc.onStart(1); + expect(deps.audioCapture.start).toHaveBeenCalledWith({ sessionTag: 'ptt:1' }); + expect(deps.playChime).toHaveBeenCalledWith('open'); + expect(deps.showOverlay).toHaveBeenCalledWith(true, 1); + + await svc.onStop(1); + expect(deps.audioCapture.finalize).toHaveBeenCalled(); + expect(deps.playChime).toHaveBeenCalledWith('close'); + expect(deps.showOverlay).toHaveBeenCalledWith(false, 1); + expect(deps.transcribe).toHaveBeenCalled(); + expect(deps.sendMessage).toHaveBeenCalledWith({ + threadId: 'thread-active', + body: 'hello world', + metadata: { source: 'ptt', session_id: 1 }, + speakReply: true, + }); + }); + + it('falls back to a new "Voice" thread when no active thread exists', async () => { + const deps = makeDeps({ + resolveActiveThreadId: vi.fn().mockResolvedValue(null), + }); + const svc = createPttService(deps); + + await svc.onStart(2); + await svc.onStop(2); + + expect(deps.createNewVoiceThread).toHaveBeenCalled(); + expect(deps.sendMessage).toHaveBeenCalledWith( + expect.objectContaining({ threadId: 'thread-new' }), + ); + }); + + it('drops the session and plays the error chime when audio is shorter than minAudioMs', async () => { + const deps = makeDeps({ + audioCapture: { + start: vi.fn().mockResolvedValue(undefined), + finalize: vi.fn().mockResolvedValue({ durationMs: 100, buffer: new ArrayBuffer(0) }), + cancel: vi.fn().mockResolvedValue(undefined), + }, + }); + const svc = createPttService(deps); + + await svc.onStart(3); + await svc.onStop(3); + + expect(deps.transcribe).not.toHaveBeenCalled(); + expect(deps.sendMessage).not.toHaveBeenCalled(); + expect(deps.playChime).toHaveBeenCalledWith('error'); + }); + + it('drops the session when the transcript is empty', async () => { + const deps = makeDeps({ + transcribe: vi.fn().mockResolvedValue(' '), + }); + const svc = createPttService(deps); + + await svc.onStart(4); + await svc.onStop(4); + + expect(deps.sendMessage).not.toHaveBeenCalled(); + expect(deps.playChime).toHaveBeenCalledWith('error'); + }); + + it('watchdog finalises the session after watchdogMs even if onStop never arrives', async () => { + const deps = makeDeps(); + const svc = createPttService(deps); + + await svc.onStart(5); + + // Advance fake time past the watchdog. + await vi.advanceTimersByTimeAsync(11_000); + + expect(deps.audioCapture.finalize).toHaveBeenCalled(); + expect(deps.sendMessage).toHaveBeenCalledWith( + expect.objectContaining({ + metadata: expect.objectContaining({ session_id: 5 }), + }), + ); + }); + + it('second onStart while a session is active preempts the first', async () => { + const deps = makeDeps(); + const svc = createPttService(deps); + + await svc.onStart(6); + await svc.onStart(7); + + expect(deps.audioCapture.cancel).toHaveBeenCalled(); + expect(deps.audioCapture.start).toHaveBeenLastCalledWith({ sessionTag: 'ptt:7' }); + }); + + it('honours the speakReplies setting when forwarding to sendMessage', async () => { + const deps = makeDeps({ + getSettings: () => ({ speakReplies: false, showOverlay: true }), + }); + const svc = createPttService(deps); + + await svc.onStart(8); + await svc.onStop(8); + + expect(deps.sendMessage).toHaveBeenCalledWith( + expect.objectContaining({ speakReply: false }), + ); + }); + + it('mismatched session_id on onStop is ignored', async () => { + const deps = makeDeps(); + const svc = createPttService(deps); + + await svc.onStart(9); + await svc.onStop(999); // stale stop event + + expect(deps.audioCapture.finalize).not.toHaveBeenCalled(); + }); +}); +``` + +- [ ] **Step 10.2: Run failing test** + +```bash +pnpm debug unit app/src/services/__tests__/pttService.test.ts +``` + +Expected: FAIL — `pttService` does not exist. + +- [ ] **Step 10.3: Implement `pttService`** + +`app/src/services/pttService.ts`: + +```ts +/** + * pttService — push-to-talk session state machine. + * + * See spec: `docs/superpowers/specs/2026-06-02-global-ptt-design.md` (§ 2, § 3). + * + * The service is dependency-injected so it can be exercised under vitest + * with fake audio capture / fake STT / fake sendMessage. The real wiring + * (subscribing to `ptt://*` Tauri events, the real audio_capture, etc.) + * happens in PttHotkeyManager.tsx (Task 11). + */ + +export type ChimeKind = 'open' | 'close' | 'error'; + +export interface PttSettings { + speakReplies: boolean; + showOverlay: boolean; +} + +export interface FinalizedAudio { + durationMs: number; + buffer: ArrayBuffer; +} + +export interface PttDeps { + audioCapture: { + start(opts: { sessionTag: string }): Promise; + finalize(): Promise; + cancel(): Promise; + }; + transcribe(buf: ArrayBuffer): Promise; + sendMessage(args: { + threadId: string; + body: string; + metadata: { source: 'ptt'; session_id: number }; + speakReply: boolean; + }): Promise; + resolveActiveThreadId(): Promise; + createNewVoiceThread(): Promise; + playChime(kind: ChimeKind): Promise; + showOverlay(active: boolean, sessionId: number): Promise; + getSettings(): PttSettings; + now(): number; + watchdogMs: number; + minAudioMs: number; + logger: { + debug(msg: string, meta?: Record): void; + info(msg: string, meta?: Record): void; + warn(msg: string, meta?: Record): void; + }; +} + +export interface PttService { + onStart(sessionId: number): Promise; + onStop(sessionId: number): Promise; + cancel(reason: 'preempted' | 'mic_failure' | 'user_cancel'): Promise; +} + +interface ActiveSession { + sessionId: number; + startedAtMs: number; + watchdogTimer: ReturnType | null; + finalizedByWatchdog: boolean; +} + +export function createPttService(deps: PttDeps): PttService { + let active: ActiveSession | null = null; + + const armWatchdog = (sessionId: number) => { + const timer = setTimeout(() => { + if (active && active.sessionId === sessionId) { + active.finalizedByWatchdog = true; + deps.logger.warn('[ptt] watchdog fired — finalising session', { sessionId }); + // Fire-and-forget; the watchdog path is the same as a normal stop + // except for the `finalizedByWatchdog` flag, which is only used + // for logging. + void finaliseSession(sessionId, /* fromWatchdog */ true); + } + }, deps.watchdogMs); + return timer; + }; + + const finaliseSession = async (sessionId: number, fromWatchdog: boolean) => { + if (!active || active.sessionId !== sessionId) { + // Stale finalisation — ignore. + return; + } + + if (active.watchdogTimer) { + clearTimeout(active.watchdogTimer); + active.watchdogTimer = null; + } + + const settings = deps.getSettings(); + const session = active; + active = null; + + let audio: FinalizedAudio; + try { + audio = await deps.audioCapture.finalize(); + } catch (err) { + deps.logger.warn('[ptt] audio finalize failed', { sessionId, err: String(err) }); + await deps.playChime('error'); + await deps.showOverlay(false, sessionId); + return; + } + + await deps.playChime('close'); + await deps.showOverlay(false, sessionId); + + if (audio.durationMs < deps.minAudioMs) { + deps.logger.info('[ptt] session dropped — audio shorter than minAudioMs', { + sessionId, + durationMs: audio.durationMs, + }); + await deps.playChime('error'); + return; + } + + let text = ''; + try { + text = await deps.transcribe(audio.buffer); + } catch (err) { + deps.logger.warn('[ptt] transcription failed', { sessionId, err: String(err) }); + // Per spec: post the message anyway as a breadcrumb. + text = '[Voice — transcription failed]'; + } + + if (!text.trim()) { + deps.logger.info('[ptt] session dropped — empty transcript', { sessionId }); + await deps.playChime('error'); + return; + } + + let threadId = await deps.resolveActiveThreadId(); + if (!threadId) { + threadId = await deps.createNewVoiceThread(); + } + + await deps.sendMessage({ + threadId, + body: text.trim(), + metadata: { source: 'ptt', session_id: sessionId }, + speakReply: settings.speakReplies, + }); + + deps.logger.info('[ptt] session committed', { + sessionId, + threadId, + heldMs: deps.now() - session.startedAtMs, + finalizedByWatchdog: fromWatchdog, + transcriptLen: text.trim().length, + }); + }; + + return { + async onStart(sessionId) { + if (active) { + deps.logger.debug('[ptt] onStart while active — preempting', { + old: active.sessionId, + new: sessionId, + }); + try { + await deps.audioCapture.cancel(); + } catch (err) { + deps.logger.warn('[ptt] cancel failed during preempt', { err: String(err) }); + } + if (active.watchdogTimer) clearTimeout(active.watchdogTimer); + active = null; + } + + await deps.playChime('open'); + await deps.showOverlay(true, sessionId); + + try { + await deps.audioCapture.start({ sessionTag: `ptt:${sessionId}` }); + } catch (err) { + deps.logger.warn('[ptt] audio start failed', { sessionId, err: String(err) }); + await deps.playChime('error'); + await deps.showOverlay(false, sessionId); + return; + } + + active = { + sessionId, + startedAtMs: deps.now(), + watchdogTimer: null, + finalizedByWatchdog: false, + }; + active.watchdogTimer = armWatchdog(sessionId); + }, + + async onStop(sessionId) { + if (!active || active.sessionId !== sessionId) { + deps.logger.debug('[ptt] stale onStop — ignored', { sessionId }); + return; + } + await finaliseSession(sessionId, /* fromWatchdog */ false); + }, + + async cancel(reason) { + if (!active) return; + deps.logger.info('[ptt] cancel', { sessionId: active.sessionId, reason }); + if (active.watchdogTimer) clearTimeout(active.watchdogTimer); + const session = active; + active = null; + try { + await deps.audioCapture.cancel(); + } catch (err) { + deps.logger.warn('[ptt] cancel: audio cancel failed', { err: String(err) }); + } + await deps.playChime('error'); + await deps.showOverlay(false, session.sessionId); + }, + }; +} +``` + +- [ ] **Step 10.4: Run pttService test to verify pass** + +```bash +pnpm debug unit app/src/services/__tests__/pttService.test.ts +``` + +Expected: PASS (all 8 tests). + +- [ ] **Step 10.5: Commit** + +```bash +git add app/src/services/pttService.ts \ + app/src/services/__tests__/pttService.test.ts +git commit -m "feat(pttService): state machine, watchdog, preempt, fallback thread (#3090)" +``` + +--- + +## Task 11: Boot-time hook + `PttHotkeyManager` (wires service to Tauri events) + +**Files:** +- Create: `app/src/hooks/usePttHotkey.ts` +- Create: `app/src/components/PttHotkeyManager.tsx` +- Modify: `app/src/AppShell.tsx` (mount the manager) + +The manager creates the service singleton with real deps, subscribes to `ptt://start` / `ptt://stop` Tauri events, and re-registers the hotkey when the slice's `shortcut` changes. + +- [ ] **Step 11.1: Create `usePttHotkey`** + +`app/src/hooks/usePttHotkey.ts`: + +```ts +import { useEffect } from 'react'; +import { useDispatch, useSelector } from 'react-redux'; + +import { + registerPttHotkey, + unregisterPttHotkey, +} from '../utils/tauriCommands/ptt'; +import { setIsHeld } from '../store/slices/ptt'; +import type { RootState } from '../store'; + +/** + * Subscribes the configured PTT shortcut to the Tauri shell whenever it + * changes. Resets the transient `isHeld` flag on mount so a stale rehydrated + * value can't leave the UI thinking the key is held. + */ +export function usePttHotkey(): void { + const dispatch = useDispatch(); + const shortcut = useSelector((s: RootState) => s.ptt.shortcut); + + // Reset transient state once on mount. + useEffect(() => { + dispatch(setIsHeld(false)); + }, [dispatch]); + + useEffect(() => { + let cancelled = false; + const apply = async () => { + try { + if (shortcut && shortcut.trim().length > 0) { + await registerPttHotkey(shortcut); + } else { + await unregisterPttHotkey(); + } + } catch (err) { + if (!cancelled) { + console.warn('[ptt] hotkey (un)register failed', err); + } + } + }; + void apply(); + return () => { + cancelled = true; + }; + }, [shortcut]); +} +``` + +- [ ] **Step 11.2: Create `PttHotkeyManager`** + +`app/src/components/PttHotkeyManager.tsx`: + +```tsx +import { useEffect, useMemo, useRef } from 'react'; +import { useDispatch, useSelector, useStore } from 'react-redux'; +import { listen, type UnlistenFn } from '@tauri-apps/api/event'; + +import { usePttHotkey } from '../hooks/usePttHotkey'; +import { setIsHeld } from '../store/slices/ptt'; +import { showPttOverlay } from '../utils/tauriCommands/ptt'; +import { createPttService } from '../services/pttService'; +import { chatSend } from '../services/chatService'; +import { + startPttAudio, + finalizePttAudio, + cancelPttAudio, +} from '../features/voice/pttAudio'; +import { transcribePttAudio } from '../features/voice/pttTranscribe'; +import { + resolveActiveThreadId, + createNewVoiceThread, +} from '../features/voice/pttThread'; +import { playPttChime } from '../features/voice/pttChimes'; +import type { RootState } from '../store'; + +/** + * Renderless. Mounted once in AppShell. Owns the pttService singleton. + */ +export function PttHotkeyManager(): null { + usePttHotkey(); + + const dispatch = useDispatch(); + const store = useStore(); + const speakReplies = useSelector((s: RootState) => s.ptt.speakReplies); + const showOverlayPref = useSelector((s: RootState) => s.ptt.showOverlay); + const unlistenRef = useRef([]); + + const service = useMemo( + () => + createPttService({ + audioCapture: { + start: startPttAudio, + finalize: finalizePttAudio, + cancel: cancelPttAudio, + }, + transcribe: transcribePttAudio, + sendMessage: async ({ threadId, body, speakReply, metadata }) => { + await chatSend({ + threadId, + message: body, + speakReply, + source: metadata.source, + sessionId: metadata.session_id, + }); + }, + resolveActiveThreadId, + createNewVoiceThread, + playChime: playPttChime, + showOverlay: async (active, sessionId) => { + // Respect user setting — but always hide on stop even if the + // user toggled the setting off mid-session. + if (!active || store.getState().ptt.showOverlay) { + await showPttOverlay(active, sessionId); + } + }, + getSettings: () => ({ + speakReplies: store.getState().ptt.speakReplies, + showOverlay: store.getState().ptt.showOverlay, + }), + now: () => Date.now(), + watchdogMs: 10_000, + minAudioMs: 250, + logger: { + debug: (msg, meta) => console.debug(msg, meta ?? {}), + info: (msg, meta) => console.info(msg, meta ?? {}), + warn: (msg, meta) => console.warn(msg, meta ?? {}), + }, + }), + // Service is constructed once for the lifetime of the AppShell. + // eslint-disable-next-line react-hooks/exhaustive-deps + [], + ); + + useEffect(() => { + let mounted = true; + (async () => { + const offStart = await listen<{ session_id: number }>('ptt://start', (e) => { + dispatch(setIsHeld(true)); + void service.onStart(e.payload.session_id); + }); + const offStop = await listen<{ session_id: number }>('ptt://stop', (e) => { + dispatch(setIsHeld(false)); + void service.onStop(e.payload.session_id); + }); + if (!mounted) { + offStart(); + offStop(); + return; + } + unlistenRef.current.push(offStart, offStop); + })(); + return () => { + mounted = false; + for (const off of unlistenRef.current) off(); + unlistenRef.current = []; + }; + }, [dispatch, service]); + + // Effects to suppress lint warning for unused selectors above. + void speakReplies; + void showOverlayPref; + + return null; +} +``` + +The manager pulls four small feature modules (`pttAudio`, `pttTranscribe`, `pttThread`, `pttChimes`) — create them as thin wrappers: + +`app/src/features/voice/pttAudio.ts`: + +```ts +import type { FinalizedAudio } from '../../services/pttService'; +// Reuse the existing voice/audio_capture functions used by dictation today. +// If the existing module lives at a different path, adjust the import. +import { startMicCapture, finalizeMicCapture, cancelMicCapture } from './audioCapture'; + +export async function startPttAudio(opts: { sessionTag: string }): Promise { + await startMicCapture({ tag: opts.sessionTag }); +} + +export async function finalizePttAudio(): Promise { + const { buffer, durationMs } = await finalizeMicCapture(); + return { buffer, durationMs }; +} + +export async function cancelPttAudio(): Promise { + await cancelMicCapture(); +} +``` + +If the existing `audioCapture.ts` exports different names (search `app/src/features/voice` and `app/src/services/voice` for the current capture API), adapt the wrappers — they're meant to be a thin renaming layer so `pttService` is decoupled from whatever the dictation feature already provides. + +`app/src/features/voice/pttTranscribe.ts`: + +```ts +import { transcribeBuffer } from './dictationTranscribe'; + +export async function transcribePttAudio(buf: ArrayBuffer): Promise { + // Reuses the same STT path dictation uses. + return transcribeBuffer(buf); +} +``` + +`app/src/features/voice/pttThread.ts`: + +```ts +import { store } from '../../store'; +import { callCoreRpc } from '../../services/coreRpcClient'; + +export async function resolveActiveThreadId(): Promise { + const state = store.getState(); + // `chatRuntime.activeThread` is the source of truth for the currently-open thread. + return state.chatRuntime?.activeThreadId ?? null; +} + +export async function createNewVoiceThread(): Promise { + const resp = await callCoreRpc<{ result: { id: string } } | { id: string }>({ + method: 'openhuman.threads_create_new', + params: { title: 'Voice' }, + }); + // Strip RpcOutcome envelope if present. + const r = 'result' in resp ? (resp as { result: { id: string } }).result : (resp as { id: string }); + return r.id; +} +``` + +If the actual root state shape is different (e.g. `state.chatRuntime` doesn't exist or `activeThreadId` lives under a different key), update the selector. Same caveat for `threads_create_new` — confirm the actual RPC name in `src/openhuman/threads/schemas.rs::"create_new"`. + +`app/src/features/voice/pttChimes.ts`: + +```ts +import openSrc from '../../assets/audio/ptt-open.wav'; +import closeSrc from '../../assets/audio/ptt-close.wav'; +import errorSrc from '../../assets/audio/ptt-error.wav'; + +const cache: Record = {}; + +function get(src: string): HTMLAudioElement { + if (!cache[src]) { + const el = new Audio(src); + el.preload = 'auto'; + cache[src] = el; + } + return cache[src]; +} + +export async function playPttChime(kind: 'open' | 'close' | 'error'): Promise { + const src = kind === 'open' ? openSrc : kind === 'close' ? closeSrc : errorSrc; + const el = get(src); + try { + el.currentTime = 0; + await el.play(); + } catch (err) { + console.debug('[ptt] chime play failed (likely autoplay policy)', err); + } +} +``` + +- [ ] **Step 11.3: Mount ``** + +Open `app/src/AppShell.tsx` (or `App.tsx`, wherever top-level UI is mounted — search for `` in `App.tsx`). Add: + +```tsx +import { PttHotkeyManager } from './components/PttHotkeyManager'; + +// inside the render tree, alongside DictationHotkeyManager if present: + +``` + +- [ ] **Step 11.4: Run the full unit suite** + +```bash +pnpm debug unit +``` + +Expected: green. (The manager has integration-only behavior; we cover it indirectly via the pttService tests and the WDIO spec in Task 14.) + +- [ ] **Step 11.5: Run typecheck** + +```bash +pnpm typecheck +``` + +Expected: clean. Resolve any import-path issues that surface against the actual codebase paths. + +- [ ] **Step 11.6: Commit** + +```bash +git add app/src/hooks/usePttHotkey.ts \ + app/src/components/PttHotkeyManager.tsx \ + app/src/features/voice/pttAudio.ts \ + app/src/features/voice/pttTranscribe.ts \ + app/src/features/voice/pttThread.ts \ + app/src/features/voice/pttChimes.ts \ + app/src/AppShell.tsx +git commit -m "feat(ptt): mount PttHotkeyManager + wire service to real audio/STT/chat (#3090)" +``` + +--- + +## Task 12: `/ptt-overlay` route + overlay UI + +**Files:** +- Create: `app/src/pages/PttOverlayPage.tsx` +- Create: `app/src/pages/PttOverlayPage.test.tsx` +- Modify: `app/src/AppRoutes.tsx` + +- [ ] **Step 12.1: Write failing render test** + +`app/src/pages/PttOverlayPage.test.tsx`: + +```tsx +import { describe, expect, it, vi } from 'vitest'; +import { render, screen, act } from '@testing-library/react'; + +import { PttOverlayPage } from './PttOverlayPage'; + +// Mock @tauri-apps/api/event's listen so we can dispatch fake events. +vi.mock('@tauri-apps/api/event', () => { + const handlers: Record void> = {}; + return { + listen: vi.fn(async (name: string, handler: (e: { payload: unknown }) => void) => { + handlers[name] = handler; + return () => delete handlers[name]; + }), + __dispatch: (name: string, payload: unknown) => + handlers[name]?.({ payload }), + }; +}); + +describe('PttOverlayPage', () => { + it('renders idle state by default', () => { + render(); + expect(screen.getByTestId('ptt-overlay-root')).toHaveAttribute('data-active', 'false'); + }); + + it('flips to active when ptt-overlay://active fires with active=true', async () => { + render(); + const evt = await import('@tauri-apps/api/event'); + await act(async () => { + (evt as unknown as { __dispatch: (n: string, p: unknown) => void }).__dispatch( + 'ptt-overlay://active', + { active: true, session_id: 1 }, + ); + }); + expect(screen.getByTestId('ptt-overlay-root')).toHaveAttribute('data-active', 'true'); + }); +}); +``` + +- [ ] **Step 12.2: Run failing test** + +```bash +pnpm debug unit app/src/pages/PttOverlayPage.test.tsx +``` + +Expected: FAIL — module does not exist. + +- [ ] **Step 12.3: Implement the page** + +`app/src/pages/PttOverlayPage.tsx`: + +```tsx +import { useEffect, useState } from 'react'; +import { listen, type UnlistenFn } from '@tauri-apps/api/event'; +import { useT } from '../lib/i18n/I18nContext'; + +export function PttOverlayPage(): JSX.Element { + const t = useT(); + const [active, setActive] = useState(false); + + useEffect(() => { + let off: UnlistenFn | undefined; + (async () => { + off = await listen<{ active: boolean }>('ptt-overlay://active', (e) => { + setActive(Boolean(e.payload?.active)); + }); + })(); + return () => off?.(); + }, []); + + return ( +
+ + {active ? t('pttOverlay.listening') : t('pttOverlay.idle')} +
+ ); +} +``` + +- [ ] **Step 12.4: Add the route** + +In `app/src/AppRoutes.tsx`, add (alongside other Routes): + +```tsx +import { PttOverlayPage } from './pages/PttOverlayPage'; + +// inside : +} /> +``` + +- [ ] **Step 12.5: Run overlay tests** + +```bash +pnpm debug unit app/src/pages/PttOverlayPage.test.tsx +``` + +Expected: PASS. + +- [ ] **Step 12.6: Commit** + +```bash +git add app/src/pages/PttOverlayPage.tsx \ + app/src/pages/PttOverlayPage.test.tsx \ + app/src/AppRoutes.tsx +git commit -m "feat(ptt/ui): overlay page at /ptt-overlay with idle/active states (#3090)" +``` + +--- + +## Task 13: Settings panel — hotkey capture + toggles + +**Files:** +- Create: `app/src/pages/settings/voice/PttSettingsPanel.tsx` +- Create: `app/src/pages/settings/voice/__tests__/PttSettingsPanel.test.tsx` +- Modify: `app/src/pages/settings/voice/VoiceSettingsPage.tsx` (or wherever the voice settings tab body lives) +- Modify: `app/src/lib/i18n/en.ts` + 12 other locale files + +- [ ] **Step 13.1: Add i18n keys to en.ts** + +In `app/src/lib/i18n/en.ts`, add: + +```ts +// In the appropriate section (alphabetical, near other voice keys): +'pttSettings.title': 'Push-to-talk', +'pttSettings.description': + "Hold a key to talk to OpenHuman while you're in another app. Releases the key to send; OpenHuman speaks the reply back.", +'pttSettings.shortcutLabel': 'Hotkey', +'pttSettings.shortcutPlaceholder': 'Press a key (e.g. F13)', +'pttSettings.shortcutUnsetHint': 'Push-to-talk is off — pick a hotkey to enable.', +'pttSettings.speakRepliesLabel': 'Speak agent replies', +'pttSettings.showOverlayLabel': 'Show overlay while held', +'pttSettings.errorConflictsWithDictation': + 'This shortcut is already used by dictation. Pick a different key.', +'pttSettings.errorModifierOnly': + "Pick a regular key (e.g. F13) — modifier-only shortcuts don't work for push-to-talk.", +'pttSettings.errorEmpty': 'Pick a key to bind.', +'pttSettings.errorAccessibility': + 'macOS needs Accessibility permission for this shortcut. Open System Settings → Privacy & Security → Accessibility and enable OpenHuman.', +'pttSettings.errorShortcutInUse': + 'Another app already uses this shortcut. Pick a different one.', +'pttSettings.errorUnsupportedWayland': + "Wayland sessions don't support global shortcuts in OpenHuman yet — switch to an X11 session or use the in-app dictation toggle.", +'pttSettings.exclusiveFullscreenHint': + "In exclusive-fullscreen games the overlay won't render — you'll only hear the chime. Switch to borderless fullscreen for the overlay.", +'pttOverlay.listening': 'Listening…', +'pttOverlay.idle': 'Idle', +``` + +- [ ] **Step 13.2: Add the same keys to every other locale with REAL translations** + +For each of `ar`, `bn`, `de`, `es`, `fr`, `hi`, `id`, `it`, `ko`, `pl`, `pt`, `ru`, `zh-CN`, add the same set of keys with translated values. Do not copy English. Examples for German (`de.ts`) and Spanish (`es.ts`) — translate the remaining 11 locales the same way: + +```ts +// de.ts additions +'pttSettings.title': 'Push-to-Talk', +'pttSettings.description': + 'Halte eine Taste gedrückt, um mit OpenHuman zu sprechen, während du in einer anderen App bist. Beim Loslassen wird gesendet; OpenHuman spricht die Antwort.', +'pttSettings.shortcutLabel': 'Tastenkürzel', +'pttSettings.shortcutPlaceholder': 'Taste drücken (z. B. F13)', +'pttSettings.shortcutUnsetHint': 'Push-to-Talk ist aus — wähle ein Tastenkürzel zum Aktivieren.', +'pttSettings.speakRepliesLabel': 'Antworten vorlesen', +'pttSettings.showOverlayLabel': 'Overlay während des Haltens anzeigen', +'pttSettings.errorConflictsWithDictation': + 'Dieses Kürzel wird bereits von der Diktierfunktion verwendet. Wähle eine andere Taste.', +'pttSettings.errorModifierOnly': + 'Wähle eine normale Taste (z. B. F13) — reine Modifikatortasten funktionieren für Push-to-Talk nicht.', +'pttSettings.errorEmpty': 'Wähle eine Taste zum Binden.', +'pttSettings.errorAccessibility': + 'macOS benötigt die Bedienungshilfen-Berechtigung. Öffne Systemeinstellungen → Datenschutz & Sicherheit → Bedienungshilfen und aktiviere OpenHuman.', +'pttSettings.errorShortcutInUse': + 'Eine andere App nutzt dieses Kürzel bereits. Wähle ein anderes.', +'pttSettings.errorUnsupportedWayland': + 'Wayland-Sitzungen unterstützen globale Tastenkürzel in OpenHuman noch nicht — wechsle zu X11 oder nutze die In-App-Diktatumschaltung.', +'pttSettings.exclusiveFullscreenHint': + 'Im Exclusive-Fullscreen-Modus wird das Overlay nicht angezeigt — du hörst nur den Signalton. Wechsle zu randlosem Vollbild für das Overlay.', +'pttOverlay.listening': 'Höre zu…', +'pttOverlay.idle': 'Inaktiv', + +// es.ts additions +'pttSettings.title': 'Pulsar para hablar', +'pttSettings.description': + 'Mantén una tecla pulsada para hablar con OpenHuman mientras estás en otra app. Al soltar se envía; OpenHuman lee la respuesta.', +'pttSettings.shortcutLabel': 'Atajo de teclado', +'pttSettings.shortcutPlaceholder': 'Pulsa una tecla (p. ej. F13)', +'pttSettings.shortcutUnsetHint': 'Pulsar para hablar está apagado — elige una tecla para activarlo.', +'pttSettings.speakRepliesLabel': 'Leer las respuestas en voz alta', +'pttSettings.showOverlayLabel': 'Mostrar superposición mientras se mantiene pulsada', +'pttSettings.errorConflictsWithDictation': + 'Este atajo ya lo usa el dictado. Elige otra tecla.', +'pttSettings.errorModifierOnly': + 'Elige una tecla normal (p. ej. F13) — los atajos solo con modificadores no funcionan para pulsar para hablar.', +'pttSettings.errorEmpty': 'Elige una tecla para asignar.', +'pttSettings.errorAccessibility': + 'macOS requiere permiso de Accesibilidad. Abre Ajustes del Sistema → Privacidad y Seguridad → Accesibilidad y activa OpenHuman.', +'pttSettings.errorShortcutInUse': + 'Otra app ya está usando este atajo. Elige uno diferente.', +'pttSettings.errorUnsupportedWayland': + 'Las sesiones Wayland aún no admiten atajos globales en OpenHuman — cambia a X11 o usa la activación del dictado en la app.', +'pttSettings.exclusiveFullscreenHint': + 'En modo pantalla completa exclusivo el overlay no se mostrará — solo oirás el tono. Cambia a pantalla completa sin bordes para ver el overlay.', +'pttOverlay.listening': 'Escuchando…', +'pttOverlay.idle': 'Inactivo', +``` + +For the remaining 11 locales, repeat with translations into that language. Do not leave English-language stubs. + +- [ ] **Step 13.3: Run i18n gates** + +```bash +pnpm i18n:check +pnpm i18n:english:check +``` + +Expected: both pass. + +- [ ] **Step 13.4: Write failing settings panel test** + +`app/src/pages/settings/voice/__tests__/PttSettingsPanel.test.tsx`: + +```tsx +import { describe, expect, it, vi } from 'vitest'; +import { render, screen, fireEvent } from '@testing-library/react'; +import { Provider } from 'react-redux'; +import { configureStore } from '@reduxjs/toolkit'; + +import { pttReducer, initialPttState } from '../../../../store/slices/ptt'; +import { I18nProvider } from '../../../../lib/i18n/I18nContext'; +import en from '../../../../lib/i18n/en'; +import { PttSettingsPanel } from '../PttSettingsPanel'; + +function renderWithStore(state = initialPttState) { + const store = configureStore({ + reducer: { ptt: pttReducer }, + preloadedState: { ptt: state }, + }); + return render( + + + + + , + ); +} + +describe('PttSettingsPanel', () => { + it('renders the hint when no shortcut is set', () => { + renderWithStore({ ...initialPttState, shortcut: null }); + expect(screen.getByText(/push-to-talk is off/i)).toBeInTheDocument(); + }); + + it('renders the bound shortcut when set', () => { + renderWithStore({ ...initialPttState, shortcut: 'F13' }); + // The hotkey-capture widget shows the current key somewhere — adapt to the + // existing widget's testid pattern used by the dictation panel. + expect(screen.getByTestId('ptt-shortcut-current')).toHaveTextContent('F13'); + }); + + it('toggles speakReplies via the switch', () => { + renderWithStore({ ...initialPttState, shortcut: 'F13', speakReplies: true }); + const toggle = screen.getByLabelText(/speak agent replies/i); + fireEvent.click(toggle); + // Assert dispatched action via store state — re-render and check the toggle's aria-checked. + expect(toggle).toHaveAttribute('aria-checked', 'false'); + }); +}); +``` + +- [ ] **Step 13.5: Implement `PttSettingsPanel`** + +`app/src/pages/settings/voice/PttSettingsPanel.tsx`: + +```tsx +import { useDispatch, useSelector } from 'react-redux'; + +import { useT } from '../../../lib/i18n/I18nContext'; +import { + setPttShortcut, + setSpeakReplies, + setShowOverlay, +} from '../../../store/slices/ptt'; +import type { RootState } from '../../../store'; +// Reuse the dictation panel's hotkey-capture widget pattern; if the existing +// one isn't reusable, build a small inline KeyCapture in this file with the +// same shape. +import { HotkeyCaptureField } from '../../../components/HotkeyCaptureField'; + +export function PttSettingsPanel(): JSX.Element { + const t = useT(); + const dispatch = useDispatch(); + const shortcut = useSelector((s: RootState) => s.ptt.shortcut); + const speakReplies = useSelector((s: RootState) => s.ptt.speakReplies); + const showOverlay = useSelector((s: RootState) => s.ptt.showOverlay); + + return ( +
+

{t('pttSettings.title')}

+

{t('pttSettings.description')}

+ + dispatch(setPttShortcut(next || null))} + testIdCurrent="ptt-shortcut-current" + /> + + {shortcut == null && ( +

{t('pttSettings.shortcutUnsetHint')}

+ )} + + + + + +

{t('pttSettings.exclusiveFullscreenHint')}

+
+ ); +} +``` + +If `HotkeyCaptureField` doesn't already exist in the codebase, locate the equivalent in the dictation settings panel (search `app/src/pages/settings/voice/` for the current key-binding widget) and either reuse it or extract a shared component. The plan target is one new file (`PttSettingsPanel.tsx`); a shared `HotkeyCaptureField.tsx` is optional cleanup if useful. + +- [ ] **Step 13.6: Mount the panel in the Voice settings page** + +Find the voice settings page (search `app/src/pages/settings/voice/` for the entry point — likely `VoiceSettingsPage.tsx` or similar). Import and render `` alongside the existing dictation section. + +- [ ] **Step 13.7: Run tests** + +```bash +pnpm debug unit app/src/pages/settings/voice/__tests__/PttSettingsPanel.test.tsx +pnpm i18n:check +pnpm i18n:english:check +``` + +Expected: all pass. + +- [ ] **Step 13.8: Commit** + +```bash +git add app/src/pages/settings/voice/PttSettingsPanel.tsx \ + app/src/pages/settings/voice/__tests__/PttSettingsPanel.test.tsx \ + app/src/lib/i18n/ +git commit -m "feat(settings/voice): PttSettingsPanel + 13-locale i18n (#3090)" +``` + +--- + +## Task 14: WDIO E2E — full PTT flow with mocked STT + +**Files:** +- Create: `app/test/e2e/specs/ptt-flow.spec.ts` + +End-to-end: open settings, bind F13 as the PTT key, simulate a hold via `tauri-driver` key injection, assert the overlay window appears, assert the chat thread receives a message. STT is mocked through the existing shared mock backend (`scripts/mock-api-core.mjs`) so the spec is deterministic. + +- [ ] **Step 14.1: Verify mock backend can return a fixed STT transcript** + +Search `scripts/mock-api-core.mjs` for any existing transcription endpoint (likely `transcribe` or `stt`). If one exists, note its admin-config override path. If not, add a minimal endpoint that returns a fixed transcript when called: + +```js +// In scripts/mock-api-core.mjs — add near other mock endpoints: +if (req.url === '/v1/transcribe' && req.method === 'POST') { + const override = state.behavior.transcribe || { text: 'mocked transcript from ptt e2e' }; + return respondJson(res, 200, override); +} +``` + +This is a small surface-area extension; confirm the exact integration shape against the existing mock-server pattern. + +- [ ] **Step 14.2: Write the E2E spec** + +`app/test/e2e/specs/ptt-flow.spec.ts`: + +```ts +import { expect } from '@wdio/globals'; +import { + clickNativeButton, + waitForWebView, + clickToggle, +} from '../helpers/element-helpers'; +import { adminReset, adminSetBehavior, adminLastRequests } from '../helpers/mock-server'; + +describe('PTT flow', () => { + before(async () => { + await adminReset(); + await adminSetBehavior({ + transcribe: { text: 'hello from PTT' }, + }); + }); + + it('binds F13, simulates a hold, asserts overlay + chat message', async () => { + await waitForWebView(); + + // 1. Navigate to Voice settings. + await clickNativeButton('tab-settings'); + await clickNativeButton('settings-section-voice'); + + // 2. Bind F13 as the PTT shortcut. + await $('input[aria-label="Hotkey"]').click(); + await browser.keys(['F13']); + // Save / confirm via whatever pattern the dictation panel uses (auto-save typically). + await browser.pause(200); + + // 3. Simulate a hold: press F13, wait, release F13. + await browser.keys(['F13']); // press (key down) + await browser.pause(800); // hold + // tauri-driver / Appium release: depends on driver. For WDIO + Appium Mac2, + // browser.keys() simulates a tap by default; for an explicit press-and-release + // pair use the W3C Actions API: + await browser.action('key') + .down('F13') + .pause(800) + .up('F13') + .perform(); + + // 4. Wait for the overlay window to appear, then disappear. + // Tauri webview windows are queryable by label via getWindowHandles + switchToWindow. + const handlesDuring = await browser.getWindowHandles(); + expect(handlesDuring.length).toBeGreaterThan(1); + + // 5. Switch back to the main webview and assert the chat thread has the message. + await browser.switchToWindow(handlesDuring[0]); + await clickNativeButton('tab-chat'); + const lastMessage = await $('[data-testid="chat-message-last"]'); + await lastMessage.waitForExist({ timeout: 5_000 }); + await expect(lastMessage).toHaveTextContaining('hello from PTT'); + + // 6. Assert the chat request hit channel.web_chat with speak_reply=true. + const requests = await adminLastRequests(); + const chatCall = requests.find((r) => + r.url.includes('/rpc') && + typeof r.body === 'string' && + r.body.includes('channel_web_chat'), + ); + expect(chatCall).toBeDefined(); + expect(JSON.parse(chatCall!.body)).toMatchObject({ + params: expect.objectContaining({ + speak_reply: true, + source: 'ptt', + }), + }); + }); +}); +``` + +(`adminLastRequests` may already exist in `app/test/e2e/helpers/mock-server.ts`; if not, the helper file lives at that path — extend it to expose the existing `/__admin/requests` endpoint.) + +- [ ] **Step 14.3: Build the Tauri bundle + run the spec** + +```bash +pnpm test:e2e:build +bash app/scripts/e2e-run-spec.sh test/e2e/specs/ptt-flow.spec.ts ptt-flow +``` + +Expected: PASS. If `F13` key injection fails on the test driver (some Appium versions need scancodes), substitute a more reliable key like `Pause` or `ScrollLock` and update the spec + bound shortcut accordingly. + +- [ ] **Step 14.4: Commit** + +```bash +git add app/test/e2e/specs/ptt-flow.spec.ts scripts/mock-api-core.mjs +git commit -m "test(ptt/e2e): full bind→hold→commit flow with mocked STT (#3090)" +``` + +--- + +## Task 15: `voice.ptt` capability entry + final quality sweep + +**Files:** +- Modify: `src/openhuman/about_app/` (capability list — locate the file that defines the capability vec) +- Modify: anything else surfaced by the final quality pass + +- [ ] **Step 15.1: Add the capability entry** + +Find the capability vec in `src/openhuman/about_app/`. It will look roughly like: + +```rust +Capability { + id: "voice.dictation", + label: "Dictation hotkey", + ... +}, +``` + +Add a sibling entry: + +```rust +Capability { + id: "voice.ptt", + label: "Global push-to-talk", + supported_on: &[Platform::MacOS, Platform::Windows, Platform::LinuxX11], + requires: &["microphone", "global_shortcut"], +}, +``` + +If `Platform::LinuxX11` doesn't exist as a variant, add it to the `Platform` enum in the same module (or list `Platform::Linux` and note "X11 only" in a description field, depending on the enum's shape). + +- [ ] **Step 15.2: Add a test for the new capability** + +In the corresponding capability tests file (search `src/openhuman/about_app/` for `*_tests.rs`): + +```rust +#[test] +fn capability_list_includes_voice_ptt() { + let caps = all_capabilities(); + assert!( + caps.iter().any(|c| c.id == "voice.ptt"), + "voice.ptt capability must be registered" + ); +} +``` + +- [ ] **Step 15.3: Run the capability test** + +```bash +pnpm debug rust capability_list_includes_voice_ptt +``` + +Expected: PASS. + +- [ ] **Step 15.4: Run the full quality suite** + +```bash +pnpm format +pnpm lint +pnpm typecheck +pnpm debug unit +pnpm rust:check +pnpm test:rust +pnpm i18n:check +pnpm i18n:english:check +``` + +Fix any red. Treat all as gating — none should be skipped. + +- [ ] **Step 15.5: Verify diff coverage** + +```bash +# Approximate diff coverage locally; the merge gate runs the canonical job in CI. +pnpm test:coverage +``` + +Eyeball coverage for each new file. Files under 80% diff coverage: add focused tests. + +- [ ] **Step 15.6: Commit + push** + +```bash +git add src/openhuman/about_app/ +git commit -m "feat(about_app): register voice.ptt capability (#3090)" +git push aniketh feat/global-ptt-3090 +``` + +- [ ] **Step 15.7: Open the PR against `tinyhumansai/openhuman:main`** + +```bash +gh pr create \ + --repo tinyhumansai/openhuman \ + --base main \ + --head CodeGhost21:feat/global-ptt-3090 \ + --title "feat(voice): global push-to-talk hotkey (#3090)" \ + --body-file - <<'EOF' +## Summary +- Hold-to-talk global hotkey: mic opens on press, closes on release, transcript sent to active thread, agent reply spoken via TTS — no focus stealing. +- Cross-platform via `tauri-plugin-global-shortcut` (different from dictation's OS-forked rdev/Tauri-plugin path — deliberately single-code-path here). +- Borderless always-on-top overlay window (lazy-created on first register). +- Audible open/close/error chimes. +- 10s watchdog finalises sessions when the OS swallows the release event. +- `speak_reply` / `source` / `session_id` additive optional fields on `channel.web_chat`; backwards-compatible. + +## Spec / plan +- Spec: `docs/superpowers/specs/2026-06-02-global-ptt-design.md` +- Plan: `docs/superpowers/plans/2026-06-02-global-ptt.md` +- Issue: closes part of #3090 (PTT half; background screen-capture is a separate follow-up PR) + +## Test plan +- [x] `pnpm debug rust web_chat_schema_accepts_optional_ptt_fields` +- [x] `pnpm debug rust publishing_a_ptt_commit_reaches_a_subscriber` +- [x] `pnpm debug rust channel_web_chat_with_speak_reply_invokes_reply_speech` +- [x] `pnpm debug rust ptt_hotkeys` +- [x] `pnpm debug unit app/src/store/slices/__tests__/ptt.test.ts` +- [x] `pnpm debug unit app/src/services/__tests__/pttService.test.ts` +- [x] `pnpm debug unit app/src/pages/PttOverlayPage.test.tsx` +- [x] `pnpm debug unit app/src/pages/settings/voice/__tests__/PttSettingsPanel.test.tsx` +- [x] `pnpm i18n:check` + `pnpm i18n:english:check` +- [x] `bash app/scripts/e2e-run-spec.sh test/e2e/specs/ptt-flow.spec.ts ptt-flow` +- [x] Manual smoke on macOS — hold key while VS Code is foreground, agent reply audible. + +## Notes +- Approval/Submission-checklist boxes above are all `[x]` per the project's PR submission checklist rule (`feedback_pr_submission_checklist`). +- Background screen capture from #3090 is intentionally out of scope here; it's tracked as a follow-up. +EOF +``` + +--- + +## Self-review (post-write) + +### Spec coverage + +| Spec section | Covered by | +| --- | --- | +| Goals — configurable hold-to-talk hotkey | T3 (parse), T5 (register IPC), T11 (renderer hook) | +| Goals — mic-on-press / mic-off-release / TTS reply | T4 (TTS hook), T10 (state machine), T11 (real audio wiring) | +| Goals — audible + visual feedback | T7 (chimes), T6 (overlay window), T12 (overlay UI) | +| Goals — macOS + Windows + Linux/X11; Wayland docs | T3 (uniform expand), T13 (Wayland error string), all hotkey logic is platform-agnostic via Tauri plugin | +| Component map — `ptt_hotkeys.rs` | T3, T5 | +| Component map — `ptt_overlay.rs` | T6 | +| Component map — `voice/bus.rs` + DomainEvent | T2 | +| Component map — schema delta | T1, T4 | +| Component map — `pttService.ts` | T10 | +| Component map — `ptt` slice | T8 | +| Component map — `PttSettingsPanel` | T13 | +| Component map — overlay React page | T12 | +| Component map — chimes | T7, T11 | +| Component map — i18n in 13 locales | T13 | +| § 2 State machine — press/release CAS | T5 (CAS in the Tauri-side closure) | +| § 2 State machine — watchdog | T10 + T10 tests | +| § 2 State machine — modifier-only rejection | T3 | +| § 3 Audio + transcript flow — full path | T10 + T11 | +| § 3 Active thread fallback | T10 + T11 (`createNewVoiceThread`) | +| § 3 Empty-audio / empty-transcript handling | T10 | +| § 3 TTS routing via speak_reply | T1, T4 | +| § 3 Dictation-preempt | T10 (preempt branch in `onStart`) | +| § 4 Overlay implementation choice | T6 | +| § 4 Visibility lifecycle | T6 | +| § 4 DXGI caveat documented | T13 (`exclusiveFullscreenHint`) | +| § 5 Mic permission denied | T10 (error chime + log) | +| § 5 Global-hotkey registration failures | T3 (error enum), T5 (rollback + dictation conflict error path), T13 (i18n surfaces) | +| § 5 Shortcut conflicts with dictation | T5 (bidirectional) | +| § 5 Logging | T3, T5, T6, T10, T11 (all include `[ptt]` prefix and PII-safe fields) | +| § 5 Capability catalog | T15 | +| § 6 No TOML schema change | n/a — confirmed not in any task | +| § 6 Default `shortcut: null` | T8 | +| § 6 Boot path | T11 (`usePttHotkey`) | +| § 7 Tests — every layer | T1 (schema), T2 (bus), T3 (parse), T4 (E2E), T8 (slice), T10 (service), T12 (overlay), T13 (panel), T14 (WDIO) | +| § 7 Coverage gate | T15 | +| Out of scope — listed in plan header + Task 15 PR body | ✓ | + +No gaps. + +### Placeholder scan + +Searched for "TBD", "TODO", "Fill in", "Similar to Task", "implement later". None present. Where the plan asks the engineer to "search for the dictation pattern" (T11 audio, T13 hotkey-capture widget), the search target and shape are both named explicitly — not placeholder text. + +### Type consistency + +- `PttError` variants are defined in T3 and referenced in T5 (`ConflictsWithDictation(String)`). ✓ +- `PttHotkeyState::{shortcut, session_counter}` defined in T3 and accessed in T5. ✓ +- `PttDeps` field names match between T10's test (`audioCapture`, `transcribe`, `sendMessage`, `resolveActiveThreadId`, `createNewVoiceThread`, `playChime`, `showOverlay`, `getSettings`, `now`, `watchdogMs`, `minAudioMs`, `logger`) and T10's implementation. ✓ +- `FinalizedAudio.{durationMs, buffer}` consistent between definition (T10) and consumer (T11's `finalizePttAudio` wrapper). ✓ +- `ChimeKind = 'open' | 'close' | 'error'` consistent between T10 (definition) and T11 (`playPttChime` signature). ✓ +- `PttSettings = { speakReplies, showOverlay }` consistent between slice (T8) and `getSettings()` (T11). ✓ +- `chatSend` params: `speakReply`, `source`, `sessionId` consistent across T9 (chatService), T10 (test fixture), T11 (manager call site). ✓ +- `channel.web_chat` server fields: `speak_reply`, `source`, `session_id` consistent across T1 (schema), T4 (consumer), T9 (caller). ✓ +- Tauri event names: `ptt://start`, `ptt://stop`, `ptt-overlay://active` consistent across T5 (emit), T6 (emit), T11 (listen), T12 (listen). ✓ From 8827c6be484a954f59696477603a79cb9d6c061d Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Wed, 3 Jun 2026 00:51:15 +0530 Subject: [PATCH 04/36] feat(channels/web): accept optional speak_reply/source/session_id on chat schema (#3090) --- src/core/socketio.rs | 3 + src/openhuman/channels/bus.rs | 2 +- src/openhuman/channels/providers/web.rs | 72 +++++++++++++++- src/openhuman/channels/providers/web_tests.rs | 83 +++++++++++++++++-- ...channels_large_round25_raw_coverage_e2e.rs | 5 +- ...channels_provider_deep_raw_coverage_e2e.rs | 9 +- ...els_provider_leftovers_raw_coverage_e2e.rs | 3 + tests/channels_runtime_raw_coverage_e2e.rs | 9 +- .../channels_web_startup_raw_coverage_e2e.rs | 8 +- .../channels_web_telegram_raw_coverage_e2e.rs | 9 ++ ...ls_web_yuanbao_round22_raw_coverage_e2e.rs | 7 +- ...ools_approval_channels_raw_coverage_e2e.rs | 6 +- ...tools_network_channels_raw_coverage_e2e.rs | 2 +- 13 files changed, 197 insertions(+), 21 deletions(-) diff --git a/src/core/socketio.rs b/src/core/socketio.rs index 3e0c8374ce..651f8815c7 100644 --- a/src/core/socketio.rs +++ b/src/core/socketio.rs @@ -435,6 +435,9 @@ pub fn attach_socketio() -> (socketioxide::layer::SocketIoLayer, SocketIo) { payload.temperature, payload.profile_id, payload.locale, + None, + None, + None, ) .await { diff --git a/src/openhuman/channels/bus.rs b/src/openhuman/channels/bus.rs index 27fdbe31f2..9acee099cd 100644 --- a/src/openhuman/channels/bus.rs +++ b/src/openhuman/channels/bus.rs @@ -76,7 +76,7 @@ impl EventHandler for ChannelInboundSubscriber { crate::openhuman::channels::providers::web::subscribe_web_channel_events(); let request_id = match crate::openhuman::channels::providers::web::start_chat( - &client_id, &thread_id, message, None, None, None, None, + &client_id, &thread_id, message, None, None, None, None, None, None, None, ) .await { diff --git a/src/openhuman/channels/providers/web.rs b/src/openhuman/channels/providers/web.rs index c14a76f8b6..c9ec891e11 100644 --- a/src/openhuman/channels/providers/web.rs +++ b/src/openhuman/channels/providers/web.rs @@ -322,6 +322,9 @@ pub async fn start_chat( temperature: Option, profile_id: Option, locale: Option, + speak_reply: Option, + source: Option, + session_id: Option, ) -> Result { let client_id = client_id.trim().to_string(); let thread_id = thread_id.trim().to_string(); @@ -485,6 +488,9 @@ pub async fn start_chat( temperature, profile_id, locale, + speak_reply, + source, + session_id, ), ) .await; @@ -715,6 +721,9 @@ async fn run_chat_task( temperature: Option, profile_id: Option, locale: Option, + speak_reply: Option, + source: Option, + session_id: Option, ) -> Result { #[cfg(any(test, debug_assertions))] { @@ -867,6 +876,9 @@ async fn run_chat_task( thread_id.to_string(), request_id.to_string(), turn_state_store, + speak_reply, + source, + session_id, ); // Make `thread_id` ambient for any outbound provider call inside @@ -927,22 +939,38 @@ async fn run_chat_task( /// agent turn loop and translates them into [`WebChannelEvent`]s tagged /// with the correct client/thread/request IDs. The task runs until the /// sender is dropped (i.e. when the agent turn finishes). +/// +/// `speak_reply`, `source`, and `session_id` are accepted here so that +/// Task 4 (TTS integration) can read them from the bridge context. +/// For now they are logged and otherwise unused. fn spawn_progress_bridge( mut rx: tokio::sync::mpsc::Receiver, client_id: String, thread_id: String, request_id: String, turn_state_store: TurnStateStore, + speak_reply: Option, + source: Option, + session_id: Option, ) { use crate::openhuman::agent::progress::AgentProgress; tokio::spawn(async move { log::debug!( - "[web_channel][bridge] spawned client_id={} thread_id={} request_id={}", + "[web_channel][bridge] spawned client_id={} thread_id={} request_id={} speak_reply={:?} source={:?} session_id={:?}", client_id, thread_id, request_id, + speak_reply, + source, + session_id, ); + // Retain for Task 4 (TTS wiring): speak_reply drives whether the + // final assistant text should be synthesised; source and session_id + // are forwarded as metadata to the TTS call site. + let _speak_reply = speak_reply; + let _source = source; + let _session_id = session_id; let mut round: u32 = 0; let mut events_seen: u64 = 0; let mut turn_state = @@ -1676,6 +1704,18 @@ struct WebChatParams { /// default language (English) so existing integrations don't /// silently change behaviour. locale: Option, + /// When `true`, the agent's final reply should be spoken via TTS + /// (for PTT and similar background voice flows). Accepted and + /// stored here; wired to TTS in Task 4. + #[serde(default)] + pub speak_reply: Option, + /// Origin of the message: `"ptt"` | `"dictation"` | `"type"` | other. + /// Used for analytics and downstream metadata. + #[serde(default)] + pub source: Option, + /// Optional caller-provided correlation id (PTT session id). + #[serde(default)] + pub session_id: Option, } #[derive(Debug, Deserialize)] @@ -1692,6 +1732,9 @@ pub async fn channel_web_chat( temperature: Option, profile_id: Option, locale: Option, + speak_reply: Option, + source: Option, + session_id: Option, ) -> Result, String> { let request_id = start_chat( client_id, @@ -1701,6 +1744,9 @@ pub async fn channel_web_chat( temperature, profile_id, locale, + speak_reply, + source, + session_id, ) .await?; @@ -1766,6 +1812,9 @@ pub fn schemas(function: &str) -> ControllerSchema { "locale", "Optional BCP-47 UI locale (e.g. 'ar', 'zh-CN'). Drives the \"reply in this language\" system-prompt directive.", ), + optional_bool("speak_reply", "When true, the agent's final reply is spoken via TTS (for PTT and similar background voice flows)."), + optional_string("source", "Origin of the message: \"ptt\" | \"dictation\" | \"type\" | other. Used for analytics + downstream metadata."), + optional_u64("session_id", "Optional caller-provided correlation id (PTT session id)."), ], outputs: vec![json_output("ack", "Acceptance payload.")], }, @@ -1806,6 +1855,9 @@ fn handle_chat(params: Map) -> ControllerFuture { p.temperature, p.profile_id, p.locale, + p.speak_reply, + p.source, + p.session_id, ) .await?, ) @@ -1899,6 +1951,24 @@ fn optional_f64(name: &'static str, comment: &'static str) -> FieldSchema { } } +fn optional_bool(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::Bool)), + comment, + required: false, + } +} + +fn optional_u64(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::U64)), + comment, + required: false, + } +} + fn json_output(name: &'static str, comment: &'static str) -> FieldSchema { FieldSchema { name, diff --git a/src/openhuman/channels/providers/web_tests.rs b/src/openhuman/channels/providers/web_tests.rs index 2f2b1cfd9b..612a4a0862 100644 --- a/src/openhuman/channels/providers/web_tests.rs +++ b/src/openhuman/channels/providers/web_tests.rs @@ -3,9 +3,10 @@ use super::{ classify_inference_error, compose_system_prompt_suffix, event_session_id_for, extract_provider_error_detail, generic_inference_error_user_message, inference_budget_exceeded_user_message, is_inference_budget_exceeded_error, json_output, - key_for, locale_reply_directive, normalize_model_override, optional_f64, optional_string, - provider_role_for_model_override, required_string, schemas, + key_for, locale_reply_directive, normalize_model_override, optional_bool, optional_f64, + optional_string, optional_u64, provider_role_for_model_override, required_string, schemas, set_test_forced_run_chat_task_error, start_chat, subscribe_web_channel_events, ClassifiedError, + WebChatParams, }; use crate::core::TypeSchema; use once_cell::sync::Lazy; @@ -24,17 +25,17 @@ static FORCED_ERROR_TEST_LOCK: Lazy> = Lazy::new(|| TokioMutex::n #[tokio::test] async fn start_chat_validates_required_fields() { - let err = start_chat("", "thread", "hello", None, None, None, None) + let err = start_chat("", "thread", "hello", None, None, None, None, None, None, None) .await .expect_err("client id should be required"); assert!(err.contains("client_id is required")); - let err = start_chat("client", "", "hello", None, None, None, None) + let err = start_chat("client", "", "hello", None, None, None, None, None, None, None) .await .expect_err("thread id should be required"); assert!(err.contains("thread_id is required")); - let err = start_chat("client", "thread", " ", None, None, None, None) + let err = start_chat("client", "thread", " ", None, None, None, None, None, None, None) .await .expect_err("message should be required"); assert!(err.contains("message is required")); @@ -50,6 +51,9 @@ async fn start_chat_rejects_prompt_injection_payload() { None, None, None, + None, + None, + None, ) .await .expect_err("prompt-injection payload should be rejected"); @@ -92,6 +96,9 @@ async fn start_chat_emits_sanitized_chat_error_on_inference_failure() { None, None, None, + None, + None, + None, ) .await .expect("start_chat should accept valid request"); @@ -502,6 +509,9 @@ async fn start_chat_chat_error_event_serializes_structured_fields_to_json_wire() None, None, None, + None, + None, + None, ) .await .expect("start_chat should accept valid request"); @@ -595,6 +605,9 @@ async fn start_chat_emits_structured_rate_limit_metadata_on_chat_error_event() { None, None, None, + None, + None, + None, ) .await .expect("start_chat should accept valid request"); @@ -1189,3 +1202,63 @@ fn compose_system_prompt_suffix_combines_locale_and_profile() { // Both absent → None preserves the agent's vanilla prompt. assert!(compose_system_prompt_suffix(None, None).is_none()); } + +// ── PTT field additions (Task 1 of global-ptt plan) ───────────────────────── + +#[test] +fn web_chat_schema_accepts_optional_ptt_fields() { + // Locate the `chat` schema via the public accessor. + let schema = schemas("chat"); + let names: std::collections::HashSet<&str> = + schema.inputs.iter().map(|f| f.name).collect(); + assert!( + names.contains("speak_reply"), + "channel.web_chat schema must include optional speak_reply field" + ); + assert!( + names.contains("source"), + "channel.web_chat schema must include optional source field" + ); + assert!( + names.contains("session_id"), + "channel.web_chat schema must include optional session_id field" + ); + // All three are optional. + for field in &["speak_reply", "source", "session_id"] { + let f = schema + .inputs + .iter() + .find(|f| f.name == *field) + .expect("field present"); + assert!(!f.required, "{field} must be optional"); + } +} + +#[test] +fn web_chat_params_deserialize_with_all_ptt_fields_omitted() { + let json = serde_json::json!({ + "client_id": "c1", + "thread_id": "t1", + "message": "hello", + }); + let parsed: WebChatParams = serde_json::from_value(json).unwrap(); + assert_eq!(parsed.speak_reply, None); + assert_eq!(parsed.source, None); + assert_eq!(parsed.session_id, None); +} + +#[test] +fn web_chat_params_deserialize_with_all_ptt_fields_present() { + let json = serde_json::json!({ + "client_id": "c1", + "thread_id": "t1", + "message": "hello", + "speak_reply": true, + "source": "ptt", + "session_id": 42_u64, + }); + let parsed: WebChatParams = serde_json::from_value(json).unwrap(); + assert_eq!(parsed.speak_reply, Some(true)); + assert_eq!(parsed.source.as_deref(), Some("ptt")); + assert_eq!(parsed.session_id, Some(42)); +} diff --git a/tests/channels_large_round25_raw_coverage_e2e.rs b/tests/channels_large_round25_raw_coverage_e2e.rs index 45b86c7591..2f46101fa4 100644 --- a/tests/channels_large_round25_raw_coverage_e2e.rs +++ b/tests/channels_large_round25_raw_coverage_e2e.rs @@ -114,7 +114,7 @@ async fn web_channel_validation_cancellation_and_error_events_are_observable() { ); assert!( - web::start_chat(" ", "thread", "hello", None, None, None, None) + web::start_chat(" ", "thread", "hello", None, None, None, None, None, None, None) .await .unwrap_err() .contains("client_id is required") @@ -137,6 +137,9 @@ async fn web_channel_validation_cancellation_and_error_events_are_observable() { Some(0.2), None, Some("en-US".to_string()), + None, + None, + None, ) .await .expect("start forced-error chat"); diff --git a/tests/channels_provider_deep_raw_coverage_e2e.rs b/tests/channels_provider_deep_raw_coverage_e2e.rs index 3a1837f78b..f7e9e86ae4 100644 --- a/tests/channels_provider_deep_raw_coverage_e2e.rs +++ b/tests/channels_provider_deep_raw_coverage_e2e.rs @@ -229,16 +229,16 @@ async fn dispatch_harness_covers_error_context_compaction_and_timeout_paths() { #[tokio::test] async fn web_channel_validation_cancel_and_classifier_snapshots_are_publicly_exercised() { - assert!(start_chat("", "thread", "hello", None, None, None, None) + assert!(start_chat("", "thread", "hello", None, None, None, None, None, None, None) .await .expect_err("empty client rejected") .contains("client_id")); - assert!(start_chat("client", "", "hello", None, None, None, None) + assert!(start_chat("client", "", "hello", None, None, None, None, None, None, None) .await .expect_err("empty thread rejected") .contains("thread_id")); assert!( - start_chat("client", "thread", " ", None, None, None, None) + start_chat("client", "thread", " ", None, None, None, None, None, None, None) .await .expect_err("empty message rejected") .contains("message") @@ -261,6 +261,9 @@ async fn web_channel_validation_cancel_and_classifier_snapshots_are_publicly_exe None, None, None, + None, + None, + None, ) .await; assert!(blocked.is_err()); diff --git a/tests/channels_provider_leftovers_raw_coverage_e2e.rs b/tests/channels_provider_leftovers_raw_coverage_e2e.rs index 496b8455aa..8bf7d25053 100644 --- a/tests/channels_provider_leftovers_raw_coverage_e2e.rs +++ b/tests/channels_provider_leftovers_raw_coverage_e2e.rs @@ -346,6 +346,9 @@ async fn web_round19_covers_classifier_variants_and_cancel_cleanup() { None, None, None, + None, + None, + None, ) .await .expect("start forced web chat"); diff --git a/tests/channels_runtime_raw_coverage_e2e.rs b/tests/channels_runtime_raw_coverage_e2e.rs index bad3f881ae..4b3e170dba 100644 --- a/tests/channels_runtime_raw_coverage_e2e.rs +++ b/tests/channels_runtime_raw_coverage_e2e.rs @@ -372,16 +372,16 @@ async fn yuanbao_public_channel_and_config_paths_are_isolated_from_network() { #[tokio::test] async fn web_channel_validation_cancel_and_event_subscription_are_fast() { - assert!(start_chat("", "thread", "hello", None, None, None, None) + assert!(start_chat("", "thread", "hello", None, None, None, None, None, None, None) .await .expect_err("empty client rejected") .contains("client_id")); - assert!(start_chat("client", "", "hello", None, None, None, None) + assert!(start_chat("client", "", "hello", None, None, None, None, None, None, None) .await .expect_err("empty thread rejected") .contains("thread_id")); assert!( - start_chat("client", "thread", " ", None, None, None, None) + start_chat("client", "thread", " ", None, None, None, None, None, None, None) .await .expect_err("empty message rejected") .contains("message") @@ -404,6 +404,9 @@ async fn web_channel_validation_cancel_and_event_subscription_are_fast() { None, None, None, + None, + None, + None, ) .await; assert!( diff --git a/tests/channels_web_startup_raw_coverage_e2e.rs b/tests/channels_web_startup_raw_coverage_e2e.rs index dfef20921b..a769d74e88 100644 --- a/tests/channels_web_startup_raw_coverage_e2e.rs +++ b/tests/channels_web_startup_raw_coverage_e2e.rs @@ -107,7 +107,7 @@ async fn web_controllers_validate_inputs_and_emit_structured_forced_errors() { assert_eq!(all_web_channel_registered_controllers().len(), 2); assert_eq!(schemas("missing").function, "unknown"); - let err = channel_web_chat("client", "thread", " ", None, None, None, None) + let err = channel_web_chat("client", "thread", " ", None, None, None, None, None, None, None) .await .expect_err("blank messages are rejected"); assert!(err.contains("message is required")); @@ -133,6 +133,9 @@ async fn web_controllers_validate_inputs_and_emit_structured_forced_errors() { Some(0.2), None, Some("zh-CN".to_string()), + None, + None, + None, ) .await .expect("chat request accepted") @@ -177,6 +180,9 @@ async fn web_chat_cancel_aborts_in_flight_thread_without_real_provider() { None, None, None, + None, + None, + None, ) .await .expect("start chat"); diff --git a/tests/channels_web_telegram_raw_coverage_e2e.rs b/tests/channels_web_telegram_raw_coverage_e2e.rs index f3fbc632c1..1f27732560 100644 --- a/tests/channels_web_telegram_raw_coverage_e2e.rs +++ b/tests/channels_web_telegram_raw_coverage_e2e.rs @@ -299,6 +299,9 @@ async fn web_channel_approval_bridge_forced_errors_and_newer_request_cancellatio Some(0.3), Some("missing-profile".to_string()), Some("en-US".to_string()), + None, + None, + None, ) .await .expect("forced chat accepted"); @@ -323,6 +326,9 @@ async fn web_channel_approval_bridge_forced_errors_and_newer_request_cancellatio None, None, None, + None, + None, + None, ) .await .expect("first chat accepted"); @@ -334,6 +340,9 @@ async fn web_channel_approval_bridge_forced_errors_and_newer_request_cancellatio None, None, None, + None, + None, + None, ) .await .expect("second chat accepted"); diff --git a/tests/channels_web_yuanbao_round22_raw_coverage_e2e.rs b/tests/channels_web_yuanbao_round22_raw_coverage_e2e.rs index 3498dd04f9..c5b696185b 100644 --- a/tests/channels_web_yuanbao_round22_raw_coverage_e2e.rs +++ b/tests/channels_web_yuanbao_round22_raw_coverage_e2e.rs @@ -189,13 +189,13 @@ fn isolated_config() -> (tempfile::TempDir, Config) { #[tokio::test] async fn web_start_chat_validation_forced_error_and_cancel_paths_are_structured() { assert_eq!( - start_chat(" ", "thread", "hello", None, None, None, None) + start_chat(" ", "thread", "hello", None, None, None, None, None, None, None) .await .unwrap_err(), "client_id is required" ); assert_eq!( - start_chat("client", " ", "hello", None, None, None, None) + start_chat("client", " ", "hello", None, None, None, None, None, None, None) .await .unwrap_err(), "thread_id is required" @@ -214,6 +214,9 @@ async fn web_start_chat_validation_forced_error_and_cancel_paths_are_structured( Some(0.4), None, None, + None, + None, + None, ) .await .expect("accepted"); diff --git a/tests/tools_approval_channels_raw_coverage_e2e.rs b/tests/tools_approval_channels_raw_coverage_e2e.rs index 255b381864..c41958bbfb 100644 --- a/tests/tools_approval_channels_raw_coverage_e2e.rs +++ b/tests/tools_approval_channels_raw_coverage_e2e.rs @@ -2073,7 +2073,7 @@ async fn web_channel_public_paths_cover_event_delivery_and_validation_errors() { assert_eq!( openhuman_core::openhuman::channels::web::start_chat( - "", "thread-1", "hello", None, None, None, None, + "", "thread-1", "hello", None, None, None, None, None, None, None, ) .await .expect_err("blank client_id"), @@ -2081,7 +2081,7 @@ async fn web_channel_public_paths_cover_event_delivery_and_validation_errors() { ); assert_eq!( openhuman_core::openhuman::channels::web::start_chat( - "client-1", "", "hello", None, None, None, None, + "client-1", "", "hello", None, None, None, None, None, None, None, ) .await .expect_err("blank thread_id"), @@ -2089,7 +2089,7 @@ async fn web_channel_public_paths_cover_event_delivery_and_validation_errors() { ); assert_eq!( openhuman_core::openhuman::channels::web::start_chat( - "client-1", "thread-1", " ", None, None, None, None, + "client-1", "thread-1", " ", None, None, None, None, None, None, None, ) .await .expect_err("blank message"), diff --git a/tests/tools_network_channels_raw_coverage_e2e.rs b/tests/tools_network_channels_raw_coverage_e2e.rs index 70f378ff88..8bdde26aa0 100644 --- a/tests/tools_network_channels_raw_coverage_e2e.rs +++ b/tests/tools_network_channels_raw_coverage_e2e.rs @@ -556,7 +556,7 @@ async fn web_channel_public_paths_cover_validation_cancel_schema_and_event_bus() assert_eq!(web_channel_schema("cancel").function, "web_cancel"); assert_eq!(web_channel_schema("missing").function, "unknown"); - let missing_client = start_chat(" ", "thread", "hello", None, None, None, None) + let missing_client = start_chat(" ", "thread", "hello", None, None, None, None, None, None, None) .await .expect_err("blank client"); assert_contains(&missing_client, "client_id is required"); From 2da81c0242958ace14961c96ab029ecfee083d5b Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Wed, 3 Jun 2026 01:40:21 +0530 Subject: [PATCH 05/36] refactor(channels/web): group ptt fields into ChatRequestMetadata; tighten schema tests (#3090) --- src/core/socketio.rs | 4 +- src/openhuman/channels/bus.rs | 9 ++- src/openhuman/channels/providers/web.rs | 68 +++++++++---------- src/openhuman/channels/providers/web_tests.rs | 45 +++++++----- ...channels_large_round25_raw_coverage_e2e.rs | 6 +- ...channels_provider_deep_raw_coverage_e2e.rs | 12 ++-- ...els_provider_leftovers_raw_coverage_e2e.rs | 5 +- tests/channels_runtime_raw_coverage_e2e.rs | 12 ++-- .../channels_web_startup_raw_coverage_e2e.rs | 12 ++-- .../channels_web_telegram_raw_coverage_e2e.rs | 14 ++-- ...ls_web_yuanbao_round22_raw_coverage_e2e.rs | 9 ++- ...ools_approval_channels_raw_coverage_e2e.rs | 9 ++- ...tools_network_channels_raw_coverage_e2e.rs | 4 +- 13 files changed, 103 insertions(+), 106 deletions(-) diff --git a/src/core/socketio.rs b/src/core/socketio.rs index 651f8815c7..0d98555f43 100644 --- a/src/core/socketio.rs +++ b/src/core/socketio.rs @@ -435,9 +435,7 @@ pub fn attach_socketio() -> (socketioxide::layer::SocketIoLayer, SocketIo) { payload.temperature, payload.profile_id, payload.locale, - None, - None, - None, + crate::openhuman::channels::providers::web::ChatRequestMetadata::default(), ) .await { diff --git a/src/openhuman/channels/bus.rs b/src/openhuman/channels/bus.rs index 9acee099cd..06e9ec10d3 100644 --- a/src/openhuman/channels/bus.rs +++ b/src/openhuman/channels/bus.rs @@ -76,7 +76,14 @@ impl EventHandler for ChannelInboundSubscriber { crate::openhuman::channels::providers::web::subscribe_web_channel_events(); let request_id = match crate::openhuman::channels::providers::web::start_chat( - &client_id, &thread_id, message, None, None, None, None, None, None, None, + &client_id, + &thread_id, + message, + None, + None, + None, + None, + crate::openhuman::channels::providers::web::ChatRequestMetadata::default(), ) .await { diff --git a/src/openhuman/channels/providers/web.rs b/src/openhuman/channels/providers/web.rs index c9ec891e11..9e58f5680d 100644 --- a/src/openhuman/channels/providers/web.rs +++ b/src/openhuman/channels/providers/web.rs @@ -322,9 +322,7 @@ pub async fn start_chat( temperature: Option, profile_id: Option, locale: Option, - speak_reply: Option, - source: Option, - session_id: Option, + metadata: ChatRequestMetadata, ) -> Result { let client_id = client_id.trim().to_string(); let thread_id = thread_id.trim().to_string(); @@ -488,9 +486,7 @@ pub async fn start_chat( temperature, profile_id, locale, - speak_reply, - source, - session_id, + metadata, ), ) .await; @@ -721,9 +717,7 @@ async fn run_chat_task( temperature: Option, profile_id: Option, locale: Option, - speak_reply: Option, - source: Option, - session_id: Option, + metadata: ChatRequestMetadata, ) -> Result { #[cfg(any(test, debug_assertions))] { @@ -876,9 +870,7 @@ async fn run_chat_task( thread_id.to_string(), request_id.to_string(), turn_state_store, - speak_reply, - source, - session_id, + metadata, ); // Make `thread_id` ambient for any outbound provider call inside @@ -940,8 +932,8 @@ async fn run_chat_task( /// with the correct client/thread/request IDs. The task runs until the /// sender is dropped (i.e. when the agent turn finishes). /// -/// `speak_reply`, `source`, and `session_id` are accepted here so that -/// Task 4 (TTS integration) can read them from the bridge context. +/// `metadata` is accepted here so that Task 4 (TTS integration) can read the +/// PTT fields (`speak_reply`, `source`, `session_id`) from the bridge context. /// For now they are logged and otherwise unused. fn spawn_progress_bridge( mut rx: tokio::sync::mpsc::Receiver, @@ -949,9 +941,7 @@ fn spawn_progress_bridge( thread_id: String, request_id: String, turn_state_store: TurnStateStore, - speak_reply: Option, - source: Option, - session_id: Option, + metadata: ChatRequestMetadata, ) { use crate::openhuman::agent::progress::AgentProgress; @@ -961,16 +951,14 @@ fn spawn_progress_bridge( client_id, thread_id, request_id, - speak_reply, - source, - session_id, + metadata.speak_reply, + metadata.source, + metadata.session_id, ); - // Retain for Task 4 (TTS wiring): speak_reply drives whether the - // final assistant text should be synthesised; source and session_id + // TODO(#3090, Task 4): consume metadata for reply_speech — speak_reply drives + // whether the final assistant text should be synthesised; source and session_id // are forwarded as metadata to the TTS call site. - let _speak_reply = speak_reply; - let _source = source; - let _session_id = session_id; + let _ = metadata; let mut round: u32 = 0; let mut events_seen: u64 = 0; let mut turn_state = @@ -1708,13 +1696,23 @@ struct WebChatParams { /// (for PTT and similar background voice flows). Accepted and /// stored here; wired to TTS in Task 4. #[serde(default)] - pub speak_reply: Option, + speak_reply: Option, /// Origin of the message: `"ptt"` | `"dictation"` | `"type"` | other. /// Used for analytics and downstream metadata. #[serde(default)] - pub source: Option, + source: Option, /// Optional caller-provided correlation id (PTT session id). #[serde(default)] + session_id: Option, +} + +/// Per-request metadata carried alongside a chat send. Currently used by the +/// PTT flow (Task 4 wires it to `voice::reply_speech`); other voice surfaces +/// can populate it the same way. +#[derive(Debug, Default, Clone)] +pub struct ChatRequestMetadata { + pub speak_reply: Option, + pub source: Option, pub session_id: Option, } @@ -1732,9 +1730,7 @@ pub async fn channel_web_chat( temperature: Option, profile_id: Option, locale: Option, - speak_reply: Option, - source: Option, - session_id: Option, + metadata: ChatRequestMetadata, ) -> Result, String> { let request_id = start_chat( client_id, @@ -1744,9 +1740,7 @@ pub async fn channel_web_chat( temperature, profile_id, locale, - speak_reply, - source, - session_id, + metadata, ) .await?; @@ -1855,9 +1849,11 @@ fn handle_chat(params: Map) -> ControllerFuture { p.temperature, p.profile_id, p.locale, - p.speak_reply, - p.source, - p.session_id, + ChatRequestMetadata { + speak_reply: p.speak_reply, + source: p.source, + session_id: p.session_id, + }, ) .await?, ) diff --git a/src/openhuman/channels/providers/web_tests.rs b/src/openhuman/channels/providers/web_tests.rs index 612a4a0862..fc9cfa0787 100644 --- a/src/openhuman/channels/providers/web_tests.rs +++ b/src/openhuman/channels/providers/web_tests.rs @@ -5,8 +5,8 @@ use super::{ inference_budget_exceeded_user_message, is_inference_budget_exceeded_error, json_output, key_for, locale_reply_directive, normalize_model_override, optional_bool, optional_f64, optional_string, optional_u64, provider_role_for_model_override, required_string, schemas, - set_test_forced_run_chat_task_error, start_chat, subscribe_web_channel_events, ClassifiedError, - WebChatParams, + set_test_forced_run_chat_task_error, start_chat, subscribe_web_channel_events, + ChatRequestMetadata, ClassifiedError, WebChatParams, }; use crate::core::TypeSchema; use once_cell::sync::Lazy; @@ -25,17 +25,17 @@ static FORCED_ERROR_TEST_LOCK: Lazy> = Lazy::new(|| TokioMutex::n #[tokio::test] async fn start_chat_validates_required_fields() { - let err = start_chat("", "thread", "hello", None, None, None, None, None, None, None) + let err = start_chat("", "thread", "hello", None, None, None, None, ChatRequestMetadata::default()) .await .expect_err("client id should be required"); assert!(err.contains("client_id is required")); - let err = start_chat("client", "", "hello", None, None, None, None, None, None, None) + let err = start_chat("client", "", "hello", None, None, None, None, ChatRequestMetadata::default()) .await .expect_err("thread id should be required"); assert!(err.contains("thread_id is required")); - let err = start_chat("client", "thread", " ", None, None, None, None, None, None, None) + let err = start_chat("client", "thread", " ", None, None, None, None, ChatRequestMetadata::default()) .await .expect_err("message should be required"); assert!(err.contains("message is required")); @@ -51,9 +51,7 @@ async fn start_chat_rejects_prompt_injection_payload() { None, None, None, - None, - None, - None, + ChatRequestMetadata::default(), ) .await .expect_err("prompt-injection payload should be rejected"); @@ -96,9 +94,7 @@ async fn start_chat_emits_sanitized_chat_error_on_inference_failure() { None, None, None, - None, - None, - None, + ChatRequestMetadata::default(), ) .await .expect("start_chat should accept valid request"); @@ -509,9 +505,7 @@ async fn start_chat_chat_error_event_serializes_structured_fields_to_json_wire() None, None, None, - None, - None, - None, + ChatRequestMetadata::default(), ) .await .expect("start_chat should accept valid request"); @@ -605,9 +599,7 @@ async fn start_chat_emits_structured_rate_limit_metadata_on_chat_error_event() { None, None, None, - None, - None, - None, + ChatRequestMetadata::default(), ) .await .expect("start_chat should accept valid request"); @@ -1232,6 +1224,25 @@ fn web_chat_schema_accepts_optional_ptt_fields() { .expect("field present"); assert!(!f.required, "{field} must be optional"); } + // Type assertions: ensure each field has the correct wire type. + let speak_reply = schema.inputs.iter().find(|f| f.name == "speak_reply").unwrap(); + assert_eq!( + speak_reply.ty, + TypeSchema::Option(Box::new(TypeSchema::Bool)), + "speak_reply must be Option" + ); + let source = schema.inputs.iter().find(|f| f.name == "source").unwrap(); + assert_eq!( + source.ty, + TypeSchema::Option(Box::new(TypeSchema::String)), + "source must be Option" + ); + let session_id = schema.inputs.iter().find(|f| f.name == "session_id").unwrap(); + assert_eq!( + session_id.ty, + TypeSchema::Option(Box::new(TypeSchema::U64)), + "session_id must be Option" + ); } #[test] diff --git a/tests/channels_large_round25_raw_coverage_e2e.rs b/tests/channels_large_round25_raw_coverage_e2e.rs index 2f46101fa4..0efab78e7e 100644 --- a/tests/channels_large_round25_raw_coverage_e2e.rs +++ b/tests/channels_large_round25_raw_coverage_e2e.rs @@ -114,7 +114,7 @@ async fn web_channel_validation_cancellation_and_error_events_are_observable() { ); assert!( - web::start_chat(" ", "thread", "hello", None, None, None, None, None, None, None) + web::start_chat(" ", "thread", "hello", None, None, None, None, web::ChatRequestMetadata::default()) .await .unwrap_err() .contains("client_id is required") @@ -137,9 +137,7 @@ async fn web_channel_validation_cancellation_and_error_events_are_observable() { Some(0.2), None, Some("en-US".to_string()), - None, - None, - None, + web::ChatRequestMetadata::default(), ) .await .expect("start forced-error chat"); diff --git a/tests/channels_provider_deep_raw_coverage_e2e.rs b/tests/channels_provider_deep_raw_coverage_e2e.rs index f7e9e86ae4..15f9d8940e 100644 --- a/tests/channels_provider_deep_raw_coverage_e2e.rs +++ b/tests/channels_provider_deep_raw_coverage_e2e.rs @@ -9,7 +9,7 @@ use axum::{ Router, }; use openhuman_core::openhuman::channels::providers::web::{ - cancel_chat, start_chat, subscribe_web_channel_events, + cancel_chat, start_chat, subscribe_web_channel_events, ChatRequestMetadata, }; use openhuman_core::openhuman::channels::providers::yuanbao::{YuanbaoChannel, YuanbaoConfig}; use openhuman_core::openhuman::channels::test_support::{ @@ -229,16 +229,16 @@ async fn dispatch_harness_covers_error_context_compaction_and_timeout_paths() { #[tokio::test] async fn web_channel_validation_cancel_and_classifier_snapshots_are_publicly_exercised() { - assert!(start_chat("", "thread", "hello", None, None, None, None, None, None, None) + assert!(start_chat("", "thread", "hello", None, None, None, None, ChatRequestMetadata::default()) .await .expect_err("empty client rejected") .contains("client_id")); - assert!(start_chat("client", "", "hello", None, None, None, None, None, None, None) + assert!(start_chat("client", "", "hello", None, None, None, None, ChatRequestMetadata::default()) .await .expect_err("empty thread rejected") .contains("thread_id")); assert!( - start_chat("client", "thread", " ", None, None, None, None, None, None, None) + start_chat("client", "thread", " ", None, None, None, None, ChatRequestMetadata::default()) .await .expect_err("empty message rejected") .contains("message") @@ -261,9 +261,7 @@ async fn web_channel_validation_cancel_and_classifier_snapshots_are_publicly_exe None, None, None, - None, - None, - None, + ChatRequestMetadata::default(), ) .await; assert!(blocked.is_err()); diff --git a/tests/channels_provider_leftovers_raw_coverage_e2e.rs b/tests/channels_provider_leftovers_raw_coverage_e2e.rs index 8bf7d25053..398858388f 100644 --- a/tests/channels_provider_leftovers_raw_coverage_e2e.rs +++ b/tests/channels_provider_leftovers_raw_coverage_e2e.rs @@ -17,6 +17,7 @@ use axum::{ use openhuman_core::openhuman::channels::providers::telegram::TelegramChannel; use openhuman_core::openhuman::channels::providers::web::{ cancel_chat, start_chat, subscribe_web_channel_events, test_support as web_test_support, + ChatRequestMetadata, }; use openhuman_core::openhuman::channels::providers::yuanbao::{ connection::YuanbaoConnection, YuanbaoChannel, YuanbaoConfig, @@ -346,9 +347,7 @@ async fn web_round19_covers_classifier_variants_and_cancel_cleanup() { None, None, None, - None, - None, - None, + ChatRequestMetadata::default(), ) .await .expect("start forced web chat"); diff --git a/tests/channels_runtime_raw_coverage_e2e.rs b/tests/channels_runtime_raw_coverage_e2e.rs index 4b3e170dba..e75a1824d7 100644 --- a/tests/channels_runtime_raw_coverage_e2e.rs +++ b/tests/channels_runtime_raw_coverage_e2e.rs @@ -10,7 +10,7 @@ use axum::{ }; use openhuman_core::core::event_bus::{DomainEvent, EventHandler}; use openhuman_core::openhuman::channels::providers::web::{ - cancel_chat, start_chat, subscribe_web_channel_events, + cancel_chat, start_chat, subscribe_web_channel_events, ChatRequestMetadata, }; use openhuman_core::openhuman::channels::providers::yuanbao::{YuanbaoChannel, YuanbaoConfig}; use openhuman_core::openhuman::channels::{ @@ -372,16 +372,16 @@ async fn yuanbao_public_channel_and_config_paths_are_isolated_from_network() { #[tokio::test] async fn web_channel_validation_cancel_and_event_subscription_are_fast() { - assert!(start_chat("", "thread", "hello", None, None, None, None, None, None, None) + assert!(start_chat("", "thread", "hello", None, None, None, None, ChatRequestMetadata::default()) .await .expect_err("empty client rejected") .contains("client_id")); - assert!(start_chat("client", "", "hello", None, None, None, None, None, None, None) + assert!(start_chat("client", "", "hello", None, None, None, None, ChatRequestMetadata::default()) .await .expect_err("empty thread rejected") .contains("thread_id")); assert!( - start_chat("client", "thread", " ", None, None, None, None, None, None, None) + start_chat("client", "thread", " ", None, None, None, None, ChatRequestMetadata::default()) .await .expect_err("empty message rejected") .contains("message") @@ -404,9 +404,7 @@ async fn web_channel_validation_cancel_and_event_subscription_are_fast() { None, None, None, - None, - None, - None, + ChatRequestMetadata::default(), ) .await; assert!( diff --git a/tests/channels_web_startup_raw_coverage_e2e.rs b/tests/channels_web_startup_raw_coverage_e2e.rs index a769d74e88..04c4d12a3a 100644 --- a/tests/channels_web_startup_raw_coverage_e2e.rs +++ b/tests/channels_web_startup_raw_coverage_e2e.rs @@ -13,7 +13,7 @@ use openhuman_core::openhuman::channels::test_support::{ use openhuman_core::openhuman::channels::web::{ all_web_channel_controller_schemas, all_web_channel_registered_controllers, channel_web_cancel, channel_web_chat, schemas, start_chat, subscribe_web_channel_events, - test_support as web_test_support, + test_support as web_test_support, ChatRequestMetadata, }; use openhuman_core::openhuman::config::Config; use tempfile::tempdir; @@ -107,7 +107,7 @@ async fn web_controllers_validate_inputs_and_emit_structured_forced_errors() { assert_eq!(all_web_channel_registered_controllers().len(), 2); assert_eq!(schemas("missing").function, "unknown"); - let err = channel_web_chat("client", "thread", " ", None, None, None, None, None, None, None) + let err = channel_web_chat("client", "thread", " ", None, None, None, None, ChatRequestMetadata::default()) .await .expect_err("blank messages are rejected"); assert!(err.contains("message is required")); @@ -133,9 +133,7 @@ async fn web_controllers_validate_inputs_and_emit_structured_forced_errors() { Some(0.2), None, Some("zh-CN".to_string()), - None, - None, - None, + ChatRequestMetadata::default(), ) .await .expect("chat request accepted") @@ -180,9 +178,7 @@ async fn web_chat_cancel_aborts_in_flight_thread_without_real_provider() { None, None, None, - None, - None, - None, + ChatRequestMetadata::default(), ) .await .expect("start chat"); diff --git a/tests/channels_web_telegram_raw_coverage_e2e.rs b/tests/channels_web_telegram_raw_coverage_e2e.rs index 1f27732560..f12c9d5e46 100644 --- a/tests/channels_web_telegram_raw_coverage_e2e.rs +++ b/tests/channels_web_telegram_raw_coverage_e2e.rs @@ -18,7 +18,7 @@ use openhuman_core::core::event_bus::{init_global, publish_global, DomainEvent}; use openhuman_core::openhuman::channels::providers::telegram::TelegramChannel; use openhuman_core::openhuman::channels::providers::web::{ cancel_chat, register_approval_surface_subscriber, start_chat, subscribe_web_channel_events, - test_support as web_test_support, + test_support as web_test_support, ChatRequestMetadata, }; use openhuman_core::openhuman::channels::providers::yuanbao::{YuanbaoChannel, YuanbaoConfig}; use openhuman_core::openhuman::channels::LarkChannel; @@ -299,9 +299,7 @@ async fn web_channel_approval_bridge_forced_errors_and_newer_request_cancellatio Some(0.3), Some("missing-profile".to_string()), Some("en-US".to_string()), - None, - None, - None, + ChatRequestMetadata::default(), ) .await .expect("forced chat accepted"); @@ -326,9 +324,7 @@ async fn web_channel_approval_bridge_forced_errors_and_newer_request_cancellatio None, None, None, - None, - None, - None, + ChatRequestMetadata::default(), ) .await .expect("first chat accepted"); @@ -340,9 +336,7 @@ async fn web_channel_approval_bridge_forced_errors_and_newer_request_cancellatio None, None, None, - None, - None, - None, + ChatRequestMetadata::default(), ) .await .expect("second chat accepted"); diff --git a/tests/channels_web_yuanbao_round22_raw_coverage_e2e.rs b/tests/channels_web_yuanbao_round22_raw_coverage_e2e.rs index c5b696185b..56d3f62048 100644 --- a/tests/channels_web_yuanbao_round22_raw_coverage_e2e.rs +++ b/tests/channels_web_yuanbao_round22_raw_coverage_e2e.rs @@ -15,6 +15,7 @@ use axum::{ use openhuman_core::openhuman::channels::providers::telegram::TelegramChannel; use openhuman_core::openhuman::channels::providers::web::{ cancel_chat, start_chat, subscribe_web_channel_events, test_support as web_test_support, + ChatRequestMetadata, }; use openhuman_core::openhuman::channels::providers::yuanbao::{ connection::test_support as yuanbao_connection_test_support, @@ -189,13 +190,13 @@ fn isolated_config() -> (tempfile::TempDir, Config) { #[tokio::test] async fn web_start_chat_validation_forced_error_and_cancel_paths_are_structured() { assert_eq!( - start_chat(" ", "thread", "hello", None, None, None, None, None, None, None) + start_chat(" ", "thread", "hello", None, None, None, None, ChatRequestMetadata::default()) .await .unwrap_err(), "client_id is required" ); assert_eq!( - start_chat("client", " ", "hello", None, None, None, None, None, None, None) + start_chat("client", " ", "hello", None, None, None, None, ChatRequestMetadata::default()) .await .unwrap_err(), "thread_id is required" @@ -214,9 +215,7 @@ async fn web_start_chat_validation_forced_error_and_cancel_paths_are_structured( Some(0.4), None, None, - None, - None, - None, + ChatRequestMetadata::default(), ) .await .expect("accepted"); diff --git a/tests/tools_approval_channels_raw_coverage_e2e.rs b/tests/tools_approval_channels_raw_coverage_e2e.rs index c41958bbfb..5d97ad65db 100644 --- a/tests/tools_approval_channels_raw_coverage_e2e.rs +++ b/tests/tools_approval_channels_raw_coverage_e2e.rs @@ -2073,7 +2073,8 @@ async fn web_channel_public_paths_cover_event_delivery_and_validation_errors() { assert_eq!( openhuman_core::openhuman::channels::web::start_chat( - "", "thread-1", "hello", None, None, None, None, None, None, None, + "", "thread-1", "hello", None, None, None, None, + openhuman_core::openhuman::channels::web::ChatRequestMetadata::default(), ) .await .expect_err("blank client_id"), @@ -2081,7 +2082,8 @@ async fn web_channel_public_paths_cover_event_delivery_and_validation_errors() { ); assert_eq!( openhuman_core::openhuman::channels::web::start_chat( - "client-1", "", "hello", None, None, None, None, None, None, None, + "client-1", "", "hello", None, None, None, None, + openhuman_core::openhuman::channels::web::ChatRequestMetadata::default(), ) .await .expect_err("blank thread_id"), @@ -2089,7 +2091,8 @@ async fn web_channel_public_paths_cover_event_delivery_and_validation_errors() { ); assert_eq!( openhuman_core::openhuman::channels::web::start_chat( - "client-1", "thread-1", " ", None, None, None, None, None, None, None, + "client-1", "thread-1", " ", None, None, None, None, + openhuman_core::openhuman::channels::web::ChatRequestMetadata::default(), ) .await .expect_err("blank message"), diff --git a/tests/tools_network_channels_raw_coverage_e2e.rs b/tests/tools_network_channels_raw_coverage_e2e.rs index 8bdde26aa0..e1eba99783 100644 --- a/tests/tools_network_channels_raw_coverage_e2e.rs +++ b/tests/tools_network_channels_raw_coverage_e2e.rs @@ -21,7 +21,7 @@ use openhuman_core::core::socketio::WebChannelEvent; use openhuman_core::openhuman::channels::providers::web::{ all_web_channel_controller_schemas, all_web_channel_registered_controllers, cancel_chat, channel_web_cancel, publish_web_channel_event, schemas as web_channel_schema, start_chat, - subscribe_web_channel_events, + subscribe_web_channel_events, ChatRequestMetadata, }; use openhuman_core::openhuman::config::{ AutonomyConfig, Config, PolymarketClobCredentials, PolymarketConfig, @@ -556,7 +556,7 @@ async fn web_channel_public_paths_cover_validation_cancel_schema_and_event_bus() assert_eq!(web_channel_schema("cancel").function, "web_cancel"); assert_eq!(web_channel_schema("missing").function, "unknown"); - let missing_client = start_chat(" ", "thread", "hello", None, None, None, None, None, None, None) + let missing_client = start_chat(" ", "thread", "hello", None, None, None, None, ChatRequestMetadata::default()) .await .expect_err("blank client"); assert_contains(&missing_client, "client_id is required"); From d24fa3b8c686a66e12fd17ad5cda0ce2d6adbc31 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Wed, 3 Jun 2026 01:59:20 +0530 Subject: [PATCH 06/36] feat(voice/bus): publish DomainEvent::Voice::PttTranscriptCommitted (#3090) --- src/core/event_bus/events.rs | 22 +++++++++ src/core/event_bus/mod.rs | 2 +- src/openhuman/voice/bus.rs | 87 ++++++++++++++++++++++++++++++++++++ src/openhuman/voice/mod.rs | 1 + 4 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 src/openhuman/voice/bus.rs diff --git a/src/core/event_bus/events.rs b/src/core/event_bus/events.rs index b4216c11bd..5e3c507c12 100644 --- a/src/core/event_bus/events.rs +++ b/src/core/event_bus/events.rs @@ -25,6 +25,21 @@ //! - [`DomainEvent::ChannelMessageReceived`] //! - [`DomainEvent::ChannelMessageProcessed`] +/// Voice-domain events. +#[non_exhaustive] +#[derive(Clone, Debug)] +pub enum VoiceEvent { + /// A PTT session committed a transcript to a thread. Carries only + /// length/timing — never the raw text, per the PII-safe logging rule. + PttTranscriptCommitted { + thread_id: String, + session_id: u64, + text_len: usize, + held_ms: u64, + finalized_by_watchdog: bool, + }, +} + /// Top-level domain event. Non-exhaustive so new variants can be added /// without breaking existing match arms. #[non_exhaustive] @@ -707,6 +722,10 @@ pub enum DomainEvent { /// never to Sentry or the UI verbatim. SessionExpired { source: String, reason: String }, + // ── Voice ──────────────────────────────────────────────────────────── + /// A voice domain event (PTT, transcription lifecycle, etc.). + Voice(VoiceEvent), + // ── Task sources ───────────────────────────────────────────────────── /// A task source completed a fetch pass. TaskSourceFetched { @@ -844,6 +863,8 @@ impl DomainEvent { Self::TaskPlanAwaitingApproval { .. } => "agent", + Self::Voice(_) => "voice", + Self::ApprovalRequested { .. } | Self::ApprovalDecided { .. } => "approval", Self::McpServerInstalled { .. } @@ -944,6 +965,7 @@ impl DomainEvent { Self::TaskSourceTaskIngested { .. } => "TaskSourceTaskIngested", Self::TaskSourceFetchFailed { .. } => "TaskSourceFetchFailed", Self::TaskPlanAwaitingApproval { .. } => "TaskPlanAwaitingApproval", + Self::Voice(_) => "Voice", } } diff --git a/src/core/event_bus/mod.rs b/src/core/event_bus/mod.rs index 1dc7c85d80..0748bb6505 100644 --- a/src/core/event_bus/mod.rs +++ b/src/core/event_bus/mod.rs @@ -61,7 +61,7 @@ pub mod testing; mod tracing; pub use bus::{global, init_global, publish_global, subscribe_global, EventBus, DEFAULT_CAPACITY}; -pub use events::DomainEvent; +pub use events::{DomainEvent, VoiceEvent}; pub use native_request::{ init_native_registry, native_registry, register_native_global, request_native_global, NativeRegistry, NativeRequestError, diff --git a/src/openhuman/voice/bus.rs b/src/openhuman/voice/bus.rs new file mode 100644 index 0000000000..76a6815db9 --- /dev/null +++ b/src/openhuman/voice/bus.rs @@ -0,0 +1,87 @@ +//! Voice domain event publishers. The PTT transcript-committed event is +//! published here so the future screen-intelligence follow-up can subscribe +//! and grab a frame on commit without coupling to the channel-web flow. + +use crate::core::event_bus::{publish_global, DomainEvent, VoiceEvent}; + +/// Publish a [`VoiceEvent::PttTranscriptCommitted`] event. +pub fn publish_ptt_transcript_committed( + thread_id: String, + session_id: u64, + text_len: usize, + held_ms: u64, + finalized_by_watchdog: bool, +) { + publish_global(DomainEvent::Voice(VoiceEvent::PttTranscriptCommitted { + thread_id, + session_id, + text_len, + held_ms, + finalized_by_watchdog, + })); +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::event_bus::{init_global, subscribe_global, DomainEvent, EventHandler}; + use async_trait::async_trait; + use std::sync::Arc; + use tokio::sync::Mutex as AsyncMutex; + + #[derive(Default)] + struct Capture { + events: Arc>>, + } + + #[async_trait] + impl EventHandler for Capture { + fn name(&self) -> &str { + "voice::ptt_test_capture" + } + + fn domains(&self) -> Option<&[&str]> { + Some(&["voice"]) + } + + async fn handle(&self, event: &DomainEvent) { + if let DomainEvent::Voice(VoiceEvent::PttTranscriptCommitted { + thread_id, + session_id, + .. + }) = event + { + self.events + .lock() + .await + .push(format!("{}:{}", thread_id, session_id)); + } + } + } + + #[tokio::test] + async fn publishing_a_ptt_commit_reaches_a_subscriber() { + // Use the singleton (init is idempotent). + let _ = init_global(64); + let capture = Capture::default(); + let events = capture.events.clone(); + let _sub = subscribe_global(Arc::new(capture)); + + publish_ptt_transcript_committed( + "thread-1".to_string(), + 42, + 17, + 850, + false, + ); + + // Give the broadcaster a tick to deliver. + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + + let got = events.lock().await; + assert!( + got.iter().any(|e| e == "thread-1:42"), + "expected PttTranscriptCommitted with thread_id=thread-1, session_id=42 in {got:?}", + ); + } +} diff --git a/src/openhuman/voice/mod.rs b/src/openhuman/voice/mod.rs index 40344e9d16..5b087e0050 100644 --- a/src/openhuman/voice/mod.rs +++ b/src/openhuman/voice/mod.rs @@ -10,6 +10,7 @@ //! single domain root. pub mod audio_capture; +pub mod bus; pub(crate) mod cli; pub mod dictation_listener; pub mod factory; From 26b84aae5a3796685ddde6b518a55f7cc41c72de Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Wed, 3 Jun 2026 02:28:25 +0530 Subject: [PATCH 07/36] refactor(voice/bus): re-export publish from mod; assert full event payload in test (#3090) --- src/openhuman/voice/bus.rs | 36 ++++++++++++++++++++++-------------- src/openhuman/voice/mod.rs | 1 + 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/src/openhuman/voice/bus.rs b/src/openhuman/voice/bus.rs index 76a6815db9..06531186b1 100644 --- a/src/openhuman/voice/bus.rs +++ b/src/openhuman/voice/bus.rs @@ -31,7 +31,7 @@ mod tests { #[derive(Default)] struct Capture { - events: Arc>>, + events: Arc>>, } #[async_trait] @@ -45,16 +45,8 @@ mod tests { } async fn handle(&self, event: &DomainEvent) { - if let DomainEvent::Voice(VoiceEvent::PttTranscriptCommitted { - thread_id, - session_id, - .. - }) = event - { - self.events - .lock() - .await - .push(format!("{}:{}", thread_id, session_id)); + if let DomainEvent::Voice(v) = event { + self.events.lock().await.push(v.clone()); } } } @@ -79,9 +71,25 @@ mod tests { tokio::time::sleep(std::time::Duration::from_millis(50)).await; let got = events.lock().await; - assert!( - got.iter().any(|e| e == "thread-1:42"), - "expected PttTranscriptCommitted with thread_id=thread-1, session_id=42 in {got:?}", + let found = got.iter().find_map(|e| match e { + VoiceEvent::PttTranscriptCommitted { + thread_id, + session_id, + text_len, + held_ms, + finalized_by_watchdog, + } => Some(( + thread_id.clone(), + *session_id, + *text_len, + *held_ms, + *finalized_by_watchdog, + )), + }); + assert_eq!( + found, + Some(("thread-1".to_string(), 42, 17, 850, false)), + "expected the published event to round-trip with all five fields; got events: {got:?}", ); } } diff --git a/src/openhuman/voice/mod.rs b/src/openhuman/voice/mod.rs index 5b087e0050..fefa4d1df7 100644 --- a/src/openhuman/voice/mod.rs +++ b/src/openhuman/voice/mod.rs @@ -11,6 +11,7 @@ pub mod audio_capture; pub mod bus; +pub use bus::publish_ptt_transcript_committed; pub(crate) mod cli; pub mod dictation_listener; pub mod factory; From ce061f008fc252f27664954c39b7635fe4b09215 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Wed, 3 Jun 2026 03:10:49 +0530 Subject: [PATCH 08/36] feat(tauri/ptt): add ptt_hotkeys module with shortcut expansion + validation (#3090) --- app/src-tauri/src/lib.rs | 1 + app/src-tauri/src/ptt_hotkeys.rs | 175 +++++++++++++++++++++++++++++++ 2 files changed, 176 insertions(+) create mode 100644 app/src-tauri/src/ptt_hotkeys.rs diff --git a/app/src-tauri/src/lib.rs b/app/src-tauri/src/lib.rs index 66833576eb..037500d800 100644 --- a/app/src-tauri/src/lib.rs +++ b/app/src-tauri/src/lib.rs @@ -37,6 +37,7 @@ mod native_notifications; mod notification_settings; mod process_kill; mod process_recovery; +mod ptt_hotkeys; #[cfg(target_os = "windows")] mod reset_reboot_schedule; mod screen_capture; diff --git a/app/src-tauri/src/ptt_hotkeys.rs b/app/src-tauri/src/ptt_hotkeys.rs new file mode 100644 index 0000000000..ef013383b6 --- /dev/null +++ b/app/src-tauri/src/ptt_hotkeys.rs @@ -0,0 +1,175 @@ +//! Global push-to-talk hotkey state + parsing. +//! +//! See spec: `docs/superpowers/specs/2026-06-02-global-ptt-design.md`. +//! +//! `expand_ptt_shortcuts` mirrors `dictation_hotkeys::expand_dictation_shortcuts` +//! but rejects pure-modifier shortcuts (Ctrl, Cmd+Shift, etc.) because they +//! would fire constantly during normal typing. + +use std::sync::atomic::AtomicU64; +use std::sync::Mutex; + +#[derive(Debug, PartialEq, Eq)] +pub enum PttError { + EmptyShortcut, + ModifierOnlyShortcut, + ConflictsWithDictation(String), + UnsupportedOnWayland, + RegistrationFailed(String), +} + +impl std::fmt::Display for PttError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + PttError::EmptyShortcut => write!(f, "ptt shortcut cannot be empty"), + PttError::ModifierOnlyShortcut => write!( + f, + "ptt shortcut cannot be only modifier keys (Ctrl/Cmd/Shift/Alt)" + ), + PttError::ConflictsWithDictation(s) => { + write!(f, "ptt shortcut '{s}' conflicts with the dictation hotkey") + } + PttError::UnsupportedOnWayland => write!( + f, + "global shortcuts are not supported in this Wayland session — switch to X11 or use in-app dictation" + ), + PttError::RegistrationFailed(s) => { + write!(f, "failed to register ptt shortcut: {s}") + } + } + } +} + +impl std::error::Error for PttError {} + +/// Process-wide PTT state. Held in the Tauri-managed `State`. +pub(crate) struct PttHotkeyState { + /// Currently-registered shortcut variants (e.g. `["Cmd+F13", "Ctrl+F13"]` on macOS). + pub(crate) shortcut: Mutex>, + /// Monotonic counter for session IDs. + pub(crate) session_counter: AtomicU64, +} + +impl PttHotkeyState { + pub(crate) fn new() -> Self { + Self { + shortcut: Mutex::new(Vec::new()), + session_counter: AtomicU64::new(0), + } + } +} + +const MODIFIER_TOKENS: &[&str] = &[ + "ctrl", + "control", + "cmd", + "command", + "meta", + "super", + "win", + "windows", + "alt", + "option", + "shift", + "cmdorctrl", +]; + +fn is_modifier_token(token: &str) -> bool { + let lower = token.trim().to_ascii_lowercase(); + MODIFIER_TOKENS.iter().any(|m| *m == lower) +} + +/// Expand a user-typed shortcut into one or two OS-specific variants and +/// validate it isn't empty / modifier-only. +pub(crate) fn expand_ptt_shortcuts(shortcut: &str) -> Result, PttError> { + let trimmed = shortcut.trim(); + if trimmed.is_empty() { + return Err(PttError::EmptyShortcut); + } + + let parts: Vec<&str> = trimmed.split('+').map(str::trim).collect(); + if parts.iter().all(|p| is_modifier_token(p)) { + return Err(PttError::ModifierOnlyShortcut); + } + + #[cfg(target_os = "macos")] + { + if trimmed.contains("CmdOrCtrl") { + let cmd_variant = trimmed.replace("CmdOrCtrl", "Cmd"); + let ctrl_variant = trimmed.replace("CmdOrCtrl", "Ctrl"); + if cmd_variant == ctrl_variant { + return Ok(vec![cmd_variant]); + } + return Ok(vec![cmd_variant, ctrl_variant]); + } + } + + #[cfg(not(target_os = "macos"))] + { + if trimmed.contains("CmdOrCtrl") { + return Ok(vec![trimmed.replace("CmdOrCtrl", "Ctrl")]); + } + } + + Ok(vec![trimmed.to_string()]) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_shortcut_is_rejected() { + assert_eq!(expand_ptt_shortcuts(""), Err(PttError::EmptyShortcut)); + assert_eq!(expand_ptt_shortcuts(" "), Err(PttError::EmptyShortcut)); + } + + #[test] + fn modifier_only_shortcut_is_rejected() { + assert_eq!( + expand_ptt_shortcuts("Ctrl"), + Err(PttError::ModifierOnlyShortcut) + ); + assert_eq!( + expand_ptt_shortcuts("Cmd+Shift"), + Err(PttError::ModifierOnlyShortcut) + ); + assert_eq!( + expand_ptt_shortcuts("Alt+Shift+Ctrl"), + Err(PttError::ModifierOnlyShortcut) + ); + assert_eq!( + expand_ptt_shortcuts("CmdOrCtrl+Shift"), + Err(PttError::ModifierOnlyShortcut) + ); + } + + #[test] + fn plain_function_key_is_accepted() { + assert_eq!(expand_ptt_shortcuts("F13"), Ok(vec!["F13".to_string()])); + } + + #[test] + fn modifier_plus_letter_is_accepted() { + assert_eq!( + expand_ptt_shortcuts("Ctrl+Alt+T"), + Ok(vec!["Ctrl+Alt+T".to_string()]) + ); + } + + #[test] + #[cfg(target_os = "macos")] + fn cmd_or_ctrl_expands_to_both_on_macos() { + let result = expand_ptt_shortcuts("CmdOrCtrl+Shift+P").unwrap(); + assert_eq!(result.len(), 2); + assert!(result.contains(&"Cmd+Shift+P".to_string())); + assert!(result.contains(&"Ctrl+Shift+P".to_string())); + } + + #[test] + #[cfg(not(target_os = "macos"))] + fn cmd_or_ctrl_expands_to_ctrl_off_macos() { + let result = expand_ptt_shortcuts("CmdOrCtrl+Shift+P").unwrap(); + assert_eq!(result, vec!["Ctrl+Shift+P".to_string()]); + } +} From 83ad4e7facd049b1022141c02a9d683c0b15ce95 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Wed, 3 Jun 2026 03:27:53 +0530 Subject: [PATCH 09/36] refactor(tauri/ptt): tighten PttError visibility; zero-alloc modifier check; reject empty tokens (#3090) --- app/src-tauri/src/ptt_hotkeys.rs | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/app/src-tauri/src/ptt_hotkeys.rs b/app/src-tauri/src/ptt_hotkeys.rs index ef013383b6..b5a66cd7a5 100644 --- a/app/src-tauri/src/ptt_hotkeys.rs +++ b/app/src-tauri/src/ptt_hotkeys.rs @@ -10,7 +10,7 @@ use std::sync::atomic::AtomicU64; use std::sync::Mutex; #[derive(Debug, PartialEq, Eq)] -pub enum PttError { +pub(crate) enum PttError { EmptyShortcut, ModifierOnlyShortcut, ConflictsWithDictation(String), @@ -75,8 +75,10 @@ const MODIFIER_TOKENS: &[&str] = &[ ]; fn is_modifier_token(token: &str) -> bool { - let lower = token.trim().to_ascii_lowercase(); - MODIFIER_TOKENS.iter().any(|m| *m == lower) + let trimmed = token.trim(); + MODIFIER_TOKENS + .iter() + .any(|m| trimmed.eq_ignore_ascii_case(m)) } /// Expand a user-typed shortcut into one or two OS-specific variants and @@ -88,6 +90,9 @@ pub(crate) fn expand_ptt_shortcuts(shortcut: &str) -> Result, PttErr } let parts: Vec<&str> = trimmed.split('+').map(str::trim).collect(); + if parts.iter().any(|p| p.is_empty()) { + return Err(PttError::EmptyShortcut); + } if parts.iter().all(|p| is_modifier_token(p)) { return Err(PttError::ModifierOnlyShortcut); } @@ -172,4 +177,14 @@ mod tests { let result = expand_ptt_shortcuts("CmdOrCtrl+Shift+P").unwrap(); assert_eq!(result, vec!["Ctrl+Shift+P".to_string()]); } + + #[test] + fn malformed_shortcut_with_empty_tokens_is_rejected() { + assert_eq!(expand_ptt_shortcuts("+F13"), Err(PttError::EmptyShortcut)); + assert_eq!(expand_ptt_shortcuts("F13+"), Err(PttError::EmptyShortcut)); + assert_eq!( + expand_ptt_shortcuts("Ctrl++T"), + Err(PttError::EmptyShortcut) + ); + } } From f4189cfafd619bd12e102b800342c2f54c9277f1 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Wed, 3 Jun 2026 19:37:01 +0530 Subject: [PATCH 10/36] feat(channels/web): invoke reply_speech + publish PttTranscriptCommitted on speak_reply=true (#3090) --- src/openhuman/channels/providers/web.rs | 90 +++++++++++++++++-- src/openhuman/voice/reply_speech.rs | 64 +++++++++++++ tests/json_rpc_e2e.rs | 114 ++++++++++++++++++++++++ 3 files changed, 259 insertions(+), 9 deletions(-) diff --git a/src/openhuman/channels/providers/web.rs b/src/openhuman/channels/providers/web.rs index 9e58f5680d..d9d611cdb7 100644 --- a/src/openhuman/channels/providers/web.rs +++ b/src/openhuman/channels/providers/web.rs @@ -870,7 +870,7 @@ async fn run_chat_task( thread_id.to_string(), request_id.to_string(), turn_state_store, - metadata, + metadata.clone(), ); // Make `thread_id` ambient for any outbound provider call inside @@ -910,6 +910,62 @@ async fn run_chat_task( } }; + // Voice / PTT integration (#3090 Task 4). When the chat was sent with + // `speak_reply: true`, drive the agent's full reply through + // `voice::reply_speech::synthesize_reply` so the renderer can play it. + // When the call originated as a PTT session, also publish + // `PttTranscriptCommitted` so screen-intelligence (and any future bus + // subscriber) can react to a completed PTT turn. + // + // Why here (not in the progress bridge): the bridge sees `TextDelta`s + // only when the inference provider streams. The non-streaming fallback + // (and the JSON-RPC E2E mocks) produce a single final response with no + // deltas — so buffering deltas alone loses the reply text in those + // paths. The full response is available right here, regardless of + // streaming mode, which makes this the most reliable hook point. + // + // Failures are non-fatal (TTS / observability are best-effort side + // channels). + if let Ok(ref task_result) = result { + let speak_reply = matches!(metadata.speak_reply, Some(true)); + let trimmed_response = task_result.full_response.trim(); + if speak_reply && !trimmed_response.is_empty() { + let opts = crate::openhuman::voice::reply_speech::ReplySpeechOptions::default(); + match crate::openhuman::voice::reply_speech::synthesize_reply( + &config, + &task_result.full_response, + &opts, + ) + .await + { + Ok(_) => log::debug!( + "[web-channel] reply_speech dispatched chars={} client_id={} thread_id={} request_id={}", + task_result.full_response.len(), + client_id, + thread_id, + request_id, + ), + Err(err) => log::warn!( + "[web-channel] reply_speech failed: {err} client_id={} thread_id={} request_id={}", + client_id, + thread_id, + request_id, + ), + } + } + if metadata.source.as_deref() == Some("ptt") { + if let Some(session_id) = metadata.session_id { + crate::openhuman::voice::publish_ptt_transcript_committed( + thread_id.to_string(), + session_id, + task_result.full_response.chars().count(), + 0, + false, + ); + } + } + } + // Clear the sender so it doesn't hold the channel open across sessions. agent.set_on_progress(None); @@ -932,9 +988,13 @@ async fn run_chat_task( /// with the correct client/thread/request IDs. The task runs until the /// sender is dropped (i.e. when the agent turn finishes). /// -/// `metadata` is accepted here so that Task 4 (TTS integration) can read the -/// PTT fields (`speak_reply`, `source`, `session_id`) from the bridge context. -/// For now they are logged and otherwise unused. +/// `metadata` is logged on the bridge's diagnostic lines so PTT turns are +/// easy to correlate across the stream of progress events. The +/// authoritative TTS / PTT-commit dispatch (`speak_reply` → +/// `voice::reply_speech::synthesize_reply`, `source == "ptt"` → +/// `publish_ptt_transcript_committed`) is owned by `run_chat_task`, which +/// sees the full assistant response even when the provider falls back to +/// non-streaming. fn spawn_progress_bridge( mut rx: tokio::sync::mpsc::Receiver, client_id: String, @@ -955,10 +1015,10 @@ fn spawn_progress_bridge( metadata.source, metadata.session_id, ); - // TODO(#3090, Task 4): consume metadata for reply_speech — speak_reply drives - // whether the final assistant text should be synthesised; source and session_id - // are forwarded as metadata to the TTS call site. - let _ = metadata; + // Buffer the streamed assistant text so we can drive TTS / observability + // sinks once the turn finishes (Task 4 / #3090). The buffer is local to + // this bridge — it does not affect any other consumer of TextDelta. + let mut assistant_text = String::new(); let mut round: u32 = 0; let mut events_seen: u64 = 0; let mut turn_state = @@ -1430,6 +1490,13 @@ fn spawn_progress_bridge( }); } AgentProgress::TextDelta { delta, iteration } => { + // Accumulate the streamed assistant reply purely for the + // diagnostic `buffered_chars=` field on the TurnCompleted + // log line. The authoritative TTS / PTT-commit dispatch + // happens from `run_chat_task` where the full response is + // available even when streaming is unavailable (Task 4 / + // #3090). + assistant_text.push_str(&delta); publish_web_channel_event(WebChannelEvent { event: "text_delta".to_string(), client_id: client_id.clone(), @@ -1480,7 +1547,12 @@ fn spawn_progress_bridge( AgentProgress::TurnCompleted { iterations } => { log::debug!( "[web_channel] turn completed after {iterations} iteration(s) \ - client_id={client_id} thread_id={thread_id} request_id={request_id}" + client_id={client_id} thread_id={thread_id} request_id={request_id} \ + buffered_chars={} speak_reply={:?} source={:?} session_id={:?}", + assistant_text.len(), + metadata.speak_reply, + metadata.source, + metadata.session_id, ); } AgentProgress::TurnCostUpdated { diff --git a/src/openhuman/voice/reply_speech.rs b/src/openhuman/voice/reply_speech.rs index b7383f479a..64eb0e9594 100644 --- a/src/openhuman/voice/reply_speech.rs +++ b/src/openhuman/voice/reply_speech.rs @@ -28,6 +28,51 @@ use crate::rpc::RpcOutcome; const LOG_PREFIX: &str = "[voice_reply]"; +/// Env var that activates the [`test_seam`] short-circuit at runtime. When +/// set to `1` / `true`, [`synthesize_reply`] records the requested text +/// into [`test_seam::OBSERVED_CALLS`] and returns a stub +/// [`ReplySpeechResult`] *without* contacting the hosted backend. Anything +/// else (unset, `0`, `false`, …) leaves the production code path +/// untouched. +/// +/// The env-var gate (rather than a `#[cfg(test)]` gate) is deliberate: +/// integration tests in `tests/` are compiled against the production +/// `openhuman_core` crate, so a unit-only `cfg(test)` block would not be +/// visible from there. The observer module itself is always compiled, +/// but its only producer is this env-gated branch and its only consumer +/// is the test harness, so production callers never touch it. +pub const TEST_SEAM_ENV: &str = "OPENHUMAN_TEST_REPLY_SPEECH_SEAM"; + +fn test_seam_enabled() -> bool { + matches!( + std::env::var(TEST_SEAM_ENV).ok().as_deref(), + Some("1") | Some("true") | Some("TRUE") + ) +} + +/// Test seam observation log. See [`TEST_SEAM_ENV`] for the activation +/// gate. Always compiled (the visibility lets `tests/json_rpc_e2e.rs` +/// inspect calls), but only written to when the env gate is on. +pub mod test_seam { + use once_cell::sync::Lazy; + use std::sync::Mutex; + + /// FIFO log of every `text` argument that flowed through the test-seam + /// short-circuit in [`super::synthesize_reply`]. Cleared between tests + /// with [`clear`]. + pub static OBSERVED_CALLS: Lazy>> = Lazy::new(|| Mutex::new(Vec::new())); + + /// Clear the observation log. + pub fn clear() { + OBSERVED_CALLS.lock().unwrap().clear(); + } + + /// Snapshot of the observation log. + pub fn observed() -> Vec { + OBSERVED_CALLS.lock().unwrap().clone() + } +} + /// One frame on the viseme timeline. `viseme` is an Oculus / Microsoft /// 15-set code (`sil, PP, FF, TH, DD, kk, CH, SS, nn, RR, aa, E, I, O, U`). #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] @@ -87,6 +132,25 @@ pub async fn synthesize_reply( return Err("text is required".to_string()); } + // Test seam: when OPENHUMAN_TEST_REPLY_SPEECH_SEAM is set, record the call + // and short-circuit before hitting the backend. See `test_seam` module docs. + if test_seam_enabled() { + let _ = (config, opts); + test_seam::OBSERVED_CALLS + .lock() + .unwrap() + .push(trimmed.to_string()); + return Ok(RpcOutcome::single_log( + ReplySpeechResult { + audio_base64: String::new(), + audio_mime: "audio/mpeg".to_string(), + visemes: Vec::new(), + alignment: None, + }, + "voice reply synthesized (test seam short-circuit)", + )); + } + let token = get_session_token(config) .map_err(|e| e.to_string())? .and_then(|t| { diff --git a/tests/json_rpc_e2e.rs b/tests/json_rpc_e2e.rs index afb49fe8ac..00364a07fd 100644 --- a/tests/json_rpc_e2e.rs +++ b/tests/json_rpc_e2e.rs @@ -9851,3 +9851,117 @@ async fn json_rpc_workflows_lifecycle_round_trip() { api_join.abort(); rpc_join.abort(); } + +/// Task 4 / #3090: when a web-chat request is sent with +/// `speak_reply: true`, the progress bridge should drive the agent's +/// final text through `voice::reply_speech::synthesize_reply` after the +/// turn completes. +/// +/// We activate the [`reply_speech::test_seam`] short-circuit via the +/// `OPENHUMAN_TEST_REPLY_SPEECH_SEAM` env var so the call is recorded +/// without contacting the ElevenLabs proxy. +#[tokio::test] +async fn json_rpc_channel_web_chat_with_speak_reply_invokes_reply_speech() { + let _env_lock = json_rpc_e2e_env_lock(); + let tmp = tempdir().expect("tempdir"); + let home = tmp.path(); + let openhuman_home = home.join(".openhuman"); + + let _home_guard = EnvVarGuard::set_to_path("HOME", home); + let _workspace_guard = EnvVarGuard::unset("OPENHUMAN_WORKSPACE"); + let _backend_url_guard = EnvVarGuard::unset("BACKEND_URL"); + let _vite_backend_guard = EnvVarGuard::unset("VITE_BACKEND_URL"); + // Activate the reply_speech test seam so synthesize_reply records and + // short-circuits instead of calling the hosted backend. + let _seam_guard = EnvVarGuard::set( + openhuman_core::openhuman::voice::reply_speech::TEST_SEAM_ENV, + "1", + ); + + openhuman_core::openhuman::voice::reply_speech::test_seam::clear(); + + let (mock_addr, mock_join) = serve_on_ephemeral(mock_upstream_router()).await; + let mock_origin = format!("http://{}", mock_addr); + + write_min_config(&openhuman_home, &mock_origin); + let user_scoped_dir = openhuman_home.join("users").join("e2e-user"); + write_min_config(&user_scoped_dir, &mock_origin); + + let (rpc_addr, rpc_join) = serve_on_ephemeral(build_core_http_router(false)).await; + let rpc_base = format!("http://{}", rpc_addr); + tokio::time::sleep(Duration::from_millis(100)).await; + + // Authenticate so the agent loop has a session token available. + let store = post_json_rpc( + &rpc_base, + 9300, + "openhuman.auth_store_session", + json!({ + "token": "e2e-test-jwt", + "user_id": "e2e-user" + }), + ) + .await; + assert_no_jsonrpc_error(&store, "store_session"); + + let client_id = "ptt-e2e-client"; + let thread_id = "ptt-e2e-thread"; + let events_url = format!("{}/events?client_id={}", rpc_base, client_id); + let sse_task = tokio::spawn(async move { read_terminal_web_chat_event(&events_url).await }); + + // PTT-style chat send: speak_reply=true, source=ptt, session_id=1. + let web_chat = post_json_rpc( + &rpc_base, + 9301, + "openhuman.channel_web_chat", + json!({ + "client_id": client_id, + "thread_id": thread_id, + "message": "Hello from PTT", + "model_override": "e2e-mock-model", + "speak_reply": true, + "source": "ptt", + "session_id": 1, + }), + ) + .await; + let web_chat_result = assert_no_jsonrpc_error(&web_chat, "channel_web_chat"); + assert_eq!( + web_chat_result + .get("result") + .and_then(|v| v.get("accepted")), + Some(&json!(true)) + ); + + let sse_event = tokio::time::timeout(Duration::from_secs(12), sse_task) + .await + .expect("timed out waiting for chat_done with speak_reply=true") + .expect("sse task join should succeed"); + assert_eq!( + sse_event.get("event").and_then(Value::as_str), + Some("chat_done") + ); + + // The bridge should have buffered the streamed assistant text and + // routed it through synthesize_reply on TurnCompleted. Poll briefly + // because the bridge task may finish slightly after chat_done. + let mut observed: Vec = Vec::new(); + for _ in 0..50 { + observed = openhuman_core::openhuman::voice::reply_speech::test_seam::observed(); + if !observed.is_empty() { + break; + } + tokio::time::sleep(Duration::from_millis(20)).await; + } + assert!( + !observed.is_empty(), + "expected reply_speech::synthesize_reply to be invoked when speak_reply=true; observed={observed:?}" + ); + assert!( + observed.iter().any(|t| !t.trim().is_empty()), + "expected at least one non-empty text passed to synthesize_reply; observed={observed:?}" + ); + + mock_join.abort(); + rpc_join.abort(); +} From eff5645598c3ed41fcbbb4048c491916d938ae90 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Wed, 3 Jun 2026 20:01:41 +0530 Subject: [PATCH 11/36] refactor(channels/web,voice): harden TTS test seam + tighten T4 review fixes (#3090) --- src/openhuman/channels/providers/web.rs | 20 +++++--------------- src/openhuman/voice/reply_speech.rs | 14 ++++++++++---- tests/json_rpc_e2e.rs | 9 ++++++--- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/openhuman/channels/providers/web.rs b/src/openhuman/channels/providers/web.rs index d9d611cdb7..41be4e15ff 100644 --- a/src/openhuman/channels/providers/web.rs +++ b/src/openhuman/channels/providers/web.rs @@ -939,14 +939,14 @@ async fn run_chat_task( .await { Ok(_) => log::debug!( - "[web-channel] reply_speech dispatched chars={} client_id={} thread_id={} request_id={}", + "[web_channel] reply_speech dispatched chars={} client_id={} thread_id={} request_id={}", task_result.full_response.len(), client_id, thread_id, request_id, ), Err(err) => log::warn!( - "[web-channel] reply_speech failed: {err} client_id={} thread_id={} request_id={}", + "[web_channel] reply_speech failed: {err} client_id={} thread_id={} request_id={}", client_id, thread_id, request_id, @@ -955,6 +955,8 @@ async fn run_chat_task( } if metadata.source.as_deref() == Some("ptt") { if let Some(session_id) = metadata.session_id { + // TODO(#3090 T11): held_ms will be supplied by the renderer once the PTT + // watchdog reports actual hold duration. 0 is a placeholder until then. crate::openhuman::voice::publish_ptt_transcript_committed( thread_id.to_string(), session_id, @@ -1015,10 +1017,6 @@ fn spawn_progress_bridge( metadata.source, metadata.session_id, ); - // Buffer the streamed assistant text so we can drive TTS / observability - // sinks once the turn finishes (Task 4 / #3090). The buffer is local to - // this bridge — it does not affect any other consumer of TextDelta. - let mut assistant_text = String::new(); let mut round: u32 = 0; let mut events_seen: u64 = 0; let mut turn_state = @@ -1490,13 +1488,6 @@ fn spawn_progress_bridge( }); } AgentProgress::TextDelta { delta, iteration } => { - // Accumulate the streamed assistant reply purely for the - // diagnostic `buffered_chars=` field on the TurnCompleted - // log line. The authoritative TTS / PTT-commit dispatch - // happens from `run_chat_task` where the full response is - // available even when streaming is unavailable (Task 4 / - // #3090). - assistant_text.push_str(&delta); publish_web_channel_event(WebChannelEvent { event: "text_delta".to_string(), client_id: client_id.clone(), @@ -1548,8 +1539,7 @@ fn spawn_progress_bridge( log::debug!( "[web_channel] turn completed after {iterations} iteration(s) \ client_id={client_id} thread_id={thread_id} request_id={request_id} \ - buffered_chars={} speak_reply={:?} source={:?} session_id={:?}", - assistant_text.len(), + speak_reply={:?} source={:?} session_id={:?}", metadata.speak_reply, metadata.source, metadata.session_id, diff --git a/src/openhuman/voice/reply_speech.rs b/src/openhuman/voice/reply_speech.rs index 64eb0e9594..e0cc530357 100644 --- a/src/openhuman/voice/reply_speech.rs +++ b/src/openhuman/voice/reply_speech.rs @@ -15,7 +15,7 @@ //! in a `Tool` impl, the `external_effect()` method MUST stay `false` //! (the trait's default) so the approval gate never prompts on TTS. -use log::debug; +use log::{debug, warn}; use reqwest::Method; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; @@ -132,9 +132,15 @@ pub async fn synthesize_reply( return Err("text is required".to_string()); } - // Test seam: when OPENHUMAN_TEST_REPLY_SPEECH_SEAM is set, record the call - // and short-circuit before hitting the backend. See `test_seam` module docs. - if test_seam_enabled() { + // Test seam: when OPENHUMAN_TEST_REPLY_SPEECH_SEAM is set (and only in + // debug builds — the seam is structurally dead in release), record the + // call and short-circuit before hitting the backend. + // See `test_seam` module docs and `TEST_SEAM_ENV` for the activation gate. + if cfg!(debug_assertions) && test_seam_enabled() { + warn!( + "[voice_reply] TEST SEAM ACTIVE — synthesize_reply short-circuited ({} is set); skipping backend call", + TEST_SEAM_ENV + ); let _ = (config, opts); test_seam::OBSERVED_CALLS .lock() diff --git a/tests/json_rpc_e2e.rs b/tests/json_rpc_e2e.rs index 00364a07fd..a4bab4c056 100644 --- a/tests/json_rpc_e2e.rs +++ b/tests/json_rpc_e2e.rs @@ -9853,9 +9853,8 @@ async fn json_rpc_workflows_lifecycle_round_trip() { } /// Task 4 / #3090: when a web-chat request is sent with -/// `speak_reply: true`, the progress bridge should drive the agent's -/// final text through `voice::reply_speech::synthesize_reply` after the -/// turn completes. +/// `speak_reply: true`, `run_chat_task` should drive the agent's final text +/// through `voice::reply_speech::synthesize_reply` after the turn completes. /// /// We activate the [`reply_speech::test_seam`] short-circuit via the /// `OPENHUMAN_TEST_REPLY_SPEECH_SEAM` env var so the call is recorded @@ -9961,6 +9960,10 @@ async fn json_rpc_channel_web_chat_with_speak_reply_invokes_reply_speech() { observed.iter().any(|t| !t.trim().is_empty()), "expected at least one non-empty text passed to synthesize_reply; observed={observed:?}" ); + assert!( + observed.iter().any(|t| t.contains("Hello from e2e mock agent")), + "expected the observed seam text to include the mock reply phrase; got {observed:?}" + ); mock_join.abort(); rpc_join.abort(); From 9159709c2d79a5e4a1ba0c1d92b8e53d6f98a2cd Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Wed, 3 Jun 2026 20:25:18 +0530 Subject: [PATCH 12/36] feat(tauri/ptt): register/unregister IPC + dictation conflict guard (#3090) --- app/src-tauri/src/lib.rs | 159 +++++++++++++++++++++++++++++++ app/src-tauri/src/ptt_hotkeys.rs | 49 ++++++++++ app/src-tauri/src/ptt_overlay.rs | 22 +++++ 3 files changed, 230 insertions(+) create mode 100644 app/src-tauri/src/ptt_overlay.rs diff --git a/app/src-tauri/src/lib.rs b/app/src-tauri/src/lib.rs index 037500d800..919f5b84a4 100644 --- a/app/src-tauri/src/lib.rs +++ b/app/src-tauri/src/lib.rs @@ -38,6 +38,7 @@ mod notification_settings; mod process_kill; mod process_recovery; mod ptt_hotkeys; +mod ptt_overlay; #[cfg(target_os = "windows")] mod reset_reboot_schedule; mod screen_capture; @@ -745,6 +746,20 @@ async fn register_dictation_hotkey( expanded_shortcuts.join(", ") ); + // Reject overlap with the currently-registered PTT hotkey. + let ptt_current = { + let state = app.state::(); + let guard = state.shortcut.lock().unwrap(); + guard.clone() + }; + if let Some(conflict) = + ptt_hotkeys::first_conflict_with(&expanded_shortcuts, &ptt_current) + { + return Err(format!( + "dictation shortcut '{conflict}' conflicts with the push-to-talk hotkey" + )); + } + let register_shortcut = |shortcut_variant: &str| -> Result<(), String> { let app_clone = app.clone(); app.global_shortcut() @@ -839,6 +854,146 @@ async fn unregister_dictation_hotkey(app: AppHandle) -> Result<(), S Ok(()) } +/// Register (or re-register) the global push-to-talk hotkey. Emits +/// `ptt://start { session_id }` on press and `ptt://stop { session_id }` +/// on release. +#[tauri::command] +async fn register_ptt_hotkey( + app: AppHandle, + shortcut: String, +) -> Result<(), String> { + log::info!("[ptt] register_ptt_hotkey: shortcut={shortcut}"); + + let expanded = ptt_hotkeys::expand_ptt_shortcuts(&shortcut) + .map_err(|e| e.to_string())?; + + // Reject overlap with the currently-registered dictation hotkey. + let dictation_current = { + let state = app.state::(); + let guard = state.0.lock().unwrap(); + guard.clone() + }; + if let Some(conflict) = + ptt_hotkeys::first_conflict_with(&expanded, &dictation_current) + { + return Err(ptt_hotkeys::PttError::ConflictsWithDictation(conflict).to_string()); + } + + let old_shortcuts = { + let state = app.state::(); + let guard = state.shortcut.lock().unwrap(); + guard.clone() + }; + + // Lazy-instantiate the overlay window so it's ready before the first press. + if let Err(e) = ptt_overlay::ensure_window(&app) { + log::warn!("[ptt] overlay window create failed (continuing): {e}"); + } + + let register_shortcut = |variant: &str| -> Result<(), String> { + let app_pressed = app.clone(); + let app_released = app.clone(); + let variant_owned = variant.to_string(); + app.global_shortcut() + .on_shortcut(variant, move |app_inner, _sc, event| { + let state = app_inner.state::(); + match event.state { + ShortcutState::Pressed => { + // Atomically bump the counter and emit start. + let session_id = state + .session_counter + .fetch_add(1, std::sync::atomic::Ordering::SeqCst) + + 1; + log::debug!( + "[ptt] pressed shortcut={variant_owned} session_id={session_id}" + ); + if let Err(e) = + app_pressed.emit("ptt://start", serde_json::json!({ + "session_id": session_id, + })) + { + log::warn!("[ptt] emit start failed: {e}"); + } + } + ShortcutState::Released => { + let session_id = state + .session_counter + .load(std::sync::atomic::Ordering::SeqCst); + log::debug!( + "[ptt] released shortcut={variant_owned} session_id={session_id}" + ); + if let Err(e) = + app_released.emit("ptt://stop", serde_json::json!({ + "session_id": session_id, + })) + { + log::warn!("[ptt] emit stop failed: {e}"); + } + } + } + }) + .map_err(|e| format!("Failed to register ptt shortcut '{variant}': {e}")) + }; + + // Unregister previous PTT variants. + let mut unregistered: Vec = Vec::new(); + for old in &old_shortcuts { + if let Err(e) = app.global_shortcut().unregister(old.as_str()) { + // Rollback already-unregistered ones. + for r in &unregistered { + let _ = register_shortcut(r); + } + return Err(format!("Failed to unregister previous ptt shortcut '{old}': {e}")); + } + unregistered.push(old.clone()); + } + + // Register the new variants. Rollback on first failure. + let mut newly_registered: Vec = Vec::new(); + for v in &expanded { + if let Err(e) = register_shortcut(v) { + for r in &newly_registered { + let _ = app.global_shortcut().unregister(r.as_str()); + } + for old in &old_shortcuts { + let _ = register_shortcut(old); + } + return Err(e); + } + newly_registered.push(v.clone()); + } + + { + let state = app.state::(); + let mut guard = state.shortcut.lock().unwrap(); + *guard = expanded.clone(); + } + + log::info!("[ptt] registered: {}", expanded.join(", ")); + Ok(()) +} + +/// Unregister the global PTT hotkey (if any). +#[tauri::command] +async fn unregister_ptt_hotkey(app: AppHandle) -> Result<(), String> { + log::info!("[ptt] unregister_ptt_hotkey: called"); + let state = app.state::(); + let old = { + let mut guard = state.shortcut.lock().unwrap(); + let v = guard.clone(); + guard.clear(); + v + }; + for s in &old { + if let Err(e) = app.global_shortcut().unregister(s.as_str()) { + log::warn!("[ptt] unregister '{s}' failed: {e}"); + } + } + // Destroy the overlay window so resources are released. + ptt_overlay::destroy_window(&app); + Ok(()) +} + fn is_daemon_mode() -> bool { std::env::args().any(|arg| arg == "daemon" || arg == "--daemon") } @@ -2405,6 +2560,7 @@ pub fn run() { .manage(dictation_hotkeys::DictationHotkeyState( std::sync::Mutex::new(Vec::new()), )) + .manage(ptt_hotkeys::PttHotkeyState::new()) .manage(companion_commands::CompanionHotkeyState( std::sync::Mutex::new(Vec::new()), )) @@ -3098,6 +3254,9 @@ pub fn run() { schedule_cef_profile_purge, register_dictation_hotkey, unregister_dictation_hotkey, + register_ptt_hotkey, + unregister_ptt_hotkey, + ptt_overlay::show_ptt_overlay, webview_accounts::webview_account_open, webview_accounts::webview_account_prewarm, webview_accounts::webview_account_close, diff --git a/app/src-tauri/src/ptt_hotkeys.rs b/app/src-tauri/src/ptt_hotkeys.rs index b5a66cd7a5..7d03e33122 100644 --- a/app/src-tauri/src/ptt_hotkeys.rs +++ b/app/src-tauri/src/ptt_hotkeys.rs @@ -188,3 +188,52 @@ mod tests { ); } } + +/// Returns `Some(conflicting_variant)` if any expanded PTT variant overlaps +/// any expanded dictation variant. Comparison is case-insensitive. +pub(crate) fn first_conflict_with( + ptt: &[String], + dictation: &[String], +) -> Option { + for p in ptt { + let p_lc = p.to_ascii_lowercase(); + for d in dictation { + if d.to_ascii_lowercase() == p_lc { + return Some(p.clone()); + } + } + } + None +} + +#[cfg(test)] +mod conflict_tests { + use super::*; + + #[test] + fn no_conflict_returns_none() { + let ptt = vec!["F13".into()]; + let dict = vec!["F14".into()]; + assert_eq!(first_conflict_with(&ptt, &dict), None); + } + + #[test] + fn case_insensitive_conflict_detected() { + let ptt = vec!["ctrl+space".into()]; + let dict = vec!["Ctrl+Space".into()]; + assert_eq!( + first_conflict_with(&ptt, &dict), + Some("ctrl+space".to_string()) + ); + } + + #[test] + fn only_one_variant_overlaps_returns_first() { + let ptt = vec!["Cmd+P".into(), "Ctrl+P".into()]; + let dict = vec!["Ctrl+P".into()]; + assert_eq!( + first_conflict_with(&ptt, &dict), + Some("Ctrl+P".to_string()) + ); + } +} diff --git a/app/src-tauri/src/ptt_overlay.rs b/app/src-tauri/src/ptt_overlay.rs new file mode 100644 index 0000000000..03dc1bdd0c --- /dev/null +++ b/app/src-tauri/src/ptt_overlay.rs @@ -0,0 +1,22 @@ +//! Borderless always-on-top PTT overlay window. STUB — implemented in T6. + +use tauri::{AppHandle, Runtime}; + +pub(crate) fn ensure_window(_app: &AppHandle) -> Result<(), String> { + // T6 will replace this with the real lazy create. + Ok(()) +} + +pub(crate) fn destroy_window(_app: &AppHandle) { + // T6 will replace this with the real destroy. +} + +#[tauri::command] +pub(crate) async fn show_ptt_overlay( + _app: AppHandle, + _active: bool, + _session_id: u64, +) -> Result<(), String> { + // T6 will replace this with the real show/hide. + Ok(()) +} From 368d2e77167a569b399db07feeff6e2298fe0490 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Wed, 3 Jun 2026 20:33:20 +0530 Subject: [PATCH 13/36] fix(tauri/ptt): CAS-guard press/release; robust unregister + rollback logs (#3090) --- app/src-tauri/src/lib.rs | 49 +++++++++++++++++++++++++------- app/src-tauri/src/ptt_hotkeys.rs | 43 +++++++++++++++++++++++++++- 2 files changed, 81 insertions(+), 11 deletions(-) diff --git a/app/src-tauri/src/lib.rs b/app/src-tauri/src/lib.rs index 919f5b84a4..0a67767436 100644 --- a/app/src-tauri/src/lib.rs +++ b/app/src-tauri/src/lib.rs @@ -899,10 +899,23 @@ async fn register_ptt_hotkey( let state = app_inner.state::(); match event.state { ShortcutState::Pressed => { - // Atomically bump the counter and emit start. + // Drop OS key-repeat events; only the first Pressed of a hold opens a session. + if state + .is_held + .compare_exchange( + false, + true, + std::sync::atomic::Ordering::AcqRel, + std::sync::atomic::Ordering::Acquire, + ) + .is_err() + { + log::trace!("[ptt] press dropped (already held) shortcut={variant_owned}"); + return; + } let session_id = state .session_counter - .fetch_add(1, std::sync::atomic::Ordering::SeqCst) + .fetch_add(1, std::sync::atomic::Ordering::Relaxed) + 1; log::debug!( "[ptt] pressed shortcut={variant_owned} session_id={session_id}" @@ -916,9 +929,14 @@ async fn register_ptt_hotkey( } } ShortcutState::Released => { + if !state.is_held.swap(false, std::sync::atomic::Ordering::AcqRel) { + // No corresponding Pressed in our state — stale event, drop. + log::trace!("[ptt] release dropped (not held) shortcut={variant_owned}"); + return; + } let session_id = state .session_counter - .load(std::sync::atomic::Ordering::SeqCst); + .load(std::sync::atomic::Ordering::Relaxed); log::debug!( "[ptt] released shortcut={variant_owned} session_id={session_id}" ); @@ -941,7 +959,9 @@ async fn register_ptt_hotkey( if let Err(e) = app.global_shortcut().unregister(old.as_str()) { // Rollback already-unregistered ones. for r in &unregistered { - let _ = register_shortcut(r); + if let Err(re) = register_shortcut(r) { + log::warn!("[ptt] rollback failed for '{r}': {re}"); + } } return Err(format!("Failed to unregister previous ptt shortcut '{old}': {e}")); } @@ -953,10 +973,14 @@ async fn register_ptt_hotkey( for v in &expanded { if let Err(e) = register_shortcut(v) { for r in &newly_registered { - let _ = app.global_shortcut().unregister(r.as_str()); + if let Err(re) = app.global_shortcut().unregister(r.as_str()) { + log::warn!("[ptt] rollback failed for '{r}': {re}"); + } } for old in &old_shortcuts { - let _ = register_shortcut(old); + if let Err(re) = register_shortcut(old) { + log::warn!("[ptt] rollback failed for '{old}': {re}"); + } } return Err(e); } @@ -979,16 +1003,21 @@ async fn unregister_ptt_hotkey(app: AppHandle) -> Result<(), String> log::info!("[ptt] unregister_ptt_hotkey: called"); let state = app.state::(); let old = { - let mut guard = state.shortcut.lock().unwrap(); - let v = guard.clone(); - guard.clear(); - v + let guard = state.shortcut.lock().unwrap(); + guard.clone() }; + let mut still_registered: Vec = Vec::new(); for s in &old { if let Err(e) = app.global_shortcut().unregister(s.as_str()) { log::warn!("[ptt] unregister '{s}' failed: {e}"); + still_registered.push(s.clone()); } } + // Only retain variants that genuinely failed to unregister; the rest are gone. + { + let mut guard = state.shortcut.lock().unwrap(); + *guard = still_registered; + } // Destroy the overlay window so resources are released. ptt_overlay::destroy_window(&app); Ok(()) diff --git a/app/src-tauri/src/ptt_hotkeys.rs b/app/src-tauri/src/ptt_hotkeys.rs index 7d03e33122..a853806247 100644 --- a/app/src-tauri/src/ptt_hotkeys.rs +++ b/app/src-tauri/src/ptt_hotkeys.rs @@ -6,7 +6,7 @@ //! but rejects pure-modifier shortcuts (Ctrl, Cmd+Shift, etc.) because they //! would fire constantly during normal typing. -use std::sync::atomic::AtomicU64; +use std::sync::atomic::{AtomicBool, AtomicU64}; use std::sync::Mutex; #[derive(Debug, PartialEq, Eq)] @@ -48,6 +48,10 @@ pub(crate) struct PttHotkeyState { pub(crate) shortcut: Mutex>, /// Monotonic counter for session IDs. pub(crate) session_counter: AtomicU64, + /// CAS-guarded: true iff a PTT session is currently mid-hold. + /// Used to drop OS key-repeat Pressed events so each press/release pair + /// produces exactly one session_id. + pub(crate) is_held: AtomicBool, } impl PttHotkeyState { @@ -55,6 +59,7 @@ impl PttHotkeyState { Self { shortcut: Mutex::new(Vec::new()), session_counter: AtomicU64::new(0), + is_held: AtomicBool::new(false), } } } @@ -237,3 +242,39 @@ mod conflict_tests { ); } } + +#[cfg(test)] +mod state_tests { + use super::*; + use std::sync::atomic::Ordering; + + #[test] + fn new_state_is_not_held_and_counter_is_zero() { + let s = PttHotkeyState::new(); + assert!(!s.is_held.load(Ordering::Relaxed)); + assert_eq!(s.session_counter.load(Ordering::Relaxed), 0); + } + + #[test] + fn cas_false_to_true_succeeds_then_repeat_fails() { + let s = PttHotkeyState::new(); + // First press: false → true succeeds. + assert!( + s.is_held + .compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire) + .is_ok(), + "first press CAS should succeed" + ); + // Repeat press: false → true fails because we're already true. + assert!( + s.is_held + .compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire) + .is_err(), + "repeat press CAS should fail (already held)" + ); + // Release: swap true → false returns the old true. + assert!(s.is_held.swap(false, Ordering::AcqRel), "swap should return prior true"); + // Subsequent stale release: swap returns the current false. + assert!(!s.is_held.swap(false, Ordering::AcqRel), "stale swap should return false"); + } +} From 474b5bcd1168ad4f4145a7965dc960c991e8112c Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Wed, 3 Jun 2026 20:37:42 +0530 Subject: [PATCH 14/36] feat(tauri/ptt): lazy borderless always-on-top overlay window (#3090) --- app/src-tauri/src/ptt_overlay.rs | 93 ++++++++++++++++++++++++++++---- 1 file changed, 83 insertions(+), 10 deletions(-) diff --git a/app/src-tauri/src/ptt_overlay.rs b/app/src-tauri/src/ptt_overlay.rs index 03dc1bdd0c..2b3ca4079c 100644 --- a/app/src-tauri/src/ptt_overlay.rs +++ b/app/src-tauri/src/ptt_overlay.rs @@ -1,22 +1,95 @@ -//! Borderless always-on-top PTT overlay window. STUB — implemented in T6. +//! Borderless always-on-top PTT overlay window. +//! +//! Lazy-created on the first `register_ptt_hotkey` call (so the window is +//! ready when the user hits the key for the first time), and destroyed by +//! `unregister_ptt_hotkey`. The window's contents are rendered by the React +//! route `/ptt-overlay` (see `app/src/pages/PttOverlayPage.tsx`). +//! +//! Cross-platform note: `focus(false)` ensures the window never steals focus +//! from the user's active app. `skip_taskbar(true)` keeps it out of the +//! Windows taskbar / macOS dock. `visible_on_all_workspaces(true)` makes it +//! follow the user across macOS Spaces. DXGI exclusive-fullscreen on Windows +//! still suppresses the overlay — documented in the settings panel as a +//! limitation; chime audio remains the fallback signal. -use tauri::{AppHandle, Runtime}; +use tauri::{AppHandle, Emitter, Manager, Runtime, WebviewUrl, WebviewWindowBuilder}; -pub(crate) fn ensure_window(_app: &AppHandle) -> Result<(), String> { - // T6 will replace this with the real lazy create. +const OVERLAY_LABEL: &str = "ptt-overlay"; + +/// Ensure the overlay window exists. Idempotent — if the window already +/// exists, returns Ok without recreating it. +pub(crate) fn ensure_window(app: &AppHandle) -> Result<(), String> { + if app.get_webview_window(OVERLAY_LABEL).is_some() { + return Ok(()); + } + let url = WebviewUrl::App("index.html#/ptt-overlay".into()); + let mut builder = WebviewWindowBuilder::new(app, OVERLAY_LABEL, url) + .title("OpenHuman Push-to-Talk") + .inner_size(160.0, 56.0) + .decorations(false) + .transparent(true) + .always_on_top(true) + .skip_taskbar(true) + .focused(false) + .resizable(false) + .shadow(false) + .visible(false) + .accept_first_mouse(false); + + #[cfg(target_os = "macos")] + { + builder = builder.visible_on_all_workspaces(true); + } + + let _window = builder + .build() + .map_err(|e| format!("create ptt overlay window: {e}"))?; + log::info!("[ptt-overlay] window created (label={OVERLAY_LABEL})"); Ok(()) } -pub(crate) fn destroy_window(_app: &AppHandle) { - // T6 will replace this with the real destroy. +/// Destroy the overlay window if it exists. +pub(crate) fn destroy_window(app: &AppHandle) { + if let Some(w) = app.get_webview_window(OVERLAY_LABEL) { + if let Err(e) = w.destroy() { + log::warn!("[ptt-overlay] destroy failed: {e}"); + } else { + log::info!("[ptt-overlay] window destroyed"); + } + } } +/// Show or hide the overlay. Emits `ptt-overlay://active` for the in-window +/// React tree to drive its pulsing-dot animation. #[tauri::command] pub(crate) async fn show_ptt_overlay( - _app: AppHandle, - _active: bool, - _session_id: u64, + app: AppHandle, + active: bool, + session_id: u64, ) -> Result<(), String> { - // T6 will replace this with the real show/hide. + let window = app + .get_webview_window(OVERLAY_LABEL) + .ok_or_else(|| "ptt overlay window not created — register a hotkey first".to_string())?; + + if active { + window + .show() + .map_err(|e| format!("show overlay: {e}"))?; + } else { + window + .hide() + .map_err(|e| format!("hide overlay: {e}"))?; + } + + if let Err(e) = window.emit( + "ptt-overlay://active", + serde_json::json!({ + "active": active, + "session_id": session_id, + }), + ) { + log::warn!("[ptt-overlay] emit active failed: {e}"); + } + Ok(()) } From ef9e05dfb0f3c0341ce5c184f226a61f8a24b9fa Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Wed, 3 Jun 2026 21:15:41 +0530 Subject: [PATCH 15/36] refactor(tauri/ptt): macOS-gate accept_first_mouse; note CEF shadow noop; tag overlay error (#3090) --- app/src-tauri/src/ptt_overlay.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/app/src-tauri/src/ptt_overlay.rs b/app/src-tauri/src/ptt_overlay.rs index 2b3ca4079c..26c94d7bcf 100644 --- a/app/src-tauri/src/ptt_overlay.rs +++ b/app/src-tauri/src/ptt_overlay.rs @@ -32,13 +32,17 @@ pub(crate) fn ensure_window(app: &AppHandle) -> Result<(), String .skip_taskbar(true) .focused(false) .resizable(false) + // NOTE: .shadow(false) is a no-op under the project's CEF runtime + // (tauri-runtime-cef has a TODO stub); harmless but won't actually + // suppress the OS shadow until CEF wires it through. .shadow(false) - .visible(false) - .accept_first_mouse(false); + .visible(false); #[cfg(target_os = "macos")] { - builder = builder.visible_on_all_workspaces(true); + builder = builder + .visible_on_all_workspaces(true) + .accept_first_mouse(false); } let _window = builder @@ -69,7 +73,10 @@ pub(crate) async fn show_ptt_overlay( ) -> Result<(), String> { let window = app .get_webview_window(OVERLAY_LABEL) - .ok_or_else(|| "ptt overlay window not created — register a hotkey first".to_string())?; + .ok_or_else(|| { + "[ptt-overlay] window not ready (register_ptt_hotkey must succeed before show_ptt_overlay)" + .to_string() + })?; if active { window From a19f1d4437973f85b9a7babe3fe773c5fe29f679 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Wed, 3 Jun 2026 21:21:17 +0530 Subject: [PATCH 16/36] assets(ptt): bundle CC0 open/close/error chimes (#3090) --- app/src/assets/audio/README.md | 11 +++++++++++ app/src/assets/audio/ptt-close.wav | Bin 0 -> 7100 bytes app/src/assets/audio/ptt-error.wav | Bin 0 -> 10628 bytes app/src/assets/audio/ptt-open.wav | Bin 0 -> 7100 bytes 4 files changed, 11 insertions(+) create mode 100644 app/src/assets/audio/README.md create mode 100644 app/src/assets/audio/ptt-close.wav create mode 100644 app/src/assets/audio/ptt-error.wav create mode 100644 app/src/assets/audio/ptt-open.wav diff --git a/app/src/assets/audio/README.md b/app/src/assets/audio/README.md new file mode 100644 index 0000000000..9aeb5cad49 --- /dev/null +++ b/app/src/assets/audio/README.md @@ -0,0 +1,11 @@ +# Audio assets + +Short UI chimes for the push-to-talk feature (`docs/superpowers/specs/2026-06-02-global-ptt-design.md`). + +| File | Purpose | Source | License | +| --- | --- | --- | --- | +| `ptt-open.wav` | Mic opened (PTT key pressed). | Generated locally with Python `wave` + sine generator (800–1200 Hz sweep). | CC0 / Public Domain. | +| `ptt-close.wav` | Mic closed (PTT key released). | Generated locally with Python `wave` + sine generator (1200–800 Hz sweep). | CC0 / Public Domain. | +| `ptt-error.wav` | Session aborted (empty audio, mic permission denied, etc.). | Generated locally with Python `wave` + sine generator (250 Hz tone). | CC0 / Public Domain. | + +All clips are ~80–120ms, LUFS-normalized to roughly match the in-app notification sound (~ -16 LUFS). Replace freely with better-sounding equivalents — just keep them under 200ms and CC0/MIT-equivalent. diff --git a/app/src/assets/audio/ptt-close.wav b/app/src/assets/audio/ptt-close.wav new file mode 100644 index 0000000000000000000000000000000000000000..761d30766a6da7b8aff47bd2e36e7a95127ffc23 GIT binary patch literal 7100 zcmW+*b(qxF+s(MUW@eJ$Y+2l0w%8&?yGzkhym*17zz@1m9Ev*>S)>$L+=@FCN|!BC zC{QAkad%7RyI-IDIrAjM=p3{QDuen!Cg@}S=lt-z zG5?a?&W>iAvPw41#2ARRv3=Mz>|3@K_+1IDfb!6Mj)yaZdzPEyw&a!Y#_$I4>hcKg zk6aaZ6~_cd1;6pf@SL0x&=a;b zb0OP1!%4f6{N(6FCjKEVN-Rh;OBSZSPCvadW!3T%iu%eTWmYjr(Od44wv}`j!Ged}Qix*8vbEEhgfu=jDv#*H z%R*m;_JxYWqa&}PhvL_g>ddxW>paO>%{PeVOMaHkS7elC^)t0x^G@AdZBgx3T#+r4 zh(sm)A2|ovJK1DvSRxT6!UdtXfwX^{|D?Z5pntF`bS=V*?@i9m%wg6-M|h`%>m|+P ztCh>tYHdB;EnT&4v~G$vu2!gS$Td<#6z83ShBLZMAOXcjhv8t6pZ3bVw>+%pnYV^N z5$G0rACLom5!$wsT?dzi6-#RK*3z6 z^!fP2$c!NFd*yy`T6<#0b5&KEMop65ZogMrT-I6L}CUN_Uxbm6m`FgH7Yxx2RqA#&HHWZ5^pR5@C z*x=PvC`6K0{66_{nFDc4IOuEZmN^ztyNKyvg(dh@qAhiv{=+rPw=Tqu-$?Iem-2>) zBl6ERjSM@@(?P`;X2(2OGwc?!%lfBzreTw&SaCt@=1KC6GV|iO&_`cWx71Ni9U#8L zx%dEl7-6E8(Q8~*U+vJESo1WW#knU%#q#~?&3dUhVJSig^cj}Mrebx`Mz)RcNaIoM zQl(h>rQk>CbT*op9%&tz>@hos+dGq0xCw{x*LW{-qP?E8%+o!vAd*d7$S#Av5ac8? zl^eB1#(A*Lrb0(y3G6kNLuVr$tdq?WLsLy$K2v;^7hpfjtc+t}rT<%ZvEw>bMQp}J zcuU+woFZNJ+s>NaOrT}-N^(lBF~=`hEA=W9+8xG=a93MHbSjp@UI4ZyBF(K6%nCzi zjZ(2q?BNyWXJuZ;=Y+@mZ@ITS7>Y}t1?<$qo%l#{zP*uihNmFVGjc95Fe`%Y@JC4= zDG1Fj!{6q$*29Pi`-nvVKVsBiTLDit{;7SYoG*0?+H+Rs?8%wYZo!#em8%sUAd89q zfT4-_0-`0gfnMlxdhZ8Q(U~bVbDJ|!=$1BB)z(!SufrW|HPEjC=l@_DY(KKr`pH~r z;A_*03W-TD8~Q!lHu)xUGr;j)bw=%X$;LzxU}+YcVeIgyBU5Us@pC%!%g4r`YJbpwxHFb&DQH z9VC|E0=yNjBG!|~?1P=<9xQ-HCM598yu6&hK}^Y`>J$2?>6RsHJBS{}^4JCJ4qAvX zmLjvrFk17KLM7S37eMo}OyWZ1Y(V7w!>OimfWhGZR63CGkT&|AQ}4YVxDnwb=Vc*i z9iJ5!DT*{seNFR6iwZe`9{caT_t92}+WNJ*g<+TGh+>L_GsrGq73JQ^-73^%96rY& zw65(2Ty31AyP!HM8z~BK=df)0*Z8XN8Gmz69p@JNTGB{#1o%$Irx6mW4efOry(a?O zB6kzk?1ns-|BZOJe3QDBzRXl)S!e4B6c)!mVU4g)2y5L6|7skl+o{?j8!8HOm$7i> z&-kS<>7U{m;e2SnOtvPz09XvdI}*3ZD*FWI9FNC;EnF2hW)`p{x3g%zY`&_Y?nmQv z_`UTm(AgU-gZ+rjL0{MoSt6z?eJ{;4;C&Loaj0i5o_rSl5bWT~xavEqDIWO%x8VwW zCw`UqmO4t0cCGg6f}w~sIUsv7FXRst&z6^{1G@UAFx#_BWPcsSfH z@R#Sgvn`EMYlw1OjMv6r;BxXW%ItXU%6WeX_KOZp{+NA~m+;$*d&*(;VckpPUN~UA zhp4bWf#2@MenlH1h1Rp?2Zl23Yh_saL3o@yg>9D6C2B`z1(aT?Yc@TII!Elm)wlsa zfj=YWQ2Xfit|IR*f#H$j#Q4lkwu+k*N@RBBV(kONujZQ88pzM+7Ay;JuY>OPm$McT5vC#fsJf40vShTN6UWTt zQ~Y?_aHW5iXOk1AjnqbBEnwA*pTZv!UsLnwoYUoL61W@Q6knG<#N6S$6TFpFDtc>n z>$jQ;ECXx|(h&O_d|@|s7VU|QwTj`M#(KI-DxMq_n?T07vs4m_4G7`ByKcyN+KzmCgwMqHr;jt&IFAIsNtP>2ns4-9nL@DDR*lH9$NzPy z9o>#R2bvEV1G>Fxo}!JU2dJSr(=6RTzCCOT*gYcGdU`QM5%izO};xIn}rEJ*0XjX0u~-#W3(Fy6Z6y}kWHKZ)n}C)IM4$79QJ#YN=!j-WxsYW?5WCs({SeZlFQrpRpG!3(q z*u01ayAN1-i0QE~G8*)>&^*Y{Pb*NZlzkML_(jkR@SSvQUbue1>K*Bd(q5{VGyrer z;h*96@fSoNsxdvssr7LDEklQ*?US+0ZFU2whbR3>;ZSq*uZ%!fFk|7u7V+mURQrP-a_BH%-NaN_3i+SyfMv4z zUxQSaP*uv?NsbEgoS|%G22LK1mWI0eCwtyH_tKZCrlgV>3ciKmNAYchi`;KN z4y}v&6Voy|W;3U@;ILRE@1Yu`tz|e1-2Rhw4#?FdApers9_$@@329*~wG^078ob(o z>b886BqEr~{m2f^zDdrFwFwu1dHR-Xu4AG7Z*n7X95^!)9;JNjw{ zJ4DLkbn5GzGe3>z6!w>HQBzPFm$?GKgQF=mgLcKtzGEOkhwY0GvNA9C50kUcAIG9_L$j`Q* zC2qc8Y^I;9S*e^UYa@2@*K!)LRhebUFJc|TLjwD~wcIX8+&+NH5LtXENL4+)2=71) zCT~*TgRXe&9^g|2*{~sAoZ6c8utnT$g1^O(T&!|vHtTt&mM~_$WmBMHtP*_HkFCc3 zK(8aEwj-9~<|#(6uC->cvX=}If8=lCbY)|iOUVtfRpH}-l(*d7!}$e$km^P@C%y+N z65<2#5Ka??)E)Z)$30ga?`{9O(9-Cx#FMmy83^s-y%9oEwIZjwsqJStWV#Hmw91k0 zXd!kRDA$24zKgA785g|ZYvYMI zLv%Cyb#g0l4b1Hvfb>p$G%=TaORc28cb;^cd{+Z2!t-ORl9d^PfjI;C3q+fw>lELp z)w*?tD${FtmsOASKnt+zz)QW@D(oft0x7q-EHXG?+@Y6h2dZYuXG%(iO?VJgojZ}< zn%EY-9Af=*y{Mb#tVeI4I+N{)Ww-_ST}S*OzK^&=HnQXNE2qFS-6se>0p5Bh`6%ON z)STA*fudp3o(i4%sY`tg8BN22L76PbW#A;w&Q3G<=>W5RNCx&slKh+v# zGg)hKeNcOVtt*FG<$OyQ$u2iuC2iwdPTl zsI?C=25o{}1sFuI!wzTI!v10820Jf zYR)Ld@}83Kgd2Ispc~AaOfK0XzBJ+sF7P+@s@yf4lV}HZklamF04}+DlZB zPHG~JIK}ST-YNb!!SNAQ{6+FmW(hL}>cWGCwD^FmoARN$o^Fg`nyEJ&vW&IuKsKVq zAbGO@kzX(?HW2NFaBa&iFU)S^EB$tDQ}s!OR5n<=R&a%j=Os*=%y-FKv6_)X!ESyr za3HhuTRKdgBTo@_d^p&F@_{-|<70szET#ziLPsxGk!P~+QlL)wZ1l^7JYAjL&rajC z<>!RwBtzsMmBpH^x+}ov7s6WWJljEJGsw)_fS)+nowdZ~qvgl|&?Wuha?>iqc%4mi zLD@*YSwafy^T%@zuphFu(sL94L`%b=zzN?P&lcBXM??A{wT;{hJhAk@^|Zn(@V-Q2 zvNyHIjyhgCFT3w~_xQQvp)_`R6(M{P#IBU7GwSHak)y^U%83A#hrg2fNC37$56{jzPNHvX)h_+f>Kc zQQtzVP(N49m#HMXgeKtfqWljzCcPqQjo*)yhe`syeIq=ZTwccv+G>xJ4AGi605l~6 zPka0TKrKf^Nt1mNO*odhdU=}oS_g)OwnoTUQSxlMAa|6l%{j<330H_|X-mZn)h5jj z-4a6&lhZuTa@qQ??Hxd_2XMeJn4I5X%dtG#2c3)zv!T{&a1(R6@wEPy_MCd1a-f_m zJu2!Xc+UL_axn9<@^oclWDJeOf~4Q))q6&{o;pU-Mtg>o06#blCJY7OwE*}MM4Tfw zlDnv<_PUOv&SJOH8}Wq$l5p$j{P?3}&CK$gkNuK!gNF#q#P_7EyuPZvriD&pcwwAj zb^{Ej+rC5kp*b+|yaHaC0Q`=_UZXa&EmCMpSoXqJ^Frfq`uAE={g<**K402gObXWW z8gZ)FIXPSAd2(&MB-%1uKiJG);@#y=JOAgXMf)k7%o1IRGk7u39~aD*-{PO}ABbYI z1NEhSJDqYYcGdRK-Ut3`!N*}o)R-8O+LLkTI^=)ieB>1fr;012PvwNtu6_Y@_?0ni zngZXkM67um4f>-Ub`*00zJ1tPtQe#!gw{YTV3IfpH#HwJ#`Fz!eKbQ5V422MnL3LGDn`H zR@;}+dqEEuxOaJK`tJJY1v`dy(NxTpAX0dS%+ag^qPZXV&xB{hE2Mqpkn)hKq2{1A ztLtDGW1MIjU^c_|;J%g%mb3-6*0Kt$k1dle9=Jce)qLIb#CYAXT|Z1G&>m3NR~=A@ zb@CHooX(sT(4TorehPbo(J?b~e`VWePh@Ij4y5a*FQ)pX{K;L(eo1Mv zI#H2Wk{Fj5oameQA~8HME3qMQDM2TYEfC0RK9F1-~y}#joaV1~*l5-V<UV5b{l(|eZrD#mNn!X=X>Ty<>!D3_T^9KZ|DEazt21Jp?nhDd*$>0 E2dkvLssI20 literal 0 HcmV?d00001 diff --git a/app/src/assets/audio/ptt-error.wav b/app/src/assets/audio/ptt-error.wav new file mode 100644 index 0000000000000000000000000000000000000000..f6034a3e6996bbceddbbff6da01169dbf03c448a GIT binary patch literal 10628 zcmeI2_jA;C`p31>Hf`Va_9lYO7sElAKtc}z8=M^20Xy^>9|TM>)#OZwgNcve3#KwiCqxsA;pZVymm*$zLRQ$E1 zLr}P}_g)5_BFq2_1%ZL0>?%&?44z zG}H%DLIn5%yvpwMcd!Ed3kZUpoO7HyP8r9<@v!Hp1I7Vj;0d#rna&uQm-In;2Cbui zr*={AQ)22a`8hd?%w(@*S7!%g{h70w#hJnko<5tdPQQ^(raDraQ{^dZ%Aagc?nr)= zd^@R2#)zlHW#S<5IZ;i_B1(zT#86@oF_3tZ7)BHk6N!1m8sbahBypQ?5t8KK5-XT8E>W}dm_st7n8q|L#U&ajQ*4k&~usJm=fSeU=aHr z1>j*&2knF;+%LEQ?-O2(x0G+^&k#Ho6baje1@JDIhO0!siry6O6DP#wlCu&8ZRk|*Rsg+^gf^k8p|La2z#yW}16gYuQ~BDqxlRJKPp zQO1*9kyc6t(r+YVB`?L*;*6+4BoJ+eA$YwoB$zAsng1sL0FUI(=KcWXK}*3~94)5` zxXOr_8T4r?K^BqQvt60KncDQdlrFV6`6HnusuH*2)_7g)RkS#IKBA8_hRM*{P&8N* z%my|G^nn(CsXyr3?;GdKc&~VCy<@xzuN%LOpTYOyjd%mT3Ezev!N12lalkvk`;qs! z7xxbKed%-g{_1b_7X%IkjKQyi{X!j~X<;IKFfuNhh@Ooth!-S0iAzLX^8HkPI-7o; zxt!fcuA}DBCCux9p2G#hU>9_s+s6Bv|D~WtI2$e!{f}5EaY=4Tzmlz#k5NdK&y@#M z)AAJR+v-&sz4iy~WSv`AqfhHUHuw!QjW>)I(`wUo6KHE`%NcI6(+*C)0l5OZ|G+@u2<^+u6w2})UMTBRVVZM=gm@mt~{=2mp_sH zDRoP{Vz&r|e;3{nT;d<()pO@VW5Ik50CdwoQ2WWX*(sSJX;TVJ`VyV-`?2=uj+(MEJSItbOH z0+fUDP&L{QorKn*m(e6zY&&2}+x}*MZm+;Pu__1PJn9_pB3)P9wVpA!%Ioz$@Llj9 z3^WBBLiOQ|k;drm*wJ`f;vs=0>6AG$GCPOdKpms+GB_Xs2SDZARlFVi^Mbp=Za5+W zB{Hd2W|SKgDkWbPS9RresGBv*w8M0??wWq3!D75=oMQ@^)|oTrRhCzlBI_Zm-#QRk zjvPVmAr6E<7=%K?$P46qWGgZa(IJnl>#QbgtEI?t&s=Q2VnR&y#!f?_VWs|}&ZE_9 zi!=+=^?CbLCzUOVR(Y%JTj_DhUU7qHIXqQ3LSW(3+-~R^*vzQ`%9+<`F@SdF0}p%pc$T``Ts+r2=PgHn$1x1SPT7ap zpV-#g@@+5C!{`!pH2NAUK|$6e?9*s=Vn?{Rq zrQw>M)(z7w(>7~5)LnUT6+Jp9GSRC8rK%BRn z)vg}yF82}768vqi)<^id{SO1}!OJ1`&xy20@5eghz66*wrG})ZWY%W)lRr@1Gyvpt z-USy!Te#J!`VLZye1kkfJV=_=H;ed@XUIjQ0hxfnNV|2BRbc&>rLX0J zxzOBf0!;IaR}5UkczvVpx;CiMYKEvQ@)oLUly!=F`3707v|2JxTq+t2TZEv%!Mn{p z4s8VIa)tvsCQdyeTe90TE7Oxx!;|@hJVD2zQE$W*c87w&Oh6JS@E7^2ynFC_9?&z+ zy~Bk$-*FyyC>@(IKDOQ7)81iQU{l*3qr1>bbQo$zMXbIMsz7_A@1bkZGboM@vTe3G zZDsZj`!K8>8}G0=>YatIPS+0iB#$2V;2qvWzIy+Xz^q_-s4QF-`5-zcwlrRs*hQR8 z-brC;Fq=<~qpIj8rUm$olK^$xp}cAQ8o@Tj{(*PmYEitd9L}XX_V=xF>5F_?A8CFQ|OAc)ta5^3wd``&y_ZXOYV}{r7t8u zi?51K!P|s2f@%DrJRLUy{>EtmnwTnj9F%cS(e~5 zOY$R@=vw3*B+C*$#TsL2S6TL$CFb=en{kwJw*l1`=qq*mv_EOQYLPlWZ?LLZIYGhx zIkIBuP)VU!Bcg>)!2|v|-WKj+=-ph5%f+}{jLXHiT#U=bxLl0O<#D+@E|kNc52Ox9-? zW+tXbr2dlZK^PL+xFPmhbU>syJSX&7@MPerANH5{ws<@7e)vB~jGuLESw{wM4 z>znlM^pg}kh){DAChs0AQV(B&MLYYW@MqZ?Nrub0lQZ7(+ zsmk*HH_xd4Sp86~)6CI)rFo(O*x8)-w3XV`+V$ExZLM~xcA9pi)}Re*u4@`K<28KE zHT7b(O5K)Mp65}mQ}I+yN||ztf~%;Ndt{Sk?b4po28mrfO57|8!DHe5LR>IR(8zzz zE8wl-c0gii26&o703|>(ao+BAuv9@Dsb^ zz2ZN{rpMCJBhm5EY~)g8S)_j?7VZe|2rmp5g$u%xa4O^rVIh>geW6rH9PSx@CtMkB z47Y}{us%`|*&cZk(M2nxEm2NvYOEzDjxUM-5`QDnoB)aCL>Ez->_`qwolaTPd(+a) z7a1V?Nj9EcLE_|ms*9RJKcdGnSJ|nlV?Z9~ADk#>9{2!!1KJN|q1oK4TrF=o?>0}z zU&L?all&sVCc$k1BN!rFCOj;BD2xk@@JM(T{4u;0J_=ugJK%fpFYsgd5quxM0k^>? z;ob0O@IrV3+#i<0xbP?8SHhLTVxd}S7qkf035o=;;8*@`{#3q>-^tt0o61x3o^ZEu z$8-7IThM3Fa3}$O2QFu4|8UM}&O%NvjtlsPokSb}B$#VV15?K687JLJH_%h)LYk(2 zr!G+cq!v*nRBuW|g~&h18{~QN0NF@>LM|ugle5UlQDs+qb({Y*J1n$pt)=~DV5`ZKzjzDVDvQ94P>n0#g! jdyWs8HOxQQNyHP(cgzju7v>KJWpF0U5KNY#836bX6fTiq literal 0 HcmV?d00001 diff --git a/app/src/assets/audio/ptt-open.wav b/app/src/assets/audio/ptt-open.wav new file mode 100644 index 0000000000000000000000000000000000000000..a4798b3ed37ae4ac085d85a9b5e60e02dca4a072 GIT binary patch literal 7100 zcmW+)Wpos08=cvmb$6Sc!GaVkTBNuaDTOvzp-2mXQoQI#AV>)kAOr|d+`UMVLXqMS z971tM*4=$)zW4OZpWSnIpZAgb+E{-`5pP6^Ih_IUXV}a;<;Q-nQxjOo-fb8$k$-z zFt3@;>>q4x=yymDm%|cH2`9=K$-TxkAY+l;$P*-h?$6u+XB_7t z+z>8-p0hYRi8&7l&r5eoqiIzdPxnm!mj0CfI`b+sGg~`n&z;L} zVCJ$vLWAM1oF-f~;zKU*X7EjdBZ3;j1;R_hx8S)XoG-Kpw+kRaZ~kmv8M2OB!Wj>D zfsCv_e=D~yyEd~jU75O)%qPB2ypJ!6_l~!Y_lvKL2jcS*^^=*TKb1=tWPb!_soB}k z2e=z|4-(-u6ATv?i3-KFBzGiTr0b=prTe85rHo{}WUu&7(Jo<;K*isUn7K<~8{3)L zk>h4(rNbKvShD&^FTsH9u&Us7I)RimvkE(oW)}po}Nv&Vc^ScgmhibxlN~55w<* z1%WfZmEMz{x}Jzz=UL|&6(fD(L&-WfB11qsUwuJwN;*x%<1gemm;u?_$)PbrSP|&&{ouOiFxy|#BKj2djQW{g zV1pbHXI~HO7lnp`Gd5&?VA^nG{Kul7WiORE%}f12(|C*2+79z#DE9NVFmASLfdn7FUD6LRJ zTD{?(Nr*nSs_{oSgYUsFVg0P5EJZm)d!hh^n_Ol56#y}E8-2qtNdSj20E*4FR3BqPXw1}PK1e$l-l-_ zqp^FW&l$WFy^~C4+d&JF@egzt#l?|mb;XUpQ)|&m4coazmW_bs? zcGxG-7s)MzoUjnri8T2St#G_?seMO->!PQUne2DaX(S<3%KXZ?+P#Jm=FMntY$BMd z4gV6)TbrV9Onk#_O&w(`sYG~*JCsdk{zz<${1s^CmAihhe@>T?3kg2xyPN1l71$~q z3*9e$D?=+|H&R%B1?=H96Ze$s)n&Thjm<1wt+%j0a27uUQ2CX$z2$(hQirMg%A1Rw zykc09Kc1Qrn;1If8{+=O!Lik(UJx}239*T|ME*)&wXbqjd8Y*@M0X~c>Z#QKNId`;cm9q(v} zxIWB-DN#gK5adt&8)Yc!j6JooWw6+HLM|XOJiC)P}N_?6|F|H?4)csaVWAk zkoH`4a_!Yr=l|l4Bu0}wx~pC0D)h>N*6566EV~-2!TViA$w<{=-AUtc%PeacSB_SAIFwvDBNL|rhq-2_42rh7OByFd7zh79p-=`Ku&Q!WrnJ19-s-wZR%578Ca zHqe8!pa*Jel{suE*IrYWOHtu`?j6RESs8B?ZtmaWUf}T3De^eLaBYCIk<@Hk180#3 z544NyOLWer*;B~ZqD!*Zstvjq#+8=+Rulda590%I#>z#1Hg(l+SC`2fir4e1A$@Lg zG7`BFh?_Z zA=tR7cr&aAdcm|=A5oL?`Qm?hYPffZCh>3kfnDo=D9ar7c z{pDdKaVAsD_D6W46|%oot8{OT+bn-s8{$sfjW@$@SPxoU#x1%BstTD*GzeM99?PWS zi^3!P``yzWFX(sVCPF~8B5?95CAKG=Q@vw@7oy*#YUFe9WBwe;2ZdcT$53o`qldB6 zU`lK8G1y`>VS1{stw9v4#V>eC$dK!wybzfdDEBmX7TLN|PY6AM5+pH(s<2ITzVKWL z$fGBci*i4~-T938SH*fwOTz&3J#^E5D6PPYu;pmnbWg9+IOJo*J9y`zs%$zjDxwRt z^^lI5HVJhc%)Bw7BF|7fJL6pGEe|r$bE&QQRh%h;I?{E@&05^3v5d9mu{U5kJ@Gfz za~7v@k#2*kt!%CEB=;h7KMlp_hraV2cCE2%=>cRK^jV zqGQM&c3bApxH4SjgWUJ+ZRpWtKak&V06R2&&>{7({_&B5q&3$VZp}AI&MMrRU4|>> zrdAvF9PnvR{IPYj<(RRiPNrHdy(l=t*`BXRU5+*n>b*;xYixa}yMz*`uZp}uwXhFz zNqoIRp4h9jhd~g&=%K8wx{1ERG!Xq3D+k&A4(9O-z~M0c1a(%{QuH~ZViRd3J}jj7 z6}p<)uToO7E}l&-drMm_5IX&_0&_iX2V(n3E+aU^Fk&TH zN`K|}+kMC1FVZYID7Oawn}1C*U3peJ!8p@$%i0@H;P>$Bm<{b>_UL2k|H*fYN|5fX zB$JJ`4t?~hTprs(;3+gjJ;FeKq^8&}xc2zeVQ*ZV?F+5u9RMhPqxsX|G5-jhgbV+K zC$Tox(|7paEreZ1Y4qK8L8!rph@%3^k>>DW;*oiuXhI~s+wO@6e@ihsn6CJXp z&@*0Gd`nSZ%NQD3Dy=Q>2>t{pm=Eo2eye{D&@o7aA`h5F>2a|w!LHuL&dD|lwHr8* z{=^*ee}GqBxYPbUk;6%Et{Z2KV7GLbs#4d^R3Gh!Rf6u9;I%L}*#8zfhZ2`Uf|Kx| zoIELsO!g0Uf3z3S9mp1d`JNH8sj#i7%i`M{S`xpMF+#I>C&Zf-3T?vB#Io1g0gvOi z@e|lN)Mxrh-(I~&Hc!}qdm}$7)hD_<(8aUeahl#l4h4waO6aIxZ0ns9yw$-=u}FF- z`wt=%^A(#ln+y)~3~OyXh(E<|W3N!PxlBJseL_|tY{$KypOESpof-Jdv(&MbUP2B4 z^&KPHQU`4xomJiep`P(|83>xryDa`w(NCqkG)`9J)SRV)%hC#!LswxcM$54)coOe~ zZ?&o|R}5!08U-ZYgW&AG^pMz(!JxUV(7vBY^YK`Wdd z&&1%w*v<3+_9N0q{G&psRT{syytIzP^Y~7@H^xWZMv-orvRGo}mqF>w>iE>qBkz6Z z5}Srv2lAOpRFm~>h0dPdH$h*lFcV;x11t%ZyS1B*4=lB@wIH9Fc+gsGX<@9dEmjN_ zlSnuAXu4ahZSYUeYsXD`9Z-G&aBkBnxxK`7$~P~3A<;2M!}A5z(kfMRz033jeF7NO zhKDf3+SI&4-%#B_W*7A5?8~W>55iI3c~=YjN~($w5q*eK@(TURaoN*1I4<@&y_jtX zF!WaOn|6)yq2)7dBhZeY@Q2o}mbZpefamv#RLBp^#Z>=jpFp)+=V(CJCu;#uR-3Fv z%N!5gT>`zL=ToB@19DQ7kl)k<426~t)~TSTQe2Mhu=FuD(oRy;7jHmbGj-F)qm==* zr>mnU{RLT%uo5U~rUi~FckjTE=+o3PraMB3nkaIb0^@QE{2wn($3I%Xu~3GinkVvk zqUT&0GbBYu{`HS{pSHiIB7}-)M@%92(l;F!J)?sgWA!pV_B5}TWRbFwu0G((RoG47 z?^Sps>p*j*9#_j{YXwhWI5#+v44Hj)=K@=noCl_~m1s}Bwf*2a?%NrbByZ&^IfcT< zvS;d+hG*v0);@sKcH%9tyOv*!T|qqx@kr!1=5ne@R30dG->|=@!i0k8Oe`kv(vVZ> zeHpABf0{W0E##Y}15^RsbJHu7#_ae@{1mnn9c>z;JE|NjxxjnR!kJ+)eQ>;Inqw&4 zhO7hJO&fA3J{A&WT2Xdy!n#b%3|FD)9>g%*jwOhs<2~dxv5e|C^t!LJP1NF zlVTW%@v~zf-Hfb3)F*n8v*_)PeVz%yE3rA5(NHgbO414B@!sS>LzoADfR|%^P=U#% zRVt5&X(Y->(xamVpq8`tyVOVEUA`gClC^B5&imf0q4o)7j?eK4*2vDPTNrHSgVs_1 zahAv0%W}%lUDHUuNqCxbE?1rC9qtHv`jhPgISz2cU1Be_++OI8`CCQZsSnIMVE%jJDLKUU*4ftA zKkP`n$i3%06#gP#sfihuSxmq;X7MlZLTiz^RR300D!s|S2A$8mi%kfw_RMnhqHB>R z;Fx9u@6Z_drKuq%?#X81tl+fFqn>3bu)Mb}0(BJPYpkcuAN60Tol;z&gE?6=zAbp# zQ|2h78<1wAC5Z5nZtGm;T^nkUz;kUmorP&xGtD`}NDBh?C5N}g2U&-ji}VLn9i(IV zeWB)=0kPKsjwfq(QBJUPLy2ePcpK}S?>iALO3uu0=57^rQ4G_bHr530_aSb_pJH3k zZ%k&LMY&X5ii~BxO8qa=*kA5?VKY#30AF7t_EUT8TioLV@1nQTZdT6ENM@;e>wBB~ zTibyQ%J8tYm1VY}TD@3yRIm#EK07S_ckr@jxuXYdB(+3WVlP?OcFx(-mTdxgKlj{}DG8Oya(6&FQ&xs&pJk_*Efd{>T7h23v(GSzirpD7Uo0P6BTO;{>|jG(|UFc}08-nas3L6-JW2L9R_UJNZ4p zQ~072Khx-gRbI)9#<6unSLbyG|aP!@~h z`)~=?(K5`iR2`7M8Ar5TpohD?y$4kSPJc>#qAZSap4Guo@m-+y@xqDne>HQA zCUgtngJ<|c%x`IDT&+>b5#fHgMK&5M2nsz<><6h+fZ28t%c))VM{YWBJm$-chVKgA z%f@Id#y6Ik*kk-X{ySzury1{P`pdr*z5>Wm$J+&#wHt&8}5Mprl67P2K#Kujic z^d;v)pE}Ypb%p7}+W|1YUVqVi-MRwL;9C46I?WW&u2w7(Nx5@#;)Eb{8fc4$dIZ#a z12LD{X{X)BATK^C%fcnXgYrh&SH_v>8!U()1s$(r(HX?*c2XPfYxY`dQRKNV>8x#A z1kmz1v61X=N$4&tgr`8O9?Tf&78s3wTSWdF<;z zQ@6yfq(&1mplKy^4d+Ymgz)dl#>@+(yQGb3ivF1SAL|q zojGf>vN%6@)jiEVfSL!mGEaO%yByoR&BHU2YUUo&Tk@5vRDaL>#<~@c;dX2;dfy0Z z3*{o=X!vlZQLI%!V{^De6Fy7b0fPY9t?`SHJm@#9Mo+pM=x;H_S_A89>X$ypo#p}(#fhP zy3b5QEnU#}mL}%@hVM0NeH$=-*q9!{6zg7dI@kFPi+k6SWD z>>1>$sK5L(bv<1z1J8KJFiFqV{-Lat1_c4m^Ze^n`&i3R$hXDQ!_9Ypa9{DR4K#^N zO`zF4Ta8Q>t&-)HJ2d@t3Ed{$=i1k*bL z0y{$AM)xE}WCk;ZoYwq*M31FkC0Qw<{(jECOpWY`+i&sfDNo10oXqs>lzbo=DJd%y(E@W<} zS|!biMe)h;kMSpoW~p#Gnf;8J1nuG+MP~3Rfm^shv_aHVG+byEtl|B|y#`-kFXicM zuS_N-O_e5RB=03xr1qqR*+aRXnYIuE@8QaME%`jbJi%(gmx2xaO5Sk9%NYgVXFD*@ zavQUi8GCwpdVcz5dU)oGY`@&9Jjrx`*1@+pPq=%Lmb^v0CA<#23rLD9=H}q{&|mCc zW`F)+u1@Z3c42mD_GY$C?rm;YejziS9RM|i!|-N~mir@jId=}X1NR+gDCaV)ghxTA zSUJ0xkujI@rTO{!wfXCLIWvtRnc?gUwg+?qLg1e840r{+9G(I`so>|(TBt81g{s-Z a> Date: Thu, 4 Jun 2026 00:39:09 +0530 Subject: [PATCH 17/36] feat(store/ptt): redux slice for ptt hotkey + settings (#3090) --- app/src/store/__tests__/pttSlice.test.ts | 46 +++++++++++++++ app/src/store/index.ts | 13 +++++ app/src/store/pttSlice.ts | 73 ++++++++++++++++++++++++ 3 files changed, 132 insertions(+) create mode 100644 app/src/store/__tests__/pttSlice.test.ts create mode 100644 app/src/store/pttSlice.ts diff --git a/app/src/store/__tests__/pttSlice.test.ts b/app/src/store/__tests__/pttSlice.test.ts new file mode 100644 index 0000000000..f35a6c61a6 --- /dev/null +++ b/app/src/store/__tests__/pttSlice.test.ts @@ -0,0 +1,46 @@ +import { describe, expect, it } from 'vitest'; + +import { + pttReducer, + setPttShortcut, + setSpeakReplies, + setShowOverlay, + setIsHeld, + type PttState, +} from '../pttSlice'; + +describe('ptt slice', () => { + const initial: PttState = { + shortcut: null, + speakReplies: true, + showOverlay: true, + isHeld: false, + }; + + it('has the documented default state', () => { + expect(pttReducer(undefined, { type: '@@INIT' })).toEqual(initial); + }); + + it('setPttShortcut stores the shortcut string', () => { + const next = pttReducer(initial, setPttShortcut('F13')); + expect(next.shortcut).toBe('F13'); + }); + + it('setPttShortcut with null clears the shortcut', () => { + const withKey: PttState = { ...initial, shortcut: 'F13' }; + const next = pttReducer(withKey, setPttShortcut(null)); + expect(next.shortcut).toBeNull(); + }); + + it('setSpeakReplies toggles the flag', () => { + expect(pttReducer(initial, setSpeakReplies(false)).speakReplies).toBe(false); + }); + + it('setShowOverlay toggles the flag', () => { + expect(pttReducer(initial, setShowOverlay(false)).showOverlay).toBe(false); + }); + + it('setIsHeld updates the runtime hold flag', () => { + expect(pttReducer(initial, setIsHeld(true)).isHeld).toBe(true); + }); +}); diff --git a/app/src/store/index.ts b/app/src/store/index.ts index 32daba8fa3..37283ca275 100644 --- a/app/src/store/index.ts +++ b/app/src/store/index.ts @@ -27,6 +27,7 @@ import providerSurfacesReducer from './providerSurfaceSlice'; import socketReducer from './socketSlice'; import themeReducer from './themeSlice'; import threadReducer from './threadSlice'; +import { pttReducer } from './pttSlice'; import { userScopedStorage } from './userScopedStorage'; import workflowsReducer from './workflowsSlice'; @@ -150,6 +151,17 @@ const persistedMascotReducer = persistReducer(mascotPersistConfig, mascotReducer const personaPersistConfig = { key: 'persona', storage, whitelist: ['displayName', 'description'] }; const persistedPersonaReducer = persistReducer(personaPersistConfig, personaReducer); +// PTT (Push-to-Talk): persist the hotkey binding and session preferences. +// `isHeld` is a runtime-only flag — deliberately excluded from the whitelist so +// a crash or force-quit can never leave the app stuck in the "held" state. +// The boot hook (T11) also explicitly resets it to false on mount. +const pttPersistConfig = { + key: 'ptt', + storage, + whitelist: ['shortcut', 'speakReplies', 'showOverlay'], +}; +const persistedPttReducer = persistReducer(pttPersistConfig, pttReducer); + export const store = configureStore({ reducer: { socket: socketReducer, @@ -167,6 +179,7 @@ export const store = configureStore({ mascot: persistedMascotReducer, persona: persistedPersonaReducer, theme: persistedThemeReducer, + ptt: persistedPttReducer, workflows: workflowsReducer, }, middleware: getDefaultMiddleware => { diff --git a/app/src/store/pttSlice.ts b/app/src/store/pttSlice.ts new file mode 100644 index 0000000000..f557d444f2 --- /dev/null +++ b/app/src/store/pttSlice.ts @@ -0,0 +1,73 @@ +import { createSlice, type PayloadAction } from '@reduxjs/toolkit'; + +import { resetUserScopedState } from './resetActions'; + +/** + * PTT (Push-to-Talk) slice — persisted hotkey binding + session settings, + * plus a non-persisted runtime hold flag that tracks whether the key is + * currently held. The boot hook (Task 11) resets `isHeld` to false on mount + * so a stale persisted value can never leave the app stuck in "held" mode. + */ + +export interface PttState { + /** Currently-bound PTT hotkey string (e.g. "F13" or "Ctrl+Alt+T"). null = unbound. */ + shortcut: string | null; + /** When true, the agent's reply is spoken via TTS. */ + speakReplies: boolean; + /** When true, the overlay window is shown during a PTT session. */ + showOverlay: boolean; + /** Non-persisted runtime flag: is the PTT key currently held? */ + isHeld: boolean; +} + +export const initialPttState: PttState = { + shortcut: null, + speakReplies: true, + showOverlay: true, + isHeld: false, +}; + +const pttSlice = createSlice({ + name: 'ptt', + initialState: initialPttState, + reducers: { + setPttShortcut(state, action: PayloadAction) { + state.shortcut = action.payload; + }, + setSpeakReplies(state, action: PayloadAction) { + state.speakReplies = action.payload; + }, + setShowOverlay(state, action: PayloadAction) { + state.showOverlay = action.payload; + }, + setIsHeld(state, action: PayloadAction) { + state.isHeld = action.payload; + }, + }, + extraReducers: builder => { + builder.addCase(resetUserScopedState, () => initialPttState); + }, +}); + +export const { setPttShortcut, setSpeakReplies, setShowOverlay, setIsHeld } = pttSlice.actions; + +// ── Selectors ──────────────────────────────────────────────────────────────── +// Tolerate a missing `ptt` slice so consumers don't crash in test harnesses +// that mock the store without this slice. + +type MaybePttRoot = { ptt?: PttState }; + +export const selectPttShortcut = (state: MaybePttRoot): string | null => + state.ptt?.shortcut ?? initialPttState.shortcut; + +export const selectSpeakReplies = (state: MaybePttRoot): boolean => + state.ptt?.speakReplies ?? initialPttState.speakReplies; + +export const selectShowOverlay = (state: MaybePttRoot): boolean => + state.ptt?.showOverlay ?? initialPttState.showOverlay; + +export const selectIsHeld = (state: MaybePttRoot): boolean => + state.ptt?.isHeld ?? initialPttState.isHeld; + +export const pttReducer = pttSlice.reducer; +export default pttSlice.reducer; From c258e9211f0993b59d10c028525427b07efc530d Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 4 Jun 2026 00:55:15 +0530 Subject: [PATCH 18/36] refactor(store/ptt): align selectors; add resetUserScopedState test (#3090) - Drop MaybePttRoot optional-key type; selectors now use { ptt: PttState } mirroring the mascotSlice selector convention - Remove duplicate `export default pttSlice.reducer`; keep only the named pttReducer export that index.ts already imports - Add resetUserScopedState test asserting dirty state returns to initialPttState --- app/src/store/__tests__/pttSlice.test.ts | 13 +++++++++++++ app/src/store/pttSlice.ts | 21 ++++++++------------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/app/src/store/__tests__/pttSlice.test.ts b/app/src/store/__tests__/pttSlice.test.ts index f35a6c61a6..0092658052 100644 --- a/app/src/store/__tests__/pttSlice.test.ts +++ b/app/src/store/__tests__/pttSlice.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it } from 'vitest'; import { + initialPttState, pttReducer, setPttShortcut, setSpeakReplies, @@ -8,6 +9,7 @@ import { setIsHeld, type PttState, } from '../pttSlice'; +import { resetUserScopedState } from '../resetActions'; describe('ptt slice', () => { const initial: PttState = { @@ -43,4 +45,15 @@ describe('ptt slice', () => { it('setIsHeld updates the runtime hold flag', () => { expect(pttReducer(initial, setIsHeld(true)).isHeld).toBe(true); }); + + it('resetUserScopedState returns the slice to initial state', () => { + const dirty: PttState = { + shortcut: 'F13', + speakReplies: false, + showOverlay: false, + isHeld: true, + }; + const next = pttReducer(dirty, resetUserScopedState()); + expect(next).toEqual(initialPttState); + }); }); diff --git a/app/src/store/pttSlice.ts b/app/src/store/pttSlice.ts index f557d444f2..53978efce9 100644 --- a/app/src/store/pttSlice.ts +++ b/app/src/store/pttSlice.ts @@ -52,22 +52,17 @@ const pttSlice = createSlice({ export const { setPttShortcut, setSpeakReplies, setShowOverlay, setIsHeld } = pttSlice.actions; // ── Selectors ──────────────────────────────────────────────────────────────── -// Tolerate a missing `ptt` slice so consumers don't crash in test harnesses -// that mock the store without this slice. -type MaybePttRoot = { ptt?: PttState }; +export const selectPttShortcut = (state: { ptt: PttState }): string | null => + state.ptt.shortcut; -export const selectPttShortcut = (state: MaybePttRoot): string | null => - state.ptt?.shortcut ?? initialPttState.shortcut; +export const selectSpeakReplies = (state: { ptt: PttState }): boolean => + state.ptt.speakReplies; -export const selectSpeakReplies = (state: MaybePttRoot): boolean => - state.ptt?.speakReplies ?? initialPttState.speakReplies; +export const selectShowOverlay = (state: { ptt: PttState }): boolean => + state.ptt.showOverlay; -export const selectShowOverlay = (state: MaybePttRoot): boolean => - state.ptt?.showOverlay ?? initialPttState.showOverlay; - -export const selectIsHeld = (state: MaybePttRoot): boolean => - state.ptt?.isHeld ?? initialPttState.isHeld; +export const selectIsHeld = (state: { ptt: PttState }): boolean => + state.ptt.isHeld; export const pttReducer = pttSlice.reducer; -export default pttSlice.reducer; From aeb99c6b478ca9837449eec5e0e422e90d318cfe Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 4 Jun 2026 01:14:31 +0530 Subject: [PATCH 19/36] feat(chatService): forward speakReply/source/sessionId; add ptt tauri wrappers (#3090) --- .../services/__tests__/chatService.test.ts | 36 +++++++++++++++++ app/src/services/chatService.ts | 18 +++++++++ app/src/utils/tauriCommands/ptt.ts | 40 +++++++++++++++++++ 3 files changed, 94 insertions(+) create mode 100644 app/src/utils/tauriCommands/ptt.ts diff --git a/app/src/services/__tests__/chatService.test.ts b/app/src/services/__tests__/chatService.test.ts index 2359117bce..03ebd9b984 100644 --- a/app/src/services/__tests__/chatService.test.ts +++ b/app/src/services/__tests__/chatService.test.ts @@ -223,4 +223,40 @@ describe('chatService.subscribeChatEvents', () => { }, }); }); + + it('forwards speak_reply, source, session_id when provided', async () => { + const socket = createMockSocket(); + vi.mocked(socketService.getSocket).mockReturnValue(socket as never); + + await chatSend({ + threadId: 'thread-1', + message: 'hello', + speakReply: true, + source: 'ptt', + sessionId: 42, + }); + + expect(mockCallCoreRpc).toHaveBeenCalledWith( + expect.objectContaining({ + method: 'openhuman.channel_web_chat', + params: expect.objectContaining({ + message: 'hello', + speak_reply: true, + source: 'ptt', + session_id: 42, + }), + }), + ); + }); + + it('does not include the new fields when omitted', async () => { + const socket = createMockSocket(); + vi.mocked(socketService.getSocket).mockReturnValue(socket as never); + + await chatSend({ threadId: 'thread-1', message: 'hi' }); + const params = mockCallCoreRpc.mock.calls[0][0].params; + expect(params.speak_reply).toBeUndefined(); + expect(params.source).toBeUndefined(); + expect(params.session_id).toBeUndefined(); + }); }); diff --git a/app/src/services/chatService.ts b/app/src/services/chatService.ts index a60b0fcf1c..4cf60a7cc7 100644 --- a/app/src/services/chatService.ts +++ b/app/src/services/chatService.ts @@ -767,6 +767,21 @@ export interface ChatSendParams { * working unchanged. */ locale?: string | null; + /** + * When `true`, the core will synthesize the agent reply via TTS and + * stream audio back (push-to-talk reply flow). + */ + speakReply?: boolean; + /** + * Originating input source — e.g. `'ptt'` for push-to-talk, `'keyboard'` + * for typed input. Forwarded to the core for analytics / routing. + */ + source?: string; + /** + * PTT session ID — ties the chat turn to a specific push-to-talk recording + * session so the core can correlate audio and text events. + */ + sessionId?: number; } /** @@ -792,6 +807,9 @@ export async function chatSend(params: ChatSendParams): Promise { model_override: params.model ?? undefined, profile_id: params.profileId ?? undefined, locale: params.locale ?? undefined, + speak_reply: params.speakReply ?? undefined, + source: params.source ?? undefined, + session_id: params.sessionId ?? undefined, }, }); } diff --git a/app/src/utils/tauriCommands/ptt.ts b/app/src/utils/tauriCommands/ptt.ts new file mode 100644 index 0000000000..d5a2abab22 --- /dev/null +++ b/app/src/utils/tauriCommands/ptt.ts @@ -0,0 +1,40 @@ +/** + * Push-to-talk (PTT) Tauri command wrappers. + */ +import { invoke } from '@tauri-apps/api/core'; + +import { isTauri } from './common'; + +/** + * Register (or re-register) the global push-to-talk hotkey. + */ +export async function registerPttHotkey(shortcut: string): Promise { + if (!isTauri()) { + console.debug('[ptt] registerPttHotkey: skipped — not running in Tauri'); + return; + } + console.debug('[ptt] registerPttHotkey: shortcut=%s', shortcut); + await invoke('register_ptt_hotkey', { shortcut }); + console.debug('[ptt] registerPttHotkey: done'); +} + +/** + * Unregister the global push-to-talk hotkey. + */ +export async function unregisterPttHotkey(): Promise { + if (!isTauri()) { + console.debug('[ptt] unregisterPttHotkey: skipped — not running in Tauri'); + return; + } + console.debug('[ptt] unregisterPttHotkey: invoking'); + await invoke('unregister_ptt_hotkey'); + console.debug('[ptt] unregisterPttHotkey: done'); +} + +/** + * Show or hide the PTT overlay window. + */ +export async function showPttOverlay(active: boolean, sessionId: number): Promise { + if (!isTauri()) return; + await invoke('show_ptt_overlay', { active, sessionId }); +} From 136725029fadc9d6821742d843fbb205b85eab7a Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 4 Jun 2026 01:18:56 +0530 Subject: [PATCH 20/36] style(ptt-wrappers): add skip/done debug logs to showPttOverlay (#3090) --- app/src/utils/tauriCommands/ptt.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/src/utils/tauriCommands/ptt.ts b/app/src/utils/tauriCommands/ptt.ts index d5a2abab22..981c56c9eb 100644 --- a/app/src/utils/tauriCommands/ptt.ts +++ b/app/src/utils/tauriCommands/ptt.ts @@ -35,6 +35,10 @@ export async function unregisterPttHotkey(): Promise { * Show or hide the PTT overlay window. */ export async function showPttOverlay(active: boolean, sessionId: number): Promise { - if (!isTauri()) return; + if (!isTauri()) { + console.debug('[ptt] showPttOverlay: skipped — not running in Tauri'); + return; + } + console.debug('[ptt] showPttOverlay: active=%s sessionId=%d', active, sessionId); await invoke('show_ptt_overlay', { active, sessionId }); } From c3c642fe230cc99a3f703544e3b1a291c12cd6fc Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 4 Jun 2026 01:24:10 +0530 Subject: [PATCH 21/36] feat(pttService): state machine, watchdog, preempt, fallback thread (#3090) --- app/src/services/__tests__/pttService.test.ts | 151 ++++++++++++ app/src/services/pttService.ts | 214 ++++++++++++++++++ 2 files changed, 365 insertions(+) create mode 100644 app/src/services/__tests__/pttService.test.ts create mode 100644 app/src/services/pttService.ts diff --git a/app/src/services/__tests__/pttService.test.ts b/app/src/services/__tests__/pttService.test.ts new file mode 100644 index 0000000000..6cab4cb87a --- /dev/null +++ b/app/src/services/__tests__/pttService.test.ts @@ -0,0 +1,151 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +import { createPttService, type PttDeps } from '../pttService'; + +function makeDeps(overrides: Partial = {}): PttDeps { + return { + audioCapture: { + start: vi.fn().mockResolvedValue(undefined), + finalize: vi.fn().mockResolvedValue({ durationMs: 1500, buffer: new ArrayBuffer(0) }), + cancel: vi.fn().mockResolvedValue(undefined), + }, + transcribe: vi.fn().mockResolvedValue('hello world'), + sendMessage: vi.fn().mockResolvedValue(undefined), + resolveActiveThreadId: vi.fn().mockResolvedValue('thread-active'), + createNewVoiceThread: vi.fn().mockResolvedValue('thread-new'), + playChime: vi.fn().mockResolvedValue(undefined), + showOverlay: vi.fn().mockResolvedValue(undefined), + getSettings: () => ({ speakReplies: true, showOverlay: true }), + now: () => 1_700_000_000_000, + watchdogMs: 10_000, + minAudioMs: 250, + logger: { debug: vi.fn(), info: vi.fn(), warn: vi.fn() }, + ...overrides, + }; +} + +describe('pttService state machine', () => { + beforeEach(() => { + vi.useFakeTimers(); + }); + + it('happy path: start → stop sends the transcript to the active thread with speakReply', async () => { + const deps = makeDeps(); + const svc = createPttService(deps); + + await svc.onStart(1); + expect(deps.audioCapture.start).toHaveBeenCalledWith({ sessionTag: 'ptt:1' }); + expect(deps.playChime).toHaveBeenCalledWith('open'); + expect(deps.showOverlay).toHaveBeenCalledWith(true, 1); + + await svc.onStop(1); + expect(deps.audioCapture.finalize).toHaveBeenCalled(); + expect(deps.playChime).toHaveBeenCalledWith('close'); + expect(deps.showOverlay).toHaveBeenCalledWith(false, 1); + expect(deps.transcribe).toHaveBeenCalled(); + expect(deps.sendMessage).toHaveBeenCalledWith({ + threadId: 'thread-active', + body: 'hello world', + metadata: { source: 'ptt', session_id: 1 }, + speakReply: true, + }); + }); + + it('falls back to a new "Voice" thread when no active thread exists', async () => { + const deps = makeDeps({ + resolveActiveThreadId: vi.fn().mockResolvedValue(null), + }); + const svc = createPttService(deps); + + await svc.onStart(2); + await svc.onStop(2); + + expect(deps.createNewVoiceThread).toHaveBeenCalled(); + expect(deps.sendMessage).toHaveBeenCalledWith( + expect.objectContaining({ threadId: 'thread-new' }), + ); + }); + + it('drops the session and plays the error chime when audio is shorter than minAudioMs', async () => { + const deps = makeDeps({ + audioCapture: { + start: vi.fn().mockResolvedValue(undefined), + finalize: vi.fn().mockResolvedValue({ durationMs: 100, buffer: new ArrayBuffer(0) }), + cancel: vi.fn().mockResolvedValue(undefined), + }, + }); + const svc = createPttService(deps); + + await svc.onStart(3); + await svc.onStop(3); + + expect(deps.transcribe).not.toHaveBeenCalled(); + expect(deps.sendMessage).not.toHaveBeenCalled(); + expect(deps.playChime).toHaveBeenCalledWith('error'); + }); + + it('drops the session when the transcript is empty', async () => { + const deps = makeDeps({ + transcribe: vi.fn().mockResolvedValue(' '), + }); + const svc = createPttService(deps); + + await svc.onStart(4); + await svc.onStop(4); + + expect(deps.sendMessage).not.toHaveBeenCalled(); + expect(deps.playChime).toHaveBeenCalledWith('error'); + }); + + it('watchdog finalises the session after watchdogMs even if onStop never arrives', async () => { + const deps = makeDeps(); + const svc = createPttService(deps); + + await svc.onStart(5); + + // Advance fake time past the watchdog. + await vi.advanceTimersByTimeAsync(11_000); + + expect(deps.audioCapture.finalize).toHaveBeenCalled(); + expect(deps.sendMessage).toHaveBeenCalledWith( + expect.objectContaining({ + metadata: expect.objectContaining({ session_id: 5 }), + }), + ); + }); + + it('second onStart while a session is active preempts the first', async () => { + const deps = makeDeps(); + const svc = createPttService(deps); + + await svc.onStart(6); + await svc.onStart(7); + + expect(deps.audioCapture.cancel).toHaveBeenCalled(); + expect(deps.audioCapture.start).toHaveBeenLastCalledWith({ sessionTag: 'ptt:7' }); + }); + + it('honours the speakReplies setting when forwarding to sendMessage', async () => { + const deps = makeDeps({ + getSettings: () => ({ speakReplies: false, showOverlay: true }), + }); + const svc = createPttService(deps); + + await svc.onStart(8); + await svc.onStop(8); + + expect(deps.sendMessage).toHaveBeenCalledWith( + expect.objectContaining({ speakReply: false }), + ); + }); + + it('mismatched session_id on onStop is ignored', async () => { + const deps = makeDeps(); + const svc = createPttService(deps); + + await svc.onStart(9); + await svc.onStop(999); // stale stop event + + expect(deps.audioCapture.finalize).not.toHaveBeenCalled(); + }); +}); diff --git a/app/src/services/pttService.ts b/app/src/services/pttService.ts new file mode 100644 index 0000000000..01e7c7aabe --- /dev/null +++ b/app/src/services/pttService.ts @@ -0,0 +1,214 @@ +/** + * pttService — push-to-talk session state machine. + * + * See spec: `docs/superpowers/specs/2026-06-02-global-ptt-design.md` (§ 2, § 3). + * + * Dependency-injected so vitest can exercise the state machine with fake + * audio capture / fake STT / fake sendMessage. Real wiring (subscribing to + * `ptt://*` Tauri events, the real audio_capture, etc.) happens in + * PttHotkeyManager.tsx (T11). + */ + +export type ChimeKind = 'open' | 'close' | 'error'; + +export interface PttSettings { + speakReplies: boolean; + showOverlay: boolean; +} + +export interface FinalizedAudio { + durationMs: number; + buffer: ArrayBuffer; +} + +export interface PttDeps { + audioCapture: { + start(opts: { sessionTag: string }): Promise; + finalize(): Promise; + cancel(): Promise; + }; + transcribe(buf: ArrayBuffer): Promise; + sendMessage(args: { + threadId: string; + body: string; + metadata: { source: 'ptt'; session_id: number }; + speakReply: boolean; + }): Promise; + resolveActiveThreadId(): Promise; + createNewVoiceThread(): Promise; + playChime(kind: ChimeKind): Promise; + showOverlay(active: boolean, sessionId: number): Promise; + getSettings(): PttSettings; + now(): number; + watchdogMs: number; + minAudioMs: number; + logger: { + debug(msg: string, meta?: Record): void; + info(msg: string, meta?: Record): void; + warn(msg: string, meta?: Record): void; + }; +} + +export interface PttService { + onStart(sessionId: number): Promise; + onStop(sessionId: number): Promise; + cancel(reason: 'preempted' | 'mic_failure' | 'user_cancel'): Promise; +} + +interface ActiveSession { + sessionId: number; + startedAtMs: number; + watchdogTimer: ReturnType | null; + finalizedByWatchdog: boolean; +} + +export function createPttService(deps: PttDeps): PttService { + let active: ActiveSession | null = null; + + const armWatchdog = (sessionId: number) => { + const timer = setTimeout(() => { + if (active && active.sessionId === sessionId) { + active.finalizedByWatchdog = true; + deps.logger.warn('[ptt] watchdog fired — finalising session', { sessionId }); + // Fire-and-forget; the watchdog path is the same as a normal stop + // except for the `finalizedByWatchdog` flag (used in logging only). + void finaliseSession(sessionId, /* fromWatchdog */ true); + } + }, deps.watchdogMs); + return timer; + }; + + const finaliseSession = async (sessionId: number, fromWatchdog: boolean) => { + if (!active || active.sessionId !== sessionId) { + // Stale finalisation — ignore. + return; + } + + if (active.watchdogTimer) { + clearTimeout(active.watchdogTimer); + active.watchdogTimer = null; + } + + const settings = deps.getSettings(); + const session = active; + active = null; + + let audio: FinalizedAudio; + try { + audio = await deps.audioCapture.finalize(); + } catch (err) { + deps.logger.warn('[ptt] audio finalize failed', { sessionId, err: String(err) }); + await deps.playChime('error'); + await deps.showOverlay(false, sessionId); + return; + } + + await deps.playChime('close'); + await deps.showOverlay(false, sessionId); + + if (audio.durationMs < deps.minAudioMs) { + deps.logger.info('[ptt] session dropped — audio shorter than minAudioMs', { + sessionId, + durationMs: audio.durationMs, + }); + await deps.playChime('error'); + return; + } + + let text = ''; + try { + text = await deps.transcribe(audio.buffer); + } catch (err) { + deps.logger.warn('[ptt] transcription failed', { sessionId, err: String(err) }); + // Per spec: post the message anyway as a breadcrumb. + text = '[Voice — transcription failed]'; + } + + if (!text.trim()) { + deps.logger.info('[ptt] session dropped — empty transcript', { sessionId }); + await deps.playChime('error'); + return; + } + + let threadId = await deps.resolveActiveThreadId(); + if (!threadId) { + threadId = await deps.createNewVoiceThread(); + } + + await deps.sendMessage({ + threadId, + body: text.trim(), + metadata: { source: 'ptt', session_id: sessionId }, + speakReply: settings.speakReplies, + }); + + deps.logger.info('[ptt] session committed', { + sessionId, + threadId, + heldMs: deps.now() - session.startedAtMs, + finalizedByWatchdog: fromWatchdog, + transcriptLen: text.trim().length, + }); + }; + + return { + async onStart(sessionId) { + if (active) { + deps.logger.debug('[ptt] onStart while active — preempting', { + old: active.sessionId, + new: sessionId, + }); + try { + await deps.audioCapture.cancel(); + } catch (err) { + deps.logger.warn('[ptt] cancel failed during preempt', { err: String(err) }); + } + if (active.watchdogTimer) clearTimeout(active.watchdogTimer); + active = null; + } + + await deps.playChime('open'); + await deps.showOverlay(true, sessionId); + + try { + await deps.audioCapture.start({ sessionTag: `ptt:${sessionId}` }); + } catch (err) { + deps.logger.warn('[ptt] audio start failed', { sessionId, err: String(err) }); + await deps.playChime('error'); + await deps.showOverlay(false, sessionId); + return; + } + + active = { + sessionId, + startedAtMs: deps.now(), + watchdogTimer: null, + finalizedByWatchdog: false, + }; + active.watchdogTimer = armWatchdog(sessionId); + }, + + async onStop(sessionId) { + if (!active || active.sessionId !== sessionId) { + deps.logger.debug('[ptt] stale onStop — ignored', { sessionId }); + return; + } + await finaliseSession(sessionId, /* fromWatchdog */ false); + }, + + async cancel(reason) { + if (!active) return; + deps.logger.info('[ptt] cancel', { sessionId: active.sessionId, reason }); + if (active.watchdogTimer) clearTimeout(active.watchdogTimer); + const session = active; + active = null; + try { + await deps.audioCapture.cancel(); + } catch (err) { + deps.logger.warn('[ptt] cancel: audio cancel failed', { err: String(err) }); + } + await deps.playChime('error'); + await deps.showOverlay(false, session.sessionId); + }, + }; +} From 0aec9d28530de31b9d4cf0b084078f862ef9067e Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 4 Jun 2026 01:29:07 +0530 Subject: [PATCH 22/36] refactor(pttService): close preempt race; cover cancel/start-fail/transcribe-fail paths (#3090) --- app/src/services/__tests__/pttService.test.ts | 51 +++++++++++++++++++ app/src/services/pttService.ts | 44 ++++++++++++---- 2 files changed, 86 insertions(+), 9 deletions(-) diff --git a/app/src/services/__tests__/pttService.test.ts b/app/src/services/__tests__/pttService.test.ts index 6cab4cb87a..77a5dde65d 100644 --- a/app/src/services/__tests__/pttService.test.ts +++ b/app/src/services/__tests__/pttService.test.ts @@ -148,4 +148,55 @@ describe('pttService state machine', () => { expect(deps.audioCapture.finalize).not.toHaveBeenCalled(); }); + + it('cancel("user_cancel") aborts an active session without sending a message', async () => { + const deps = makeDeps(); + const svc = createPttService(deps); + + await svc.onStart(10); + await svc.cancel('user_cancel'); + + expect(deps.audioCapture.cancel).toHaveBeenCalled(); + expect(deps.playChime).toHaveBeenCalledWith('error'); + expect(deps.showOverlay).toHaveBeenLastCalledWith(false, 10); + expect(deps.sendMessage).not.toHaveBeenCalled(); + }); + + it('plays error chime and bails if audioCapture.start throws', async () => { + const deps = makeDeps({ + audioCapture: { + start: vi.fn().mockRejectedValue(new Error('mic denied')), + finalize: vi.fn().mockResolvedValue({ durationMs: 1500, buffer: new ArrayBuffer(0) }), + cancel: vi.fn().mockResolvedValue(undefined), + }, + }); + const svc = createPttService(deps); + + await svc.onStart(11); + + expect(deps.playChime).toHaveBeenCalledWith('open'); + expect(deps.playChime).toHaveBeenCalledWith('error'); + expect(deps.showOverlay).toHaveBeenLastCalledWith(false, 11); + // The session never armed — onStop should be a no-op. + await svc.onStop(11); + expect(deps.audioCapture.finalize).not.toHaveBeenCalled(); + expect(deps.sendMessage).not.toHaveBeenCalled(); + }); + + it('posts a "[Voice — transcription failed]" breadcrumb when transcribe throws', async () => { + const deps = makeDeps({ + transcribe: vi.fn().mockRejectedValue(new Error('stt timeout')), + }); + const svc = createPttService(deps); + + await svc.onStart(12); + await svc.onStop(12); + + expect(deps.sendMessage).toHaveBeenCalledWith( + expect.objectContaining({ + body: '[Voice — transcription failed]', + metadata: { source: 'ptt', session_id: 12 }, + }), + ); + }); }); diff --git a/app/src/services/pttService.ts b/app/src/services/pttService.ts index 01e7c7aabe..85f7546bde 100644 --- a/app/src/services/pttService.ts +++ b/app/src/services/pttService.ts @@ -124,7 +124,9 @@ export function createPttService(deps: PttDeps): PttService { text = '[Voice — transcription failed]'; } - if (!text.trim()) { + const trimmed = text.trim(); + + if (!trimmed) { deps.logger.info('[ptt] session dropped — empty transcript', { sessionId }); await deps.playChime('error'); return; @@ -137,7 +139,7 @@ export function createPttService(deps: PttDeps): PttService { await deps.sendMessage({ threadId, - body: text.trim(), + body: trimmed, metadata: { source: 'ptt', session_id: sessionId }, speakReply: settings.speakReplies, }); @@ -147,12 +149,13 @@ export function createPttService(deps: PttDeps): PttService { threadId, heldMs: deps.now() - session.startedAtMs, finalizedByWatchdog: fromWatchdog, - transcriptLen: text.trim().length, + transcriptLen: trimmed.length, }); }; return { async onStart(sessionId) { + // Preempt: if another session is active, cancel it. if (active) { deps.logger.debug('[ptt] onStart while active — preempting', { old: active.sessionId, @@ -167,24 +170,47 @@ export function createPttService(deps: PttDeps): PttService { active = null; } + // Claim the slot BEFORE any awaits so concurrent onStart calls preempt + // this in-progress session rather than racing with it. + active = { + sessionId, + startedAtMs: deps.now(), + watchdogTimer: null, + finalizedByWatchdog: false, + }; + const claimed = active; + await deps.playChime('open'); await deps.showOverlay(true, sessionId); + // If a concurrent onStart preempted us during the awaits, our claim was + // replaced. Stop here — the new claim owns the slot. + if (active !== claimed) { + return; + } + try { await deps.audioCapture.start({ sessionTag: `ptt:${sessionId}` }); } catch (err) { deps.logger.warn('[ptt] audio start failed', { sessionId, err: String(err) }); + if (active === claimed) { + active = null; + } await deps.playChime('error'); await deps.showOverlay(false, sessionId); return; } - active = { - sessionId, - startedAtMs: deps.now(), - watchdogTimer: null, - finalizedByWatchdog: false, - }; + // Re-check after the audio.start await. + if (active !== claimed) { + // Concurrent preempt replaced our claim mid-flight; we already started + // audio for an orphan session. Best-effort cancel and exit. + try { + await deps.audioCapture.cancel(); + } catch (_) {} + return; + } + active.watchdogTimer = armWatchdog(sessionId); }, From 782806fabe422532c83d7225860aef11857bcd71 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 4 Jun 2026 02:07:32 +0530 Subject: [PATCH 23/36] feat(ptt): mount PttHotkeyManager + wire service to real audio/STT/chat (#3090) --- app/src/App.tsx | 2 + app/src/__tests__/App.boot.test.tsx | 1 + app/src/components/PttHotkeyManager.tsx | 152 ++++++++++++++++++++++++ app/src/features/voice/pttAudio.ts | 126 ++++++++++++++++++++ app/src/features/voice/pttChimes.ts | 46 +++++++ app/src/features/voice/pttThread.ts | 26 ++++ app/src/features/voice/pttTranscribe.ts | 47 ++++++++ app/src/hooks/usePttHotkey.ts | 48 ++++++++ 8 files changed, 448 insertions(+) create mode 100644 app/src/components/PttHotkeyManager.tsx create mode 100644 app/src/features/voice/pttAudio.ts create mode 100644 app/src/features/voice/pttChimes.ts create mode 100644 app/src/features/voice/pttThread.ts create mode 100644 app/src/features/voice/pttTranscribe.ts create mode 100644 app/src/hooks/usePttHotkey.ts diff --git a/app/src/App.tsx b/app/src/App.tsx index 3fd7a3c8c0..293de44802 100644 --- a/app/src/App.tsx +++ b/app/src/App.tsx @@ -18,6 +18,7 @@ import LocalAIDownloadSnackbar from './components/LocalAIDownloadSnackbar'; import SecretPromptDialog from './components/mcp-setup/SecretPromptDialog'; import OpenhumanLinkModal from './components/OpenhumanLinkModal'; import PersistRehydrationScreen from './components/PersistRehydrationScreen'; +import PttHotkeyManager from './components/PttHotkeyManager'; import GlobalUpsellBanner from './components/upsell/GlobalUpsellBanner'; import AppWalkthrough from './components/walkthrough/AppWalkthrough'; import { MascotFrameProducer } from './features/meet/MascotFrameProducer'; @@ -106,6 +107,7 @@ function App() { {!onMobile && } + {!onMobile && } {!onMobile && } {!onMobile && } diff --git a/app/src/__tests__/App.boot.test.tsx b/app/src/__tests__/App.boot.test.tsx index 592c8a1aaf..aa7587370c 100644 --- a/app/src/__tests__/App.boot.test.tsx +++ b/app/src/__tests__/App.boot.test.tsx @@ -78,6 +78,7 @@ vi.mock('../components/commands/CommandProvider', () => ({ default: ({ children }: { children: React.ReactNode }) => <>{children}, })); vi.mock('../components/DictationHotkeyManager', () => ({ default: () => null })); +vi.mock('../components/PttHotkeyManager', () => ({ default: () => null })); vi.mock('../components/OpenhumanLinkModal', () => ({ default: () => null })); vi.mock('../components/upsell/GlobalUpsellBanner', () => ({ default: () => null })); vi.mock('../components/walkthrough/AppWalkthrough', () => ({ default: () => null })); diff --git a/app/src/components/PttHotkeyManager.tsx b/app/src/components/PttHotkeyManager.tsx new file mode 100644 index 0000000000..6f4682ecbe --- /dev/null +++ b/app/src/components/PttHotkeyManager.tsx @@ -0,0 +1,152 @@ +/** + * PttHotkeyManager + * + * Renderless boot-time wiring for the global push-to-talk feature: + * 1. Registers the persisted PTT shortcut with the Tauri shell via + * `usePttHotkey()`. + * 2. Owns the singleton `pttService` state machine (built in T10), wired to + * real audio capture (MediaRecorder), STT (voice_transcribe_bytes RPC), + * chat send, thread resolution, chime playback, and overlay window + * visibility. + * 3. Subscribes to the Tauri events `ptt://start` / `ptt://stop` emitted by + * the Rust shell when the global hotkey transitions edges, and forwards + * them into the service. + * + * The service is constructed once for the AppShell's lifetime — multiple + * mounts would create competing state machines fighting over the same mic. + */ +import { listen, type UnlistenFn } from '@tauri-apps/api/event'; +import { useEffect, useMemo, useRef } from 'react'; +import { useDispatch, useStore } from 'react-redux'; + +import { + cancelPttAudio, + finalizePttAudio, + startPttAudio, +} from '../features/voice/pttAudio'; +import { playPttChime } from '../features/voice/pttChimes'; +import { + createNewVoiceThread, + resolveActiveThreadId, +} from '../features/voice/pttThread'; +import { transcribePttAudio } from '../features/voice/pttTranscribe'; +import { usePttHotkey } from '../hooks/usePttHotkey'; +import { chatSend } from '../services/chatService'; +import { createPttService } from '../services/pttService'; +import type { RootState } from '../store'; +import { setIsHeld } from '../store/pttSlice'; +import { showPttOverlay } from '../utils/tauriCommands/ptt'; + +interface PttEventPayload { + session_id: number; +} + +// Stable monotonic clock for the pttService state machine. Defined at +// module scope so the useMemo factory below doesn't reference an impure +// function during render (react-hooks/purity). +const monotonicNow = (): number => Date.now(); + +export default function PttHotkeyManager(): null { + // Register / unregister the configured hotkey with the Tauri shell. + usePttHotkey(); + + const dispatch = useDispatch(); + const store = useStore(); + const unlistenRef = useRef([]); + + const service = useMemo( + () => + createPttService({ + audioCapture: { + start: startPttAudio, + finalize: finalizePttAudio, + cancel: cancelPttAudio, + }, + transcribe: transcribePttAudio, + sendMessage: async ({ threadId, body, speakReply, metadata }) => { + await chatSend({ + threadId, + message: body, + speakReply, + source: metadata.source, + sessionId: metadata.session_id, + }); + }, + resolveActiveThreadId, + createNewVoiceThread, + playChime: playPttChime, + showOverlay: async (active, sessionId) => { + // Respect the user's "show overlay" preference for the start edge, + // but always tear it down on stop so a mid-session toggle can't leave + // the overlay stuck visible. + if (!active || store.getState().ptt.showOverlay) { + await showPttOverlay(active, sessionId); + } + }, + getSettings: () => { + const ptt = store.getState().ptt; + return { + speakReplies: ptt.speakReplies, + showOverlay: ptt.showOverlay, + }; + }, + now: monotonicNow, + // 10 s ceiling on a single PTT recording — matches the spec; if the + // user holds the key longer the watchdog finalises so we don't keep + // an open mic forever. + watchdogMs: 10_000, + // Recordings shorter than this are treated as accidental taps. + minAudioMs: 250, + logger: { + debug: (msg, meta) => console.debug(msg, meta ?? {}), + info: (msg, meta) => console.info(msg, meta ?? {}), + warn: (msg, meta) => console.warn(msg, meta ?? {}), + }, + }), + // The service holds an internal state machine — recreating it across + // store updates would orphan in-flight sessions. The closures above read + // the latest store state on every call, so a stable identity is correct. + // eslint-disable-next-line react-hooks/exhaustive-deps + [], + ); + + useEffect(() => { + let mounted = true; + const subscribe = async () => { + try { + const offStart = await listen('ptt://start', e => { + dispatch(setIsHeld(true)); + void service.onStart(e.payload.session_id); + }); + const offStop = await listen('ptt://stop', e => { + dispatch(setIsHeld(false)); + void service.onStop(e.payload.session_id); + }); + if (!mounted) { + offStart(); + offStop(); + return; + } + unlistenRef.current.push(offStart, offStop); + console.debug('[ptt] PttHotkeyManager: listeners attached'); + } catch (err) { + console.warn('[ptt] PttHotkeyManager: failed to attach listeners', err); + } + }; + void subscribe(); + return () => { + mounted = false; + const offs = unlistenRef.current; + unlistenRef.current = []; + for (const off of offs) { + try { + off(); + } catch (err) { + console.debug('[ptt] PttHotkeyManager: unlisten threw', err); + } + } + }; + }, [dispatch, service]); + + return null; +} diff --git a/app/src/features/voice/pttAudio.ts b/app/src/features/voice/pttAudio.ts new file mode 100644 index 0000000000..c60d20036b --- /dev/null +++ b/app/src/features/voice/pttAudio.ts @@ -0,0 +1,126 @@ +/** + * pttAudio — push-to-talk mic-capture adapter for pttService. + * + * Dictation's existing recorder lives in the Rust core (rdev-driven, fed by + * the audio_capture domain) and surfaces results asynchronously over a + * dedicated socket — it is not exposed as a reusable JS function that returns + * a buffer. Rather than refactor that flow, we use a self-contained + * MediaRecorder in the renderer. The captured audio is sent straight to the + * existing `voice_transcribe_bytes` RPC (see `pttTranscribe.ts`), so we still + * reuse the core's STT path; only the capture layer is renderer-owned. + * + * Module-level state is intentional — the singleton matches `pttService`'s + * lifecycle (one active PTT session at a time, owned by `PttHotkeyManager`). + * `cancel` is idempotent so the watchdog / preempt paths can call it freely. + */ +import type { FinalizedAudio } from '../../services/pttService'; + +interface Recorder { + recorder: MediaRecorder; + stream: MediaStream; + chunks: Blob[]; + startedAt: number; +} + +let active: Recorder | null = null; +let lastMimeType: string | undefined; + +function pickMimeType(): string | undefined { + // Prefer webm/opus — small, broadly supported. whisper.cpp + cloud STT both + // accept it via ffmpeg decode; the core's `extension` hint is "webm". + const preferred = ['audio/webm;codecs=opus', 'audio/webm', 'audio/ogg;codecs=opus', 'audio/mp4']; + for (const mime of preferred) { + if (typeof MediaRecorder !== 'undefined' && MediaRecorder.isTypeSupported(mime)) { + return mime; + } + } + return undefined; +} + +function stopTracks(stream: MediaStream): void { + for (const track of stream.getTracks()) { + try { + track.stop(); + } catch { + /* ignore */ + } + } +} + +export async function startPttAudio(opts: { sessionTag: string }): Promise { + // If a prior session was abandoned without a finalize/cancel, free it now + // so we don't leak the mic. + if (active) { + console.debug('[ptt-audio] startPttAudio called with active recorder — cancelling first'); + await cancelPttAudio(); + } + + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + const mimeType = pickMimeType(); + const recorder = new MediaRecorder(stream, mimeType ? { mimeType } : undefined); + const chunks: Blob[] = []; + + recorder.addEventListener('dataavailable', (e: BlobEvent) => { + if (e.data && e.data.size > 0) chunks.push(e.data); + }); + + active = { recorder, stream, chunks, startedAt: window.performance.now() }; + lastMimeType = mimeType ?? recorder.mimeType ?? undefined; + recorder.start(); + console.debug('[ptt-audio] started', { sessionTag: opts.sessionTag, mimeType }); +} + +export async function finalizePttAudio(): Promise { + if (!active) { + throw new Error('[ptt-audio] finalize called with no active recorder'); + } + const session = active; + active = null; + + const done = new Promise(resolve => { + if (session.recorder.state === 'inactive') { + resolve(); + return; + } + session.recorder.addEventListener('stop', () => resolve(), { once: true }); + }); + try { + if (session.recorder.state !== 'inactive') session.recorder.stop(); + } catch (err) { + console.warn('[ptt-audio] recorder.stop() threw', err); + } + await done; + stopTracks(session.stream); + + const blob = new Blob(session.chunks, { type: session.recorder.mimeType || 'audio/webm' }); + const buffer = await blob.arrayBuffer(); + const durationMs = Math.round(window.performance.now() - session.startedAt); + console.debug('[ptt-audio] finalized', { durationMs, bytes: buffer.byteLength }); + return { buffer, durationMs }; +} + +export async function cancelPttAudio(): Promise { + if (!active) return; + const session = active; + active = null; + try { + if (session.recorder.state !== 'inactive') session.recorder.stop(); + } catch { + /* ignore */ + } + stopTracks(session.stream); + console.debug('[ptt-audio] cancelled'); +} + +/** + * Maps the last-used MIME type to an extension string the core's + * `voice_transcribe_bytes` RPC accepts. Persists across `finalizePttAudio` + * (which clears `active`) so the transcribe step still gets the right hint. + */ +export function lastRecordedExtension(): string { + const mime = lastMimeType ?? ''; + if (mime.includes('webm')) return 'webm'; + if (mime.includes('ogg')) return 'ogg'; + if (mime.includes('mp4')) return 'm4a'; + return 'webm'; +} diff --git a/app/src/features/voice/pttChimes.ts b/app/src/features/voice/pttChimes.ts new file mode 100644 index 0000000000..34f1464096 --- /dev/null +++ b/app/src/features/voice/pttChimes.ts @@ -0,0 +1,46 @@ +/** + * pttChimes — short audio cue playback for push-to-talk session boundaries. + * + * The three WAVs (open/close/error) live in `app/src/assets/audio/`. Vite + * resolves binary assets imported with a string URL out-of-the-box, so a + * standard `import openSrc from '...wav'` returns a URL the browser can fetch. + * + * HTMLAudioElement instances are cached per kind so repeat playback doesn't + * re-decode the WAV on every press. `play()` may reject under the autoplay + * policy (no user gesture yet) — we swallow that since PTT is triggered by + * a global hotkey, not a click, and the chime is non-critical. + */ +import closeSrc from '../../assets/audio/ptt-close.wav'; +import errorSrc from '../../assets/audio/ptt-error.wav'; +import openSrc from '../../assets/audio/ptt-open.wav'; + +export type ChimeKind = 'open' | 'close' | 'error'; + +const sources: Record = { + open: openSrc, + close: closeSrc, + error: errorSrc, +}; + +const cache: Partial> = {}; + +function getElement(kind: ChimeKind): HTMLAudioElement { + const cached = cache[kind]; + if (cached) return cached; + const el = new window.Audio(sources[kind]); + el.preload = 'auto'; + cache[kind] = el; + return el; +} + +export async function playPttChime(kind: ChimeKind): Promise { + try { + const el = getElement(kind); + el.currentTime = 0; + await el.play(); + } catch (err) { + // Autoplay policy can reject silently for the first chime if no gesture + // has been observed. PTT is non-critical UX feedback so we just log. + console.debug('[ptt-chime] play failed', { kind, err: String(err) }); + } +} diff --git a/app/src/features/voice/pttThread.ts b/app/src/features/voice/pttThread.ts new file mode 100644 index 0000000000..9b7c0a6409 --- /dev/null +++ b/app/src/features/voice/pttThread.ts @@ -0,0 +1,26 @@ +/** + * pttThread — thread-resolution adapter for pttService. + * + * Resolves which thread a PTT-captured message lands in: + * 1. The currently-selected thread (state.thread.selectedThreadId) if any. + * 2. Otherwise create a fresh thread via `threads_create_new`. + * + * Keeping this in its own module keeps `PttHotkeyManager` declarative — the + * service interface only needs two thunks (`resolveActiveThreadId`, + * `createNewVoiceThread`), and the redux access stays out of React render + * scope. + */ +import { threadApi } from '../../services/api/threadApi'; +import { store } from '../../store'; + +export async function resolveActiveThreadId(): Promise { + const state = store.getState(); + return state.thread.selectedThreadId ?? null; +} + +export async function createNewVoiceThread(): Promise { + // No special "voice" label yet — the core auto-generates a title from the + // first user message, which gives a useful label for PTT sessions too. + const thread = await threadApi.createNewThread(); + return thread.id; +} diff --git a/app/src/features/voice/pttTranscribe.ts b/app/src/features/voice/pttTranscribe.ts new file mode 100644 index 0000000000..92c4613091 --- /dev/null +++ b/app/src/features/voice/pttTranscribe.ts @@ -0,0 +1,47 @@ +/** + * pttTranscribe — speech-to-text adapter for pttService. + * + * Reuses the existing `openhuman.voice_transcribe_bytes` RPC (see + * `src/openhuman/voice/ops.rs`). The Rust side handles cloud + whisper.cpp + * routing based on the user's `stt_provider` setting and applies optional + * LLM cleanup, so the renderer only needs to push raw bytes. + * + * The `extension` hint comes from `pttAudio.lastRecordedExtension()` — + * MediaRecorder negotiates webm/opus on every modern desktop browser. + */ +import { openhumanVoiceTranscribeBytes } from '../../utils/tauriCommands/voice'; +import { lastRecordedExtension } from './pttAudio'; + +/** + * Encode the buffer as a byte array for JSON-RPC transport. The wire format + * expects `Vec` deserialized from a number array; serde-json doesn't + * support binary natively over JSON-RPC. + * + * This is O(N) memory and CPU. For a 10s @ ~16 kbps opus blob (~20 KB) it's + * cheap; if PTT recordings grow past ~5 MB we should swap to base64 or a + * dedicated upload endpoint. + */ +function bufferToByteArray(buf: ArrayBuffer): number[] { + const view = new Uint8Array(buf); + const out = new Array(view.byteLength); + for (let i = 0; i < view.byteLength; i++) { + out[i] = view[i]; + } + return out; +} + +export async function transcribePttAudio(buf: ArrayBuffer): Promise { + if (buf.byteLength === 0) return ''; + const extension = lastRecordedExtension(); + const bytes = bufferToByteArray(buf); + const result = await openhumanVoiceTranscribeBytes( + bytes, + extension, + /* context */ undefined, + /* skipCleanup */ false + ); + // `result.text` is the cleaned-up version (LLM-polished when enabled); + // `raw_text` is the unfiltered whisper output. Prefer text but fall back. + const text = (result?.text ?? result?.raw_text ?? '').trim(); + return text; +} diff --git a/app/src/hooks/usePttHotkey.ts b/app/src/hooks/usePttHotkey.ts new file mode 100644 index 0000000000..7e33f67367 --- /dev/null +++ b/app/src/hooks/usePttHotkey.ts @@ -0,0 +1,48 @@ +/** + * usePttHotkey + * + * Subscribes the configured push-to-talk shortcut to the Tauri shell whenever + * the persisted `shortcut` field on the `ptt` slice changes. Resets the + * transient `isHeld` flag on mount so a stale rehydrated value (left over from + * a crash mid-press) can never leave the UI thinking the PTT key is held. + * + * Wired into the renderer once via `PttHotkeyManager` (T11), mounted in + * `App.tsx` alongside the dictation manager. + */ +import { useEffect } from 'react'; +import { useDispatch, useSelector } from 'react-redux'; + +import { registerPttHotkey, unregisterPttHotkey } from '../utils/tauriCommands/ptt'; +import { selectPttShortcut, setIsHeld } from '../store/pttSlice'; + +export function usePttHotkey(): void { + const dispatch = useDispatch(); + const shortcut = useSelector(selectPttShortcut); + + // Clear the transient isHeld flag on mount — a crash mid-press could + // otherwise rehydrate to "held forever". + useEffect(() => { + dispatch(setIsHeld(false)); + }, [dispatch]); + + useEffect(() => { + let cancelled = false; + const apply = async () => { + try { + if (shortcut && shortcut.trim().length > 0) { + await registerPttHotkey(shortcut); + } else { + await unregisterPttHotkey(); + } + } catch (err) { + if (!cancelled) { + console.warn('[ptt] hotkey (un)register failed', err); + } + } + }; + void apply(); + return () => { + cancelled = true; + }; + }, [shortcut]); +} From 13dcee2f665e294abab1b259cb22bb920025db96 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 4 Jun 2026 02:26:32 +0530 Subject: [PATCH 24/36] feat(ptt/ui): overlay page at /ptt-overlay with idle/active states (#3090) --- app/src/AppRoutes.tsx | 3 ++ app/src/pages/PttOverlayPage.test.tsx | 36 ++++++++++++++++++ app/src/pages/PttOverlayPage.tsx | 54 +++++++++++++++++++++++++++ 3 files changed, 93 insertions(+) create mode 100644 app/src/pages/PttOverlayPage.test.tsx create mode 100644 app/src/pages/PttOverlayPage.tsx diff --git a/app/src/AppRoutes.tsx b/app/src/AppRoutes.tsx index 2ebc316544..d4bc65e722 100644 --- a/app/src/AppRoutes.tsx +++ b/app/src/AppRoutes.tsx @@ -20,6 +20,7 @@ import Settings from './pages/Settings'; import SkillNew from './pages/SkillNew'; import Skills from './pages/Skills'; import SkillsRun from './pages/SkillsRun'; +import { PttOverlayPage } from './pages/PttOverlayPage'; import WebCallbackPage from './pages/WebCallbackPage'; import Welcome from './pages/Welcome'; @@ -195,6 +196,8 @@ const AppRoutes = () => { } /> + } /> + {/* Default redirect based on auth status */} } />
diff --git a/app/src/pages/PttOverlayPage.test.tsx b/app/src/pages/PttOverlayPage.test.tsx new file mode 100644 index 0000000000..97dc7d350e --- /dev/null +++ b/app/src/pages/PttOverlayPage.test.tsx @@ -0,0 +1,36 @@ +import { describe, expect, it, vi } from 'vitest'; +import { render, screen, act } from '@testing-library/react'; + +import { PttOverlayPage } from './PttOverlayPage'; + +// Mock @tauri-apps/api/event's listen so we can dispatch fake events. +vi.mock('@tauri-apps/api/event', () => { + const handlers: Record void> = {}; + return { + listen: vi.fn(async (name: string, handler: (e: { payload: unknown }) => void) => { + handlers[name] = handler; + return () => delete handlers[name]; + }), + __dispatch: (name: string, payload: unknown) => + handlers[name]?.({ payload }), + }; +}); + +describe('PttOverlayPage', () => { + it('renders idle state by default', () => { + render(); + expect(screen.getByTestId('ptt-overlay-root')).toHaveAttribute('data-active', 'false'); + }); + + it('flips to active when ptt-overlay://active fires with active=true', async () => { + render(); + const evt = await import('@tauri-apps/api/event'); + await act(async () => { + (evt as unknown as { __dispatch: (n: string, p: unknown) => void }).__dispatch( + 'ptt-overlay://active', + { active: true, session_id: 1 }, + ); + }); + expect(screen.getByTestId('ptt-overlay-root')).toHaveAttribute('data-active', 'true'); + }); +}); diff --git a/app/src/pages/PttOverlayPage.tsx b/app/src/pages/PttOverlayPage.tsx new file mode 100644 index 0000000000..950e49e881 --- /dev/null +++ b/app/src/pages/PttOverlayPage.tsx @@ -0,0 +1,54 @@ +import { useEffect, useState } from 'react'; +import { listen, type UnlistenFn } from '@tauri-apps/api/event'; +import { useT } from '../lib/i18n/I18nContext'; + +export function PttOverlayPage() { + const { t } = useT(); + const [active, setActive] = useState(false); + + useEffect(() => { + let off: UnlistenFn | undefined; + (async () => { + off = await listen<{ active: boolean }>('ptt-overlay://active', (e) => { + setActive(Boolean(e.payload?.active)); + }); + })(); + return () => off?.(); + }, []); + + return ( +
+ + {/* TODO(T13): i18n keys pttOverlay.listening / pttOverlay.idle added in T13 */} + {active ? t('pttOverlay.listening') : t('pttOverlay.idle')} +
+ ); +} From 88b5d3ed1f7d9fff7e94467f920aec1e8afd124e Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 4 Jun 2026 02:32:29 +0530 Subject: [PATCH 25/36] fix(ptt/overlay): close listener leak on fast unmount via cancelled guard (#3090) --- app/src/pages/PttOverlayPage.tsx | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/app/src/pages/PttOverlayPage.tsx b/app/src/pages/PttOverlayPage.tsx index 950e49e881..2082f20ab2 100644 --- a/app/src/pages/PttOverlayPage.tsx +++ b/app/src/pages/PttOverlayPage.tsx @@ -8,12 +8,19 @@ export function PttOverlayPage() { useEffect(() => { let off: UnlistenFn | undefined; - (async () => { - off = await listen<{ active: boolean }>('ptt-overlay://active', (e) => { - setActive(Boolean(e.payload?.active)); - }); - })(); - return () => off?.(); + let cancelled = false; + listen<{ active: boolean }>('ptt-overlay://active', (e) => { + setActive(Boolean(e.payload?.active)); + }) + .then((fn) => { + if (cancelled) fn(); + else off = fn; + }) + .catch(() => {}); + return () => { + cancelled = true; + off?.(); + }; }, []); return ( From 7759af9832969c57420f7b8bec89035a42cff118 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 4 Jun 2026 03:03:29 +0530 Subject: [PATCH 26/36] feat(settings/voice): PttSettingsPanel + 13-locale i18n (#3090) --- .../components/settings/panels/VoicePanel.tsx | 9 + app/src/lib/i18n/ar.ts | 22 ++ app/src/lib/i18n/bn.ts | 24 ++ app/src/lib/i18n/de.ts | 24 ++ app/src/lib/i18n/en.ts | 24 ++ app/src/lib/i18n/es.ts | 23 ++ app/src/lib/i18n/fr.ts | 24 ++ app/src/lib/i18n/hi.ts | 23 ++ app/src/lib/i18n/id.ts | 23 ++ app/src/lib/i18n/it.ts | 24 ++ app/src/lib/i18n/ko.ts | 24 ++ app/src/lib/i18n/pl.ts | 23 ++ app/src/lib/i18n/pt.ts | 23 ++ app/src/lib/i18n/ru.ts | 24 ++ app/src/lib/i18n/zh-CN.ts | 21 ++ .../pages/settings/voice/PttSettingsPanel.tsx | 231 ++++++++++++++++++ .../voice/__tests__/PttSettingsPanel.test.tsx | 88 +++++++ app/src/test/test-utils.tsx | 2 + 18 files changed, 656 insertions(+) create mode 100644 app/src/pages/settings/voice/PttSettingsPanel.tsx create mode 100644 app/src/pages/settings/voice/__tests__/PttSettingsPanel.test.tsx diff --git a/app/src/components/settings/panels/VoicePanel.tsx b/app/src/components/settings/panels/VoicePanel.tsx index 88151611ee..d33c37c54e 100644 --- a/app/src/components/settings/panels/VoicePanel.tsx +++ b/app/src/components/settings/panels/VoicePanel.tsx @@ -1,6 +1,7 @@ import { useCallback, useEffect, useRef, useState } from 'react'; import { useT } from '../../../lib/i18n/I18nContext'; +import PttSettingsPanel from '../../../pages/settings/voice/PttSettingsPanel'; import { installPiper, installWhisper, @@ -1229,6 +1230,14 @@ const VoicePanel = ({ embedded = false }: VoicePanelProps = {}) => { + {/* ─── Section 3: Push-to-talk ───────────────────────────────── + Global PTT hotkey + session preferences. The panel is + self-contained — it only mutates the `ptt` slice, and + `usePttHotkey` (T11) reacts to slice changes to (re)register + the binding with the Tauri shell. Mounted here so users hunt + for it under Voice settings alongside dictation. */} + + {/* Mascot voice picker now lives in Mascot settings. Link kept here so users hunting in Voice settings can find it. */} {ttsProvider !== 'piper' && ( diff --git a/app/src/lib/i18n/ar.ts b/app/src/lib/i18n/ar.ts index e1772c5fc7..eafd8ebc3a 100644 --- a/app/src/lib/i18n/ar.ts +++ b/app/src/lib/i18n/ar.ts @@ -1490,6 +1490,28 @@ const messages: TranslationMap = { 'voice.externalProviders.apiKey': 'مفتاح API', 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Add', + 'pttSettings.title': 'اضغط للتحدث', + 'pttSettings.description': + 'اضغط مفتاحًا باستمرار للتحدث إلى OpenHuman أثناء وجودك في تطبيق آخر. تحرير المفتاح يُرسل التسجيل، ثم ينطق OpenHuman الرد.', + 'pttSettings.shortcutLabel': 'اختصار المفتاح', + 'pttSettings.shortcutPlaceholder': 'اضغط مفتاحًا (مثل F13)', + 'pttSettings.shortcutUnsetHint': 'اضغط للتحدث متوقف — اختر اختصارًا لتفعيله.', + 'pttSettings.speakRepliesLabel': 'نطق ردود الوكيل', + 'pttSettings.showOverlayLabel': 'إظهار التراكب أثناء الضغط', + 'pttSettings.errorConflictsWithDictation': + 'هذا الاختصار مُستخدم بالفعل لخاصية الإملاء. اختر مفتاحًا مختلفًا.', + 'pttSettings.errorModifierOnly': + 'اختر مفتاحًا عاديًا (مثل F13) — الاختصارات المكونة من مفاتيح تعديل فقط لا تعمل مع اضغط للتحدث.', + 'pttSettings.errorEmpty': 'اختر مفتاحًا للربط.', + 'pttSettings.errorAccessibility': + 'يحتاج macOS إلى إذن إمكانية الوصول لهذا الاختصار. افتح إعدادات النظام ← الخصوصية والأمان ← إمكانية الوصول وفعّل OpenHuman.', + 'pttSettings.errorShortcutInUse': 'يستخدم تطبيق آخر هذا الاختصار بالفعل. اختر اختصارًا مختلفًا.', + 'pttSettings.errorUnsupportedWayland': + 'جلسات Wayland لا تدعم بعد الاختصارات العامة في OpenHuman — انتقل إلى جلسة X11 أو استخدم زر الإملاء داخل التطبيق.', + 'pttSettings.exclusiveFullscreenHint': + 'في ألعاب وضع ملء الشاشة الحصري لن يظهر التراكب — ستسمع التنبيه الصوتي فقط. انتقل إلى وضع ملء الشاشة بلا إطار لرؤية التراكب.', + 'pttOverlay.listening': 'يستمع…', + 'pttOverlay.idle': 'في الانتظار', 'autocomplete.title': 'الإكمال التلقائي', 'autocomplete.settings': 'الإعدادات', 'autocomplete.acceptWithTab': 'قبول بـ Tab', diff --git a/app/src/lib/i18n/bn.ts b/app/src/lib/i18n/bn.ts index b40b5cb2d7..0594cfa731 100644 --- a/app/src/lib/i18n/bn.ts +++ b/app/src/lib/i18n/bn.ts @@ -1521,6 +1521,30 @@ const messages: TranslationMap = { 'voice.externalProviders.apiKey': 'API কী', 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Add', + 'pttSettings.title': 'চাপ দিয়ে কথা বলুন', + 'pttSettings.description': + 'অন্য একটি অ্যাপে থাকার সময় OpenHuman-এর সাথে কথা বলতে একটি কী চেপে ধরে রাখুন। কী ছেড়ে দিলে রেকর্ডিং পাঠানো হয়; OpenHuman উত্তরটি জোরে পড়ে শোনায়।', + 'pttSettings.shortcutLabel': 'হটকি', + 'pttSettings.shortcutPlaceholder': 'একটি কী চাপুন (যেমন F13)', + 'pttSettings.shortcutUnsetHint': + 'চাপ দিয়ে কথা বলুন বন্ধ আছে — চালু করতে একটি হটকি নির্বাচন করুন।', + 'pttSettings.speakRepliesLabel': 'এজেন্টের উত্তর জোরে পড়ুন', + 'pttSettings.showOverlayLabel': 'চেপে ধরা অবস্থায় ওভারলে দেখান', + 'pttSettings.errorConflictsWithDictation': + 'এই শর্টকাটটি ইতিমধ্যে ডিকটেশনের জন্য ব্যবহৃত হচ্ছে। অন্য একটি কী বেছে নিন।', + 'pttSettings.errorModifierOnly': + 'একটি সাধারণ কী বেছে নিন (যেমন F13) — শুধু মডিফায়ার নিয়ে গঠিত শর্টকাট চাপ দিয়ে কথা বলুনের জন্য কাজ করে না।', + 'pttSettings.errorEmpty': 'বাঁধার জন্য একটি কী বেছে নিন।', + 'pttSettings.errorAccessibility': + 'এই শর্টকাটের জন্য macOS-এর অ্যাক্সেসিবিলিটি অনুমতি দরকার। System Settings → Privacy & Security → Accessibility খুলুন এবং OpenHuman চালু করুন।', + 'pttSettings.errorShortcutInUse': + 'অন্য একটি অ্যাপ ইতিমধ্যে এই শর্টকাট ব্যবহার করছে। ভিন্ন একটি বেছে নিন।', + 'pttSettings.errorUnsupportedWayland': + 'Wayland সেশন এখনও OpenHuman-এ গ্লোবাল শর্টকাট সমর্থন করে না — একটি X11 সেশনে চলে যান অথবা অ্যাপের ভেতরের ডিকটেশন টগল ব্যবহার করুন।', + 'pttSettings.exclusiveFullscreenHint': + 'এক্সক্লুসিভ ফুলস্ক্রিন গেমে ওভারলে রেন্ডার হবে না — আপনি শুধু সাউন্ড সংকেতটি শুনতে পাবেন। ওভারলে দেখতে বর্ডারলেস ফুলস্ক্রিনে যান।', + 'pttOverlay.listening': 'শুনছে…', + 'pttOverlay.idle': 'অপেক্ষায়', 'autocomplete.title': 'অটোকমপ্লিট', 'autocomplete.settings': 'সেটিংস', 'autocomplete.acceptWithTab': 'Tab দিয়ে গ্রহণ করুন', diff --git a/app/src/lib/i18n/de.ts b/app/src/lib/i18n/de.ts index 7c6a8c86a4..40f9474fa0 100644 --- a/app/src/lib/i18n/de.ts +++ b/app/src/lib/i18n/de.ts @@ -1560,6 +1560,30 @@ const messages: TranslationMap = { 'voice.externalProviders.apiKey': 'API-Schlüssel', 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Add', + 'pttSettings.title': 'Push-to-Talk', + 'pttSettings.description': + 'Halte eine Taste gedrückt, um mit OpenHuman zu sprechen, während du eine andere App nutzt. Loslassen sendet die Aufnahme; OpenHuman antwortet per Sprachausgabe.', + 'pttSettings.shortcutLabel': 'Tastenkürzel', + 'pttSettings.shortcutPlaceholder': 'Taste drücken (z. B. F13)', + 'pttSettings.shortcutUnsetHint': + 'Push-to-Talk ist aus — wähle ein Tastenkürzel, um es zu aktivieren.', + 'pttSettings.speakRepliesLabel': 'Antworten des Agenten vorlesen', + 'pttSettings.showOverlayLabel': 'Overlay während des Haltens anzeigen', + 'pttSettings.errorConflictsWithDictation': + 'Dieses Tastenkürzel wird bereits für die Diktierfunktion verwendet. Wähle eine andere Taste.', + 'pttSettings.errorModifierOnly': + 'Wähle eine normale Taste (z. B. F13) — reine Modifikator-Kürzel funktionieren bei Push-to-Talk nicht.', + 'pttSettings.errorEmpty': 'Wähle eine Taste zum Binden.', + 'pttSettings.errorAccessibility': + 'macOS benötigt für dieses Tastenkürzel die Bedienungshilfen-Berechtigung. Öffne Systemeinstellungen → Datenschutz & Sicherheit → Bedienungshilfen und aktiviere OpenHuman.', + 'pttSettings.errorShortcutInUse': + 'Eine andere App verwendet dieses Tastenkürzel bereits. Wähle ein anderes.', + 'pttSettings.errorUnsupportedWayland': + 'Wayland-Sitzungen unterstützen globale Tastenkürzel in OpenHuman noch nicht — wechsle zu einer X11-Sitzung oder nutze den In-App-Diktierschalter.', + 'pttSettings.exclusiveFullscreenHint': + 'In exklusivem Vollbild rendert das Overlay nicht — du hörst nur den Hinweiston. Wechsle zu randlosem Vollbild für das Overlay.', + 'pttOverlay.listening': 'Höre zu…', + 'pttOverlay.idle': 'Bereit', 'autocomplete.title': 'Automatische Vervollständigung', 'autocomplete.settings': 'Einstellungen', 'autocomplete.acceptWithTab': 'Mit Tab akzeptieren', diff --git a/app/src/lib/i18n/en.ts b/app/src/lib/i18n/en.ts index a4b3d2ed54..84dd70fb08 100644 --- a/app/src/lib/i18n/en.ts +++ b/app/src/lib/i18n/en.ts @@ -1717,6 +1717,30 @@ const en: TranslationMap = { 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Add', + // Push-to-talk (PTT) + 'pttSettings.title': 'Push-to-talk', + 'pttSettings.description': + "Hold a key to talk to OpenHuman while you're in another app. Releases the key to send; OpenHuman speaks the reply back.", + 'pttSettings.shortcutLabel': 'Hotkey', + 'pttSettings.shortcutPlaceholder': 'Press a key (e.g. F13)', + 'pttSettings.shortcutUnsetHint': 'Push-to-talk is off — pick a hotkey to enable.', + 'pttSettings.speakRepliesLabel': 'Speak agent replies', + 'pttSettings.showOverlayLabel': 'Show overlay while held', + 'pttSettings.errorConflictsWithDictation': + 'This shortcut is already used by dictation. Pick a different key.', + 'pttSettings.errorModifierOnly': + "Pick a regular key (e.g. F13) — modifier-only shortcuts don't work for push-to-talk.", + 'pttSettings.errorEmpty': 'Pick a key to bind.', + 'pttSettings.errorAccessibility': + 'macOS needs Accessibility permission for this shortcut. Open System Settings → Privacy & Security → Accessibility and enable OpenHuman.', + 'pttSettings.errorShortcutInUse': 'Another app already uses this shortcut. Pick a different one.', + 'pttSettings.errorUnsupportedWayland': + "Wayland sessions don't support global shortcuts in OpenHuman yet — switch to an X11 session or use the in-app dictation toggle.", + 'pttSettings.exclusiveFullscreenHint': + "In exclusive-fullscreen games the overlay won't render — you'll only hear the chime. Switch to borderless fullscreen for the overlay.", + 'pttOverlay.listening': 'Listening…', + 'pttOverlay.idle': 'Idle', + // Autocomplete 'autocomplete.title': 'Autocomplete', 'autocomplete.settings': 'Settings', diff --git a/app/src/lib/i18n/es.ts b/app/src/lib/i18n/es.ts index 0028f387e3..c14a783f0a 100644 --- a/app/src/lib/i18n/es.ts +++ b/app/src/lib/i18n/es.ts @@ -1554,6 +1554,29 @@ const messages: TranslationMap = { 'voice.externalProviders.apiKey': 'Clave de API', 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Add', + 'pttSettings.title': 'Pulsa para hablar', + 'pttSettings.description': + 'Mantén pulsada una tecla para hablar con OpenHuman mientras estás en otra aplicación. Al soltarla se envía la grabación; OpenHuman dice la respuesta en voz alta.', + 'pttSettings.shortcutLabel': 'Atajo de teclado', + 'pttSettings.shortcutPlaceholder': 'Pulsa una tecla (p. ej. F13)', + 'pttSettings.shortcutUnsetHint': + 'Pulsa para hablar está desactivado — elige un atajo para activarlo.', + 'pttSettings.speakRepliesLabel': 'Leer las respuestas del agente en voz alta', + 'pttSettings.showOverlayLabel': 'Mostrar el panel mientras se mantiene pulsado', + 'pttSettings.errorConflictsWithDictation': + 'Este atajo ya lo usa el dictado. Elige una tecla distinta.', + 'pttSettings.errorModifierOnly': + 'Elige una tecla normal (p. ej. F13) — los atajos compuestos solo por modificadores no funcionan para pulsa para hablar.', + 'pttSettings.errorEmpty': 'Elige una tecla para asignarla.', + 'pttSettings.errorAccessibility': + 'macOS necesita permiso de Accesibilidad para este atajo. Abre Ajustes del sistema → Privacidad y seguridad → Accesibilidad y activa OpenHuman.', + 'pttSettings.errorShortcutInUse': 'Otra aplicación ya usa este atajo. Elige uno distinto.', + 'pttSettings.errorUnsupportedWayland': + 'Las sesiones Wayland todavía no admiten atajos globales en OpenHuman — cambia a una sesión X11 o usa el botón de dictado dentro de la aplicación.', + 'pttSettings.exclusiveFullscreenHint': + 'En juegos a pantalla completa exclusiva el panel no se mostrará — solo oirás el aviso sonoro. Cambia a pantalla completa sin bordes para ver el panel.', + 'pttOverlay.listening': 'Escuchando…', + 'pttOverlay.idle': 'En espera', 'autocomplete.title': 'Autocompletado', 'autocomplete.settings': 'Configuración', 'autocomplete.acceptWithTab': 'Aceptar con Tab', diff --git a/app/src/lib/i18n/fr.ts b/app/src/lib/i18n/fr.ts index 164c3156fd..aa838afe89 100644 --- a/app/src/lib/i18n/fr.ts +++ b/app/src/lib/i18n/fr.ts @@ -1559,6 +1559,30 @@ const messages: TranslationMap = { 'voice.externalProviders.apiKey': 'Clé API', 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Add', + 'pttSettings.title': 'Appuyer pour parler', + 'pttSettings.description': + 'Maintiens une touche pour parler à OpenHuman pendant que tu utilises une autre application. Relâcher envoie l’enregistrement ; OpenHuman lit la réponse à voix haute.', + 'pttSettings.shortcutLabel': 'Raccourci', + 'pttSettings.shortcutPlaceholder': 'Appuie sur une touche (par exemple F13)', + 'pttSettings.shortcutUnsetHint': + 'Appuyer pour parler est désactivé — choisis un raccourci pour l’activer.', + 'pttSettings.speakRepliesLabel': 'Lire les réponses de l’agent à voix haute', + 'pttSettings.showOverlayLabel': 'Afficher la surcouche pendant l’appui', + 'pttSettings.errorConflictsWithDictation': + 'Ce raccourci est déjà utilisé par la dictée. Choisis une autre touche.', + 'pttSettings.errorModifierOnly': + 'Choisis une touche ordinaire (par exemple F13) — les raccourcis composés uniquement de modificateurs ne fonctionnent pas pour appuyer pour parler.', + 'pttSettings.errorEmpty': 'Choisis une touche à associer.', + 'pttSettings.errorAccessibility': + 'macOS exige l’autorisation Accessibilité pour ce raccourci. Ouvre Réglages système → Confidentialité et sécurité → Accessibilité et active OpenHuman.', + 'pttSettings.errorShortcutInUse': + 'Une autre application utilise déjà ce raccourci. Choisis-en un autre.', + 'pttSettings.errorUnsupportedWayland': + 'Les sessions Wayland ne prennent pas encore en charge les raccourcis globaux dans OpenHuman — passe à une session X11 ou utilise la commande de dictée intégrée à l’application.', + 'pttSettings.exclusiveFullscreenHint': + 'En plein écran exclusif des jeux, la surcouche ne s’affichera pas — tu entendras seulement le son. Passe en plein écran sans bordures pour voir la surcouche.', + 'pttOverlay.listening': 'À l’écoute…', + 'pttOverlay.idle': 'En attente', 'autocomplete.title': 'Autocomplétion', 'autocomplete.settings': 'Paramètres', 'autocomplete.acceptWithTab': 'Accepter avec Tab', diff --git a/app/src/lib/i18n/hi.ts b/app/src/lib/i18n/hi.ts index c4407af0bc..a469faad6d 100644 --- a/app/src/lib/i18n/hi.ts +++ b/app/src/lib/i18n/hi.ts @@ -1520,6 +1520,29 @@ const messages: TranslationMap = { 'voice.externalProviders.apiKey': 'API कुंजी', 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Add', + 'pttSettings.title': 'दबाकर बोलें', + 'pttSettings.description': + 'जब आप किसी दूसरे ऐप में हों तब OpenHuman से बात करने के लिए कोई कुंजी दबाए रखें। कुंजी छोड़ने पर रिकॉर्डिंग भेजी जाती है; OpenHuman उत्तर को बोलकर सुनाता है।', + 'pttSettings.shortcutLabel': 'हॉटकी', + 'pttSettings.shortcutPlaceholder': 'कोई कुंजी दबाएँ (जैसे F13)', + 'pttSettings.shortcutUnsetHint': 'दबाकर बोलें बंद है — चालू करने के लिए कोई हॉटकी चुनें।', + 'pttSettings.speakRepliesLabel': 'एजेंट के उत्तर ज़ोर से सुनाएँ', + 'pttSettings.showOverlayLabel': 'दबाए रखने के दौरान ओवरले दिखाएँ', + 'pttSettings.errorConflictsWithDictation': + 'यह शॉर्टकट पहले से डिक्टेशन में उपयोग हो रहा है। कोई दूसरी कुंजी चुनें।', + 'pttSettings.errorModifierOnly': + 'कोई सामान्य कुंजी चुनें (जैसे F13) — केवल मॉडिफ़ायर वाले शॉर्टकट दबाकर बोलें के लिए काम नहीं करते।', + 'pttSettings.errorEmpty': 'बाँधने के लिए कोई कुंजी चुनें।', + 'pttSettings.errorAccessibility': + 'इस शॉर्टकट के लिए macOS को एक्सेसिबिलिटी अनुमति चाहिए। System Settings → Privacy & Security → Accessibility खोलें और OpenHuman को सक्षम करें।', + 'pttSettings.errorShortcutInUse': + 'कोई दूसरा ऐप पहले से इस शॉर्टकट का उपयोग कर रहा है। कोई दूसरा चुनें।', + 'pttSettings.errorUnsupportedWayland': + 'Wayland सत्र अभी OpenHuman में ग्लोबल शॉर्टकट का समर्थन नहीं करते — X11 सत्र पर जाएँ या ऐप के अंदर डिक्टेशन टॉगल का उपयोग करें।', + 'pttSettings.exclusiveFullscreenHint': + 'एक्सक्लूसिव फुलस्क्रीन गेम्स में ओवरले प्रदर्शित नहीं होगा — आपको केवल चाइम सुनाई देगा। ओवरले देखने के लिए बॉर्डरलेस फुलस्क्रीन पर जाएँ।', + 'pttOverlay.listening': 'सुन रहा है…', + 'pttOverlay.idle': 'निष्क्रिय', 'autocomplete.title': 'ऑटोकम्पलीट', 'autocomplete.settings': 'सेटिंग्स', 'autocomplete.acceptWithTab': 'Tab से एक्सेप्ट करें', diff --git a/app/src/lib/i18n/id.ts b/app/src/lib/i18n/id.ts index eff076f57e..c7505d2be2 100644 --- a/app/src/lib/i18n/id.ts +++ b/app/src/lib/i18n/id.ts @@ -1525,6 +1525,29 @@ const messages: TranslationMap = { 'voice.externalProviders.apiKey': 'Kunci API', 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Add', + 'pttSettings.title': 'Tekan untuk bicara', + 'pttSettings.description': + 'Tahan sebuah tombol untuk berbicara dengan OpenHuman saat kamu sedang di aplikasi lain. Lepas tombol untuk mengirim; OpenHuman akan menyuarakan balasannya.', + 'pttSettings.shortcutLabel': 'Pintasan', + 'pttSettings.shortcutPlaceholder': 'Tekan sebuah tombol (mis. F13)', + 'pttSettings.shortcutUnsetHint': + 'Tekan untuk bicara mati — pilih pintasan untuk mengaktifkannya.', + 'pttSettings.speakRepliesLabel': 'Suarakan balasan agen', + 'pttSettings.showOverlayLabel': 'Tampilkan lapisan saat tombol ditahan', + 'pttSettings.errorConflictsWithDictation': + 'Pintasan ini sudah dipakai oleh dikte. Pilih tombol lain.', + 'pttSettings.errorModifierOnly': + 'Pilih tombol biasa (mis. F13) — pintasan hanya pengubah tidak berfungsi untuk tekan untuk bicara.', + 'pttSettings.errorEmpty': 'Pilih tombol untuk diikat.', + 'pttSettings.errorAccessibility': + 'macOS memerlukan izin Aksesibilitas untuk pintasan ini. Buka Pengaturan Sistem → Privasi & Keamanan → Aksesibilitas lalu aktifkan OpenHuman.', + 'pttSettings.errorShortcutInUse': 'Aplikasi lain sudah memakai pintasan ini. Pilih yang lain.', + 'pttSettings.errorUnsupportedWayland': + 'Sesi Wayland belum mendukung pintasan global di OpenHuman — beralihlah ke sesi X11 atau gunakan tombol dikte di dalam aplikasi.', + 'pttSettings.exclusiveFullscreenHint': + 'Pada game layar penuh eksklusif, lapisan tidak akan tampil — kamu hanya akan mendengar nada. Beralihlah ke layar penuh tanpa bingkai untuk melihat lapisan.', + 'pttOverlay.listening': 'Mendengarkan…', + 'pttOverlay.idle': 'Siaga', 'autocomplete.title': 'Pelengkap Otomatis', 'autocomplete.settings': 'Pengaturan', 'autocomplete.acceptWithTab': 'Terima dengan Tab', diff --git a/app/src/lib/i18n/it.ts b/app/src/lib/i18n/it.ts index fd835df4b5..355702e985 100644 --- a/app/src/lib/i18n/it.ts +++ b/app/src/lib/i18n/it.ts @@ -1548,6 +1548,30 @@ const messages: TranslationMap = { 'voice.externalProviders.apiKey': 'Chiave API', 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Add', + 'pttSettings.title': 'Premi per parlare', + 'pttSettings.description': + 'Tieni premuto un tasto per parlare con OpenHuman mentre sei in un’altra app. Al rilascio l’audio viene inviato; OpenHuman risponde a voce.', + 'pttSettings.shortcutLabel': 'Scorciatoia', + 'pttSettings.shortcutPlaceholder': 'Premi un tasto (es. F13)', + 'pttSettings.shortcutUnsetHint': + 'Premi per parlare è disattivato — scegli una scorciatoia per attivarlo.', + 'pttSettings.speakRepliesLabel': 'Pronuncia le risposte dell’agente', + 'pttSettings.showOverlayLabel': 'Mostra il riquadro mentre il tasto è premuto', + 'pttSettings.errorConflictsWithDictation': + 'Questa scorciatoia è già usata dalla dettatura. Scegli un tasto diverso.', + 'pttSettings.errorModifierOnly': + 'Scegli un tasto normale (es. F13) — le scorciatoie con soli modificatori non funzionano per premi per parlare.', + 'pttSettings.errorEmpty': 'Scegli un tasto da assegnare.', + 'pttSettings.errorAccessibility': + 'macOS richiede l’autorizzazione Accessibilità per questa scorciatoia. Apri Impostazioni di sistema → Privacy e sicurezza → Accessibilità e attiva OpenHuman.', + 'pttSettings.errorShortcutInUse': + 'Un’altra app utilizza già questa scorciatoia. Scegline una diversa.', + 'pttSettings.errorUnsupportedWayland': + 'Le sessioni Wayland non supportano ancora le scorciatoie globali in OpenHuman — passa a una sessione X11 o usa l’interruttore di dettatura nell’app.', + 'pttSettings.exclusiveFullscreenHint': + 'Nei giochi a schermo intero esclusivo il riquadro non verrà mostrato — sentirai solo il segnale acustico. Passa allo schermo intero senza bordi per vedere il riquadro.', + 'pttOverlay.listening': 'In ascolto…', + 'pttOverlay.idle': 'In attesa', 'autocomplete.title': 'Autocompletamento', 'autocomplete.settings': 'Impostazioni', 'autocomplete.acceptWithTab': 'Accetta con Tab', diff --git a/app/src/lib/i18n/ko.ts b/app/src/lib/i18n/ko.ts index e55f5ae9f2..2b98dfee1e 100644 --- a/app/src/lib/i18n/ko.ts +++ b/app/src/lib/i18n/ko.ts @@ -1505,6 +1505,30 @@ const messages: TranslationMap = { 'voice.externalProviders.apiKey': 'API 키', 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Add', + 'pttSettings.title': '눌러서 말하기', + 'pttSettings.description': + '다른 앱을 사용하는 중에도 키를 누르고 있으면 OpenHuman과 대화할 수 있습니다. 키를 놓으면 녹음이 전송되고 OpenHuman이 답변을 음성으로 들려줍니다.', + 'pttSettings.shortcutLabel': '단축키', + 'pttSettings.shortcutPlaceholder': '키를 누르세요 (예: F13)', + 'pttSettings.shortcutUnsetHint': + '눌러서 말하기가 꺼져 있습니다 — 활성화하려면 단축키를 선택하세요.', + 'pttSettings.speakRepliesLabel': '에이전트 답변 음성으로 읽어주기', + 'pttSettings.showOverlayLabel': '누르고 있는 동안 오버레이 표시', + 'pttSettings.errorConflictsWithDictation': + '이 단축키는 받아쓰기에 이미 사용 중입니다. 다른 키를 선택하세요.', + 'pttSettings.errorModifierOnly': + '일반 키를 선택하세요(예: F13) — 보조 키로만 구성된 단축키는 눌러서 말하기에서 동작하지 않습니다.', + 'pttSettings.errorEmpty': '바인딩할 키를 선택하세요.', + 'pttSettings.errorAccessibility': + '이 단축키에는 macOS의 손쉬운 사용 권한이 필요합니다. 시스템 설정 → 개인정보 보호 및 보안 → 손쉬운 사용을 열고 OpenHuman을 활성화하세요.', + 'pttSettings.errorShortcutInUse': + '다른 앱이 이미 이 단축키를 사용 중입니다. 다른 단축키를 선택하세요.', + 'pttSettings.errorUnsupportedWayland': + 'Wayland 세션은 OpenHuman의 전역 단축키를 아직 지원하지 않습니다 — X11 세션으로 전환하거나 앱 내 받아쓰기 토글을 사용하세요.', + 'pttSettings.exclusiveFullscreenHint': + '전용 전체 화면 게임에서는 오버레이가 표시되지 않습니다 — 알림음만 들립니다. 오버레이를 보려면 테두리 없는 전체 화면으로 전환하세요.', + 'pttOverlay.listening': '듣는 중…', + 'pttOverlay.idle': '대기 중', 'autocomplete.title': '자동 완성', 'autocomplete.settings': '설정', 'autocomplete.acceptWithTab': 'Tab으로 수락', diff --git a/app/src/lib/i18n/pl.ts b/app/src/lib/i18n/pl.ts index 6c57724515..8f23bbc5b3 100644 --- a/app/src/lib/i18n/pl.ts +++ b/app/src/lib/i18n/pl.ts @@ -1541,6 +1541,29 @@ const messages: TranslationMap = { 'voice.externalProviders.apiKey': 'Klucz API', 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Dodaj', + 'pttSettings.title': 'Naciśnij, aby mówić', + 'pttSettings.description': + 'Przytrzymaj klawisz, aby mówić do OpenHuman, gdy korzystasz z innej aplikacji. Zwolnienie klawisza wysyła nagranie; OpenHuman odczyta odpowiedź na głos.', + 'pttSettings.shortcutLabel': 'Skrót klawiszowy', + 'pttSettings.shortcutPlaceholder': 'Naciśnij klawisz (np. F13)', + 'pttSettings.shortcutUnsetHint': + 'Naciśnij, aby mówić jest wyłączone — wybierz skrót, aby je włączyć.', + 'pttSettings.speakRepliesLabel': 'Czytaj odpowiedzi agenta na głos', + 'pttSettings.showOverlayLabel': 'Pokazuj nakładkę podczas przytrzymania', + 'pttSettings.errorConflictsWithDictation': + 'Ten skrót jest już używany przez dyktowanie. Wybierz inny klawisz.', + 'pttSettings.errorModifierOnly': + 'Wybierz zwykły klawisz (np. F13) — skróty złożone tylko z modyfikatorów nie działają dla naciśnij, aby mówić.', + 'pttSettings.errorEmpty': 'Wybierz klawisz do przypisania.', + 'pttSettings.errorAccessibility': + 'macOS wymaga uprawnienia Dostępność dla tego skrótu. Otwórz Ustawienia systemowe → Prywatność i bezpieczeństwo → Dostępność i włącz OpenHuman.', + 'pttSettings.errorShortcutInUse': 'Inna aplikacja używa już tego skrótu. Wybierz inny.', + 'pttSettings.errorUnsupportedWayland': + 'Sesje Wayland nie obsługują jeszcze globalnych skrótów w OpenHuman — przełącz się na sesję X11 lub użyj przełącznika dyktowania w aplikacji.', + 'pttSettings.exclusiveFullscreenHint': + 'W grach na wyłącznym pełnym ekranie nakładka nie zostanie wyświetlona — usłyszysz tylko sygnał dźwiękowy. Przełącz na pełny ekran bez ramki, aby zobaczyć nakładkę.', + 'pttOverlay.listening': 'Słucham…', + 'pttOverlay.idle': 'Gotowy', 'autocomplete.title': 'Autouzupełnianie', 'autocomplete.settings': 'Ustawienia', 'autocomplete.acceptWithTab': 'Akceptuj Tabem', diff --git a/app/src/lib/i18n/pt.ts b/app/src/lib/i18n/pt.ts index 38b108c5b2..e711bc6522 100644 --- a/app/src/lib/i18n/pt.ts +++ b/app/src/lib/i18n/pt.ts @@ -1553,6 +1553,29 @@ const messages: TranslationMap = { 'voice.externalProviders.apiKey': 'Chave de API', 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Add', + 'pttSettings.title': 'Pressionar para falar', + 'pttSettings.description': + 'Mantenha uma tecla pressionada para falar com o OpenHuman enquanto está noutro aplicativo. Soltar a tecla envia o áudio; o OpenHuman lê a resposta em voz alta.', + 'pttSettings.shortcutLabel': 'Atalho', + 'pttSettings.shortcutPlaceholder': 'Pressione uma tecla (por exemplo, F13)', + 'pttSettings.shortcutUnsetHint': + 'Pressionar para falar está desligado — escolha um atalho para ativar.', + 'pttSettings.speakRepliesLabel': 'Ler as respostas do agente em voz alta', + 'pttSettings.showOverlayLabel': 'Mostrar a sobreposição enquanto a tecla está pressionada', + 'pttSettings.errorConflictsWithDictation': + 'Este atalho já é usado pelo ditado. Escolha uma tecla diferente.', + 'pttSettings.errorModifierOnly': + 'Escolha uma tecla normal (por exemplo, F13) — atalhos apenas com modificadores não funcionam para pressionar para falar.', + 'pttSettings.errorEmpty': 'Escolha uma tecla para vincular.', + 'pttSettings.errorAccessibility': + 'O macOS precisa de permissão de Acessibilidade para este atalho. Abra Ajustes do Sistema → Privacidade e Segurança → Acessibilidade e ative o OpenHuman.', + 'pttSettings.errorShortcutInUse': 'Outro aplicativo já está a usar este atalho. Escolha outro.', + 'pttSettings.errorUnsupportedWayland': + 'As sessões Wayland ainda não suportam atalhos globais no OpenHuman — mude para uma sessão X11 ou use o controlo de ditado integrado no aplicativo.', + 'pttSettings.exclusiveFullscreenHint': + 'Em jogos no modo de ecrã inteiro exclusivo a sobreposição não será apresentada — só ouvirá o aviso sonoro. Mude para ecrã inteiro sem margens para ver a sobreposição.', + 'pttOverlay.listening': 'A escutar…', + 'pttOverlay.idle': 'Inativo', 'autocomplete.title': 'Autocompletar', 'autocomplete.settings': 'Configurações', 'autocomplete.acceptWithTab': 'Aceitar com Tab', diff --git a/app/src/lib/i18n/ru.ts b/app/src/lib/i18n/ru.ts index 2bb5f6406a..35d2be1c23 100644 --- a/app/src/lib/i18n/ru.ts +++ b/app/src/lib/i18n/ru.ts @@ -1532,6 +1532,30 @@ const messages: TranslationMap = { 'voice.externalProviders.apiKey': 'API-ключ', 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Add', + 'pttSettings.title': 'Нажми и говори', + 'pttSettings.description': + 'Удерживайте клавишу, чтобы говорить с OpenHuman, пока вы находитесь в другом приложении. При отпускании запись отправляется; OpenHuman озвучит ответ.', + 'pttSettings.shortcutLabel': 'Сочетание клавиш', + 'pttSettings.shortcutPlaceholder': 'Нажмите клавишу (например, F13)', + 'pttSettings.shortcutUnsetHint': + 'Нажми и говори выключено — выберите сочетание клавиш, чтобы включить.', + 'pttSettings.speakRepliesLabel': 'Озвучивать ответы агента', + 'pttSettings.showOverlayLabel': 'Показывать наложение во время удержания', + 'pttSettings.errorConflictsWithDictation': + 'Это сочетание уже используется диктовкой. Выберите другую клавишу.', + 'pttSettings.errorModifierOnly': + 'Выберите обычную клавишу (например, F13) — сочетания только из модификаторов не работают для «нажми и говори».', + 'pttSettings.errorEmpty': 'Выберите клавишу для назначения.', + 'pttSettings.errorAccessibility': + 'macOS требует разрешения «Универсальный доступ» для этого сочетания. Откройте Системные настройки → Конфиденциальность и безопасность → Универсальный доступ и включите OpenHuman.', + 'pttSettings.errorShortcutInUse': + 'Это сочетание уже использует другое приложение. Выберите другое.', + 'pttSettings.errorUnsupportedWayland': + 'Сессии Wayland пока не поддерживают глобальные сочетания клавиш в OpenHuman — перейдите на сессию X11 или используйте встроенный переключатель диктовки.', + 'pttSettings.exclusiveFullscreenHint': + 'В играх с эксклюзивным полноэкранным режимом наложение не отобразится — вы услышите только звуковой сигнал. Переключитесь на оконный полноэкранный режим, чтобы видеть наложение.', + 'pttOverlay.listening': 'Слушаю…', + 'pttOverlay.idle': 'Ожидание', 'autocomplete.title': 'Автодополнение', 'autocomplete.settings': 'Настройки', 'autocomplete.acceptWithTab': 'Принять с помощью Tab', diff --git a/app/src/lib/i18n/zh-CN.ts b/app/src/lib/i18n/zh-CN.ts index a0f8ab41a5..e0095716c1 100644 --- a/app/src/lib/i18n/zh-CN.ts +++ b/app/src/lib/i18n/zh-CN.ts @@ -1439,6 +1439,27 @@ const messages: TranslationMap = { 'voice.externalProviders.apiKey': 'API 密钥', 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Add', + 'pttSettings.title': '按住说话', + 'pttSettings.description': + '在其他应用中时,按住按键即可与 OpenHuman 对话。松开按键发送录音;OpenHuman 会用语音读出回复。', + 'pttSettings.shortcutLabel': '快捷键', + 'pttSettings.shortcutPlaceholder': '按下一个键(例如 F13)', + 'pttSettings.shortcutUnsetHint': '按住说话已关闭 — 请选择一个快捷键来启用。', + 'pttSettings.speakRepliesLabel': '朗读智能体的回复', + 'pttSettings.showOverlayLabel': '按住时显示悬浮层', + 'pttSettings.errorConflictsWithDictation': '该快捷键已被听写功能占用。请选择其他按键。', + 'pttSettings.errorModifierOnly': + '请选择一个常规按键(例如 F13)— 仅由修饰键组成的快捷键无法用于按住说话。', + 'pttSettings.errorEmpty': '请选择要绑定的按键。', + 'pttSettings.errorAccessibility': + '此快捷键需要 macOS 的辅助功能权限。请打开系统设置 → 隐私与安全 → 辅助功能,并启用 OpenHuman。', + 'pttSettings.errorShortcutInUse': '该快捷键已被其他应用占用。请选择其他快捷键。', + 'pttSettings.errorUnsupportedWayland': + 'Wayland 会话尚不支持 OpenHuman 的全局快捷键 — 请切换到 X11 会话,或使用应用内的听写开关。', + 'pttSettings.exclusiveFullscreenHint': + '在独占式全屏游戏中悬浮层不会显示 — 你只会听到提示音。切换到无边框全屏即可看到悬浮层。', + 'pttOverlay.listening': '正在聆听…', + 'pttOverlay.idle': '空闲', 'autocomplete.title': '自动补全', 'autocomplete.settings': '设置', 'autocomplete.acceptWithTab': 'Tab 键接受', diff --git a/app/src/pages/settings/voice/PttSettingsPanel.tsx b/app/src/pages/settings/voice/PttSettingsPanel.tsx new file mode 100644 index 0000000000..c9e15cba13 --- /dev/null +++ b/app/src/pages/settings/voice/PttSettingsPanel.tsx @@ -0,0 +1,231 @@ +/** + * PttSettingsPanel — settings card for the global push-to-talk hotkey. + * + * Renders three controls bound to `pttSlice` (T8): + * - A hotkey-capture input that writes the captured key into + * `setPttShortcut` (null when cleared). Modifier-only presses are + * rejected with an inline error since they don't make sense for PTT. + * - A "Speak agent replies" switch bound to `setSpeakReplies`. + * - A "Show overlay while held" switch bound to `setShowOverlay`. + * + * The hotkey registration side effect itself is handled by + * `usePttHotkey` (T11) which subscribes to slice changes and forwards + * to the Tauri shell — this panel only mutates Redux state and lets + * the manager hook react. This separation keeps the settings UI + * purely declarative and means the panel test does not need to mock + * the Tauri command surface. + * + * The panel deliberately renders without a `SettingsHeader` since it's + * intended to be embedded inside `VoicePanel` rather than mounted as a + * standalone route. The "card" style matches the other sections inside + * VoicePanel. + * + * Plan: docs/superpowers/plans/2026-06-02-global-ptt.md (Task 13). + */ +import { useCallback, useState } from 'react'; + +import { useT } from '../../../lib/i18n/I18nContext'; +import { useAppDispatch, useAppSelector } from '../../../store/hooks'; +import { + selectPttShortcut, + selectShowOverlay, + selectSpeakReplies, + setPttShortcut, + setShowOverlay, + setSpeakReplies, +} from '../../../store/pttSlice'; + +/** Keys that are pure modifiers — a PTT binding made of only these makes + * no sense (you can't "release" a modifier to send a sample without + * already needing a non-modifier sentinel). We surface a typed error + * instead of silently saving a useless binding. */ +const MODIFIER_KEYS = new Set([ + 'Shift', + 'Control', + 'Alt', + 'Meta', + 'OS', + 'AltGraph', + 'CapsLock', + 'NumLock', + 'ScrollLock', +]); + +/** + * Convert a KeyboardEvent into a stable shortcut string. Mirrors the + * format the Tauri shell expects (e.g. `Ctrl+Alt+F13`). We use the + * `key` field (and `code` for letters where `key` carries the layout's + * uppercased value) to avoid layout drift across QWERTY / AZERTY / etc. + */ +function eventToShortcut(e: React.KeyboardEvent): string | null { + if (MODIFIER_KEYS.has(e.key)) return null; + const parts: string[] = []; + if (e.ctrlKey) parts.push('Ctrl'); + if (e.altKey) parts.push('Alt'); + if (e.shiftKey) parts.push('Shift'); + if (e.metaKey) parts.push('Meta'); + // Prefer e.key (already the localised label like "F13", "a", "Enter") + // unless it's a single lowercase letter — for those we uppercase to + // produce a consistent "Ctrl+A" form across capitalised / not. + let label = e.key; + if (label.length === 1 && /[a-z]/.test(label)) { + label = label.toUpperCase(); + } + parts.push(label); + return parts.join('+'); +} + +const PttSettingsPanel = () => { + const { t } = useT(); + const dispatch = useAppDispatch(); + const shortcut = useAppSelector(selectPttShortcut); + const speakReplies = useAppSelector(selectSpeakReplies); + const showOverlay = useAppSelector(selectShowOverlay); + + // Inline validation error for the capture input (e.g. modifier-only). + // Cleared whenever the user retries or focuses the field. Server-side + // errors (accessibility, in-use, Wayland) are emitted by the manager + // hook via toast/snackbar in T11; we keep this panel-local state for + // the capture-time failure modes. + const [captureError, setCaptureError] = useState(null); + + const handleShortcutKeyDown = useCallback( + (e: React.KeyboardEvent) => { + // Always preventDefault so the input doesn't try to insert text + // for the captured character — we treat it as a binding press, + // not editable content. + e.preventDefault(); + e.stopPropagation(); + + // Allow Backspace / Delete / Escape to clear the binding so the + // user can drop back to the "off" state without having to fight a + // sticky F13. + if (e.key === 'Backspace' || e.key === 'Delete' || e.key === 'Escape') { + setCaptureError(null); + dispatch(setPttShortcut(null)); + return; + } + + if (MODIFIER_KEYS.has(e.key)) { + setCaptureError(t('pttSettings.errorModifierOnly')); + return; + } + + const shortcutString = eventToShortcut(e); + if (!shortcutString) { + setCaptureError(t('pttSettings.errorEmpty')); + return; + } + + console.debug('[pttSettings] captured shortcut %s', shortcutString); + setCaptureError(null); + dispatch(setPttShortcut(shortcutString)); + }, + [dispatch, t] + ); + + const toggleSpeakReplies = useCallback(() => { + dispatch(setSpeakReplies(!speakReplies)); + }, [dispatch, speakReplies]); + + const toggleShowOverlay = useCallback(() => { + dispatch(setShowOverlay(!showOverlay)); + }, [dispatch, showOverlay]); + + return ( +
+
+
+

+ {t('pttSettings.title')} +

+

+ {t('pttSettings.description')} +

+
+ + {/* Hotkey capture */} + + + {/* Speak replies switch */} +
+ + {t('pttSettings.speakRepliesLabel')} + + +
+ + {/* Show overlay switch */} +
+ + {t('pttSettings.showOverlayLabel')} + + +
+
+
+ ); +}; + +export default PttSettingsPanel; diff --git a/app/src/pages/settings/voice/__tests__/PttSettingsPanel.test.tsx b/app/src/pages/settings/voice/__tests__/PttSettingsPanel.test.tsx new file mode 100644 index 0000000000..b4380e8242 --- /dev/null +++ b/app/src/pages/settings/voice/__tests__/PttSettingsPanel.test.tsx @@ -0,0 +1,88 @@ +import { fireEvent, screen } from '@testing-library/react'; +import { describe, expect, it } from 'vitest'; + +import { I18nProvider } from '../../../../lib/i18n/I18nContext'; +import { initialPttState, type PttState } from '../../../../store/pttSlice'; +import { renderWithProviders } from '../../../../test/test-utils'; +import PttSettingsPanel from '../PttSettingsPanel'; + +/** + * Render PttSettingsPanel with the given PTT slice state pre-seeded so + * each test can assert against a known starting point. We wrap in the + * real `I18nProvider` so the panel's labels resolve to the en.ts copy + * — that lets tests query by their final rendered text without + * hard-coding the message ids. + */ +function renderPanel(pttOverrides: Partial = {}) { + const preloadedState = { + locale: { current: 'en' as const }, + ptt: { ...initialPttState, ...pttOverrides }, + }; + return renderWithProviders( + + + , + { preloadedState } + ); +} + +describe('PttSettingsPanel', () => { + it('renders the "not set" hint when no shortcut is bound', () => { + renderPanel({ shortcut: null }); + expect( + screen.getByText(/Push-to-talk is off — pick a hotkey to enable\./i) + ).toBeInTheDocument(); + }); + + it('renders the bound shortcut when set', () => { + renderPanel({ shortcut: 'F13' }); + expect(screen.getByTestId('ptt-shortcut-input')).toHaveValue('F13'); + // The unset hint should NOT show once a shortcut is bound. + expect( + screen.queryByText(/Push-to-talk is off — pick a hotkey to enable\./i) + ).not.toBeInTheDocument(); + }); + + it('toggles speakReplies via the switch', () => { + const { store } = renderPanel({ shortcut: 'F13', speakReplies: true }); + const speakSwitch = screen.getByTestId('ptt-speak-replies-switch'); + expect(speakSwitch).toHaveAttribute('aria-checked', 'true'); + + fireEvent.click(speakSwitch); + + const stateAfter = (store.getState() as { ptt: PttState }).ptt; + expect(stateAfter.speakReplies).toBe(false); + // And the aria-checked attribute should flip on the rendered switch. + expect(screen.getByTestId('ptt-speak-replies-switch')).toHaveAttribute('aria-checked', 'false'); + }); + + it('toggles showOverlay via the switch', () => { + const { store } = renderPanel({ shortcut: 'F13', showOverlay: true }); + const overlaySwitch = screen.getByTestId('ptt-show-overlay-switch'); + expect(overlaySwitch).toHaveAttribute('aria-checked', 'true'); + + fireEvent.click(overlaySwitch); + + const stateAfter = (store.getState() as { ptt: PttState }).ptt; + expect(stateAfter.showOverlay).toBe(false); + }); + + it('updates the shortcut when a key is captured in the input', () => { + const { store } = renderPanel({ shortcut: null }); + const input = screen.getByTestId('ptt-shortcut-input'); + + // Simulate a real keyboard event — the panel listens for keydown on the + // focused input and captures the key code (e.g. "F13"). Using fireEvent + // because userEvent.keyboard treats F13 as a sequence. + fireEvent.keyDown(input, { key: 'F13', code: 'F13' }); + + const stateAfter = (store.getState() as { ptt: PttState }).ptt; + expect(stateAfter.shortcut).toBe('F13'); + }); + + it('shows the panel title and description from the en locale', () => { + renderPanel({ shortcut: null }); + expect(screen.getByText('Push-to-talk')).toBeInTheDocument(); + expect(screen.getByText(/Hold a key to talk to OpenHuman/i)).toBeInTheDocument(); + }); +}); diff --git a/app/src/test/test-utils.tsx b/app/src/test/test-utils.tsx index 9f16fe7510..64e9004a7b 100644 --- a/app/src/test/test-utils.tsx +++ b/app/src/test/test-utils.tsx @@ -17,6 +17,7 @@ import coreModeReducer from '../store/coreModeSlice'; import localeReducer from '../store/localeSlice'; import mascotReducer from '../store/mascotSlice'; import personaReducer from '../store/personaSlice'; +import { pttReducer } from '../store/pttSlice'; import socketReducer from '../store/socketSlice'; /** @@ -37,6 +38,7 @@ const testRootReducer = combineReducers({ locale: localeReducer, mascot: mascotReducer, persona: personaReducer, + ptt: pttReducer, socket: socketReducer, }); From 8c72740e358d71ee706960580614cf2f6dcdf32e Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 4 Jun 2026 12:03:46 +0530 Subject: [PATCH 27/36] feat(ptt/settings): surface registration errors with localized messages (#3090) Store the most recent Tauri hotkey registration error in pttSlice (registrationError, transient/non-persisted) and dispatch it from usePttHotkey on failure, clearing it on success. PttSettingsPanel maps well-known error strings (dictation conflict, Wayland, accessibility, shortcut-in-use) to their existing i18n keys and renders them inline below the capture input so the user sees the real failure reason instead of a silent "saved" state. --- app/src/hooks/usePttHotkey.ts | 12 +++++- .../pages/settings/voice/PttSettingsPanel.tsx | 37 +++++++++++++++++ .../voice/__tests__/PttSettingsPanel.test.tsx | 41 +++++++++++++++++++ app/src/store/__tests__/pttSlice.test.ts | 14 +++++++ app/src/store/pttSlice.ts | 27 ++++++++++-- 5 files changed, 125 insertions(+), 6 deletions(-) diff --git a/app/src/hooks/usePttHotkey.ts b/app/src/hooks/usePttHotkey.ts index 7e33f67367..f2bb3b7fa3 100644 --- a/app/src/hooks/usePttHotkey.ts +++ b/app/src/hooks/usePttHotkey.ts @@ -13,7 +13,11 @@ import { useEffect } from 'react'; import { useDispatch, useSelector } from 'react-redux'; import { registerPttHotkey, unregisterPttHotkey } from '../utils/tauriCommands/ptt'; -import { selectPttShortcut, setIsHeld } from '../store/pttSlice'; +import { + selectPttShortcut, + setIsHeld, + setPttRegistrationError, +} from '../store/pttSlice'; export function usePttHotkey(): void { const dispatch = useDispatch(); @@ -31,12 +35,16 @@ export function usePttHotkey(): void { try { if (shortcut && shortcut.trim().length > 0) { await registerPttHotkey(shortcut); + if (!cancelled) dispatch(setPttRegistrationError(null)); } else { await unregisterPttHotkey(); + if (!cancelled) dispatch(setPttRegistrationError(null)); } } catch (err) { if (!cancelled) { + const msg = err instanceof Error ? err.message : String(err); console.warn('[ptt] hotkey (un)register failed', err); + dispatch(setPttRegistrationError(msg)); } } }; @@ -44,5 +52,5 @@ export function usePttHotkey(): void { return () => { cancelled = true; }; - }, [shortcut]); + }, [shortcut, dispatch]); } diff --git a/app/src/pages/settings/voice/PttSettingsPanel.tsx b/app/src/pages/settings/voice/PttSettingsPanel.tsx index c9e15cba13..2b746d97fb 100644 --- a/app/src/pages/settings/voice/PttSettingsPanel.tsx +++ b/app/src/pages/settings/voice/PttSettingsPanel.tsx @@ -27,6 +27,7 @@ import { useCallback, useState } from 'react'; import { useT } from '../../../lib/i18n/I18nContext'; import { useAppDispatch, useAppSelector } from '../../../store/hooks'; import { + selectPttRegistrationError, selectPttShortcut, selectShowOverlay, selectSpeakReplies, @@ -75,12 +76,40 @@ function eventToShortcut(e: React.KeyboardEvent): string | null { return parts.join('+'); } +/** + * Map a raw Tauri error string from `register_ptt_hotkey` to a localized + * message. Pattern-matches on well-known substrings so the panel doesn't need + * to depend on the exact Rust error wording; falls back to the raw string for + * anything unrecognised (still useful to the user for diagnostics). + */ +function localizedRegistrationError( + raw: string | null, + t: (key: string) => string, +): string | null { + if (!raw) return null; + const lower = raw.toLowerCase(); + if (lower.includes('conflict') && lower.includes('dictation')) { + return t('pttSettings.errorConflictsWithDictation'); + } + if (lower.includes('wayland')) { + return t('pttSettings.errorUnsupportedWayland'); + } + if (lower.includes('accessibility')) { + return t('pttSettings.errorAccessibility'); + } + if (lower.includes('in use') || lower.includes('shortcutinuse') || lower.includes('in_use')) { + return t('pttSettings.errorShortcutInUse'); + } + return raw; +} + const PttSettingsPanel = () => { const { t } = useT(); const dispatch = useAppDispatch(); const shortcut = useAppSelector(selectPttShortcut); const speakReplies = useAppSelector(selectSpeakReplies); const showOverlay = useAppSelector(selectShowOverlay); + const registrationError = useAppSelector(selectPttRegistrationError); // Inline validation error for the capture input (e.g. modifier-only). // Cleared whenever the user retries or focuses the field. Server-side @@ -174,6 +203,14 @@ const PttSettingsPanel = () => { {captureError}

)} + {!captureError && registrationError && ( +

+ {localizedRegistrationError(registrationError, t)} +

+ )} {/* Speak replies switch */} diff --git a/app/src/pages/settings/voice/__tests__/PttSettingsPanel.test.tsx b/app/src/pages/settings/voice/__tests__/PttSettingsPanel.test.tsx index b4380e8242..69dc8ba4c5 100644 --- a/app/src/pages/settings/voice/__tests__/PttSettingsPanel.test.tsx +++ b/app/src/pages/settings/voice/__tests__/PttSettingsPanel.test.tsx @@ -85,4 +85,45 @@ describe('PttSettingsPanel', () => { expect(screen.getByText('Push-to-talk')).toBeInTheDocument(); expect(screen.getByText(/Hold a key to talk to OpenHuman/i)).toBeInTheDocument(); }); + + it('renders the localized registration error when the slice has a dictation conflict', () => { + renderPanel({ + shortcut: 'F13', + registrationError: "ptt shortcut 'F13' conflicts with the dictation hotkey", + }); + const errEl = screen.getByTestId('ptt-registration-error'); + expect(errEl).toBeInTheDocument(); + expect(errEl).toHaveTextContent(/already used by dictation/i); + }); + + it('renders a localized Wayland error when the slice has one', () => { + renderPanel({ + shortcut: 'F13', + registrationError: 'global shortcuts are not supported in this Wayland session', + }); + expect(screen.getByTestId('ptt-registration-error')).toHaveTextContent(/wayland/i); + }); + + it('renders the raw error string for unrecognised errors', () => { + renderPanel({ shortcut: 'F13', registrationError: 'some unexpected Tauri error' }); + expect(screen.getByTestId('ptt-registration-error')).toHaveTextContent( + 'some unexpected Tauri error' + ); + }); + + it('does not render a registration error when registrationError is null', () => { + renderPanel({ shortcut: 'F13', registrationError: null }); + expect(screen.queryByTestId('ptt-registration-error')).not.toBeInTheDocument(); + }); + + it('hides the registration error when a captureError (modifier-only) is also present', () => { + // Both errors at once — captureError wins because it's more immediate. + // Trigger captureError by pressing a modifier-only key. + renderPanel({ shortcut: 'F13', registrationError: 'some unexpected Tauri error' }); + const input = screen.getByTestId('ptt-shortcut-input'); + fireEvent.keyDown(input, { key: 'Shift', code: 'ShiftLeft', shiftKey: true }); + // captureError is now set — registration error should be hidden. + expect(screen.queryByTestId('ptt-registration-error')).not.toBeInTheDocument(); + expect(screen.getByTestId('ptt-shortcut-error')).toBeInTheDocument(); + }); }); diff --git a/app/src/store/__tests__/pttSlice.test.ts b/app/src/store/__tests__/pttSlice.test.ts index 0092658052..1f697a5feb 100644 --- a/app/src/store/__tests__/pttSlice.test.ts +++ b/app/src/store/__tests__/pttSlice.test.ts @@ -7,6 +7,7 @@ import { setSpeakReplies, setShowOverlay, setIsHeld, + setPttRegistrationError, type PttState, } from '../pttSlice'; import { resetUserScopedState } from '../resetActions'; @@ -17,6 +18,7 @@ describe('ptt slice', () => { speakReplies: true, showOverlay: true, isHeld: false, + registrationError: null, }; it('has the documented default state', () => { @@ -46,12 +48,24 @@ describe('ptt slice', () => { expect(pttReducer(initial, setIsHeld(true)).isHeld).toBe(true); }); + it('setPttRegistrationError stores the error string', () => { + const next = pttReducer(initial, setPttRegistrationError('hotkey in use')); + expect(next.registrationError).toBe('hotkey in use'); + }); + + it('setPttRegistrationError with null clears the error', () => { + const withErr: PttState = { ...initial, registrationError: 'some error' }; + const next = pttReducer(withErr, setPttRegistrationError(null)); + expect(next.registrationError).toBeNull(); + }); + it('resetUserScopedState returns the slice to initial state', () => { const dirty: PttState = { shortcut: 'F13', speakReplies: false, showOverlay: false, isHeld: true, + registrationError: 'some error', }; const next = pttReducer(dirty, resetUserScopedState()); expect(next).toEqual(initialPttState); diff --git a/app/src/store/pttSlice.ts b/app/src/store/pttSlice.ts index 53978efce9..2c0a42510c 100644 --- a/app/src/store/pttSlice.ts +++ b/app/src/store/pttSlice.ts @@ -4,9 +4,13 @@ import { resetUserScopedState } from './resetActions'; /** * PTT (Push-to-Talk) slice — persisted hotkey binding + session settings, - * plus a non-persisted runtime hold flag that tracks whether the key is - * currently held. The boot hook (Task 11) resets `isHeld` to false on mount - * so a stale persisted value can never leave the app stuck in "held" mode. + * plus non-persisted runtime flags: + * - `isHeld`: tracks whether the PTT key is currently held. The boot hook + * (Task 11) resets it to false on mount so a stale rehydrated value can + * never leave the app stuck in "held" mode. + * - `registrationError`: the most recent error from `register_ptt_hotkey`, + * surfaced in PttSettingsPanel (T13). Cleared on successful register. + * Transient — not persisted across sessions. */ export interface PttState { @@ -18,6 +22,8 @@ export interface PttState { showOverlay: boolean; /** Non-persisted runtime flag: is the PTT key currently held? */ isHeld: boolean; + /** Last error from register_ptt_hotkey, surfaced in PttSettingsPanel. Cleared on successful register. */ + registrationError: string | null; } export const initialPttState: PttState = { @@ -25,6 +31,7 @@ export const initialPttState: PttState = { speakReplies: true, showOverlay: true, isHeld: false, + registrationError: null, }; const pttSlice = createSlice({ @@ -43,13 +50,22 @@ const pttSlice = createSlice({ setIsHeld(state, action: PayloadAction) { state.isHeld = action.payload; }, + setPttRegistrationError(state, action: PayloadAction) { + state.registrationError = action.payload; + }, }, extraReducers: builder => { builder.addCase(resetUserScopedState, () => initialPttState); }, }); -export const { setPttShortcut, setSpeakReplies, setShowOverlay, setIsHeld } = pttSlice.actions; +export const { + setPttShortcut, + setSpeakReplies, + setShowOverlay, + setIsHeld, + setPttRegistrationError, +} = pttSlice.actions; // ── Selectors ──────────────────────────────────────────────────────────────── @@ -65,4 +81,7 @@ export const selectShowOverlay = (state: { ptt: PttState }): boolean => export const selectIsHeld = (state: { ptt: PttState }): boolean => state.ptt.isHeld; +export const selectPttRegistrationError = (state: { ptt: PttState }): string | null => + state.ptt.registrationError; + export const pttReducer = pttSlice.reducer; From fe7d7811e3b91bafc767ad16bfd8ade6e26771be Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 4 Jun 2026 12:22:35 +0530 Subject: [PATCH 28/36] =?UTF-8?q?test(ptt/e2e):=20full=20bind=E2=86=92hold?= =?UTF-8?q?=E2=86=92commit=20flow=20with=20mocked=20STT=20(#3090)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/test/e2e/specs/ptt-flow.spec.ts | 619 ++++++++++++++++++++++++++++ 1 file changed, 619 insertions(+) create mode 100644 app/test/e2e/specs/ptt-flow.spec.ts diff --git a/app/test/e2e/specs/ptt-flow.spec.ts b/app/test/e2e/specs/ptt-flow.spec.ts new file mode 100644 index 0000000000..86116efff1 --- /dev/null +++ b/app/test/e2e/specs/ptt-flow.spec.ts @@ -0,0 +1,619 @@ +// @ts-nocheck +/** + * E2E: global push-to-talk (PTT) end-to-end flow with mocked STT. + * + * Task 14 from `docs/superpowers/plans/2026-06-02-global-ptt.md`. + * + * What this spec exercises (top to bottom): + * + * UI: + * 1. Navigate to /settings/voice → PttSettingsPanel mounts (data-testid + * "ptt-settings-panel"). + * 2. Programmatically dispatch `setPttShortcut('F13')` against the exposed + * Redux store to simulate the user binding a hotkey. Using a Redux + * dispatch (rather than driving the readonly capture input via + * chromedriver) sidesteps two fragile layers: + * a. The keyboard-capture input intercepts native keydown events + * that CDP would otherwise inject into the textarea. + * b. F13 is reliably passable through chromedriver to a generic + * input but the panel-level interception logic is unit-tested + * elsewhere (PttSettingsPanel.test.tsx). We test the *binding + * effect*, not the capture UX. + * 3. Assert `usePttHotkey` reacts and Redux state settles with a non-null + * shortcut. Registration may succeed (no error) or fail with a non- + * empty error string on headless Linux runners with no real keyboard + * — both are acceptable signals that the binding path was driven; we + * log the failure for follow-up but don't make CI red on it. + * + * PTT session: + * 4. Mock navigator.mediaDevices.getUserMedia + MediaRecorder so the + * renderer-side audio capture (pttAudio.ts) can run without a real + * microphone (headless CEF has no audio device). + * 5. Configure the mock backend (audioTranscriptionText) so the core's + * cloud STT path returns a known transcript "hello from PTT". + * 6. Simulate the hotkey hold by emitting `ptt://start`/`ptt://stop` via + * Tauri's internal event plugin (`__TAURI_INTERNALS__.invoke('plugin: + * event|emit', ...)`). This is the same path `@tauri-apps/api/event`'s + * `emit()` uses; we go through the internal because direct dynamic + * imports of `@tauri-apps/api/event` don't resolve under Chromium- + * driver (see core-rpc.ts). + * 7. Wait long enough between start/stop (≥ 250 ms — pttService's + * `minAudioMs`) so the recording isn't dropped as an accidental tap. + * + * Assertions: + * 8. The overlay window is created (window-handle count went from 1 → + * 2 when register_ptt_hotkey called ptt_overlay::ensure_window). + * 9. The transcribed text appears as a user message in the chat thread. + * 10. The core_rpc_relay invocation for `channel_web_chat` carried + * `speak_reply: true` (the user's PTT setting was honoured on the + * wire). We spy on `__TAURI_INTERNALS__.invoke` before the press to + * capture the call payload. + * + * Plan: docs/superpowers/plans/2026-06-02-global-ptt.md (Task 14). + * Spec: docs/superpowers/specs/2026-06-02-global-ptt-design.md. + * + * Limitations / notes for follow-up sessions: + * - The OS-level global-shortcut emit can't be triggered by the Chromium + * driver (CDP injects events into the renderer, not the OS keyboard + * subsystem). Step 6 above is the correct workaround in a unit-test + * sense, but it does not exercise the rdev → tauri global-shortcut + * pipeline on the way in. That layer is covered by Rust unit tests + * in `ptt_hotkeys.rs` and integration coverage in PttHotkeyManager + * tests. + * - MediaRecorder availability under CEF headless: present but won't + * produce real opus frames. We mock it entirely so the buffer reaches + * the transcribe RPC as a zero-byte blob; the mock backend doesn't care + * about the actual audio bytes (it just returns the configured + * transcript text). + */ +import { waitForApp } from '../helpers/app-helpers'; +import { + getSelectedThreadId, + waitForAssistantReplyContaining, + waitForSocketConnected, +} from '../helpers/chat-harness'; +import { callOpenhumanRpc } from '../helpers/core-rpc'; +import { textExists } from '../helpers/element-helpers'; +import { resetApp } from '../helpers/reset-app'; +import { navigateViaHash } from '../helpers/shared-flows'; +import { + clearRequestLog, + getRequestLog, + setMockBehavior, + startMockServer, + stopMockServer, +} from '../mock-server'; + +const USER_ID = 'e2e-ptt-flow'; +const SHORTCUT = 'F13'; +const STT_TRANSCRIPT = 'hello from PTT'; + +const OVERLAY_WINDOW_LABEL = 'ptt-overlay'; +// pttService.minAudioMs is 250; we hold for 800 ms to be comfortably above the +// floor and tolerant of slow CI scheduling. +const HOLD_DURATION_MS = 800; + +describe('PTT — global push-to-talk flow', function () { + this.timeout(180_000); + + before(async function beforeSuite() { + this.timeout(120_000); + await startMockServer(); + await waitForApp(); + await resetApp(USER_ID); + + // The cloud STT path goes through /openai/v1/audio/transcriptions in the + // mock backend; set the deterministic transcript before any PTT press. + setMockBehavior('audioTranscriptionText', STT_TRANSCRIPT); + }); + + after(async () => { + setMockBehavior('audioTranscriptionText', ''); + await stopMockServer(); + }); + + // --------------------------------------------------------------------------- + // Step 1: settings → voice → PttSettingsPanel. + // --------------------------------------------------------------------------- + it('renders the PTT settings panel under /settings/voice', async () => { + await navigateViaHash('/settings/voice'); + + // The panel may take a beat to mount as VoicePanel hydrates its providers. + const panel = await browser.$('[data-testid="ptt-settings-panel"]'); + await panel.waitForExist({ + timeout: 20_000, + timeoutMsg: 'ptt-settings-panel did not mount under /settings/voice', + }); + + // The hotkey input + the two switches must all be present (T13 contract). + const shortcutInput = await browser.$('[data-testid="ptt-shortcut-input"]'); + await shortcutInput.waitForExist({ timeout: 5_000 }); + const speakSwitch = await browser.$('[data-testid="ptt-speak-replies-switch"]'); + await speakSwitch.waitForExist({ timeout: 5_000 }); + const overlaySwitch = await browser.$('[data-testid="ptt-show-overlay-switch"]'); + await overlaySwitch.waitForExist({ timeout: 5_000 }); + }); + + // --------------------------------------------------------------------------- + // Step 2 + 3: bind the shortcut, observe Redux + register_ptt_hotkey. + // + // We drive Redux directly. The shortcut-capture input is exhaustively + // covered by PttSettingsPanel.test.tsx; here we test the *binding effect* + // — that setting the shortcut triggers the manager hook which calls + // register_ptt_hotkey in the Tauri shell. + // --------------------------------------------------------------------------- + it('binds the F13 hotkey via Redux + the manager hook forwards to the Tauri shell', async () => { + // Sanity: store handle is exposed (gated on E2E build flag). + const storePresent = await browser.execute( + () => + typeof (window as unknown as { __OPENHUMAN_STORE__?: unknown }).__OPENHUMAN_STORE__ !== + 'undefined' + ); + expect(storePresent).toBe(true); + + // Speak replies must be true so the chat-send carries speak_reply: true. + // showOverlay must be true so the manager invokes show_ptt_overlay on + // the start edge (overlay window check below depends on it). + await browser.execute(() => { + const store = ( + window as unknown as { + __OPENHUMAN_STORE__: { dispatch: (a: { type: string; payload: unknown }) => unknown }; + } + ).__OPENHUMAN_STORE__; + store.dispatch({ type: 'ptt/setSpeakReplies', payload: true }); + store.dispatch({ type: 'ptt/setShowOverlay', payload: true }); + }); + + // Dispatch the binding. + await browser.execute((shortcut: string) => { + const store = ( + window as unknown as { + __OPENHUMAN_STORE__: { dispatch: (a: { type: string; payload: string }) => unknown }; + } + ).__OPENHUMAN_STORE__; + store.dispatch({ type: 'ptt/setPttShortcut', payload: shortcut }); + }, SHORTCUT); + + // Wait until the slice settles with the bound shortcut. + await browser.waitUntil( + async () => { + return ( + (await browser.execute(() => { + const state = ( + window as unknown as { + __OPENHUMAN_STORE__: { getState: () => { ptt?: { shortcut?: string | null } } }; + } + ).__OPENHUMAN_STORE__.getState(); + return state.ptt?.shortcut ?? null; + })) === SHORTCUT + ); + }, + { timeout: 5_000, timeoutMsg: 'ptt.shortcut never settled to F13' } + ); + + // Give usePttHotkey a beat to call register_ptt_hotkey, then read the + // registration-error slice. A null (or empty) error means the Tauri + // shell registered the OS shortcut successfully. A non-null error is + // acceptable in headless Linux containers where the global-shortcut + // plugin can't talk to a real X11 / Wayland socket — we log and + // continue rather than fail the spec on env-specific gaps. + await browser.pause(2_000); + const registrationError = await browser.execute(() => { + const state = ( + window as unknown as { + __OPENHUMAN_STORE__: { getState: () => { ptt?: { registrationError?: string | null } } }; + } + ).__OPENHUMAN_STORE__.getState(); + return state.ptt?.registrationError ?? null; + }); + if (registrationError) { + console.warn( + `[ptt-flow] register_ptt_hotkey returned error in this environment: ${registrationError}. ` + + 'Continuing — the binding-side wiring was driven and the failure is the OS shortcut path.' + ); + } else { + console.log('[ptt-flow] register_ptt_hotkey succeeded — overlay window should now exist'); + } + }); + + // --------------------------------------------------------------------------- + // Step 8: overlay window is created lazily by register_ptt_hotkey. + // + // We check getWindowHandles. The handle count goes from 1 (main app) → + // 2 (main + ptt-overlay) once ensure_window has run. We tolerate either + // outcome: if the OS shortcut failed earlier (headless container), the + // overlay might still be created (ensure_window is best-effort and runs + // before the shortcut registration), but we don't *require* it to assert + // success. + // --------------------------------------------------------------------------- + it('lazy-creates the overlay webview window once the hotkey is bound', async () => { + // Poll briefly — window creation is async after register_ptt_hotkey returns. + const deadline = Date.now() + 10_000; + let handles: string[] = []; + while (Date.now() < deadline) { + handles = await browser.getWindowHandles(); + if (handles.length >= 2) break; + await browser.pause(300); + } + console.log(`[ptt-flow] window handles after bind: ${handles.length}`); + if (handles.length < 2) { + console.warn( + '[ptt-flow] overlay window did not appear — likely register_ptt_hotkey failed on this OS ' + + '(see registrationError log above). Skipping overlay-window assertion.' + ); + return; + } + // Confirm at least one of the new handles loads the ptt-overlay route. + const mainHandle = await browser.getWindowHandle(); + let foundOverlay = false; + for (const handle of handles) { + if (handle === mainHandle) continue; + try { + await browser.switchToWindow(handle); + const url = await browser.getUrl(); + console.log(`[ptt-flow] inspecting non-main window: ${url}`); + if (url.includes('ptt-overlay') || url.includes(OVERLAY_WINDOW_LABEL)) { + foundOverlay = true; + break; + } + } catch (err) { + console.warn('[ptt-flow] switchToWindow threw — continuing', err); + } + } + // Switch back to the main window before the next test runs. + try { + await browser.switchToWindow(mainHandle); + } catch (err) { + console.warn('[ptt-flow] could not switch back to main window', err); + } + expect(foundOverlay).toBe(true); + }); + + // --------------------------------------------------------------------------- + // Step 4–7 + 9–10: simulate the hold, observe the commit. + // --------------------------------------------------------------------------- + it('simulates a PTT hold, captures audio, transcribes via mock, sends with speak_reply: true', async function () { + this.timeout(120_000); + + // Make sure the user is signed in + the socket is connected so the + // channel_web_chat RPC has a real client_id to route on. + const socketReady = await waitForSocketConnected(30_000); + if (!socketReady) { + console.warn('[ptt-flow] socket did not connect within 30s — chat send may fail'); + } + + // Navigate to /chat so the chat runtime is hydrated and we land on a + // resolvable thread. pttThread.ts will resolve the active thread or + // create one as needed; this just makes the assertion at step 9 + // easier (we can read selectedThreadId and assert message presence). + await navigateViaHash('/chat'); + await browser.waitUntil(async () => await textExists('Threads'), { + timeout: 15_000, + timeoutMsg: 'Conversations did not mount under /chat', + }); + + // ------------------------------------------------------------------------- + // 4a. Mock getUserMedia + MediaRecorder so pttAudio.ts succeeds. + // + // We replace getUserMedia with a fake that returns a MediaStream-shaped + // object; we replace MediaRecorder with a minimal stub that fires + // 'dataavailable' (empty Blob) and 'stop' synchronously when .stop() is + // called. The audio buffer ends up zero-byte — the mock STT endpoint + // returns the fixed transcript regardless. + // ------------------------------------------------------------------------- + await browser.execute(() => { + const w = window as unknown as Record; + w.__e2e_ptt_real_gum = navigator.mediaDevices?.getUserMedia?.bind(navigator.mediaDevices); + w.__e2e_ptt_real_mr = (window as unknown as { MediaRecorder?: unknown }).MediaRecorder; + + class FakeMediaRecorder { + public state: 'inactive' | 'recording' = 'inactive'; + public mimeType: string; + private listeners = new Map void>>(); + constructor(_stream: unknown, opts?: { mimeType?: string }) { + this.mimeType = opts?.mimeType || 'audio/webm;codecs=opus'; + } + static isTypeSupported(_mime: string): boolean { + return true; + } + addEventListener(type: string, fn: (e: unknown) => void): void { + if (!this.listeners.has(type)) this.listeners.set(type, new Set()); + this.listeners.get(type)!.add(fn); + } + removeEventListener(type: string, fn: (e: unknown) => void): void { + this.listeners.get(type)?.delete(fn); + } + dispatchEvent(type: string, payload: unknown): void { + const set = this.listeners.get(type); + if (!set) return; + for (const fn of set) { + try { + fn(payload); + } catch (err) { + // swallow — listener failures shouldn't break the test + console.warn('[e2e-ptt-mock] listener threw', err); + } + } + } + start(): void { + this.state = 'recording'; + } + stop(): void { + // Emit a tiny synthetic chunk + a stop event. pttAudio expects + // dataavailable with .data:Blob and then stop. + const blob = new Blob([new Uint8Array(8)], { type: this.mimeType }); + this.dispatchEvent('dataavailable', { data: blob }); + this.state = 'inactive'; + this.dispatchEvent('stop', new Event('stop')); + } + } + + const fakeStream = { + getTracks: () => [ + { + stop() { + /* noop */ + }, + kind: 'audio' as const, + }, + ], + }; + + Object.defineProperty(navigator, 'mediaDevices', { + configurable: true, + value: { + ...(navigator.mediaDevices || {}), + getUserMedia: () => Promise.resolve(fakeStream as unknown as MediaStream), + }, + }); + (window as unknown as { MediaRecorder: unknown }).MediaRecorder = + FakeMediaRecorder as unknown; + }); + + // ------------------------------------------------------------------------- + // 10a. Spy on Tauri invocations so we can capture the channel_web_chat + // payload and assert speak_reply: true was forwarded on the wire. + // + // __TAURI_INTERNALS__.invoke is the underlying channel every Tauri + // command (and `core_rpc_relay`) flows through. We wrap it to push + // relay calls into a module-window-scoped list. + // ------------------------------------------------------------------------- + await browser.execute(() => { + const w = window as unknown as { + __TAURI_INTERNALS__?: { + invoke?: (...args: unknown[]) => Promise; + [k: string]: unknown; + }; + __e2e_ptt_relay_calls?: Array<{ cmd: string; args: unknown }>; + __e2e_ptt_real_invoke?: (...args: unknown[]) => Promise; + }; + if (!w.__TAURI_INTERNALS__ || typeof w.__TAURI_INTERNALS__.invoke !== 'function') { + console.warn('[e2e-ptt-spy] __TAURI_INTERNALS__.invoke missing — spy not installed'); + return; + } + w.__e2e_ptt_relay_calls = []; + w.__e2e_ptt_real_invoke = w.__TAURI_INTERNALS__.invoke; + const original = w.__e2e_ptt_real_invoke; + w.__TAURI_INTERNALS__.invoke = async function spied( + cmd: string, + args?: unknown, + ...rest: unknown[] + ): Promise { + try { + if (cmd === 'core_rpc_relay') { + w.__e2e_ptt_relay_calls!.push({ cmd, args }); + } + } catch { + /* ignore */ + } + // Forward to the original implementation, preserving binding. + return (original as Function).call(w.__TAURI_INTERNALS__, cmd, args, ...rest); + }; + }); + + clearRequestLog(); + const threadIdBefore = await getSelectedThreadId(); + console.log(`[ptt-flow] selectedThreadId before press: ${threadIdBefore}`); + + // ------------------------------------------------------------------------- + // 6. Simulate the hotkey hold by emitting ptt://start and ptt://stop + // via Tauri's internal event plugin. PttHotkeyManager's listen() + // handlers pick these up and drive pttService through onStart/onStop. + // ------------------------------------------------------------------------- + const sessionId = 1; + const emitOk = await browser.execute( + async ({ event, payloadJson }) => { + const w = window as unknown as { + __TAURI_INTERNALS__?: { invoke?: (...args: unknown[]) => Promise }; + }; + const invoke = w.__TAURI_INTERNALS__?.invoke; + if (!invoke) return { ok: false, err: 'no __TAURI_INTERNALS__.invoke' }; + try { + // plugin:event|emit accepts a JSON-string payload for arbitrary + // event types (the listener side is generic-typed). + await invoke('plugin:event|emit', { event, payload: payloadJson }); + return { ok: true }; + } catch (e) { + return { ok: false, err: e instanceof Error ? e.message : String(e) }; + } + }, + { event: 'ptt://start', payloadJson: JSON.stringify({ session_id: sessionId }) } + ); + if (!emitOk?.ok) { + console.warn(`[ptt-flow] emit ptt://start failed: ${emitOk?.err}`); + } + + // Hold for HOLD_DURATION_MS so the recording isn't dropped as a tap. + await browser.pause(HOLD_DURATION_MS); + + const stopOk = await browser.execute( + async ({ event, payloadJson }) => { + const w = window as unknown as { + __TAURI_INTERNALS__?: { invoke?: (...args: unknown[]) => Promise }; + }; + const invoke = w.__TAURI_INTERNALS__?.invoke; + if (!invoke) return { ok: false, err: 'no __TAURI_INTERNALS__.invoke' }; + try { + await invoke('plugin:event|emit', { event, payload: payloadJson }); + return { ok: true }; + } catch (e) { + return { ok: false, err: e instanceof Error ? e.message : String(e) }; + } + }, + { event: 'ptt://stop', payloadJson: JSON.stringify({ session_id: sessionId }) } + ); + if (!stopOk?.ok) { + console.warn(`[ptt-flow] emit ptt://stop failed: ${stopOk?.err}`); + } + + // ------------------------------------------------------------------------- + // 9. The transcript should appear as a user message in the chat thread. + // ------------------------------------------------------------------------- + const sawTranscript = await waitForAssistantReplyContaining(STT_TRANSCRIPT, { + timeoutMs: 30_000, + logPrefix: '[ptt-flow]', + }); + if (!sawTranscript) { + console.warn( + `[ptt-flow] transcript "${STT_TRANSCRIPT}" did not appear in DOM — ` + + 'this is often caused by getUserMedia mock injection failing under headless CEF, ' + + 'or by register_ptt_hotkey having failed earlier so pttService never received ptt://start.' + ); + } + + // ------------------------------------------------------------------------- + // 10b. Assert at least one core_rpc_relay invocation included + // method: 'openhuman.channel_web_chat' with speak_reply: true. + // ------------------------------------------------------------------------- + const relayCalls = (await browser.execute(() => { + return (window as unknown as { __e2e_ptt_relay_calls?: unknown[] }).__e2e_ptt_relay_calls; + })) as Array<{ cmd: string; args: unknown }> | undefined; + console.log(`[ptt-flow] captured ${relayCalls?.length ?? 0} core_rpc_relay invocations`); + + let sawSpeakReplyChat = false; + for (const call of relayCalls ?? []) { + try { + // Tauri's invoke signature is (cmd, args) where args is a record. + // For core_rpc_relay the renderer passes either a record like + // { method, params, body } or a single string — we coerce robustly. + const args = call.args as Record | undefined; + const payload = args && typeof args === 'object' ? JSON.stringify(args) : String(args); + if ( + payload.includes('openhuman.channel_web_chat') && + payload.includes('"speak_reply":true') + ) { + sawSpeakReplyChat = true; + break; + } + } catch { + /* ignore non-stringifiable payloads */ + } + } + if (!sawSpeakReplyChat) { + console.warn( + '[ptt-flow] did not observe a channel_web_chat call with speak_reply:true. ' + + 'Dumping the captured payloads for diagnosis:\n' + + JSON.stringify(relayCalls ?? [], null, 2).slice(0, 4_000) + ); + } + + // Restore the spy + getUserMedia/MediaRecorder so any later spec in the + // session sees a clean window. + await browser.execute(() => { + const w = window as unknown as { + __TAURI_INTERNALS__?: { invoke?: unknown }; + __e2e_ptt_real_invoke?: unknown; + __e2e_ptt_real_gum?: unknown; + __e2e_ptt_real_mr?: unknown; + }; + if (w.__TAURI_INTERNALS__ && w.__e2e_ptt_real_invoke) { + w.__TAURI_INTERNALS__.invoke = w.__e2e_ptt_real_invoke; + } + if (w.__e2e_ptt_real_gum && navigator.mediaDevices) { + Object.defineProperty(navigator.mediaDevices, 'getUserMedia', { + configurable: true, + value: w.__e2e_ptt_real_gum, + }); + } + if (w.__e2e_ptt_real_mr) { + (window as unknown as { MediaRecorder: unknown }).MediaRecorder = w.__e2e_ptt_real_mr; + } + delete (w as Record).__e2e_ptt_relay_calls; + delete (w as Record).__e2e_ptt_real_invoke; + delete (w as Record).__e2e_ptt_real_gum; + delete (w as Record).__e2e_ptt_real_mr; + }); + + // Soft-assert: in a fully green environment both flags are true. We + // expect both, but the warnings above explain the env paths where one + // might come back false. Asserting hard would gate CI on shaky pieces. + expect(sawTranscript).toBe(true); + expect(sawSpeakReplyChat).toBe(true); + }); + + // --------------------------------------------------------------------------- + // Step 5 corroboration: the mock STT endpoint was hit. + // + // We assert the request log contains a POST to + // /openai/v1/audio/transcriptions. This is independent of the spy above — + // it confirms the audio bytes actually traversed the Rust STT pipeline + // (voice_transcribe_bytes RPC → cloud provider → mock). + // --------------------------------------------------------------------------- + it('the mock backend received the audio-transcriptions request', async () => { + const log = getRequestLog() as Array<{ method: string; url: string }>; + const sttCalls = log.filter( + r => r.method === 'POST' && r.url.includes('/openai/v1/audio/transcriptions') + ); + console.log(`[ptt-flow] /openai/v1/audio/transcriptions calls observed: ${sttCalls.length}`); + // The earlier "PTT session" test logs a warning rather than failing if the + // OS shortcut couldn't register. In that case the audio path may never + // have triggered — log and move on rather than make CI red on env gaps. + if (sttCalls.length === 0) { + console.warn( + '[ptt-flow] no audio-transcriptions calls observed. ' + + 'Most likely cause: the renderer-side audio capture mock or the ptt://start emit ' + + 'did not fully exercise the pttService path. The earlier in-flight steps log ' + + 'their specific failures.' + ); + } + expect(sttCalls.length).toBeGreaterThan(0); + }); + + // --------------------------------------------------------------------------- + // Optional sanity: the conversation persists with the transcript text. + // + // Uses the same test_support_read_workspace_file mechanism as the chat- + // harness specs (see chat-harness-send-stream.spec.ts). + // --------------------------------------------------------------------------- + it('the chat thread JSONL contains the transcribed text on disk', async () => { + const threadId = await getSelectedThreadId(); + if (typeof threadId !== 'string' || threadId.length === 0) { + console.warn('[ptt-flow] no selectedThreadId after press — skipping JSONL check'); + return; + } + const hex = Array.from(new TextEncoder().encode(threadId)) + .map(b => b.toString(16).padStart(2, '0')) + .join(''); + const relPath = `memory/conversations/threads/${hex}.jsonl`; + let content = ''; + const deadline = Date.now() + 10_000; + while (Date.now() < deadline) { + const read = await callOpenhumanRpc<{ result: { content_utf8: string } }>( + 'openhuman.test_support_read_workspace_file', + { rel_path: relPath, max_bytes: 65_536 } + ); + if (read.ok && read.result?.result?.content_utf8) { + content = read.result.result.content_utf8; + if (content.includes(STT_TRANSCRIPT)) break; + } + await browser.pause(300); + } + if (!content.includes(STT_TRANSCRIPT)) { + console.warn( + `[ptt-flow] thread JSONL did not contain "${STT_TRANSCRIPT}". This corroborates ` + + 'an earlier failure in the press path; the earlier `it` logs the specific cause.' + ); + } + expect(content).toContain(STT_TRANSCRIPT); + }); +}); From 2acab2062a6d28f898313bcbc0f3ea89f7787084 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 4 Jun 2026 14:41:06 +0530 Subject: [PATCH 29/36] feat(about_app): register voice.ptt capability (#3090) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surfaces the global push-to-talk feature in the user-facing capability catalog so the /about and settings search surfaces describe it. The id, domain, and Conversation category sit next to conversation.send_voice and the iOS mobile.push_to_talk entry — pinned by a new `capability_list_includes_voice_ptt` test that also asserts the how_to mentions Push-to-Talk and the description mentions hold + hotkey, so a future copy refactor can't silently drop the hook. Privacy is `DERIVED_TO_BACKEND` because PTT routes audio through the configured STT provider (matching `conversation.send_voice`'s shape). --- src/openhuman/about_app/catalog_data.rs | 16 +++++++++++++ src/openhuman/about_app/catalog_tests.rs | 30 ++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/src/openhuman/about_app/catalog_data.rs b/src/openhuman/about_app/catalog_data.rs index 96c8fd735a..c3de975765 100644 --- a/src/openhuman/about_app/catalog_data.rs +++ b/src/openhuman/about_app/catalog_data.rs @@ -147,6 +147,22 @@ pub(super) const CAPABILITIES: &[Capability] = &[ status: CapabilityStatus::Beta, privacy: DERIVED_TO_BACKEND, }, + Capability { + id: "voice.ptt", + name: "Global push-to-talk", + domain: "voice", + category: CapabilityCategory::Conversation, + description: "Hold a global hotkey from anywhere on the desktop to dictate into the \ + active chat thread. Press opens the mic, release commits the transcript, \ + and an always-on-top overlay shows listening/idle state without stealing \ + focus. Cross-platform via tauri-plugin-global-shortcut (macOS, Windows, \ + Linux/X11); requires microphone access and a global shortcut binding. \ + Optional speak_reply plays the agent's response through local TTS.", + how_to: "Settings → Voice → Push-to-Talk: pick a shortcut, grant microphone access, \ + then hold the configured hotkey from any window.", + status: CapabilityStatus::Beta, + privacy: DERIVED_TO_BACKEND, + }, Capability { id: "conversation.inline_autocomplete", name: "Inline Autocomplete", diff --git a/src/openhuman/about_app/catalog_tests.rs b/src/openhuman/about_app/catalog_tests.rs index 40e35b9bed..fe736f6817 100644 --- a/src/openhuman/about_app/catalog_tests.rs +++ b/src/openhuman/about_app/catalog_tests.rs @@ -7,6 +7,36 @@ fn lookup_returns_expected_capability() { assert_eq!(capability.status, CapabilityStatus::Beta); } +/// PR #3090: the global push-to-talk feature is user-facing and must be +/// discoverable in the capability catalog so the in-app /about surface and +/// settings search can describe it. Pins the id, category, and the rough +/// shape of the how_to / description so a future rewrite can't silently +/// drop the entry or split it from the Conversation umbrella where the +/// related voice capabilities live. +#[test] +fn capability_list_includes_voice_ptt() { + let caps = all_capabilities(); + assert!( + caps.iter().any(|c| c.id == "voice.ptt"), + "voice.ptt capability must be registered" + ); + + let ptt = lookup("voice.ptt").expect("voice.ptt should be registered"); + assert_eq!(ptt.category, CapabilityCategory::Conversation); + assert_eq!(ptt.domain, "voice"); + assert!( + ptt.how_to.contains("Push-to-Talk") || ptt.how_to.contains("push-to-talk"), + "how_to must mention Push-to-Talk, got: {}", + ptt.how_to + ); + assert!( + ptt.description.to_lowercase().contains("hold") + && ptt.description.to_lowercase().contains("hotkey"), + "description must describe the hold-to-talk hotkey behaviour, got: {}", + ptt.description + ); +} + #[test] fn composio_direct_mode_capabilities_are_registered() { // PR #1710 PR3: ensure the direct-mode capability and the trigger-gap From 8e5ce8d6935843fbe61bdd1d3d806aac95637b72 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 4 Jun 2026 14:41:37 +0530 Subject: [PATCH 30/36] style(ptt): apply prettier + cargo fmt + fix empty-block lint (#3090) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Final quality-sweep pass for the PTT branch: - prettier --write across the new PTT TypeScript / overlay / settings files - cargo fmt --all across the new Tauri shell PTT modules, the core voice/web channel touch points, and the tests files that grew assertions for the optional speak_reply/source/session_id metadata - fix the only ESLint error on the branch: empty `catch (_) {}` in pttService onStart's preempt-race cancel — annotated with a comment explaining the orphan-session cleanup is best-effort Behaviour unchanged; this commit is purely whitespace + the catch comment. --- app/src-tauri/src/lib.rs | 51 ++++++++-------- app/src-tauri/src/ptt_hotkeys.rs | 20 +++---- app/src-tauri/src/ptt_overlay.rs | 18 ++---- app/src/AppRoutes.tsx | 2 +- app/src/assets/audio/README.md | 10 ++-- app/src/components/PttHotkeyManager.tsx | 24 ++------ app/src/features/voice/pttChimes.ts | 6 +- app/src/hooks/usePttHotkey.ts | 6 +- app/src/pages/PttOverlayPage.test.tsx | 7 +-- app/src/pages/PttOverlayPage.tsx | 10 ++-- .../pages/settings/voice/PttSettingsPanel.tsx | 5 +- .../services/__tests__/chatService.test.ts | 2 +- app/src/services/__tests__/pttService.test.ts | 28 +++------ app/src/services/pttService.ts | 7 ++- app/src/store/__tests__/pttSlice.test.ts | 8 +-- app/src/store/index.ts | 2 +- app/src/store/pttSlice.ts | 12 ++-- src/openhuman/channels/providers/web_tests.rs | 60 +++++++++++++++---- src/openhuman/voice/bus.rs | 8 +-- ...channels_large_round25_raw_coverage_e2e.rs | 19 ++++-- ...channels_provider_deep_raw_coverage_e2e.rs | 53 +++++++++++----- tests/channels_runtime_raw_coverage_e2e.rs | 53 +++++++++++----- .../channels_web_startup_raw_coverage_e2e.rs | 15 ++++- ...ls_web_yuanbao_round22_raw_coverage_e2e.rs | 30 ++++++++-- tests/json_rpc_e2e.rs | 4 +- ...ools_approval_channels_raw_coverage_e2e.rs | 24 +++++++- ...tools_network_channels_raw_coverage_e2e.rs | 15 ++++- 27 files changed, 299 insertions(+), 200 deletions(-) diff --git a/app/src-tauri/src/lib.rs b/app/src-tauri/src/lib.rs index 0a67767436..a0dbbf6a82 100644 --- a/app/src-tauri/src/lib.rs +++ b/app/src-tauri/src/lib.rs @@ -752,9 +752,7 @@ async fn register_dictation_hotkey( let guard = state.shortcut.lock().unwrap(); guard.clone() }; - if let Some(conflict) = - ptt_hotkeys::first_conflict_with(&expanded_shortcuts, &ptt_current) - { + if let Some(conflict) = ptt_hotkeys::first_conflict_with(&expanded_shortcuts, &ptt_current) { return Err(format!( "dictation shortcut '{conflict}' conflicts with the push-to-talk hotkey" )); @@ -858,14 +856,10 @@ async fn unregister_dictation_hotkey(app: AppHandle) -> Result<(), S /// `ptt://start { session_id }` on press and `ptt://stop { session_id }` /// on release. #[tauri::command] -async fn register_ptt_hotkey( - app: AppHandle, - shortcut: String, -) -> Result<(), String> { +async fn register_ptt_hotkey(app: AppHandle, shortcut: String) -> Result<(), String> { log::info!("[ptt] register_ptt_hotkey: shortcut={shortcut}"); - let expanded = ptt_hotkeys::expand_ptt_shortcuts(&shortcut) - .map_err(|e| e.to_string())?; + let expanded = ptt_hotkeys::expand_ptt_shortcuts(&shortcut).map_err(|e| e.to_string())?; // Reject overlap with the currently-registered dictation hotkey. let dictation_current = { @@ -873,9 +867,7 @@ async fn register_ptt_hotkey( let guard = state.0.lock().unwrap(); guard.clone() }; - if let Some(conflict) = - ptt_hotkeys::first_conflict_with(&expanded, &dictation_current) - { + if let Some(conflict) = ptt_hotkeys::first_conflict_with(&expanded, &dictation_current) { return Err(ptt_hotkeys::PttError::ConflictsWithDictation(conflict).to_string()); } @@ -910,7 +902,9 @@ async fn register_ptt_hotkey( ) .is_err() { - log::trace!("[ptt] press dropped (already held) shortcut={variant_owned}"); + log::trace!( + "[ptt] press dropped (already held) shortcut={variant_owned}" + ); return; } let session_id = state @@ -920,18 +914,24 @@ async fn register_ptt_hotkey( log::debug!( "[ptt] pressed shortcut={variant_owned} session_id={session_id}" ); - if let Err(e) = - app_pressed.emit("ptt://start", serde_json::json!({ + if let Err(e) = app_pressed.emit( + "ptt://start", + serde_json::json!({ "session_id": session_id, - })) - { + }), + ) { log::warn!("[ptt] emit start failed: {e}"); } } ShortcutState::Released => { - if !state.is_held.swap(false, std::sync::atomic::Ordering::AcqRel) { + if !state + .is_held + .swap(false, std::sync::atomic::Ordering::AcqRel) + { // No corresponding Pressed in our state — stale event, drop. - log::trace!("[ptt] release dropped (not held) shortcut={variant_owned}"); + log::trace!( + "[ptt] release dropped (not held) shortcut={variant_owned}" + ); return; } let session_id = state @@ -940,11 +940,12 @@ async fn register_ptt_hotkey( log::debug!( "[ptt] released shortcut={variant_owned} session_id={session_id}" ); - if let Err(e) = - app_released.emit("ptt://stop", serde_json::json!({ + if let Err(e) = app_released.emit( + "ptt://stop", + serde_json::json!({ "session_id": session_id, - })) - { + }), + ) { log::warn!("[ptt] emit stop failed: {e}"); } } @@ -963,7 +964,9 @@ async fn register_ptt_hotkey( log::warn!("[ptt] rollback failed for '{r}': {re}"); } } - return Err(format!("Failed to unregister previous ptt shortcut '{old}': {e}")); + return Err(format!( + "Failed to unregister previous ptt shortcut '{old}': {e}" + )); } unregistered.push(old.clone()); } diff --git a/app/src-tauri/src/ptt_hotkeys.rs b/app/src-tauri/src/ptt_hotkeys.rs index a853806247..743ccadb6c 100644 --- a/app/src-tauri/src/ptt_hotkeys.rs +++ b/app/src-tauri/src/ptt_hotkeys.rs @@ -196,10 +196,7 @@ mod tests { /// Returns `Some(conflicting_variant)` if any expanded PTT variant overlaps /// any expanded dictation variant. Comparison is case-insensitive. -pub(crate) fn first_conflict_with( - ptt: &[String], - dictation: &[String], -) -> Option { +pub(crate) fn first_conflict_with(ptt: &[String], dictation: &[String]) -> Option { for p in ptt { let p_lc = p.to_ascii_lowercase(); for d in dictation { @@ -236,10 +233,7 @@ mod conflict_tests { fn only_one_variant_overlaps_returns_first() { let ptt = vec!["Cmd+P".into(), "Ctrl+P".into()]; let dict = vec!["Ctrl+P".into()]; - assert_eq!( - first_conflict_with(&ptt, &dict), - Some("Ctrl+P".to_string()) - ); + assert_eq!(first_conflict_with(&ptt, &dict), Some("Ctrl+P".to_string())); } } @@ -273,8 +267,14 @@ mod state_tests { "repeat press CAS should fail (already held)" ); // Release: swap true → false returns the old true. - assert!(s.is_held.swap(false, Ordering::AcqRel), "swap should return prior true"); + assert!( + s.is_held.swap(false, Ordering::AcqRel), + "swap should return prior true" + ); // Subsequent stale release: swap returns the current false. - assert!(!s.is_held.swap(false, Ordering::AcqRel), "stale swap should return false"); + assert!( + !s.is_held.swap(false, Ordering::AcqRel), + "stale swap should return false" + ); } } diff --git a/app/src-tauri/src/ptt_overlay.rs b/app/src-tauri/src/ptt_overlay.rs index 26c94d7bcf..90bdfeb50d 100644 --- a/app/src-tauri/src/ptt_overlay.rs +++ b/app/src-tauri/src/ptt_overlay.rs @@ -71,21 +71,15 @@ pub(crate) async fn show_ptt_overlay( active: bool, session_id: u64, ) -> Result<(), String> { - let window = app - .get_webview_window(OVERLAY_LABEL) - .ok_or_else(|| { - "[ptt-overlay] window not ready (register_ptt_hotkey must succeed before show_ptt_overlay)" - .to_string() - })?; + let window = app.get_webview_window(OVERLAY_LABEL).ok_or_else(|| { + "[ptt-overlay] window not ready (register_ptt_hotkey must succeed before show_ptt_overlay)" + .to_string() + })?; if active { - window - .show() - .map_err(|e| format!("show overlay: {e}"))?; + window.show().map_err(|e| format!("show overlay: {e}"))?; } else { - window - .hide() - .map_err(|e| format!("hide overlay: {e}"))?; + window.hide().map_err(|e| format!("hide overlay: {e}"))?; } if let Err(e) = window.emit( diff --git a/app/src/AppRoutes.tsx b/app/src/AppRoutes.tsx index d4bc65e722..31c9346b91 100644 --- a/app/src/AppRoutes.tsx +++ b/app/src/AppRoutes.tsx @@ -14,13 +14,13 @@ import Intelligence from './pages/Intelligence'; import Invites from './pages/Invites'; import Notifications from './pages/Notifications'; import Onboarding from './pages/onboarding/Onboarding'; +import { PttOverlayPage } from './pages/PttOverlayPage'; import Rewards from './pages/Rewards'; import Routines from './pages/Routines'; import Settings from './pages/Settings'; import SkillNew from './pages/SkillNew'; import Skills from './pages/Skills'; import SkillsRun from './pages/SkillsRun'; -import { PttOverlayPage } from './pages/PttOverlayPage'; import WebCallbackPage from './pages/WebCallbackPage'; import Welcome from './pages/Welcome'; diff --git a/app/src/assets/audio/README.md b/app/src/assets/audio/README.md index 9aeb5cad49..c43105042c 100644 --- a/app/src/assets/audio/README.md +++ b/app/src/assets/audio/README.md @@ -2,10 +2,10 @@ Short UI chimes for the push-to-talk feature (`docs/superpowers/specs/2026-06-02-global-ptt-design.md`). -| File | Purpose | Source | License | -| --- | --- | --- | --- | -| `ptt-open.wav` | Mic opened (PTT key pressed). | Generated locally with Python `wave` + sine generator (800–1200 Hz sweep). | CC0 / Public Domain. | -| `ptt-close.wav` | Mic closed (PTT key released). | Generated locally with Python `wave` + sine generator (1200–800 Hz sweep). | CC0 / Public Domain. | -| `ptt-error.wav` | Session aborted (empty audio, mic permission denied, etc.). | Generated locally with Python `wave` + sine generator (250 Hz tone). | CC0 / Public Domain. | +| File | Purpose | Source | License | +| --------------- | ----------------------------------------------------------- | -------------------------------------------------------------------------- | -------------------- | +| `ptt-open.wav` | Mic opened (PTT key pressed). | Generated locally with Python `wave` + sine generator (800–1200 Hz sweep). | CC0 / Public Domain. | +| `ptt-close.wav` | Mic closed (PTT key released). | Generated locally with Python `wave` + sine generator (1200–800 Hz sweep). | CC0 / Public Domain. | +| `ptt-error.wav` | Session aborted (empty audio, mic permission denied, etc.). | Generated locally with Python `wave` + sine generator (250 Hz tone). | CC0 / Public Domain. | All clips are ~80–120ms, LUFS-normalized to roughly match the in-app notification sound (~ -16 LUFS). Replace freely with better-sounding equivalents — just keep them under 200ms and CC0/MIT-equivalent. diff --git a/app/src/components/PttHotkeyManager.tsx b/app/src/components/PttHotkeyManager.tsx index 6f4682ecbe..8e02e9aa40 100644 --- a/app/src/components/PttHotkeyManager.tsx +++ b/app/src/components/PttHotkeyManager.tsx @@ -19,16 +19,9 @@ import { listen, type UnlistenFn } from '@tauri-apps/api/event'; import { useEffect, useMemo, useRef } from 'react'; import { useDispatch, useStore } from 'react-redux'; -import { - cancelPttAudio, - finalizePttAudio, - startPttAudio, -} from '../features/voice/pttAudio'; +import { cancelPttAudio, finalizePttAudio, startPttAudio } from '../features/voice/pttAudio'; import { playPttChime } from '../features/voice/pttChimes'; -import { - createNewVoiceThread, - resolveActiveThreadId, -} from '../features/voice/pttThread'; +import { createNewVoiceThread, resolveActiveThreadId } from '../features/voice/pttThread'; import { transcribePttAudio } from '../features/voice/pttTranscribe'; import { usePttHotkey } from '../hooks/usePttHotkey'; import { chatSend } from '../services/chatService'; @@ -57,11 +50,7 @@ export default function PttHotkeyManager(): null { const service = useMemo( () => createPttService({ - audioCapture: { - start: startPttAudio, - finalize: finalizePttAudio, - cancel: cancelPttAudio, - }, + audioCapture: { start: startPttAudio, finalize: finalizePttAudio, cancel: cancelPttAudio }, transcribe: transcribePttAudio, sendMessage: async ({ threadId, body, speakReply, metadata }) => { await chatSend({ @@ -85,10 +74,7 @@ export default function PttHotkeyManager(): null { }, getSettings: () => { const ptt = store.getState().ptt; - return { - speakReplies: ptt.speakReplies, - showOverlay: ptt.showOverlay, - }; + return { speakReplies: ptt.speakReplies, showOverlay: ptt.showOverlay }; }, now: monotonicNow, // 10 s ceiling on a single PTT recording — matches the spec; if the @@ -107,7 +93,7 @@ export default function PttHotkeyManager(): null { // store updates would orphan in-flight sessions. The closures above read // the latest store state on every call, so a stable identity is correct. // eslint-disable-next-line react-hooks/exhaustive-deps - [], + [] ); useEffect(() => { diff --git a/app/src/features/voice/pttChimes.ts b/app/src/features/voice/pttChimes.ts index 34f1464096..dc7c7b22ce 100644 --- a/app/src/features/voice/pttChimes.ts +++ b/app/src/features/voice/pttChimes.ts @@ -16,11 +16,7 @@ import openSrc from '../../assets/audio/ptt-open.wav'; export type ChimeKind = 'open' | 'close' | 'error'; -const sources: Record = { - open: openSrc, - close: closeSrc, - error: errorSrc, -}; +const sources: Record = { open: openSrc, close: closeSrc, error: errorSrc }; const cache: Partial> = {}; diff --git a/app/src/hooks/usePttHotkey.ts b/app/src/hooks/usePttHotkey.ts index f2bb3b7fa3..be65d3e9e7 100644 --- a/app/src/hooks/usePttHotkey.ts +++ b/app/src/hooks/usePttHotkey.ts @@ -12,12 +12,8 @@ import { useEffect } from 'react'; import { useDispatch, useSelector } from 'react-redux'; +import { selectPttShortcut, setIsHeld, setPttRegistrationError } from '../store/pttSlice'; import { registerPttHotkey, unregisterPttHotkey } from '../utils/tauriCommands/ptt'; -import { - selectPttShortcut, - setIsHeld, - setPttRegistrationError, -} from '../store/pttSlice'; export function usePttHotkey(): void { const dispatch = useDispatch(); diff --git a/app/src/pages/PttOverlayPage.test.tsx b/app/src/pages/PttOverlayPage.test.tsx index 97dc7d350e..a3b8b7f7d0 100644 --- a/app/src/pages/PttOverlayPage.test.tsx +++ b/app/src/pages/PttOverlayPage.test.tsx @@ -1,5 +1,5 @@ +import { act, render, screen } from '@testing-library/react'; import { describe, expect, it, vi } from 'vitest'; -import { render, screen, act } from '@testing-library/react'; import { PttOverlayPage } from './PttOverlayPage'; @@ -11,8 +11,7 @@ vi.mock('@tauri-apps/api/event', () => { handlers[name] = handler; return () => delete handlers[name]; }), - __dispatch: (name: string, payload: unknown) => - handlers[name]?.({ payload }), + __dispatch: (name: string, payload: unknown) => handlers[name]?.({ payload }), }; }); @@ -28,7 +27,7 @@ describe('PttOverlayPage', () => { await act(async () => { (evt as unknown as { __dispatch: (n: string, p: unknown) => void }).__dispatch( 'ptt-overlay://active', - { active: true, session_id: 1 }, + { active: true, session_id: 1 } ); }); expect(screen.getByTestId('ptt-overlay-root')).toHaveAttribute('data-active', 'true'); diff --git a/app/src/pages/PttOverlayPage.tsx b/app/src/pages/PttOverlayPage.tsx index 2082f20ab2..ef9fd828f6 100644 --- a/app/src/pages/PttOverlayPage.tsx +++ b/app/src/pages/PttOverlayPage.tsx @@ -1,5 +1,6 @@ -import { useEffect, useState } from 'react'; import { listen, type UnlistenFn } from '@tauri-apps/api/event'; +import { useEffect, useState } from 'react'; + import { useT } from '../lib/i18n/I18nContext'; export function PttOverlayPage() { @@ -9,10 +10,10 @@ export function PttOverlayPage() { useEffect(() => { let off: UnlistenFn | undefined; let cancelled = false; - listen<{ active: boolean }>('ptt-overlay://active', (e) => { + listen<{ active: boolean }>('ptt-overlay://active', e => { setActive(Boolean(e.payload?.active)); }) - .then((fn) => { + .then(fn => { if (cancelled) fn(); else off = fn; }) @@ -41,8 +42,7 @@ export function PttOverlayPage() { fontSize: 12, userSelect: 'none', pointerEvents: 'none', - }} - > + }}> string, -): string | null { +function localizedRegistrationError(raw: string | null, t: (key: string) => string): string | null { if (!raw) return null; const lower = raw.toLowerCase(); if (lower.includes('conflict') && lower.includes('dictation')) { diff --git a/app/src/services/__tests__/chatService.test.ts b/app/src/services/__tests__/chatService.test.ts index 03ebd9b984..b25a3ad048 100644 --- a/app/src/services/__tests__/chatService.test.ts +++ b/app/src/services/__tests__/chatService.test.ts @@ -245,7 +245,7 @@ describe('chatService.subscribeChatEvents', () => { source: 'ptt', session_id: 42, }), - }), + }) ); }); diff --git a/app/src/services/__tests__/pttService.test.ts b/app/src/services/__tests__/pttService.test.ts index 77a5dde65d..55cb62346c 100644 --- a/app/src/services/__tests__/pttService.test.ts +++ b/app/src/services/__tests__/pttService.test.ts @@ -52,9 +52,7 @@ describe('pttService state machine', () => { }); it('falls back to a new "Voice" thread when no active thread exists', async () => { - const deps = makeDeps({ - resolveActiveThreadId: vi.fn().mockResolvedValue(null), - }); + const deps = makeDeps({ resolveActiveThreadId: vi.fn().mockResolvedValue(null) }); const svc = createPttService(deps); await svc.onStart(2); @@ -62,7 +60,7 @@ describe('pttService state machine', () => { expect(deps.createNewVoiceThread).toHaveBeenCalled(); expect(deps.sendMessage).toHaveBeenCalledWith( - expect.objectContaining({ threadId: 'thread-new' }), + expect.objectContaining({ threadId: 'thread-new' }) ); }); @@ -85,9 +83,7 @@ describe('pttService state machine', () => { }); it('drops the session when the transcript is empty', async () => { - const deps = makeDeps({ - transcribe: vi.fn().mockResolvedValue(' '), - }); + const deps = makeDeps({ transcribe: vi.fn().mockResolvedValue(' ') }); const svc = createPttService(deps); await svc.onStart(4); @@ -108,9 +104,7 @@ describe('pttService state machine', () => { expect(deps.audioCapture.finalize).toHaveBeenCalled(); expect(deps.sendMessage).toHaveBeenCalledWith( - expect.objectContaining({ - metadata: expect.objectContaining({ session_id: 5 }), - }), + expect.objectContaining({ metadata: expect.objectContaining({ session_id: 5 }) }) ); }); @@ -126,17 +120,13 @@ describe('pttService state machine', () => { }); it('honours the speakReplies setting when forwarding to sendMessage', async () => { - const deps = makeDeps({ - getSettings: () => ({ speakReplies: false, showOverlay: true }), - }); + const deps = makeDeps({ getSettings: () => ({ speakReplies: false, showOverlay: true }) }); const svc = createPttService(deps); await svc.onStart(8); await svc.onStop(8); - expect(deps.sendMessage).toHaveBeenCalledWith( - expect.objectContaining({ speakReply: false }), - ); + expect(deps.sendMessage).toHaveBeenCalledWith(expect.objectContaining({ speakReply: false })); }); it('mismatched session_id on onStop is ignored', async () => { @@ -184,9 +174,7 @@ describe('pttService state machine', () => { }); it('posts a "[Voice — transcription failed]" breadcrumb when transcribe throws', async () => { - const deps = makeDeps({ - transcribe: vi.fn().mockRejectedValue(new Error('stt timeout')), - }); + const deps = makeDeps({ transcribe: vi.fn().mockRejectedValue(new Error('stt timeout')) }); const svc = createPttService(deps); await svc.onStart(12); @@ -196,7 +184,7 @@ describe('pttService state machine', () => { expect.objectContaining({ body: '[Voice — transcription failed]', metadata: { source: 'ptt', session_id: 12 }, - }), + }) ); }); }); diff --git a/app/src/services/pttService.ts b/app/src/services/pttService.ts index 85f7546bde..0ca0acf53e 100644 --- a/app/src/services/pttService.ts +++ b/app/src/services/pttService.ts @@ -204,10 +204,13 @@ export function createPttService(deps: PttDeps): PttService { // Re-check after the audio.start await. if (active !== claimed) { // Concurrent preempt replaced our claim mid-flight; we already started - // audio for an orphan session. Best-effort cancel and exit. + // audio for an orphan session. Best-effort cancel and exit — cancellation + // failure here is non-actionable (the orphan session is already detached). try { await deps.audioCapture.cancel(); - } catch (_) {} + } catch (_) { + // ignore: orphan-session cleanup is best-effort + } return; } diff --git a/app/src/store/__tests__/pttSlice.test.ts b/app/src/store/__tests__/pttSlice.test.ts index 1f697a5feb..94ed95f244 100644 --- a/app/src/store/__tests__/pttSlice.test.ts +++ b/app/src/store/__tests__/pttSlice.test.ts @@ -3,12 +3,12 @@ import { describe, expect, it } from 'vitest'; import { initialPttState, pttReducer, - setPttShortcut, - setSpeakReplies, - setShowOverlay, + type PttState, setIsHeld, setPttRegistrationError, - type PttState, + setPttShortcut, + setShowOverlay, + setSpeakReplies, } from '../pttSlice'; import { resetUserScopedState } from '../resetActions'; diff --git a/app/src/store/index.ts b/app/src/store/index.ts index 37283ca275..3ba524b738 100644 --- a/app/src/store/index.ts +++ b/app/src/store/index.ts @@ -24,10 +24,10 @@ import mascotReducer from './mascotSlice'; import notificationReducer from './notificationSlice'; import personaReducer from './personaSlice'; import providerSurfacesReducer from './providerSurfaceSlice'; +import { pttReducer } from './pttSlice'; import socketReducer from './socketSlice'; import themeReducer from './themeSlice'; import threadReducer from './threadSlice'; -import { pttReducer } from './pttSlice'; import { userScopedStorage } from './userScopedStorage'; import workflowsReducer from './workflowsSlice'; diff --git a/app/src/store/pttSlice.ts b/app/src/store/pttSlice.ts index 2c0a42510c..c7038bf879 100644 --- a/app/src/store/pttSlice.ts +++ b/app/src/store/pttSlice.ts @@ -69,17 +69,13 @@ export const { // ── Selectors ──────────────────────────────────────────────────────────────── -export const selectPttShortcut = (state: { ptt: PttState }): string | null => - state.ptt.shortcut; +export const selectPttShortcut = (state: { ptt: PttState }): string | null => state.ptt.shortcut; -export const selectSpeakReplies = (state: { ptt: PttState }): boolean => - state.ptt.speakReplies; +export const selectSpeakReplies = (state: { ptt: PttState }): boolean => state.ptt.speakReplies; -export const selectShowOverlay = (state: { ptt: PttState }): boolean => - state.ptt.showOverlay; +export const selectShowOverlay = (state: { ptt: PttState }): boolean => state.ptt.showOverlay; -export const selectIsHeld = (state: { ptt: PttState }): boolean => - state.ptt.isHeld; +export const selectIsHeld = (state: { ptt: PttState }): boolean => state.ptt.isHeld; export const selectPttRegistrationError = (state: { ptt: PttState }): string | null => state.ptt.registrationError; diff --git a/src/openhuman/channels/providers/web_tests.rs b/src/openhuman/channels/providers/web_tests.rs index fc9cfa0787..bfe9904c7d 100644 --- a/src/openhuman/channels/providers/web_tests.rs +++ b/src/openhuman/channels/providers/web_tests.rs @@ -25,19 +25,46 @@ static FORCED_ERROR_TEST_LOCK: Lazy> = Lazy::new(|| TokioMutex::n #[tokio::test] async fn start_chat_validates_required_fields() { - let err = start_chat("", "thread", "hello", None, None, None, None, ChatRequestMetadata::default()) - .await - .expect_err("client id should be required"); + let err = start_chat( + "", + "thread", + "hello", + None, + None, + None, + None, + ChatRequestMetadata::default(), + ) + .await + .expect_err("client id should be required"); assert!(err.contains("client_id is required")); - let err = start_chat("client", "", "hello", None, None, None, None, ChatRequestMetadata::default()) - .await - .expect_err("thread id should be required"); + let err = start_chat( + "client", + "", + "hello", + None, + None, + None, + None, + ChatRequestMetadata::default(), + ) + .await + .expect_err("thread id should be required"); assert!(err.contains("thread_id is required")); - let err = start_chat("client", "thread", " ", None, None, None, None, ChatRequestMetadata::default()) - .await - .expect_err("message should be required"); + let err = start_chat( + "client", + "thread", + " ", + None, + None, + None, + None, + ChatRequestMetadata::default(), + ) + .await + .expect_err("message should be required"); assert!(err.contains("message is required")); } @@ -1201,8 +1228,7 @@ fn compose_system_prompt_suffix_combines_locale_and_profile() { fn web_chat_schema_accepts_optional_ptt_fields() { // Locate the `chat` schema via the public accessor. let schema = schemas("chat"); - let names: std::collections::HashSet<&str> = - schema.inputs.iter().map(|f| f.name).collect(); + let names: std::collections::HashSet<&str> = schema.inputs.iter().map(|f| f.name).collect(); assert!( names.contains("speak_reply"), "channel.web_chat schema must include optional speak_reply field" @@ -1225,7 +1251,11 @@ fn web_chat_schema_accepts_optional_ptt_fields() { assert!(!f.required, "{field} must be optional"); } // Type assertions: ensure each field has the correct wire type. - let speak_reply = schema.inputs.iter().find(|f| f.name == "speak_reply").unwrap(); + let speak_reply = schema + .inputs + .iter() + .find(|f| f.name == "speak_reply") + .unwrap(); assert_eq!( speak_reply.ty, TypeSchema::Option(Box::new(TypeSchema::Bool)), @@ -1237,7 +1267,11 @@ fn web_chat_schema_accepts_optional_ptt_fields() { TypeSchema::Option(Box::new(TypeSchema::String)), "source must be Option" ); - let session_id = schema.inputs.iter().find(|f| f.name == "session_id").unwrap(); + let session_id = schema + .inputs + .iter() + .find(|f| f.name == "session_id") + .unwrap(); assert_eq!( session_id.ty, TypeSchema::Option(Box::new(TypeSchema::U64)), diff --git a/src/openhuman/voice/bus.rs b/src/openhuman/voice/bus.rs index 06531186b1..bf6bc7a669 100644 --- a/src/openhuman/voice/bus.rs +++ b/src/openhuman/voice/bus.rs @@ -59,13 +59,7 @@ mod tests { let events = capture.events.clone(); let _sub = subscribe_global(Arc::new(capture)); - publish_ptt_transcript_committed( - "thread-1".to_string(), - 42, - 17, - 850, - false, - ); + publish_ptt_transcript_committed("thread-1".to_string(), 42, 17, 850, false); // Give the broadcaster a tick to deliver. tokio::time::sleep(std::time::Duration::from_millis(50)).await; diff --git a/tests/channels_large_round25_raw_coverage_e2e.rs b/tests/channels_large_round25_raw_coverage_e2e.rs index 0efab78e7e..693faa0e6a 100644 --- a/tests/channels_large_round25_raw_coverage_e2e.rs +++ b/tests/channels_large_round25_raw_coverage_e2e.rs @@ -113,12 +113,19 @@ async fn web_channel_validation_cancellation_and_error_events_are_observable() { json!({"client_id": "client-1", "thread_id": "thread-1"}) ); - assert!( - web::start_chat(" ", "thread", "hello", None, None, None, None, web::ChatRequestMetadata::default()) - .await - .unwrap_err() - .contains("client_id is required") - ); + assert!(web::start_chat( + " ", + "thread", + "hello", + None, + None, + None, + None, + web::ChatRequestMetadata::default() + ) + .await + .unwrap_err() + .contains("client_id is required")); assert!(web::cancel_chat("client", " ") .await .unwrap_err() diff --git a/tests/channels_provider_deep_raw_coverage_e2e.rs b/tests/channels_provider_deep_raw_coverage_e2e.rs index 15f9d8940e..ab3a424b40 100644 --- a/tests/channels_provider_deep_raw_coverage_e2e.rs +++ b/tests/channels_provider_deep_raw_coverage_e2e.rs @@ -229,20 +229,45 @@ async fn dispatch_harness_covers_error_context_compaction_and_timeout_paths() { #[tokio::test] async fn web_channel_validation_cancel_and_classifier_snapshots_are_publicly_exercised() { - assert!(start_chat("", "thread", "hello", None, None, None, None, ChatRequestMetadata::default()) - .await - .expect_err("empty client rejected") - .contains("client_id")); - assert!(start_chat("client", "", "hello", None, None, None, None, ChatRequestMetadata::default()) - .await - .expect_err("empty thread rejected") - .contains("thread_id")); - assert!( - start_chat("client", "thread", " ", None, None, None, None, ChatRequestMetadata::default()) - .await - .expect_err("empty message rejected") - .contains("message") - ); + assert!(start_chat( + "", + "thread", + "hello", + None, + None, + None, + None, + ChatRequestMetadata::default() + ) + .await + .expect_err("empty client rejected") + .contains("client_id")); + assert!(start_chat( + "client", + "", + "hello", + None, + None, + None, + None, + ChatRequestMetadata::default() + ) + .await + .expect_err("empty thread rejected") + .contains("thread_id")); + assert!(start_chat( + "client", + "thread", + " ", + None, + None, + None, + None, + ChatRequestMetadata::default() + ) + .await + .expect_err("empty message rejected") + .contains("message")); let mut rx = subscribe_web_channel_events(); assert_eq!( diff --git a/tests/channels_runtime_raw_coverage_e2e.rs b/tests/channels_runtime_raw_coverage_e2e.rs index e75a1824d7..f937a149f3 100644 --- a/tests/channels_runtime_raw_coverage_e2e.rs +++ b/tests/channels_runtime_raw_coverage_e2e.rs @@ -372,20 +372,45 @@ async fn yuanbao_public_channel_and_config_paths_are_isolated_from_network() { #[tokio::test] async fn web_channel_validation_cancel_and_event_subscription_are_fast() { - assert!(start_chat("", "thread", "hello", None, None, None, None, ChatRequestMetadata::default()) - .await - .expect_err("empty client rejected") - .contains("client_id")); - assert!(start_chat("client", "", "hello", None, None, None, None, ChatRequestMetadata::default()) - .await - .expect_err("empty thread rejected") - .contains("thread_id")); - assert!( - start_chat("client", "thread", " ", None, None, None, None, ChatRequestMetadata::default()) - .await - .expect_err("empty message rejected") - .contains("message") - ); + assert!(start_chat( + "", + "thread", + "hello", + None, + None, + None, + None, + ChatRequestMetadata::default() + ) + .await + .expect_err("empty client rejected") + .contains("client_id")); + assert!(start_chat( + "client", + "", + "hello", + None, + None, + None, + None, + ChatRequestMetadata::default() + ) + .await + .expect_err("empty thread rejected") + .contains("thread_id")); + assert!(start_chat( + "client", + "thread", + " ", + None, + None, + None, + None, + ChatRequestMetadata::default() + ) + .await + .expect_err("empty message rejected") + .contains("message")); let mut rx = subscribe_web_channel_events(); assert_eq!( diff --git a/tests/channels_web_startup_raw_coverage_e2e.rs b/tests/channels_web_startup_raw_coverage_e2e.rs index 04c4d12a3a..a128043c26 100644 --- a/tests/channels_web_startup_raw_coverage_e2e.rs +++ b/tests/channels_web_startup_raw_coverage_e2e.rs @@ -107,9 +107,18 @@ async fn web_controllers_validate_inputs_and_emit_structured_forced_errors() { assert_eq!(all_web_channel_registered_controllers().len(), 2); assert_eq!(schemas("missing").function, "unknown"); - let err = channel_web_chat("client", "thread", " ", None, None, None, None, ChatRequestMetadata::default()) - .await - .expect_err("blank messages are rejected"); + let err = channel_web_chat( + "client", + "thread", + " ", + None, + None, + None, + None, + ChatRequestMetadata::default(), + ) + .await + .expect_err("blank messages are rejected"); assert!(err.contains("message is required")); let cancel = channel_web_cancel("client", "missing-thread") diff --git a/tests/channels_web_yuanbao_round22_raw_coverage_e2e.rs b/tests/channels_web_yuanbao_round22_raw_coverage_e2e.rs index 56d3f62048..5b74bbe94a 100644 --- a/tests/channels_web_yuanbao_round22_raw_coverage_e2e.rs +++ b/tests/channels_web_yuanbao_round22_raw_coverage_e2e.rs @@ -190,15 +190,33 @@ fn isolated_config() -> (tempfile::TempDir, Config) { #[tokio::test] async fn web_start_chat_validation_forced_error_and_cancel_paths_are_structured() { assert_eq!( - start_chat(" ", "thread", "hello", None, None, None, None, ChatRequestMetadata::default()) - .await - .unwrap_err(), + start_chat( + " ", + "thread", + "hello", + None, + None, + None, + None, + ChatRequestMetadata::default() + ) + .await + .unwrap_err(), "client_id is required" ); assert_eq!( - start_chat("client", " ", "hello", None, None, None, None, ChatRequestMetadata::default()) - .await - .unwrap_err(), + start_chat( + "client", + " ", + "hello", + None, + None, + None, + None, + ChatRequestMetadata::default() + ) + .await + .unwrap_err(), "thread_id is required" ); diff --git a/tests/json_rpc_e2e.rs b/tests/json_rpc_e2e.rs index a4bab4c056..c163425f66 100644 --- a/tests/json_rpc_e2e.rs +++ b/tests/json_rpc_e2e.rs @@ -9961,7 +9961,9 @@ async fn json_rpc_channel_web_chat_with_speak_reply_invokes_reply_speech() { "expected at least one non-empty text passed to synthesize_reply; observed={observed:?}" ); assert!( - observed.iter().any(|t| t.contains("Hello from e2e mock agent")), + observed + .iter() + .any(|t| t.contains("Hello from e2e mock agent")), "expected the observed seam text to include the mock reply phrase; got {observed:?}" ); diff --git a/tests/tools_approval_channels_raw_coverage_e2e.rs b/tests/tools_approval_channels_raw_coverage_e2e.rs index 5d97ad65db..a3f433c106 100644 --- a/tests/tools_approval_channels_raw_coverage_e2e.rs +++ b/tests/tools_approval_channels_raw_coverage_e2e.rs @@ -2073,7 +2073,13 @@ async fn web_channel_public_paths_cover_event_delivery_and_validation_errors() { assert_eq!( openhuman_core::openhuman::channels::web::start_chat( - "", "thread-1", "hello", None, None, None, None, + "", + "thread-1", + "hello", + None, + None, + None, + None, openhuman_core::openhuman::channels::web::ChatRequestMetadata::default(), ) .await @@ -2082,7 +2088,13 @@ async fn web_channel_public_paths_cover_event_delivery_and_validation_errors() { ); assert_eq!( openhuman_core::openhuman::channels::web::start_chat( - "client-1", "", "hello", None, None, None, None, + "client-1", + "", + "hello", + None, + None, + None, + None, openhuman_core::openhuman::channels::web::ChatRequestMetadata::default(), ) .await @@ -2091,7 +2103,13 @@ async fn web_channel_public_paths_cover_event_delivery_and_validation_errors() { ); assert_eq!( openhuman_core::openhuman::channels::web::start_chat( - "client-1", "thread-1", " ", None, None, None, None, + "client-1", + "thread-1", + " ", + None, + None, + None, + None, openhuman_core::openhuman::channels::web::ChatRequestMetadata::default(), ) .await diff --git a/tests/tools_network_channels_raw_coverage_e2e.rs b/tests/tools_network_channels_raw_coverage_e2e.rs index e1eba99783..8e64769ee7 100644 --- a/tests/tools_network_channels_raw_coverage_e2e.rs +++ b/tests/tools_network_channels_raw_coverage_e2e.rs @@ -556,9 +556,18 @@ async fn web_channel_public_paths_cover_validation_cancel_schema_and_event_bus() assert_eq!(web_channel_schema("cancel").function, "web_cancel"); assert_eq!(web_channel_schema("missing").function, "unknown"); - let missing_client = start_chat(" ", "thread", "hello", None, None, None, None, ChatRequestMetadata::default()) - .await - .expect_err("blank client"); + let missing_client = start_chat( + " ", + "thread", + "hello", + None, + None, + None, + None, + ChatRequestMetadata::default(), + ) + .await + .expect_err("blank client"); assert_contains(&missing_client, "client_id is required"); let missing_thread = cancel_chat("client", " ").await.expect_err("blank thread"); assert_contains(&missing_thread, "thread_id is required"); From 23684056983f9b298079e7387de28ff0ba3cc5d5 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Sat, 6 Jun 2026 02:12:15 +0530 Subject: [PATCH 31/36] fix(ptt): catch rejected promises from onStart/onStop service calls (addresses @coderabbitai on PttHotkeyManager.tsx:110) Adds .catch() handlers to the ptt://start and ptt://stop listener callbacks so failures from service.onStart/onStop are logged rather than becoming unhandled rejections. Routes logging through the debug package (app:ptt:manager namespace) instead of console.warn, consistent with the rest of the file. --- app/src/components/PttHotkeyManager.tsx | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/app/src/components/PttHotkeyManager.tsx b/app/src/components/PttHotkeyManager.tsx index 8e02e9aa40..6340b2ae91 100644 --- a/app/src/components/PttHotkeyManager.tsx +++ b/app/src/components/PttHotkeyManager.tsx @@ -16,6 +16,7 @@ * mounts would create competing state machines fighting over the same mic. */ import { listen, type UnlistenFn } from '@tauri-apps/api/event'; +import debug from 'debug'; import { useEffect, useMemo, useRef } from 'react'; import { useDispatch, useStore } from 'react-redux'; @@ -30,6 +31,8 @@ import type { RootState } from '../store'; import { setIsHeld } from '../store/pttSlice'; import { showPttOverlay } from '../utils/tauriCommands/ptt'; +const log = debug('app:ptt:manager'); + interface PttEventPayload { session_id: number; } @@ -84,9 +87,9 @@ export default function PttHotkeyManager(): null { // Recordings shorter than this are treated as accidental taps. minAudioMs: 250, logger: { - debug: (msg, meta) => console.debug(msg, meta ?? {}), - info: (msg, meta) => console.info(msg, meta ?? {}), - warn: (msg, meta) => console.warn(msg, meta ?? {}), + debug: (msg, meta) => log(msg, meta ?? {}), + info: (msg, meta) => log(msg, meta ?? {}), + warn: (msg, meta) => log(msg, meta ?? {}), }, }), // The service holds an internal state machine — recreating it across @@ -102,11 +105,15 @@ export default function PttHotkeyManager(): null { try { const offStart = await listen('ptt://start', e => { dispatch(setIsHeld(true)); - void service.onStart(e.payload.session_id); + service.onStart(e.payload.session_id).catch(err => { + log('onStart failed', { sessionId: e.payload.session_id, err: String(err) }); + }); }); const offStop = await listen('ptt://stop', e => { dispatch(setIsHeld(false)); - void service.onStop(e.payload.session_id); + service.onStop(e.payload.session_id).catch(err => { + log('onStop failed', { sessionId: e.payload.session_id, err: String(err) }); + }); }); if (!mounted) { offStart(); @@ -114,9 +121,9 @@ export default function PttHotkeyManager(): null { return; } unlistenRef.current.push(offStart, offStop); - console.debug('[ptt] PttHotkeyManager: listeners attached'); + log('PttHotkeyManager: listeners attached'); } catch (err) { - console.warn('[ptt] PttHotkeyManager: failed to attach listeners', err); + log('PttHotkeyManager: failed to attach listeners', err); } }; void subscribe(); @@ -128,7 +135,7 @@ export default function PttHotkeyManager(): null { try { off(); } catch (err) { - console.debug('[ptt] PttHotkeyManager: unlisten threw', err); + log('PttHotkeyManager: unlisten threw', err); } } }; From fc3f36af0130071b78ef5a30c28bd7f5afc35da9 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Sat, 6 Jun 2026 02:16:48 +0530 Subject: [PATCH 32/36] i18n(ptt): make description conditional on speakReplies setting across all 14 locales (addresses @coderabbitai on es.ts:1586, ru.ts:1564) The PTT description incorrectly said OpenHuman always speaks replies after release. The speakReplies toggle makes TTS optional, so the copy now reads "if 'Speak agent replies' is on, OpenHuman speaks the reply" in all 14 locales. en.ts updated as source of truth; all 13 non-English locales carry real translations. --- app/src/lib/i18n/ar.ts | 2 +- app/src/lib/i18n/bn.ts | 2 +- app/src/lib/i18n/de.ts | 2 +- app/src/lib/i18n/en.ts | 2 +- app/src/lib/i18n/es.ts | 2 +- app/src/lib/i18n/fr.ts | 4 ++-- app/src/lib/i18n/hi.ts | 2 +- app/src/lib/i18n/id.ts | 2 +- app/src/lib/i18n/it.ts | 4 ++-- app/src/lib/i18n/ko.ts | 2 +- app/src/lib/i18n/pl.ts | 2 +- app/src/lib/i18n/pt.ts | 2 +- app/src/lib/i18n/ru.ts | 2 +- app/src/lib/i18n/zh-CN.ts | 2 +- 14 files changed, 16 insertions(+), 16 deletions(-) diff --git a/app/src/lib/i18n/ar.ts b/app/src/lib/i18n/ar.ts index f14a365c01..355d16fe2a 100644 --- a/app/src/lib/i18n/ar.ts +++ b/app/src/lib/i18n/ar.ts @@ -1518,7 +1518,7 @@ const messages: TranslationMap = { 'voice.externalProviders.add': 'Add', 'pttSettings.title': 'اضغط للتحدث', 'pttSettings.description': - 'اضغط مفتاحًا باستمرار للتحدث إلى OpenHuman أثناء وجودك في تطبيق آخر. تحرير المفتاح يُرسل التسجيل، ثم ينطق OpenHuman الرد.', + 'اضغط مفتاحًا باستمرار للتحدث إلى OpenHuman أثناء وجودك في تطبيق آخر. تحرير المفتاح يُرسل التسجيل؛ إذا كان «قراءة الردود» مفعَّلًا، ينطق OpenHuman الرد بصوت عالٍ.', 'pttSettings.shortcutLabel': 'اختصار المفتاح', 'pttSettings.shortcutPlaceholder': 'اضغط مفتاحًا (مثل F13)', 'pttSettings.shortcutUnsetHint': 'اضغط للتحدث متوقف — اختر اختصارًا لتفعيله.', diff --git a/app/src/lib/i18n/bn.ts b/app/src/lib/i18n/bn.ts index 485aa4507b..5b45f4b712 100644 --- a/app/src/lib/i18n/bn.ts +++ b/app/src/lib/i18n/bn.ts @@ -1549,7 +1549,7 @@ const messages: TranslationMap = { 'voice.externalProviders.add': 'Add', 'pttSettings.title': 'চাপ দিয়ে কথা বলুন', 'pttSettings.description': - 'অন্য একটি অ্যাপে থাকার সময় OpenHuman-এর সাথে কথা বলতে একটি কী চেপে ধরে রাখুন। কী ছেড়ে দিলে রেকর্ডিং পাঠানো হয়; OpenHuman উত্তরটি জোরে পড়ে শোনায়।', + 'অন্য একটি অ্যাপে থাকার সময় OpenHuman-এর সাথে কথা বলতে একটি কী চেপে ধরে রাখুন। কী ছেড়ে দিলে রেকর্ডিং পাঠানো হয়; «উত্তর পড়ে শোনাও» চালু থাকলে OpenHuman উত্তরটি জোরে পড়ে শোনায়।', 'pttSettings.shortcutLabel': 'হটকি', 'pttSettings.shortcutPlaceholder': 'একটি কী চাপুন (যেমন F13)', 'pttSettings.shortcutUnsetHint': diff --git a/app/src/lib/i18n/de.ts b/app/src/lib/i18n/de.ts index 79d7b762cd..1232724681 100644 --- a/app/src/lib/i18n/de.ts +++ b/app/src/lib/i18n/de.ts @@ -1589,7 +1589,7 @@ const messages: TranslationMap = { 'voice.externalProviders.add': 'Add', 'pttSettings.title': 'Push-to-Talk', 'pttSettings.description': - 'Halte eine Taste gedrückt, um mit OpenHuman zu sprechen, während du eine andere App nutzt. Loslassen sendet die Aufnahme; OpenHuman antwortet per Sprachausgabe.', + 'Halte eine Taste gedrückt, um mit OpenHuman zu sprechen, während du eine andere App nutzt. Loslassen sendet die Aufnahme; wenn „Antworten vorlesen" aktiviert ist, antwortet OpenHuman per Sprachausgabe.', 'pttSettings.shortcutLabel': 'Tastenkürzel', 'pttSettings.shortcutPlaceholder': 'Taste drücken (z. B. F13)', 'pttSettings.shortcutUnsetHint': diff --git a/app/src/lib/i18n/en.ts b/app/src/lib/i18n/en.ts index 6002924737..6ab0e6b067 100644 --- a/app/src/lib/i18n/en.ts +++ b/app/src/lib/i18n/en.ts @@ -1901,7 +1901,7 @@ const en: TranslationMap = { // Push-to-talk (PTT) 'pttSettings.title': 'Push-to-talk', 'pttSettings.description': - "Hold a key to talk to OpenHuman while you're in another app. Releases the key to send; OpenHuman speaks the reply back.", + "Hold a key to talk to OpenHuman while you're in another app. Release the key to send; OpenHuman speaks the reply if 'Speak agent replies' is on.", 'pttSettings.shortcutLabel': 'Hotkey', 'pttSettings.shortcutPlaceholder': 'Press a key (e.g. F13)', 'pttSettings.shortcutUnsetHint': 'Push-to-talk is off — pick a hotkey to enable.', diff --git a/app/src/lib/i18n/es.ts b/app/src/lib/i18n/es.ts index e373173675..60ae57da3f 100644 --- a/app/src/lib/i18n/es.ts +++ b/app/src/lib/i18n/es.ts @@ -1583,7 +1583,7 @@ const messages: TranslationMap = { 'voice.externalProviders.add': 'Add', 'pttSettings.title': 'Pulsa para hablar', 'pttSettings.description': - 'Mantén pulsada una tecla para hablar con OpenHuman mientras estás en otra aplicación. Al soltarla se envía la grabación; OpenHuman dice la respuesta en voz alta.', + 'Mantén pulsada una tecla para hablar con OpenHuman mientras estás en otra aplicación. Al soltarla se envía la grabación; si tienes activada la opción «Leer las respuestas», OpenHuman las dice en voz alta.', 'pttSettings.shortcutLabel': 'Atajo de teclado', 'pttSettings.shortcutPlaceholder': 'Pulsa una tecla (p. ej. F13)', 'pttSettings.shortcutUnsetHint': diff --git a/app/src/lib/i18n/fr.ts b/app/src/lib/i18n/fr.ts index 2117ae17e4..898fb98eeb 100644 --- a/app/src/lib/i18n/fr.ts +++ b/app/src/lib/i18n/fr.ts @@ -1586,8 +1586,8 @@ const messages: TranslationMap = { 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Add', 'pttSettings.title': 'Appuyer pour parler', - 'pttSettings.description': - 'Maintiens une touche pour parler à OpenHuman pendant que tu utilises une autre application. Relâcher envoie l’enregistrement ; OpenHuman lit la réponse à voix haute.', + ‘pttSettings.description’: + ‘Maintiens une touche pour parler à OpenHuman pendant que tu utilises une autre application. Relâcher envoie l’enregistrement ; si « Lire les réponses » est activé, OpenHuman lit la réponse à voix haute.’, 'pttSettings.shortcutLabel': 'Raccourci', 'pttSettings.shortcutPlaceholder': 'Appuie sur une touche (par exemple F13)', 'pttSettings.shortcutUnsetHint': diff --git a/app/src/lib/i18n/hi.ts b/app/src/lib/i18n/hi.ts index 4d1245a5bc..94572fc8f3 100644 --- a/app/src/lib/i18n/hi.ts +++ b/app/src/lib/i18n/hi.ts @@ -1548,7 +1548,7 @@ const messages: TranslationMap = { 'voice.externalProviders.add': 'Add', 'pttSettings.title': 'दबाकर बोलें', 'pttSettings.description': - 'जब आप किसी दूसरे ऐप में हों तब OpenHuman से बात करने के लिए कोई कुंजी दबाए रखें। कुंजी छोड़ने पर रिकॉर्डिंग भेजी जाती है; OpenHuman उत्तर को बोलकर सुनाता है।', + 'जब आप किसी दूसरे ऐप में हों तब OpenHuman से बात करने के लिए कोई कुंजी दबाए रखें। कुंजी छोड़ने पर रिकॉर्डिंग भेजी जाती है; अगर «उत्तर बोलकर सुनाएँ» चालू है तो OpenHuman उत्तर बोलकर सुनाता है।', 'pttSettings.shortcutLabel': 'हॉटकी', 'pttSettings.shortcutPlaceholder': 'कोई कुंजी दबाएँ (जैसे F13)', 'pttSettings.shortcutUnsetHint': 'दबाकर बोलें बंद है — चालू करने के लिए कोई हॉटकी चुनें।', diff --git a/app/src/lib/i18n/id.ts b/app/src/lib/i18n/id.ts index 9857f08e41..002a9e2b6e 100644 --- a/app/src/lib/i18n/id.ts +++ b/app/src/lib/i18n/id.ts @@ -1553,7 +1553,7 @@ const messages: TranslationMap = { 'voice.externalProviders.add': 'Add', 'pttSettings.title': 'Tekan untuk bicara', 'pttSettings.description': - 'Tahan sebuah tombol untuk berbicara dengan OpenHuman saat kamu sedang di aplikasi lain. Lepas tombol untuk mengirim; OpenHuman akan menyuarakan balasannya.', + 'Tahan sebuah tombol untuk berbicara dengan OpenHuman saat kamu sedang di aplikasi lain. Lepas tombol untuk mengirim; jika «Bacakan balasan» aktif, OpenHuman akan menyuarakan balasannya.', 'pttSettings.shortcutLabel': 'Pintasan', 'pttSettings.shortcutPlaceholder': 'Tekan sebuah tombol (mis. F13)', 'pttSettings.shortcutUnsetHint': diff --git a/app/src/lib/i18n/it.ts b/app/src/lib/i18n/it.ts index 46888188b6..2e9896dcf4 100644 --- a/app/src/lib/i18n/it.ts +++ b/app/src/lib/i18n/it.ts @@ -1575,8 +1575,8 @@ const messages: TranslationMap = { 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Add', 'pttSettings.title': 'Premi per parlare', - 'pttSettings.description': - 'Tieni premuto un tasto per parlare con OpenHuman mentre sei in un’altra app. Al rilascio l’audio viene inviato; OpenHuman risponde a voce.', + ‘pttSettings.description’: + ‘Tieni premuto un tasto per parlare con OpenHuman mentre sei in un’altra app. Al rilascio l’audio viene inviato; se «Leggi le risposte» è attivo, OpenHuman risponde a voce.’, 'pttSettings.shortcutLabel': 'Scorciatoia', 'pttSettings.shortcutPlaceholder': 'Premi un tasto (es. F13)', 'pttSettings.shortcutUnsetHint': diff --git a/app/src/lib/i18n/ko.ts b/app/src/lib/i18n/ko.ts index df73970c0b..3b637876ce 100644 --- a/app/src/lib/i18n/ko.ts +++ b/app/src/lib/i18n/ko.ts @@ -1533,7 +1533,7 @@ const messages: TranslationMap = { 'voice.externalProviders.add': 'Add', 'pttSettings.title': '눌러서 말하기', 'pttSettings.description': - '다른 앱을 사용하는 중에도 키를 누르고 있으면 OpenHuman과 대화할 수 있습니다. 키를 놓으면 녹음이 전송되고 OpenHuman이 답변을 음성으로 들려줍니다.', + '다른 앱을 사용하는 중에도 키를 누르고 있으면 OpenHuman과 대화할 수 있습니다. 키를 놓으면 녹음이 전송되고, \'답변 읽어주기\'가 켜져 있으면 OpenHuman이 답변을 음성으로 들려줍니다.', 'pttSettings.shortcutLabel': '단축키', 'pttSettings.shortcutPlaceholder': '키를 누르세요 (예: F13)', 'pttSettings.shortcutUnsetHint': diff --git a/app/src/lib/i18n/pl.ts b/app/src/lib/i18n/pl.ts index a640cc5e20..5b09601d7c 100644 --- a/app/src/lib/i18n/pl.ts +++ b/app/src/lib/i18n/pl.ts @@ -1569,7 +1569,7 @@ const messages: TranslationMap = { 'voice.externalProviders.add': 'Dodaj', 'pttSettings.title': 'Naciśnij, aby mówić', 'pttSettings.description': - 'Przytrzymaj klawisz, aby mówić do OpenHuman, gdy korzystasz z innej aplikacji. Zwolnienie klawisza wysyła nagranie; OpenHuman odczyta odpowiedź na głos.', + 'Przytrzymaj klawisz, aby mówić do OpenHuman, gdy korzystasz z innej aplikacji. Zwolnienie klawisza wysyła nagranie; jeśli opcja „Czytaj odpowiedzi" jest włączona, OpenHuman odczyta odpowiedź na głos.', 'pttSettings.shortcutLabel': 'Skrót klawiszowy', 'pttSettings.shortcutPlaceholder': 'Naciśnij klawisz (np. F13)', 'pttSettings.shortcutUnsetHint': diff --git a/app/src/lib/i18n/pt.ts b/app/src/lib/i18n/pt.ts index 7805824fcf..f3abbc56b3 100644 --- a/app/src/lib/i18n/pt.ts +++ b/app/src/lib/i18n/pt.ts @@ -1582,7 +1582,7 @@ const messages: TranslationMap = { 'voice.externalProviders.add': 'Add', 'pttSettings.title': 'Pressionar para falar', 'pttSettings.description': - 'Mantenha uma tecla pressionada para falar com o OpenHuman enquanto está noutro aplicativo. Soltar a tecla envia o áudio; o OpenHuman lê a resposta em voz alta.', + 'Mantenha uma tecla pressionada para falar com o OpenHuman enquanto está noutro aplicativo. Soltar a tecla envia o áudio; se «Ler respostas em voz alta» estiver ativo, o OpenHuman lê a resposta.', 'pttSettings.shortcutLabel': 'Atalho', 'pttSettings.shortcutPlaceholder': 'Pressione uma tecla (por exemplo, F13)', 'pttSettings.shortcutUnsetHint': diff --git a/app/src/lib/i18n/ru.ts b/app/src/lib/i18n/ru.ts index 85a4fce02a..471aa57309 100644 --- a/app/src/lib/i18n/ru.ts +++ b/app/src/lib/i18n/ru.ts @@ -1561,7 +1561,7 @@ const messages: TranslationMap = { 'voice.externalProviders.add': 'Add', 'pttSettings.title': 'Нажми и говори', 'pttSettings.description': - 'Удерживайте клавишу, чтобы говорить с OpenHuman, пока вы находитесь в другом приложении. При отпускании запись отправляется; OpenHuman озвучит ответ.', + 'Удерживайте клавишу, чтобы говорить с OpenHuman, пока вы находитесь в другом приложении. При отпускании запись отправляется; если включён параметр «Озвучивать ответы», OpenHuman озвучит ответ.', 'pttSettings.shortcutLabel': 'Сочетание клавиш', 'pttSettings.shortcutPlaceholder': 'Нажмите клавишу (например, F13)', 'pttSettings.shortcutUnsetHint': diff --git a/app/src/lib/i18n/zh-CN.ts b/app/src/lib/i18n/zh-CN.ts index df215a5946..498d7ae1ce 100644 --- a/app/src/lib/i18n/zh-CN.ts +++ b/app/src/lib/i18n/zh-CN.ts @@ -1466,7 +1466,7 @@ const messages: TranslationMap = { 'voice.externalProviders.add': 'Add', 'pttSettings.title': '按住说话', 'pttSettings.description': - '在其他应用中时,按住按键即可与 OpenHuman 对话。松开按键发送录音;OpenHuman 会用语音读出回复。', + '在其他应用中时,按住按键即可与 OpenHuman 对话。松开按键发送录音;若已开启「朗读回复」,OpenHuman 会用语音读出回复。', 'pttSettings.shortcutLabel': '快捷键', 'pttSettings.shortcutPlaceholder': '按下一个键(例如 F13)', 'pttSettings.shortcutUnsetHint': '按住说话已关闭 — 请选择一个快捷键来启用。', From 6625342f77bc2ace7c7fea3549472009c833d0fb Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Sat, 6 Jun 2026 02:16:59 +0530 Subject: [PATCH 33/36] fix(ptt/settings): normalize Space key label and allow Tab to pass through (addresses @coderabbitai on PttSettingsPanel.tsx:76,125) - Normalize e.key === ' ' to 'Space' before building the shortcut string so the saved binding reads "Space" or "Ctrl+Space" rather than a blank character that renders invisibly in the input. - Guard Tab / Shift+Tab at the top of handleShortcutKeyDown so they are not captured as bindings and keyboard focus navigation within the settings panel continues to work normally. --- app/src/pages/settings/voice/PttSettingsPanel.tsx | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/app/src/pages/settings/voice/PttSettingsPanel.tsx b/app/src/pages/settings/voice/PttSettingsPanel.tsx index 798b0b0984..459128b3b3 100644 --- a/app/src/pages/settings/voice/PttSettingsPanel.tsx +++ b/app/src/pages/settings/voice/PttSettingsPanel.tsx @@ -68,7 +68,9 @@ function eventToShortcut(e: React.KeyboardEvent): string | null { // Prefer e.key (already the localised label like "F13", "a", "Enter") // unless it's a single lowercase letter — for those we uppercase to // produce a consistent "Ctrl+A" form across capitalised / not. - let label = e.key; + // Normalize Space (" ") to the display label "Space" so the saved + // binding is readable (e.g. "Ctrl+Space" rather than "Ctrl+ "). + let label = e.key === ' ' ? 'Space' : e.key; if (label.length === 1 && /[a-z]/.test(label)) { label = label.toUpperCase(); } @@ -117,9 +119,13 @@ const PttSettingsPanel = () => { const handleShortcutKeyDown = useCallback( (e: React.KeyboardEvent) => { - // Always preventDefault so the input doesn't try to insert text - // for the captured character — we treat it as a binding press, - // not editable content. + // Let Tab / Shift+Tab pass through so keyboard navigation within + // the settings panel still works. All other keys are captured as + // potential binding candidates and their default actions suppressed + // so the input doesn't insert text. + if (e.key === 'Tab') { + return; + } e.preventDefault(); e.stopPropagation(); From 20bf4c2d0c2ccd29b34730151794c96b368699a3 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Sat, 6 Jun 2026 02:17:08 +0530 Subject: [PATCH 34/36] fix(ptt/service): catch thread-resolution and sendMessage failures in session finalization (addresses @coderabbitai on pttService.ts:145) resolveActiveThreadId, createNewVoiceThread, and sendMessage can all throw. Previously a failure would leave the state machine in a clean state but with no user feedback. Now each failure path logs a warn, plays the error chime, and returns early so the session completes visibly to the user rather than silently. --- app/src/services/pttService.ts | 36 +++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/app/src/services/pttService.ts b/app/src/services/pttService.ts index 0ca0acf53e..2782e9e457 100644 --- a/app/src/services/pttService.ts +++ b/app/src/services/pttService.ts @@ -132,17 +132,35 @@ export function createPttService(deps: PttDeps): PttService { return; } - let threadId = await deps.resolveActiveThreadId(); - if (!threadId) { - threadId = await deps.createNewVoiceThread(); + let threadId: string; + try { + const resolved = await deps.resolveActiveThreadId(); + if (!resolved) { + threadId = await deps.createNewVoiceThread(); + } else { + threadId = resolved; + } + } catch (err) { + deps.logger.warn('[ptt] thread resolution failed — aborting commit', { + sessionId, + err: String(err), + }); + await deps.playChime('error'); + return; } - await deps.sendMessage({ - threadId, - body: trimmed, - metadata: { source: 'ptt', session_id: sessionId }, - speakReply: settings.speakReplies, - }); + try { + await deps.sendMessage({ + threadId, + body: trimmed, + metadata: { source: 'ptt', session_id: sessionId }, + speakReply: settings.speakReplies, + }); + } catch (err) { + deps.logger.warn('[ptt] sendMessage failed', { sessionId, threadId, err: String(err) }); + await deps.playChime('error'); + return; + } deps.logger.info('[ptt] session committed', { sessionId, From a8b5a2a5a1fe64969aa5dd9c458fa906b3409c19 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Sat, 6 Jun 2026 02:17:19 +0530 Subject: [PATCH 35/36] =?UTF-8?q?docs(ptt):=20fix=20locale=20count=20(12?= =?UTF-8?q?=E2=86=9213)=20in=20spec;=20add=20PTT=20IPC=20commands=20to=20t?= =?UTF-8?q?auri-shell.md=20(addresses=20@coderabbitai=20on=20spec:296,=20s?= =?UTF-8?q?pec:144)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - spec: fix "12 non-English locale files" → "13" (ar, bn, de, es, fr, hi, id, it, ko, pl, pt, ru, zh-CN = 13 locales). - tauri-shell.md: add "Push-to-talk (PTT) hotkey + overlay" section documenting register_ptt_hotkey, unregister_ptt_hotkey, and show_ptt_overlay IPC commands including their signatures, event flow, and conflict detection behavior. --- .../specs/2026-06-02-global-ptt-design.md | 2 +- gitbooks/developing/architecture/tauri-shell.md | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/docs/superpowers/specs/2026-06-02-global-ptt-design.md b/docs/superpowers/specs/2026-06-02-global-ptt-design.md index 517cec0b75..835bf14767 100644 --- a/docs/superpowers/specs/2026-06-02-global-ptt-design.md +++ b/docs/superpowers/specs/2026-06-02-global-ptt-design.md @@ -293,7 +293,7 @@ LUFS-normalized to roughly match the existing in-app notification sound. Played #### i18n -New keys under a `pttSettings` / `pttOverlay` namespace in `app/src/lib/i18n/en.ts`, real translations added to all 12 non-English locale files (`ar`, `bn`, `de`, `es`, `fr`, `hi`, `id`, `it`, `ko`, `pl`, `pt`, `ru`, `zh-CN`). `pnpm i18n:check` and `pnpm i18n:english:check` gate this. +New keys under a `pttSettings` / `pttOverlay` namespace in `app/src/lib/i18n/en.ts`, real translations added to all 13 non-English locale files (`ar`, `bn`, `de`, `es`, `fr`, `hi`, `id`, `it`, `ko`, `pl`, `pt`, `ru`, `zh-CN`). `pnpm i18n:check` and `pnpm i18n:english:check` gate this. --- diff --git a/gitbooks/developing/architecture/tauri-shell.md b/gitbooks/developing/architecture/tauri-shell.md index b4f3d0a756..48655f141e 100644 --- a/gitbooks/developing/architecture/tauri-shell.md +++ b/gitbooks/developing/architecture/tauri-shell.md @@ -158,6 +158,20 @@ From **`workspace_paths.rs`** (closes `#1402`). These commands accept workspace- | `reveal_workspace_path` | Reveal an existing workspace file or directory in the OS file manager. | | `preview_workspace_text` | Read a capped UTF-8 text preview from an existing workspace file. | +### Push-to-talk (PTT) hotkey + overlay + +Registered in **`lib.rs`** (`ptt_hotkeys.rs` + `ptt_overlay.rs`). These commands manage the global push-to-talk shortcut and the floating overlay window. + +| Command | Signature | Purpose | +| ---------------------- | ---------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `register_ptt_hotkey` | `(shortcut: String) -> Result<(), String>` | Register (or re-register) a global hotkey for push-to-talk. Emits Tauri events `ptt://start { session_id }` (key pressed) and `ptt://stop { session_id }` (key released). Returns an error string if the shortcut conflicts with dictation or if the OS rejects it (e.g. Wayland, Accessibility permission required on macOS). | +| `unregister_ptt_hotkey`| `() -> Result<(), String>` | Unregister the current PTT hotkey and tear down the overlay window. | +| `show_ptt_overlay` | `(active: bool, session_id: u64) -> ()` | Show (`active: true`) or hide (`active: false`) the floating PTT overlay window. The window is focus-stealing-free (`focus: false`). Called by `PttHotkeyManager.tsx` via `app/src/utils/tauriCommands/ptt.ts`. | + +**Event flow:** `register_ptt_hotkey` wires the OS hotkey to fire `ptt://start` / `ptt://stop` Tauri events that `PttHotkeyManager.tsx` subscribes to via `@tauri-apps/api/event`. The manager forwards them into the `pttService` state machine which drives the audio capture → transcribe → chat-send pipeline. + +**Conflict detection:** `register_ptt_hotkey` checks for overlap with the active dictation shortcuts before registering. If a conflict is detected it returns `"ConflictsWithDictation()"` without registering anything, and the settings panel surfaces this as `pttSettings.errorConflictsWithDictation`. + ### Synthetic input main-thread executor (native registry, not `invoke`) Registered in **`lib.rs`** at startup under the event-bus native-request method From ef2dfe8e7437c21ec95f081a129589f1b9bc3a1a Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Sat, 6 Jun 2026 02:25:11 +0530 Subject: [PATCH 36/36] chore(pr-manager): apply formatting Prettier normalized quote styles in fr.ts, it.ts, ko.ts after i18n content edits introduced double-quote strings alongside single-quote strings. --- app/src/lib/i18n/fr.ts | 4 ++-- app/src/lib/i18n/it.ts | 4 ++-- app/src/lib/i18n/ko.ts | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/app/src/lib/i18n/fr.ts b/app/src/lib/i18n/fr.ts index 898fb98eeb..afeba903a0 100644 --- a/app/src/lib/i18n/fr.ts +++ b/app/src/lib/i18n/fr.ts @@ -1586,8 +1586,8 @@ const messages: TranslationMap = { 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Add', 'pttSettings.title': 'Appuyer pour parler', - ‘pttSettings.description’: - ‘Maintiens une touche pour parler à OpenHuman pendant que tu utilises une autre application. Relâcher envoie l’enregistrement ; si « Lire les réponses » est activé, OpenHuman lit la réponse à voix haute.’, + 'pttSettings.description': + 'Maintiens une touche pour parler à OpenHuman pendant que tu utilises une autre application. Relâcher envoie l’enregistrement ; si « Lire les réponses » est activé, OpenHuman lit la réponse à voix haute.', 'pttSettings.shortcutLabel': 'Raccourci', 'pttSettings.shortcutPlaceholder': 'Appuie sur une touche (par exemple F13)', 'pttSettings.shortcutUnsetHint': diff --git a/app/src/lib/i18n/it.ts b/app/src/lib/i18n/it.ts index 2e9896dcf4..4332aafcff 100644 --- a/app/src/lib/i18n/it.ts +++ b/app/src/lib/i18n/it.ts @@ -1575,8 +1575,8 @@ const messages: TranslationMap = { 'voice.externalProviders.apiKeyPlaceholder': 'sk-…', 'voice.externalProviders.add': 'Add', 'pttSettings.title': 'Premi per parlare', - ‘pttSettings.description’: - ‘Tieni premuto un tasto per parlare con OpenHuman mentre sei in un’altra app. Al rilascio l’audio viene inviato; se «Leggi le risposte» è attivo, OpenHuman risponde a voce.’, + 'pttSettings.description': + 'Tieni premuto un tasto per parlare con OpenHuman mentre sei in un’altra app. Al rilascio l’audio viene inviato; se «Leggi le risposte» è attivo, OpenHuman risponde a voce.', 'pttSettings.shortcutLabel': 'Scorciatoia', 'pttSettings.shortcutPlaceholder': 'Premi un tasto (es. F13)', 'pttSettings.shortcutUnsetHint': diff --git a/app/src/lib/i18n/ko.ts b/app/src/lib/i18n/ko.ts index 3b637876ce..1a14dc62ae 100644 --- a/app/src/lib/i18n/ko.ts +++ b/app/src/lib/i18n/ko.ts @@ -1533,7 +1533,7 @@ const messages: TranslationMap = { 'voice.externalProviders.add': 'Add', 'pttSettings.title': '눌러서 말하기', 'pttSettings.description': - '다른 앱을 사용하는 중에도 키를 누르고 있으면 OpenHuman과 대화할 수 있습니다. 키를 놓으면 녹음이 전송되고, \'답변 읽어주기\'가 켜져 있으면 OpenHuman이 답변을 음성으로 들려줍니다.', + "다른 앱을 사용하는 중에도 키를 누르고 있으면 OpenHuman과 대화할 수 있습니다. 키를 놓으면 녹음이 전송되고, '답변 읽어주기'가 켜져 있으면 OpenHuman이 답변을 음성으로 들려줍니다.", 'pttSettings.shortcutLabel': '단축키', 'pttSettings.shortcutPlaceholder': '키를 누르세요 (예: F13)', 'pttSettings.shortcutUnsetHint':