From 453e074b554dcfd5c93324a293734c98b07c993e Mon Sep 17 00:00:00 2001 From: Julius Marminge Date: Thu, 14 May 2026 00:23:15 +0200 Subject: [PATCH 1/5] Add background activity policy and host power monitoring - Track client leases, host power, and background work eligibility - Route provider snapshot refreshes through shared background policy - Add tests for the new policy and related server settings wiring Co-authored-by: codex --- .../src/background/BackgroundPolicy.test.ts | 191 ++++++ .../server/src/background/BackgroundPolicy.ts | 276 +++++++++ .../server/src/background/HostPowerMonitor.ts | 209 +++++++ .../src/provider/Drivers/ClaudeDriver.ts | 8 +- .../src/provider/Drivers/CodexDriver.ts | 9 +- .../src/provider/Drivers/CursorDriver.ts | 11 +- .../server/src/provider/Drivers/GrokDriver.ts | 6 +- .../src/provider/Drivers/OpenCodeDriver.ts | 9 +- .../ProviderInstanceRegistryLive.test.ts | 39 ++ .../provider/Layers/ProviderRegistry.test.ts | 39 ++ .../makeManagedServerProvider.test.ts | 82 ++- .../src/provider/makeManagedServerProvider.ts | 54 +- apps/server/src/server.test.ts | 31 + apps/server/src/server.ts | 10 +- apps/server/src/serverSettings.test.ts | 8 + apps/server/src/serverSettings.ts | 8 +- .../src/vcs/VcsStatusBroadcaster.test.ts | 92 +++ apps/server/src/vcs/VcsStatusBroadcaster.ts | 10 + apps/server/src/ws.ts | 37 +- .../components/settings/SettingsPanels.tsx | 568 +++++++++++++++++- .../settings/SourceControlSettings.tsx | 89 ++- apps/web/src/connection/runtime.ts | 18 +- apps/web/src/hooks/useSettings.ts | 1 - .../web/src/lib/backgroundActivityReporter.ts | 202 +++++++ packages/client-runtime/src/rpc/client.ts | 35 +- packages/contracts/src/background.ts | 101 ++++ packages/contracts/src/baseSchemas.ts | 2 + packages/contracts/src/index.ts | 1 + packages/contracts/src/rpc.ts | 36 ++ packages/contracts/src/settings.ts | 61 ++ packages/shared/package.json | 4 + .../shared/src/backgroundActivitySettings.ts | 248 ++++++++ packages/shared/src/serverSettings.test.ts | 78 +++ packages/shared/src/serverSettings.ts | 56 +- 34 files changed, 2572 insertions(+), 57 deletions(-) create mode 100644 apps/server/src/background/BackgroundPolicy.test.ts create mode 100644 apps/server/src/background/BackgroundPolicy.ts create mode 100644 apps/server/src/background/HostPowerMonitor.ts create mode 100644 apps/web/src/lib/backgroundActivityReporter.ts create mode 100644 packages/contracts/src/background.ts create mode 100644 packages/shared/src/backgroundActivitySettings.ts diff --git a/apps/server/src/background/BackgroundPolicy.test.ts b/apps/server/src/background/BackgroundPolicy.test.ts new file mode 100644 index 00000000000..0bfcfb2b282 --- /dev/null +++ b/apps/server/src/background/BackgroundPolicy.test.ts @@ -0,0 +1,191 @@ +import { assert, describe, it } from "@effect/vitest"; +import { + AuthSessionId, + RpcClientId, + type HostPowerSnapshot, + type ClientActivityReportInput, +} from "@t3tools/contracts"; +import * as DateTime from "effect/DateTime"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as PubSub from "effect/PubSub"; +import * as Stream from "effect/Stream"; + +import { ServerSettingsService } from "../serverSettings.ts"; +import * as BackgroundPolicy from "./BackgroundPolicy.ts"; +import * as HostPowerMonitor from "./HostPowerMonitor.ts"; + +const TEST_NOW = DateTime.makeUnsafe("2026-05-13T00:00:00.000Z"); + +const nominalHostPower: HostPowerSnapshot = { + source: "unknown", + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt: TEST_NOW, +}; + +const constrainedHostPower: HostPowerSnapshot = { + ...nominalHostPower, + lowPowerMode: "true", + stale: false, +}; + +function makeReport(overrides: Partial = {}): ClientActivityReportInput { + return { + clientId: "client-1", + clientKind: "web", + visible: true, + focused: true, + recentlyInteracted: true, + scopes: [{ type: "vcs-status", cwd: "/repo" }], + ttlMs: 45_000, + observedAt: TEST_NOW, + ...overrides, + }; +} + +function makeLayer( + hostPower: HostPowerSnapshot, + settingsOverrides: Parameters[0] = {}, +) { + const hostLayer = Layer.effect( + HostPowerMonitor.HostPowerMonitor, + Effect.gen(function* () { + const changes = yield* PubSub.sliding(1); + let snapshot = hostPower; + return HostPowerMonitor.HostPowerMonitor.of({ + snapshot: Effect.sync(() => snapshot), + report: (next) => + Effect.sync(() => { + snapshot = next; + }).pipe(Effect.andThen(PubSub.publish(changes, next)), Effect.asVoid), + setDemandActive: () => Effect.void, + streamChanges: Stream.fromPubSub(changes), + }); + }), + ); + return BackgroundPolicy.layer.pipe( + Layer.provide(Layer.merge(hostLayer, ServerSettingsService.layerTest(settingsOverrides))), + ); +} + +describe("BackgroundPolicy", () => { + it.effect("records foreground scoped client demand", () => + Effect.gen(function* () { + const policy = yield* BackgroundPolicy.BackgroundPolicy; + yield* policy.reportClientActivity( + AuthSessionId.make("session-1"), + RpcClientId.make(1), + makeReport(), + ); + + const snapshot = yield* policy.snapshot; + assert.equal(snapshot.activeForegroundLeaseCount, 1); + assert.deepStrictEqual(snapshot.activeScopeKeys, ["vcs-status:/repo"]); + assert.equal(snapshot.shouldRunOpportunisticWork, true); + assert.equal(yield* policy.hasDemand({ type: "vcs-status", cwd: "/repo" }), true); + assert.equal(yield* policy.hasDemand({ type: "vcs-status", cwd: "/other" }), false); + assert.equal(yield* policy.shouldRunScopeWork({ type: "vcs-status", cwd: "/repo" }), true); + assert.equal(yield* policy.shouldRunScopeWork({ type: "vcs-status", cwd: "/other" }), false); + }).pipe(Effect.provide(makeLayer(nominalHostPower))), + ); + + it.effect("removes all leases for a disconnected websocket connection", () => + Effect.gen(function* () { + const policy = yield* BackgroundPolicy.BackgroundPolicy; + yield* policy.reportClientActivity( + AuthSessionId.make("session-1"), + RpcClientId.make(1), + makeReport(), + ); + yield* policy.removeRpcClient(RpcClientId.make(1)); + + const snapshot = yield* policy.snapshot; + assert.equal(snapshot.activeForegroundLeaseCount, 0); + assert.deepStrictEqual(snapshot.activeScopeKeys, []); + assert.equal(snapshot.shouldRunOpportunisticWork, false); + }).pipe(Effect.provide(makeLayer(nominalHostPower))), + ); + + it.effect("host low power mode disables opportunistic work without dropping scoped demand", () => + Effect.gen(function* () { + const policy = yield* BackgroundPolicy.BackgroundPolicy; + yield* policy.reportClientActivity( + AuthSessionId.make("session-1"), + RpcClientId.make(1), + makeReport(), + ); + + const snapshot = yield* policy.snapshot; + assert.equal(snapshot.activeForegroundLeaseCount, 1); + assert.deepStrictEqual(snapshot.activeScopeKeys, ["vcs-status:/repo"]); + assert.equal(snapshot.shouldRunOpportunisticWork, false); + assert.equal(yield* policy.hasDemand({ type: "vcs-status", cwd: "/repo" }), true); + assert.equal(yield* policy.shouldRunScopeWork({ type: "vcs-status", cwd: "/repo" }), false); + }).pipe(Effect.provide(makeLayer(constrainedHostPower))), + ); + + it.effect("keeps background demand visible while preventing scoped work", () => + Effect.gen(function* () { + const policy = yield* BackgroundPolicy.BackgroundPolicy; + yield* policy.reportClientActivity( + AuthSessionId.make("session-1"), + RpcClientId.make(1), + makeReport({ focused: false, visible: false }), + ); + + const snapshot = yield* policy.snapshot; + assert.equal(snapshot.activeForegroundLeaseCount, 0); + assert.deepStrictEqual(snapshot.activeScopeKeys, ["vcs-status:/repo"]); + assert.equal(yield* policy.hasDemand({ type: "vcs-status", cwd: "/repo" }), true); + assert.equal(yield* policy.shouldRunScopeWork({ type: "vcs-status", cwd: "/repo" }), false); + }).pipe(Effect.provide(makeLayer(nominalHostPower))), + ); + + it.effect( + "performance profile allows background scoped work while a scoped lease is active", + () => + Effect.gen(function* () { + const policy = yield* BackgroundPolicy.BackgroundPolicy; + yield* policy.reportClientActivity( + AuthSessionId.make("session-1"), + RpcClientId.make(1), + makeReport({ focused: false, visible: false }), + ); + + assert.equal(yield* policy.shouldRunScopeWork({ type: "vcs-status", cwd: "/repo" }), true); + }).pipe( + Effect.provide(makeLayer(nominalHostPower, { backgroundActivityProfile: "performance" })), + ), + ); + + it.effect("battery saver profile pauses scoped work on battery", () => + Effect.gen(function* () { + const policy = yield* BackgroundPolicy.BackgroundPolicy; + yield* policy.reportClientActivity( + AuthSessionId.make("session-1"), + RpcClientId.make(1), + makeReport(), + ); + + assert.equal(yield* policy.shouldRunScopeWork({ type: "vcs-status", cwd: "/repo" }), false); + }).pipe( + Effect.provide( + makeLayer( + { + ...nominalHostPower, + onBattery: "true", + stale: false, + }, + { backgroundActivityProfile: "battery-saver" }, + ), + ), + ), + ); +}); diff --git a/apps/server/src/background/BackgroundPolicy.ts b/apps/server/src/background/BackgroundPolicy.ts new file mode 100644 index 00000000000..db3ae31102c --- /dev/null +++ b/apps/server/src/background/BackgroundPolicy.ts @@ -0,0 +1,276 @@ +import { + type AuthSessionId, + type BackgroundPolicySnapshot, + type BackgroundScope, + type ClientActivityLease, + type ClientActivityReportInput, + type HostPowerSnapshot, + type RpcClientId, +} from "@t3tools/contracts"; +import { + getBackgroundActivityPresetSettings, + resolveServerBackgroundActivitySettings, + type ResolvedBackgroundActivitySettings, +} from "@t3tools/shared/backgroundActivitySettings"; +import * as DateTime from "effect/DateTime"; +import * as Context from "effect/Context"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as PubSub from "effect/PubSub"; +import * as Ref from "effect/Ref"; +import * as Stream from "effect/Stream"; + +import { ServerSettingsService } from "../serverSettings.ts"; +import * as HostPowerMonitor from "./HostPowerMonitor.ts"; + +export interface BackgroundPolicyShape { + readonly reportClientActivity: ( + sessionId: AuthSessionId, + rpcClientId: RpcClientId, + input: ClientActivityReportInput, + ) => Effect.Effect; + readonly removeRpcClient: (rpcClientId: RpcClientId) => Effect.Effect; + readonly reportHostPowerState: (snapshot: HostPowerSnapshot) => Effect.Effect; + readonly snapshot: Effect.Effect; + readonly streamChanges: Stream.Stream; + readonly hasDemand: (scope: BackgroundScope) => Effect.Effect; + readonly shouldRunScopeWork: (scope: BackgroundScope) => Effect.Effect; + readonly shouldRunOpportunisticWork: Effect.Effect; +} + +export class BackgroundPolicy extends Context.Service()( + "t3/background/BackgroundPolicy", +) {} + +const DEFAULT_LEASE_TTL_MS = 45_000; +const MAX_LEASE_TTL_MS = 120_000; + +function scopeKey(scope: BackgroundScope): string { + switch (scope.type) { + case "server-config": + case "diagnostics": + return scope.type; + case "provider-status": + return scope.instanceId ? `${scope.type}:${scope.instanceId}` : scope.type; + case "vcs-status": + case "git-refs": + return `${scope.type}:${scope.cwd}`; + case "thread": + return `${scope.type}:${scope.threadId}`; + } +} + +function isLeaseActive(lease: ClientActivityLease, now: DateTime.Utc): boolean { + return DateTime.isGreaterThan(lease.expiresAt, now); +} + +function isForegroundLease(lease: ClientActivityLease, now: DateTime.Utc): boolean { + return isLeaseActive(lease, now) && lease.visible && lease.focused; +} + +function leaseHasScope(lease: ClientActivityLease, scope: BackgroundScope): boolean { + const key = scopeKey(scope); + return lease.scopes.some((leaseScope) => scopeKey(leaseScope) === key); +} + +function hasThermalPressure(hostPower: HostPowerSnapshot): boolean { + return hostPower.thermalState === "serious" || hostPower.thermalState === "critical"; +} + +function isHostConstrained( + hostPower: HostPowerSnapshot, + settings: ResolvedBackgroundActivitySettings, +): boolean { + if ( + (settings.pauseWhenHostLocked && hostPower.locked === "true") || + hasThermalPressure(hostPower) + ) { + return true; + } + if (settings.pauseWhenHostLowPower && hostPower.lowPowerMode === "true") return true; + return settings.pauseWhenOnBattery && hostPower.onBattery === "true"; +} + +function isClientConstrained( + lease: ClientActivityLease, + settings: ResolvedBackgroundActivitySettings, +): boolean { + if (settings.pauseWhenClientLowPower && lease.lowPowerMode === "true") return true; + return settings.pauseWhenOnBattery && lease.batteryState === "unplugged"; +} + +function leaseMayRunScopedWork( + lease: ClientActivityLease, + scope: BackgroundScope, + now: DateTime.Utc, + settings: ResolvedBackgroundActivitySettings, +): boolean { + const activeWithScope = isLeaseActive(lease, now) && leaseHasScope(lease, scope); + if (!activeWithScope || isClientConstrained(lease, settings)) { + return false; + } + if (settings.profile === "performance") { + return true; + } + return isForegroundLease(lease, now); +} + +function computeSnapshot(input: { + readonly hostPower: HostPowerSnapshot; + readonly leases: ReadonlyMap; + readonly now: DateTime.Utc; + readonly settings: ResolvedBackgroundActivitySettings; + readonly updatedAt: DateTime.Utc; +}): BackgroundPolicySnapshot { + const activeLeases = [...input.leases.values()].filter((lease) => + isLeaseActive(lease, input.now), + ); + const foregroundLeases = activeLeases.filter((lease) => isForegroundLease(lease, input.now)); + const activeScopeKeys = new Set(); + for (const lease of activeLeases) { + for (const scope of lease.scopes) { + activeScopeKeys.add(scopeKey(scope)); + } + } + + return { + hostPower: input.hostPower, + leases: activeLeases, + activeForegroundLeaseCount: foregroundLeases.length, + activeScopeKeys: [...activeScopeKeys].toSorted(), + shouldRunOpportunisticWork: + foregroundLeases.some((lease) => !isClientConstrained(lease, input.settings)) && + !isHostConstrained(input.hostPower, input.settings), + updatedAt: input.updatedAt, + }; +} + +export const make = Effect.fn("background.policy.make")(function* () { + const hostPowerMonitor = yield* HostPowerMonitor.HostPowerMonitor; + const serverSettings = yield* ServerSettingsService; + const leasesRef = yield* Ref.make(new Map()); + const changes = yield* PubSub.sliding(1); + + const backgroundActivitySettings = serverSettings.getSettings.pipe( + Effect.map(resolveServerBackgroundActivitySettings), + Effect.catch(() => Effect.succeed(getBackgroundActivityPresetSettings("balanced"))), + ); + + const snapshot = Effect.gen(function* () { + const [hostPower, leases, now, settings] = yield* Effect.all([ + hostPowerMonitor.snapshot, + Ref.get(leasesRef), + DateTime.now, + backgroundActivitySettings, + ]); + return computeSnapshot({ hostPower, leases, now, settings, updatedAt: now }); + }); + + const publishSnapshot = snapshot.pipe( + Effect.tap((next) => hostPowerMonitor.setDemandActive(next.activeForegroundLeaseCount > 0)), + Effect.flatMap((next) => PubSub.publish(changes, next)), + ); + + const reportClientActivity: BackgroundPolicyShape["reportClientActivity"] = ( + sessionId, + rpcClientId, + input, + ) => + Effect.gen(function* () { + const ttlMs = Math.min( + Math.max(input.ttlMs ?? DEFAULT_LEASE_TTL_MS, 1_000), + MAX_LEASE_TTL_MS, + ); + const now = yield* DateTime.now; + const expiresAt = DateTime.add(now, { milliseconds: ttlMs }); + const lease: ClientActivityLease = { + sessionId, + rpcClientId, + clientId: input.clientId, + clientKind: input.clientKind, + visible: input.visible, + focused: input.focused, + recentlyInteracted: input.recentlyInteracted, + ...(input.appState !== undefined ? { appState: input.appState } : {}), + ...(input.lowPowerMode !== undefined ? { lowPowerMode: input.lowPowerMode } : {}), + ...(input.batteryState !== undefined ? { batteryState: input.batteryState } : {}), + ...(input.networkType !== undefined ? { networkType: input.networkType } : {}), + scopes: input.scopes, + updatedAt: now, + expiresAt, + }; + yield* Ref.update(leasesRef, (leases) => { + const next = new Map(leases); + next.set(`${rpcClientId}:${input.clientId}`, lease); + return next; + }); + yield* publishSnapshot; + }); + + const removeRpcClient: BackgroundPolicyShape["removeRpcClient"] = (rpcClientId) => + Ref.update(leasesRef, (leases) => { + const next = new Map(leases); + for (const key of next.keys()) { + if (key.startsWith(`${rpcClientId}:`)) { + next.delete(key); + } + } + return next; + }).pipe(Effect.andThen(publishSnapshot), Effect.asVoid); + + const hasDemand: BackgroundPolicyShape["hasDemand"] = (scope) => + Effect.map(snapshot, (current) => current.activeScopeKeys.includes(scopeKey(scope))); + + const shouldRunScopeWork: BackgroundPolicyShape["shouldRunScopeWork"] = (scope) => + Effect.gen(function* () { + const [current, settings] = yield* Effect.all([snapshot, backgroundActivitySettings]); + if (isHostConstrained(current.hostPower, settings)) { + return false; + } + return current.leases.some((lease) => + leaseMayRunScopedWork(lease, scope, current.updatedAt, settings), + ); + }); + + const shouldRunOpportunisticWork = Effect.map( + snapshot, + (current) => current.shouldRunOpportunisticWork, + ); + + yield* Stream.runForEach(hostPowerMonitor.streamChanges, () => publishSnapshot).pipe( + Effect.forkScoped, + ); + + yield* Effect.forever( + Effect.sleep("15 seconds").pipe( + Effect.andThen( + Effect.gen(function* () { + const now = yield* DateTime.now; + yield* Ref.update(leasesRef, (leases) => { + const next = new Map(leases); + for (const [key, lease] of next) { + if (!isLeaseActive(lease, now)) { + next.delete(key); + } + } + return next; + }); + }), + ), + Effect.andThen(publishSnapshot), + ), + ).pipe(Effect.forkScoped); + + return BackgroundPolicy.of({ + reportClientActivity, + removeRpcClient, + reportHostPowerState: hostPowerMonitor.report, + snapshot, + streamChanges: Stream.fromPubSub(changes), + hasDemand, + shouldRunScopeWork, + shouldRunOpportunisticWork, + }); +}); + +export const layer = Layer.effect(BackgroundPolicy, make()); diff --git a/apps/server/src/background/HostPowerMonitor.ts b/apps/server/src/background/HostPowerMonitor.ts new file mode 100644 index 00000000000..b6d068531a3 --- /dev/null +++ b/apps/server/src/background/HostPowerMonitor.ts @@ -0,0 +1,209 @@ +import { + type BackgroundBooleanState, + type HostPowerSnapshot, + type HostPowerThermalState, +} from "@t3tools/contracts"; +import { + getBackgroundActivityPresetSettings, + resolveServerBackgroundActivitySettings, +} from "@t3tools/shared/backgroundActivitySettings"; +import * as Context from "effect/Context"; +import * as DateTime from "effect/DateTime"; +import * as Duration from "effect/Duration"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as PubSub from "effect/PubSub"; +import * as Ref from "effect/Ref"; +import * as Stream from "effect/Stream"; + +import * as ProcessRunner from "../processRunner.ts"; +import { ServerSettingsService } from "../serverSettings.ts"; + +export interface HostPowerMonitorShape { + readonly snapshot: Effect.Effect; + readonly report: (snapshot: HostPowerSnapshot) => Effect.Effect; + readonly setDemandActive: (active: boolean) => Effect.Effect; + readonly streamChanges: Stream.Stream; +} + +export class HostPowerMonitor extends Context.Service()( + "t3/background/HostPowerMonitor", +) {} + +const COMMAND_TIMEOUT = Duration.seconds(3); + +export const makeUnknownSnapshot = ( + source: HostPowerSnapshot["source"], + updatedAt: HostPowerSnapshot["updatedAt"], +): HostPowerSnapshot => ({ + source, + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt, +}); + +function boolState(value: boolean | null): BackgroundBooleanState { + if (value === null) return "unknown"; + return value ? "true" : "false"; +} + +function parseIdleSeconds(ioregOutput: string): number | null { + const match = /"HIDIdleTime"\s*=\s*(\d+)/u.exec(ioregOutput); + if (!match) return null; + const nanoseconds = Number(match[1]); + return Number.isFinite(nanoseconds) ? Math.floor(nanoseconds / 1_000_000_000) : null; +} + +function parseOnBattery(pmsetBatteryOutput: string): boolean | null { + if (/Now drawing from 'Battery Power'/iu.test(pmsetBatteryOutput)) return true; + if (/Now drawing from 'AC Power'/iu.test(pmsetBatteryOutput)) return false; + return null; +} + +function parseLowPowerMode(pmsetOutput: string): boolean | null { + const match = /^\s*lowpowermode\s+([01])\s*$/imu.exec(pmsetOutput); + if (!match) return null; + return match[1] === "1"; +} + +function parseThermalState(_output: string): HostPowerThermalState { + // The stable shell adapter intentionally does not parse `pmset thermlog`; + // native adapters can provide this without depending on human-formatted text. + return "unknown"; +} + +function runOptional( + runner: ProcessRunner.ProcessRunnerShape, + command: string, + args: ReadonlyArray, +) { + return runner + .run({ + command, + args, + timeout: COMMAND_TIMEOUT, + timeoutBehavior: "timedOutResult", + outputMode: "truncate", + maxOutputBytes: 32_000, + }) + .pipe(Effect.option); +} + +const readMacShellSnapshot = Effect.fn("background.hostPower.readMacShellSnapshot")(function* () { + const runner = yield* ProcessRunner.ProcessRunner; + const updatedAt = yield* DateTime.now; + const [idleOutput, batteryOutput, pmsetOutput] = yield* Effect.all( + [ + runOptional(runner, "ioreg", ["-c", "IOHIDSystem"]), + runOptional(runner, "pmset", ["-g", "batt"]), + runOptional(runner, "pmset", ["-g"]), + ], + { concurrency: "unbounded" }, + ); + + const idleSeconds = idleOutput._tag === "Some" ? parseIdleSeconds(idleOutput.value.stdout) : null; + const onBattery = + batteryOutput._tag === "Some" ? parseOnBattery(batteryOutput.value.stdout) : null; + const lowPowerMode = + pmsetOutput._tag === "Some" ? parseLowPowerMode(pmsetOutput.value.stdout) : null; + + return { + source: "node-macos-shell", + idle: boolState(idleSeconds === null ? null : idleSeconds >= 60), + idleSeconds, + locked: "unknown", + suspended: false, + onBattery: boolState(onBattery), + lowPowerMode: boolState(lowPowerMode), + thermalState: parseThermalState(""), + stale: false, + updatedAt, + } satisfies HostPowerSnapshot; +}); + +export const make = Effect.fn("background.hostPower.make")(function* ( + initialSource: HostPowerSnapshot["source"] = "unknown", +) { + const initial = makeUnknownSnapshot(initialSource, yield* DateTime.now); + const latestRef = yield* Ref.make(initial); + const demandActiveRef = yield* Ref.make(false); + const changes = yield* PubSub.sliding(1); + + const report: HostPowerMonitorShape["report"] = (snapshot) => + Ref.set(latestRef, snapshot).pipe( + Effect.andThen(PubSub.publish(changes, snapshot)), + Effect.asVoid, + ); + + return HostPowerMonitor.of({ + snapshot: Ref.get(latestRef), + report, + setDemandActive: (active) => Ref.set(demandActiveRef, active), + streamChanges: Stream.fromPubSub(changes), + }); +}); + +const unknownLayer = Layer.effect(HostPowerMonitor, make("unknown")); +const linuxLayer = Layer.effect(HostPowerMonitor, make("node-linux")); +const windowsLayer = Layer.effect(HostPowerMonitor, make("node-windows")); + +const macShellLayer = Layer.effect( + HostPowerMonitor, + Effect.gen(function* () { + const serverSettings = yield* ServerSettingsService; + const monitor = yield* make("node-macos-shell"); + const demandActiveRef = yield* Ref.make(true); + const setDemandActive: HostPowerMonitorShape["setDemandActive"] = (active) => + Ref.set(demandActiveRef, active); + const getPollInterval = Effect.gen(function* () { + const demandActive = yield* Ref.get(demandActiveRef); + const settings = yield* serverSettings.getSettings.pipe( + Effect.map(resolveServerBackgroundActivitySettings), + Effect.catch(() => Effect.succeed(getBackgroundActivityPresetSettings("balanced"))), + ); + return demandActive + ? settings.hostPowerMonitorActiveInterval + : settings.hostPowerMonitorIdleInterval; + }); + const adaptiveMonitor = HostPowerMonitor.of({ + snapshot: monitor.snapshot, + report: monitor.report, + setDemandActive, + streamChanges: monitor.streamChanges, + }); + yield* readMacShellSnapshot().pipe( + Effect.flatMap(adaptiveMonitor.report), + Effect.ignoreCause({ log: true }), + ); + yield* Effect.forever( + getPollInterval.pipe( + Effect.flatMap((interval) => Effect.sleep(Duration.max(interval, Duration.seconds(5)))), + Effect.andThen(readMacShellSnapshot()), + Effect.flatMap(adaptiveMonitor.report), + Effect.ignoreCause({ log: true }), + ), + ).pipe(Effect.forkScoped); + return adaptiveMonitor; + }), +).pipe(Layer.provide(ProcessRunner.layer)); + +export const layer = Layer.unwrap( + Effect.sync(() => { + switch (process.platform) { + case "darwin": + return macShellLayer; + case "linux": + return linuxLayer; + case "win32": + return windowsLayer; + default: + return unknownLayer; + } + }), +); diff --git a/apps/server/src/provider/Drivers/ClaudeDriver.ts b/apps/server/src/provider/Drivers/ClaudeDriver.ts index b126028f813..a5f180c62de 100644 --- a/apps/server/src/provider/Drivers/ClaudeDriver.ts +++ b/apps/server/src/provider/Drivers/ClaudeDriver.ts @@ -25,7 +25,9 @@ import { HttpClient } from "effect/unstable/http"; import { ChildProcessSpawner } from "effect/unstable/process"; import { makeClaudeTextGeneration } from "../../textGeneration/ClaudeTextGeneration.ts"; +import * as BackgroundPolicy from "../../background/BackgroundPolicy.ts"; import { ServerConfig } from "../../config.ts"; +import { ServerSettingsService } from "../../serverSettings.ts"; import { ProviderDriverError } from "../Errors.ts"; import { makeClaudeAdapter } from "../Layers/ClaudeAdapter.ts"; import { @@ -52,7 +54,6 @@ import { makeClaudeCapabilitiesCacheKey, makeClaudeContinuationGroupKey } from " const decodeClaudeSettings = Schema.decodeSync(ClaudeSettings); const DRIVER_KIND = ProviderDriverKind.make("claudeAgent"); -const SNAPSHOT_REFRESH_INTERVAL = Duration.minutes(5); const CAPABILITIES_PROBE_TTL = Duration.minutes(5); function isClaudeNativeCommandPath(commandPath: string): boolean { @@ -77,13 +78,15 @@ const UPDATE = makePackageManagedProviderMaintenanceResolver({ }); export type ClaudeDriverEnv = + | BackgroundPolicy.BackgroundPolicy | ChildProcessSpawner.ChildProcessSpawner | Crypto.Crypto | FileSystem.FileSystem | HttpClient.HttpClient | Path.Path | ProviderEventLoggers - | ServerConfig; + | ServerConfig + | ServerSettingsService; const withInstanceIdentity = (input: { @@ -176,7 +179,6 @@ export const ClaudeDriver: ProviderDriver = { Effect.provideService(HttpClient.HttpClient, httpClient), Effect.flatMap((enrichedSnapshot) => publishSnapshot(enrichedSnapshot)), ), - refreshInterval: SNAPSHOT_REFRESH_INTERVAL, }).pipe( Effect.mapError( (cause) => diff --git a/apps/server/src/provider/Drivers/CodexDriver.ts b/apps/server/src/provider/Drivers/CodexDriver.ts index 441edda479f..f36cfa3dddc 100644 --- a/apps/server/src/provider/Drivers/CodexDriver.ts +++ b/apps/server/src/provider/Drivers/CodexDriver.ts @@ -22,7 +22,6 @@ * @module provider/Drivers/CodexDriver */ import { CodexSettings, ProviderDriverKind, type ServerProvider } from "@t3tools/contracts"; -import * as Duration from "effect/Duration"; import * as Crypto from "effect/Crypto"; import * as Effect from "effect/Effect"; import * as FileSystem from "effect/FileSystem"; @@ -33,7 +32,9 @@ import { HttpClient } from "effect/unstable/http"; import { ChildProcessSpawner } from "effect/unstable/process"; import { makeCodexTextGeneration } from "../../textGeneration/CodexTextGeneration.ts"; +import * as BackgroundPolicy from "../../background/BackgroundPolicy.ts"; import { ServerConfig } from "../../config.ts"; +import { ServerSettingsService } from "../../serverSettings.ts"; import { ProviderDriverError } from "../Errors.ts"; import { makeCodexAdapter } from "../Layers/CodexAdapter.ts"; import { checkCodexProviderStatus, makePendingCodexProvider } from "../Layers/CodexProvider.ts"; @@ -55,7 +56,6 @@ import { const decodeCodexSettings = Schema.decodeSync(CodexSettings); const DRIVER_KIND = ProviderDriverKind.make("codex"); -const SNAPSHOT_REFRESH_INTERVAL = Duration.minutes(5); const UPDATE = makePackageManagedProviderMaintenanceResolver({ provider: DRIVER_KIND, npmPackageName: "@openai/codex", @@ -69,13 +69,15 @@ const UPDATE = makePackageManagedProviderMaintenanceResolver({ * registered driver and the runtime satisfies them once. */ export type CodexDriverEnv = + | BackgroundPolicy.BackgroundPolicy | ChildProcessSpawner.ChildProcessSpawner | Crypto.Crypto | FileSystem.FileSystem | HttpClient.HttpClient | Path.Path | ProviderEventLoggers - | ServerConfig; + | ServerConfig + | ServerSettingsService; /** * Stamp instance identity onto a `ServerProvider` snapshot produced by the @@ -176,7 +178,6 @@ export const CodexDriver: ProviderDriver = { Effect.provideService(HttpClient.HttpClient, httpClient), Effect.flatMap((enrichedSnapshot) => publishSnapshot(enrichedSnapshot)), ), - refreshInterval: SNAPSHOT_REFRESH_INTERVAL, }).pipe( Effect.mapError( (cause) => diff --git a/apps/server/src/provider/Drivers/CursorDriver.ts b/apps/server/src/provider/Drivers/CursorDriver.ts index ba532864c45..3f045be89d8 100644 --- a/apps/server/src/provider/Drivers/CursorDriver.ts +++ b/apps/server/src/provider/Drivers/CursorDriver.ts @@ -12,7 +12,6 @@ * @module provider/Drivers/CursorDriver */ import { CursorSettings, ProviderDriverKind, type ServerProvider } from "@t3tools/contracts"; -import * as Duration from "effect/Duration"; import * as Crypto from "effect/Crypto"; import * as Effect from "effect/Effect"; import * as FileSystem from "effect/FileSystem"; @@ -23,6 +22,8 @@ import { HttpClient } from "effect/unstable/http"; import { ChildProcessSpawner } from "effect/unstable/process"; import { ServerConfig } from "../../config.ts"; +import * as BackgroundPolicy from "../../background/BackgroundPolicy.ts"; +import { ServerSettingsService } from "../../serverSettings.ts"; import { makeCursorTextGeneration } from "../../textGeneration/CursorTextGeneration.ts"; import { ProviderDriverError } from "../Errors.ts"; import { makeCursorAdapter } from "../Layers/CursorAdapter.ts"; @@ -48,7 +49,6 @@ import { const decodeCursorSettings = Schema.decodeSync(CursorSettings); const DRIVER_KIND = ProviderDriverKind.make("cursor"); -const SNAPSHOT_REFRESH_INTERVAL = Duration.minutes(5); const UPDATE = makeStaticProviderMaintenanceResolver( makeProviderMaintenanceCapabilities({ provider: DRIVER_KIND, @@ -60,13 +60,15 @@ const UPDATE = makeStaticProviderMaintenanceResolver( ); export type CursorDriverEnv = + | BackgroundPolicy.BackgroundPolicy | ChildProcessSpawner.ChildProcessSpawner | Crypto.Crypto | FileSystem.FileSystem | HttpClient.HttpClient | Path.Path | ProviderEventLoggers - | ServerConfig; + | ServerConfig + | ServerSettingsService; const withInstanceIdentity = (input: { @@ -148,8 +150,7 @@ export const CursorDriver: ProviderDriver = { publishSnapshot, stampIdentity, httpClient, - }), - refreshInterval: SNAPSHOT_REFRESH_INTERVAL, + }).pipe(Effect.provideService(ChildProcessSpawner.ChildProcessSpawner, spawner)), }).pipe( Effect.mapError( (cause) => diff --git a/apps/server/src/provider/Drivers/GrokDriver.ts b/apps/server/src/provider/Drivers/GrokDriver.ts index ab01439ffd3..dc6f21b151d 100644 --- a/apps/server/src/provider/Drivers/GrokDriver.ts +++ b/apps/server/src/provider/Drivers/GrokDriver.ts @@ -9,7 +9,9 @@ import * as Stream from "effect/Stream"; import { HttpClient } from "effect/unstable/http"; import { ChildProcessSpawner } from "effect/unstable/process"; +import * as BackgroundPolicy from "../../background/BackgroundPolicy.ts"; import { ServerConfig } from "../../config.ts"; +import { ServerSettingsService } from "../../serverSettings.ts"; import { makeGrokTextGeneration } from "../../textGeneration/GrokTextGeneration.ts"; import { ProviderDriverError } from "../Errors.ts"; import { makeGrokAdapter } from "../Layers/GrokAdapter.ts"; @@ -44,13 +46,15 @@ const UPDATE = makeStaticProviderMaintenanceResolver( ); export type GrokDriverEnv = + | BackgroundPolicy.BackgroundPolicy | ChildProcessSpawner.ChildProcessSpawner | Crypto.Crypto | FileSystem.FileSystem | HttpClient.HttpClient | Path.Path | ProviderEventLoggers - | ServerConfig; + | ServerConfig + | ServerSettingsService; const withInstanceIdentity = (input: { diff --git a/apps/server/src/provider/Drivers/OpenCodeDriver.ts b/apps/server/src/provider/Drivers/OpenCodeDriver.ts index e7216f83366..c5a85c7c274 100644 --- a/apps/server/src/provider/Drivers/OpenCodeDriver.ts +++ b/apps/server/src/provider/Drivers/OpenCodeDriver.ts @@ -13,7 +13,6 @@ * @module provider/Drivers/OpenCodeDriver */ import { OpenCodeSettings, ProviderDriverKind, type ServerProvider } from "@t3tools/contracts"; -import * as Duration from "effect/Duration"; import * as Crypto from "effect/Crypto"; import * as Effect from "effect/Effect"; import * as FileSystem from "effect/FileSystem"; @@ -24,7 +23,9 @@ import { HttpClient } from "effect/unstable/http"; import { ChildProcessSpawner } from "effect/unstable/process"; import { makeOpenCodeTextGeneration } from "../../textGeneration/OpenCodeTextGeneration.ts"; +import * as BackgroundPolicy from "../../background/BackgroundPolicy.ts"; import { ServerConfig } from "../../config.ts"; +import { ServerSettingsService } from "../../serverSettings.ts"; import { ProviderDriverError } from "../Errors.ts"; import { makeOpenCodeAdapter } from "../Layers/OpenCodeAdapter.ts"; import { @@ -50,7 +51,6 @@ import { const decodeOpenCodeSettings = Schema.decodeSync(OpenCodeSettings); const DRIVER_KIND = ProviderDriverKind.make("opencode"); -const SNAPSHOT_REFRESH_INTERVAL = Duration.minutes(5); function isOpenCodeNativeCommandPath(commandPath: string): boolean { const normalized = normalizeCommandPath(commandPath); @@ -73,6 +73,7 @@ const UPDATE = makePackageManagedProviderMaintenanceResolver({ }); export type OpenCodeDriverEnv = + | BackgroundPolicy.BackgroundPolicy | ChildProcessSpawner.ChildProcessSpawner | Crypto.Crypto | FileSystem.FileSystem @@ -80,7 +81,8 @@ export type OpenCodeDriverEnv = | OpenCodeRuntime | Path.Path | ProviderEventLoggers - | ServerConfig; + | ServerConfig + | ServerSettingsService; const withInstanceIdentity = (input: { @@ -155,7 +157,6 @@ export const OpenCodeDriver: ProviderDriver Effect.provideService(HttpClient.HttpClient, httpClient), Effect.flatMap((enrichedSnapshot) => publishSnapshot(enrichedSnapshot)), ), - refreshInterval: SNAPSHOT_REFRESH_INTERVAL, }).pipe( Effect.mapError( (cause) => diff --git a/apps/server/src/provider/Layers/ProviderInstanceRegistryLive.test.ts b/apps/server/src/provider/Layers/ProviderInstanceRegistryLive.test.ts index f2c5892a2c6..c151a165412 100644 --- a/apps/server/src/provider/Layers/ProviderInstanceRegistryLive.test.ts +++ b/apps/server/src/provider/Layers/ProviderInstanceRegistryLive.test.ts @@ -34,11 +34,15 @@ import { type ProviderInstanceConfigMap, ProviderInstanceId, } from "@t3tools/contracts"; +import * as DateTime from "effect/DateTime"; import * as Effect from "effect/Effect"; import * as Layer from "effect/Layer"; +import * as Stream from "effect/Stream"; import { HttpClient, HttpClientResponse } from "effect/unstable/http"; +import * as BackgroundPolicy from "../../background/BackgroundPolicy.ts"; import { ServerConfig } from "../../config.ts"; +import { ServerSettingsService } from "../../serverSettings.ts"; import { ClaudeDriver } from "../Drivers/ClaudeDriver.ts"; import { CodexDriver } from "../Drivers/CodexDriver.ts"; import { CursorDriver } from "../Drivers/CursorDriver.ts"; @@ -55,6 +59,37 @@ const TestHttpClientLive = Layer.succeed( ), ); +const TEST_EPOCH = DateTime.makeUnsafe("1970-01-01T00:00:00.000Z"); + +const BackgroundPolicyAlwaysRunLayer = Layer.mock(BackgroundPolicy.BackgroundPolicy)({ + reportClientActivity: () => Effect.void, + removeRpcClient: () => Effect.void, + reportHostPowerState: () => Effect.void, + snapshot: Effect.succeed({ + hostPower: { + source: "unknown", + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt: TEST_EPOCH, + }, + leases: [], + activeForegroundLeaseCount: 0, + activeScopeKeys: [], + shouldRunOpportunisticWork: true, + updatedAt: TEST_EPOCH, + }), + streamChanges: Stream.empty, + hasDemand: () => Effect.succeed(true), + shouldRunScopeWork: () => Effect.succeed(true), + shouldRunOpportunisticWork: Effect.succeed(true), +}); + const makeCodexConfig = (overrides: Partial): CodexSettings => ({ enabled: false, binaryPath: "codex", @@ -107,6 +142,8 @@ describe("ProviderInstanceRegistryLive — multi-instance codex slice", () => { prefix: "provider-instance-registry-test", }).pipe( Layer.provideMerge(NodeServices.layer), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), + Layer.provideMerge(ServerSettingsService.layerTest()), Layer.provideMerge(TestHttpClientLive), Layer.provideMerge(Layer.succeed(ProviderEventLoggers, NoOpProviderEventLoggers)), ); @@ -244,6 +281,8 @@ describe("ProviderInstanceRegistryLive — all drivers slice", () => { prefix: "provider-instance-registry-all-drivers-test", }).pipe( Layer.provideMerge(infraLayer), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), + Layer.provideMerge(ServerSettingsService.layerTest()), Layer.provideMerge(TestHttpClientLive), Layer.provideMerge(Layer.succeed(ProviderEventLoggers, NoOpProviderEventLoggers)), ); diff --git a/apps/server/src/provider/Layers/ProviderRegistry.test.ts b/apps/server/src/provider/Layers/ProviderRegistry.test.ts index 5fe0f903686..f415ce3bfaf 100644 --- a/apps/server/src/provider/Layers/ProviderRegistry.test.ts +++ b/apps/server/src/provider/Layers/ProviderRegistry.test.ts @@ -1,5 +1,6 @@ import * as NodeServices from "@effect/platform-node/NodeServices"; import { describe, it, assert } from "@effect/vitest"; +import * as DateTime from "effect/DateTime"; import * as Effect from "effect/Effect"; import * as Exit from "effect/Exit"; import * as Fiber from "effect/Fiber"; @@ -32,6 +33,7 @@ import { applyServerSettingsPatch } from "@t3tools/shared/serverSettings"; import { checkCodexProviderStatus, type CodexAppServerProviderSnapshot } from "./CodexProvider.ts"; import { checkClaudeProviderStatus } from "./ClaudeProvider.ts"; +import * as BackgroundPolicy from "../../background/BackgroundPolicy.ts"; import { OpenCodeRuntimeLive } from "../opencodeRuntime.ts"; import { NoOpProviderEventLoggers, ProviderEventLoggers } from "./ProviderEventLoggers.ts"; import { ProviderInstanceRegistryHydrationLive } from "./ProviderInstanceRegistryHydration.ts"; @@ -64,6 +66,7 @@ process.env.T3CODE_CURSOR_ENABLED = "1"; // ── Test helpers ──────────────────────────────────────────────────── const encoder = new TextEncoder(); +const TEST_EPOCH = DateTime.makeUnsafe("1970-01-01T00:00:00.000Z"); const TestHttpClientLive = Layer.succeed( HttpClient.HttpClient, @@ -72,6 +75,35 @@ const TestHttpClientLive = Layer.succeed( ), ); +const BackgroundPolicyAlwaysRunLayer = Layer.mock(BackgroundPolicy.BackgroundPolicy)({ + reportClientActivity: () => Effect.void, + removeRpcClient: () => Effect.void, + reportHostPowerState: () => Effect.void, + snapshot: Effect.succeed({ + hostPower: { + source: "unknown", + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt: TEST_EPOCH, + }, + leases: [], + activeForegroundLeaseCount: 0, + activeScopeKeys: [], + shouldRunOpportunisticWork: true, + updatedAt: TEST_EPOCH, + }), + streamChanges: Stream.empty, + hasDemand: () => Effect.succeed(true), + shouldRunScopeWork: () => Effect.succeed(true), + shouldRunOpportunisticWork: Effect.succeed(true), +}); + function selectDescriptor( id: string, label: string, @@ -806,6 +838,7 @@ it.layer(Layer.mergeAll(NodeServices.layer, ServerSettingsService.layerTest(), T prefix: "t3-provider-registry-merged-persist-", }), ), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), Layer.provideMerge(NodeServices.layer), ), ).pipe(Scope.provide(scope)); @@ -900,6 +933,7 @@ it.layer(Layer.mergeAll(NodeServices.layer, ServerSettingsService.layerTest(), T prefix: "t3-provider-registry-refresh-failure-", }), ), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), Layer.provideMerge(NodeServices.layer), ), ).pipe(Scope.provide(scope)); @@ -1004,6 +1038,7 @@ it.layer(Layer.mergeAll(NodeServices.layer, ServerSettingsService.layerTest(), T prefix: "t3-provider-registry-sync-failure-", }), ), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), Layer.provideMerge(NodeServices.layer), ), ).pipe(Scope.provide(scope)); @@ -1101,6 +1136,7 @@ it.layer(Layer.mergeAll(NodeServices.layer, ServerSettingsService.layerTest(), T Layer.provideMerge(TestHttpClientLive), Layer.provideMerge(Layer.succeed(ProviderEventLoggers, NoOpProviderEventLoggers)), Layer.provideMerge(OpenCodeRuntimeLive), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), // NO spawner mock — `ChildProcessSpawner` is supplied by the // outer `NodeServices.layer` on `it.layer(...)` and will // genuinely spawn a subprocess. The missing-binary ENOENT is @@ -1193,6 +1229,7 @@ it.layer(Layer.mergeAll(NodeServices.layer, ServerSettingsService.layerTest(), T }), ), Layer.provideMerge(NodeServices.layer), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), ); const runtimeServices = yield* Layer.build(providerRegistryLayer).pipe( Scope.provide(scope), @@ -1301,6 +1338,7 @@ it.layer(Layer.mergeAll(NodeServices.layer, ServerSettingsService.layerTest(), T Layer.provideMerge(Layer.succeed(ProviderEventLoggers, NoOpProviderEventLoggers)), Layer.provideMerge(OpenCodeRuntimeLive), Layer.provideMerge(NodeServices.layer), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), ); const runtimeServices = yield* Layer.build(providerRegistryLayer).pipe( Scope.provide(scope), @@ -1354,6 +1392,7 @@ it.layer(Layer.mergeAll(NodeServices.layer, ServerSettingsService.layerTest(), T Layer.provideMerge(TestHttpClientLive), Layer.provideMerge(Layer.succeed(ProviderEventLoggers, NoOpProviderEventLoggers)), Layer.provideMerge(OpenCodeRuntimeLive), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), Layer.provideMerge( mockCommandSpawnerLayer((command, args) => { if (command === "agent") { diff --git a/apps/server/src/provider/makeManagedServerProvider.test.ts b/apps/server/src/provider/makeManagedServerProvider.test.ts index ba1f01fe2b9..b2830688cc6 100644 --- a/apps/server/src/provider/makeManagedServerProvider.test.ts +++ b/apps/server/src/provider/makeManagedServerProvider.test.ts @@ -1,16 +1,22 @@ import { describe, it, assert } from "@effect/vitest"; import { ProviderDriverKind, ProviderInstanceId, type ServerProvider } from "@t3tools/contracts"; import { createModelCapabilities } from "@t3tools/shared/model"; +import * as DateTime from "effect/DateTime"; import * as Deferred from "effect/Deferred"; import * as Effect from "effect/Effect"; import * as Fiber from "effect/Fiber"; +import * as Layer from "effect/Layer"; import * as PubSub from "effect/PubSub"; import * as Ref from "effect/Ref"; import * as Stream from "effect/Stream"; +import { TestClock } from "effect/testing"; +import * as BackgroundPolicy from "../background/BackgroundPolicy.ts"; +import { ServerSettingsService } from "../serverSettings.ts"; import { makeManagedServerProvider } from "./makeManagedServerProvider.ts"; const emptyCapabilities = createModelCapabilities({ optionDescriptors: [] }); +const TEST_EPOCH = DateTime.makeUnsafe("1970-01-01T00:00:00.000Z"); const fastModeCapabilities = createModelCapabilities({ optionDescriptors: [ { @@ -87,6 +93,43 @@ const refreshedSnapshotSecond: ServerProvider = { message: "Refreshed provider availability again.", }; +function makeBackgroundPolicyLayer(shouldRunScopeWork: boolean) { + return Layer.mock(BackgroundPolicy.BackgroundPolicy)({ + reportClientActivity: () => Effect.void, + removeRpcClient: () => Effect.void, + reportHostPowerState: () => Effect.void, + snapshot: Effect.succeed({ + hostPower: { + source: "unknown", + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt: TEST_EPOCH, + }, + leases: [], + activeForegroundLeaseCount: 0, + activeScopeKeys: [], + shouldRunOpportunisticWork: true, + updatedAt: TEST_EPOCH, + }), + streamChanges: Stream.empty, + hasDemand: () => Effect.succeed(shouldRunScopeWork), + shouldRunScopeWork: () => Effect.succeed(shouldRunScopeWork), + shouldRunOpportunisticWork: Effect.succeed(shouldRunScopeWork), + }); +} + +const BackgroundPolicyAlwaysRunLayer = makeBackgroundPolicyLayer(true); +const BackgroundPolicyNeverRunLayer = makeBackgroundPolicyLayer(false); +const ServerSettingsTestLayer = ServerSettingsService.layerTest(); +const AlwaysRunTestLayer = Layer.merge(BackgroundPolicyAlwaysRunLayer, ServerSettingsTestLayer); +const NeverRunTestLayer = Layer.merge(BackgroundPolicyNeverRunLayer, ServerSettingsTestLayer); + const enrichedSnapshotSecond: ServerProvider = { ...refreshedSnapshotSecond, checkedAt: "2026-04-10T00:00:04.000Z", @@ -140,7 +183,38 @@ describe("makeManagedServerProvider", () => { assert.deepStrictEqual(latest, refreshedSnapshot); assert.strictEqual(yield* Ref.get(checkCalls), 1); }), - ), + ).pipe(Effect.provide(AlwaysRunTestLayer)), + ); + + it.effect("skips periodic provider refreshes without foreground provider-status demand", () => + Effect.scoped( + Effect.gen(function* () { + const checkCalls = yield* Ref.make(0); + const initialCheckDone = yield* Deferred.make(); + yield* makeManagedServerProvider({ + maintenanceCapabilities, + getSettings: Effect.succeed({ enabled: true }), + streamSettings: Stream.empty, + haveSettingsChanged: (previous, next) => previous.enabled !== next.enabled, + initialSnapshot: () => Effect.succeed(initialSnapshot), + checkProvider: Ref.updateAndGet(checkCalls, (count) => count + 1).pipe( + Effect.tap((count) => + count === 1 + ? Deferred.succeed(initialCheckDone, undefined).pipe(Effect.ignore) + : Effect.void, + ), + Effect.as(refreshedSnapshot), + ), + refreshInterval: "1 second", + }); + + yield* Deferred.await(initialCheckDone); + yield* TestClock.adjust("1 second"); + yield* Effect.yieldNow; + + assert.strictEqual(yield* Ref.get(checkCalls), 1); + }), + ).pipe(Effect.provide(Layer.mergeAll(NeverRunTestLayer, TestClock.layer()))), ); it.effect("reruns the provider check when streamed settings change", () => @@ -185,7 +259,7 @@ describe("makeManagedServerProvider", () => { assert.deepStrictEqual(latest, refreshedSnapshotSecond); assert.strictEqual(yield* Ref.get(checkCalls), 2); }), - ), + ).pipe(Effect.provide(AlwaysRunTestLayer)), ); it.effect("streams supplemental snapshot updates after the base provider check completes", () => @@ -223,7 +297,7 @@ describe("makeManagedServerProvider", () => { assert.deepStrictEqual(updates, [refreshedSnapshot, enrichedSnapshot]); assert.deepStrictEqual(latest, enrichedSnapshot); }), - ), + ).pipe(Effect.provide(AlwaysRunTestLayer)), ); it.effect("ignores stale enrichment callbacks after a newer refresh advances generation", () => @@ -284,6 +358,6 @@ describe("makeManagedServerProvider", () => { ]); assert.deepStrictEqual(latest, enrichedSnapshotSecond); }), - ), + ).pipe(Effect.provide(AlwaysRunTestLayer)), ); }); diff --git a/apps/server/src/provider/makeManagedServerProvider.ts b/apps/server/src/provider/makeManagedServerProvider.ts index 88547fb3afa..8bb1f351a88 100644 --- a/apps/server/src/provider/makeManagedServerProvider.ts +++ b/apps/server/src/provider/makeManagedServerProvider.ts @@ -1,4 +1,9 @@ -import type { ServerProvider } from "@t3tools/contracts"; +import { + DEFAULT_PROVIDER_HEALTH_REFRESH_INTERVAL, + type ServerProvider, + ServerSettingsError, +} from "@t3tools/contracts"; +import { resolveServerBackgroundActivitySettings } from "@t3tools/shared/backgroundActivitySettings"; import * as Duration from "effect/Duration"; import * as Effect from "effect/Effect"; import * as Equal from "effect/Equal"; @@ -9,8 +14,9 @@ import * as Scope from "effect/Scope"; import * as Stream from "effect/Stream"; import * as Semaphore from "effect/Semaphore"; +import * as BackgroundPolicy from "../background/BackgroundPolicy.ts"; +import { ServerSettingsService } from "../serverSettings.ts"; import type { ServerProviderShape } from "./Services/ServerProvider.ts"; -import { ServerSettingsError } from "@t3tools/contracts"; interface ProviderSnapshotState { readonly snapshot: ServerProvider; @@ -33,7 +39,13 @@ export const makeManagedServerProvider = Effect.fn("makeManagedServerProvider")( readonly publishSnapshot: (snapshot: ServerProvider) => Effect.Effect; }) => Effect.Effect; readonly refreshInterval?: Duration.Input; -}): Effect.fn.Return { +}): Effect.fn.Return< + ServerProviderShape, + ServerSettingsError, + Scope.Scope | BackgroundPolicy.BackgroundPolicy | ServerSettingsService +> { + const backgroundPolicy = yield* BackgroundPolicy.BackgroundPolicy; + const serverSettings = yield* ServerSettingsService; const refreshSemaphore = yield* Semaphore.make(1); const changesPubSub = yield* Effect.acquireRelease( PubSub.unbounded(), @@ -134,13 +146,45 @@ export const makeManagedServerProvider = Effect.fn("makeManagedServerProvider")( return yield* applySnapshot(nextSettings, { forceRefresh: true }); }); + const hasProviderStatusDemand = Effect.gen(function* () { + const state = yield* Ref.get(snapshotStateRef); + const instanceId = state.snapshot.instanceId; + const [genericDemand, instanceDemand] = yield* Effect.all([ + backgroundPolicy.shouldRunScopeWork({ type: "provider-status" }), + backgroundPolicy.shouldRunScopeWork({ type: "provider-status", instanceId }), + ]); + return genericDemand || instanceDemand; + }); + + const getRefreshInterval = input.refreshInterval + ? Effect.succeed(input.refreshInterval) + : serverSettings.getSettings.pipe( + Effect.map( + (settings) => + resolveServerBackgroundActivitySettings(settings).providerHealthRefreshInterval, + ), + Effect.orElseSucceed(() => DEFAULT_PROVIDER_HEALTH_REFRESH_INTERVAL), + ); + yield* Stream.runForEach(input.streamSettings, (nextSettings) => Effect.asVoid(applySnapshot(nextSettings)), ).pipe(Effect.forkScoped); yield* Effect.forever( - Effect.sleep(input.refreshInterval ?? "60 seconds").pipe( - Effect.flatMap(() => refreshSnapshot()), + getRefreshInterval.pipe( + Effect.flatMap((refreshInterval) => + Duration.toMillis(Duration.fromInputUnsafe(refreshInterval)) <= 0 + ? Effect.sleep("60 seconds") + : Effect.sleep(refreshInterval).pipe( + Effect.flatMap(() => + hasProviderStatusDemand.pipe( + Effect.flatMap((shouldRefresh) => + shouldRefresh ? refreshSnapshot().pipe(Effect.asVoid) : Effect.void, + ), + ), + ), + ), + ), Effect.ignoreCause({ log: true }), ), ).pipe(Effect.forkScoped); diff --git a/apps/server/src/server.test.ts b/apps/server/src/server.test.ts index 205833289ea..f96f4562e1c 100644 --- a/apps/server/src/server.test.ts +++ b/apps/server/src/server.test.ts @@ -69,6 +69,7 @@ import { vi } from "vite-plus/test"; const TEST_EPOCH = DateTime.makeUnsafe("1970-01-01T00:00:00.000Z"); +import * as BackgroundPolicy from "./background/BackgroundPolicy.ts"; import type { ServerConfigShape } from "./config.ts"; import { deriveServerPaths, ServerConfig } from "./config.ts"; import { makeRoutesLayer } from "./server.ts"; @@ -772,6 +773,36 @@ const buildAppUnderTest = (options?: { ...options?.layers?.serverRuntimeStartup, }), ), + Layer.provide( + Layer.mock(BackgroundPolicy.BackgroundPolicy)({ + reportClientActivity: () => Effect.void, + removeRpcClient: () => Effect.void, + reportHostPowerState: () => Effect.void, + snapshot: Effect.succeed({ + hostPower: { + source: "unknown", + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt: TEST_EPOCH, + }, + leases: [], + activeForegroundLeaseCount: 0, + activeScopeKeys: [], + shouldRunOpportunisticWork: false, + updatedAt: TEST_EPOCH, + }), + streamChanges: Stream.empty, + hasDemand: () => Effect.succeed(false), + shouldRunScopeWork: () => Effect.succeed(false), + shouldRunOpportunisticWork: Effect.succeed(false), + }), + ), Layer.provide( Layer.mock(ServerEnvironment)({ getEnvironmentId: Effect.succeed(testEnvironmentDescriptor.environmentId), diff --git a/apps/server/src/server.ts b/apps/server/src/server.ts index 1da0ea27a65..b21b30eff8f 100644 --- a/apps/server/src/server.ts +++ b/apps/server/src/server.ts @@ -5,6 +5,8 @@ import { FetchHttpClient, HttpRouter, HttpServer } from "effect/unstable/http"; import * as HttpApiBuilder from "effect/unstable/httpapi/HttpApiBuilder"; import { ServerConfig } from "./config.ts"; +import * as BackgroundPolicy from "./background/BackgroundPolicy.ts"; +import * as HostPowerMonitor from "./background/HostPowerMonitor.ts"; import { otlpTracesProxyRouteLayer, assetRouteLayer, @@ -110,6 +112,11 @@ const PtyAdapterLive = Layer.unwrap( }), ); +const BackgroundLayerLive = BackgroundPolicy.layer.pipe( + Layer.provide(HostPowerMonitor.layer), + Layer.provideMerge(ServerSettingsLive), +); + const RelayClientLive = Layer.unwrap( Effect.gen(function* () { const config = yield* ServerConfig; @@ -285,6 +292,7 @@ const ProviderRuntimeLayerLive = ProviderSessionReaperLive.pipe( const RuntimeCoreDependenciesLive = ReactorLayerLive.pipe( // Core Services + Layer.provideMerge(ServerSettingsLive), Layer.provideMerge(CheckpointingLayerLive), Layer.provideMerge(SourceControlProviderRegistryLayerLive), Layer.provideMerge(GitLayerLive), @@ -312,7 +320,6 @@ const RuntimeCoreDependenciesLive = ReactorLayerLive.pipe( // no longer transitively provides it. Exposing it at the runtime level // keeps a single Live for all opencode consumers. Layer.provideMerge(OpenCodeRuntimeLive), - Layer.provideMerge(ServerSettingsLive), Layer.provideMerge(WorkspaceLayerLive), Layer.provideMerge(ProjectFaviconResolverLayerLive), Layer.provideMerge(RepositoryIdentityResolverLive), @@ -329,6 +336,7 @@ const RuntimeCoreDependenciesLive = ReactorLayerLive.pipe( const RuntimeDependenciesLive = RuntimeCoreDependenciesLive.pipe( // Misc. + Layer.provideMerge(BackgroundLayerLive), Layer.provideMerge(ProcessDiagnostics.layer), Layer.provideMerge(ProcessResourceMonitor.layer), Layer.provideMerge(TraceDiagnostics.layer), diff --git a/apps/server/src/serverSettings.test.ts b/apps/server/src/serverSettings.test.ts index d24f2ee2826..eb8cc3083c2 100644 --- a/apps/server/src/serverSettings.test.ts +++ b/apps/server/src/serverSettings.test.ts @@ -462,6 +462,14 @@ it.layer(NodeServices.layer)("server settings", (it) => { serverPassword: "secret-password", }, }, + backgroundActivity: { + schemaVersion: 1, + profile: "custom", + baseProfile: "balanced", + overrides: { + automaticGitFetchInterval: 10_000, + }, + }, automaticGitFetchInterval: 10_000, }); }).pipe(Effect.provide(makeServerSettingsLayer())), diff --git a/apps/server/src/serverSettings.ts b/apps/server/src/serverSettings.ts index 0e126604b4a..af9b5f0fa58 100644 --- a/apps/server/src/serverSettings.ts +++ b/apps/server/src/serverSettings.ts @@ -135,13 +135,17 @@ export class ServerSettingsService extends Context.Service< Layer.effect( ServerSettingsService, Effect.gen(function* () { - const { automaticGitFetchInterval, ...overridesForMerge } = overrides; + const { automaticGitFetchInterval, providerHealthRefreshInterval, ...overridesForMerge } = + overrides; const merged = deepMerge(DEFAULT_SERVER_SETTINGS, overridesForMerge); const initialSettings = yield* normalizeServerSettings({ ...merged, ...(automaticGitFetchInterval !== undefined ? { automaticGitFetchInterval: automaticGitFetchInterval as Duration.Duration } : {}), + ...(providerHealthRefreshInterval !== undefined + ? { providerHealthRefreshInterval: providerHealthRefreshInterval as Duration.Duration } + : {}), }); const currentSettingsRef = yield* Ref.make(initialSettings); @@ -216,7 +220,9 @@ function fallbackTextGenerationProvider(settings: ServerSettings): ServerSetting // Values under these keys are compared as a whole — never stripped field-by-field. const ATOMIC_SETTINGS_KEYS: ReadonlySet = new Set([ + "backgroundActivity", "automaticGitFetchInterval", + "providerHealthRefreshInterval", "textGenerationModelSelection", ]); diff --git a/apps/server/src/vcs/VcsStatusBroadcaster.test.ts b/apps/server/src/vcs/VcsStatusBroadcaster.test.ts index d78999f88c1..367fd677a54 100644 --- a/apps/server/src/vcs/VcsStatusBroadcaster.test.ts +++ b/apps/server/src/vcs/VcsStatusBroadcaster.test.ts @@ -1,6 +1,7 @@ import { assert, it, describe } from "@effect/vitest"; import * as NodeServices from "@effect/platform-node/NodeServices"; import * as Deferred from "effect/Deferred"; +import * as DateTime from "effect/DateTime"; import * as Duration from "effect/Duration"; import * as Effect from "effect/Effect"; import * as Exit from "effect/Exit"; @@ -12,6 +13,7 @@ import * as Scope from "effect/Scope"; import * as Stream from "effect/Stream"; import * as TestClock from "effect/testing/TestClock"; import type { + BackgroundScope, VcsStatusLocalResult, VcsStatusRemoteResult, VcsStatusResult, @@ -20,8 +22,11 @@ import type { import { GitManagerError } from "@t3tools/contracts"; import * as VcsStatusBroadcaster from "./VcsStatusBroadcaster.ts"; +import * as BackgroundPolicy from "../background/BackgroundPolicy.ts"; import * as GitWorkflowService from "../git/GitWorkflowService.ts"; +const TEST_EPOCH = DateTime.makeUnsafe("1970-01-01T00:00:00.000Z"); + const baseLocalStatus: VcsStatusLocalResult = { isRepo: true, sourceControlProvider: { @@ -71,6 +76,7 @@ function makeTestLayer(state: { }) { return VcsStatusBroadcaster.layer.pipe( Layer.provideMerge(NodeServices.layer), + Layer.provide(makeBackgroundPolicyLayer(() => true)), Layer.provide( Layer.mock(GitWorkflowService.GitWorkflowService)({ localStatus: () => @@ -97,6 +103,37 @@ function makeTestLayer(state: { ); } +function makeBackgroundPolicyLayer(shouldRunScopeWork: (scope: BackgroundScope) => boolean) { + return Layer.mock(BackgroundPolicy.BackgroundPolicy)({ + reportClientActivity: () => Effect.void, + removeRpcClient: () => Effect.void, + reportHostPowerState: () => Effect.void, + snapshot: Effect.succeed({ + hostPower: { + source: "unknown", + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt: TEST_EPOCH, + }, + leases: [], + activeForegroundLeaseCount: 0, + activeScopeKeys: [], + shouldRunOpportunisticWork: false, + updatedAt: TEST_EPOCH, + }), + streamChanges: Stream.empty, + hasDemand: () => Effect.succeed(true), + shouldRunScopeWork: (scope) => Effect.sync(() => shouldRunScopeWork(scope)), + shouldRunOpportunisticWork: Effect.succeed(true), + }); +} + describe("VcsStatusBroadcaster", () => { it.effect("reuses the cached VCS status across repeated reads", () => { const state = { @@ -176,6 +213,7 @@ describe("VcsStatusBroadcaster", () => { }; const testLayer = VcsStatusBroadcaster.layer.pipe( Layer.provideMerge(NodeServices.layer), + Layer.provide(makeBackgroundPolicyLayer(() => true)), Layer.provide( Layer.mock(GitWorkflowService.GitWorkflowService)({ localStatus: () => @@ -277,6 +315,7 @@ describe("VcsStatusBroadcaster", () => { }; const testLayer = VcsStatusBroadcaster.layer.pipe( Layer.provideMerge(NodeServices.layer), + Layer.provide(makeBackgroundPolicyLayer(() => true)), Layer.provide( Layer.mock(GitWorkflowService.GitWorkflowService)({ localStatus: (input) => @@ -434,6 +473,7 @@ describe("VcsStatusBroadcaster", () => { let firstRemoteAttemptDeferred: Deferred.Deferred | null = null; const testLayer = VcsStatusBroadcaster.layer.pipe( Layer.provideMerge(NodeServices.layer), + Layer.provide(makeBackgroundPolicyLayer(() => true)), Layer.provide( Layer.mock(GitWorkflowService.GitWorkflowService)({ localStatus: () => @@ -573,6 +613,57 @@ describe("VcsStatusBroadcaster", () => { ); }); + it.effect("does not start automatic remote refreshes without foreground client demand", () => { + const state = { + currentLocalStatus: baseLocalStatus, + currentRemoteStatus: baseRemoteStatus, + localStatusCalls: 0, + remoteStatusCalls: 0, + localInvalidationCalls: 0, + remoteInvalidationCalls: 0, + }; + const testLayer = VcsStatusBroadcaster.layer.pipe( + Layer.provideMerge(NodeServices.layer), + Layer.provide(makeBackgroundPolicyLayer(() => false)), + Layer.provide( + Layer.mock(GitWorkflowService.GitWorkflowService)({ + localStatus: () => + Effect.sync(() => { + state.localStatusCalls += 1; + return state.currentLocalStatus; + }), + remoteStatus: () => + Effect.sync(() => { + state.remoteStatusCalls += 1; + return state.currentRemoteStatus; + }), + invalidateLocalStatus: () => + Effect.sync(() => { + state.localInvalidationCalls += 1; + }), + invalidateRemoteStatus: () => + Effect.sync(() => { + state.remoteInvalidationCalls += 1; + }), + } satisfies Partial), + ), + ); + + return Effect.gen(function* () { + const broadcaster = yield* VcsStatusBroadcaster.VcsStatusBroadcaster; + const snapshot = yield* Stream.runHead( + broadcaster.streamStatus( + { cwd: "/repo" }, + { automaticRemoteRefreshInterval: Effect.succeed(Duration.seconds(1)) }, + ), + ); + + assert.isTrue(Option.isSome(snapshot)); + assert.equal(state.remoteStatusCalls, 0); + assert.equal(state.remoteInvalidationCalls, 0); + }).pipe(Effect.provide(testLayer)); + }); + it.effect("stops the remote poller after the last stream subscriber disconnects", () => { const state = { currentLocalStatus: baseLocalStatus, @@ -586,6 +677,7 @@ describe("VcsStatusBroadcaster", () => { let remoteStartedDeferred: Deferred.Deferred | null = null; const testLayer = VcsStatusBroadcaster.layer.pipe( Layer.provideMerge(NodeServices.layer), + Layer.provide(makeBackgroundPolicyLayer(() => true)), Layer.provide( Layer.mock(GitWorkflowService.GitWorkflowService)({ localStatus: () => diff --git a/apps/server/src/vcs/VcsStatusBroadcaster.ts b/apps/server/src/vcs/VcsStatusBroadcaster.ts index f0cacab2dcb..7039843565c 100644 --- a/apps/server/src/vcs/VcsStatusBroadcaster.ts +++ b/apps/server/src/vcs/VcsStatusBroadcaster.ts @@ -21,6 +21,7 @@ import type { } from "@t3tools/contracts"; import { mergeGitStatusParts } from "@t3tools/shared/git"; +import * as BackgroundPolicy from "../background/BackgroundPolicy.ts"; import * as GitWorkflowService from "../git/GitWorkflowService.ts"; const DEFAULT_VCS_STATUS_REFRESH_INTERVAL = Duration.seconds(30); @@ -98,6 +99,7 @@ export const layer = Layer.effect( VcsStatusBroadcaster, Effect.gen(function* () { const workflow = yield* GitWorkflowService.GitWorkflowService; + const backgroundPolicy = yield* BackgroundPolicy.BackgroundPolicy; const fs = yield* FileSystem.FileSystem; const changesPubSub = yield* Effect.acquireRelease( PubSub.unbounded(), @@ -317,6 +319,14 @@ export const layer = Layer.effect( return activeInterval; } + const shouldRun = yield* backgroundPolicy.shouldRunScopeWork({ + type: "vcs-status", + cwd, + }); + if (!shouldRun) { + return activeInterval; + } + const exit = yield* refreshRemoteStatus(cwd, { refreshUpstream: !Duration.isZero(configuredInterval), }).pipe(Effect.exit); diff --git a/apps/server/src/ws.ts b/apps/server/src/ws.ts index 1ad37e7c49b..5f7010228a9 100644 --- a/apps/server/src/ws.ts +++ b/apps/server/src/ws.ts @@ -42,6 +42,7 @@ import { type RelayClientInstallProgressEvent, OrchestrationReplayEventsError, FilesystemBrowseError, + RpcClientId, AssetAccessError, EnvironmentAuthorizationError, ThreadId, @@ -52,6 +53,7 @@ import { WS_METHODS, WsRpcGroup, } from "@t3tools/contracts"; +import { resolveServerBackgroundActivitySettings } from "@t3tools/shared/backgroundActivitySettings"; import { clamp } from "effect/Number"; import { HttpRouter, HttpServerRequest, HttpServerRespondable } from "effect/unstable/http"; import { RpcSerialization, RpcServer } from "effect/unstable/rpc"; @@ -88,6 +90,7 @@ import { ReviewService } from "./review/ReviewService.ts"; import { ProjectSetupScriptRunner } from "./project/Services/ProjectSetupScriptRunner.ts"; import { RepositoryIdentityResolver } from "./project/Services/RepositoryIdentityResolver.ts"; import { ServerEnvironment } from "./environment/Services/ServerEnvironment.ts"; +import * as BackgroundPolicy from "./background/BackgroundPolicy.ts"; import * as EnvironmentAuth from "./auth/EnvironmentAuth.ts"; import type { AuthenticatedSession } from "./auth/EnvironmentAuth.ts"; import * as ProcessDiagnostics from "./diagnostics/ProcessDiagnostics.ts"; @@ -277,10 +280,13 @@ const makeWsRpcLayer = (currentSession: AuthenticatedSession) => const projectSetupScriptRunner = yield* ProjectSetupScriptRunner; const repositoryIdentityResolver = yield* RepositoryIdentityResolver; const serverEnvironment = yield* ServerEnvironment; + const backgroundPolicy = yield* BackgroundPolicy.BackgroundPolicy; const serverAuth = yield* EnvironmentAuth.EnvironmentAuth; const sourceControlDiscovery = yield* SourceControlDiscoveryLayer.SourceControlDiscovery; const automaticGitFetchInterval = serverSettings.getSettings.pipe( - Effect.map((settings) => settings.automaticGitFetchInterval), + Effect.map( + (settings) => resolveServerBackgroundActivitySettings(settings).automaticGitFetchInterval, + ), Effect.catch((cause) => Effect.logWarning("Failed to read automatic Git fetch interval setting", { detail: cause.message, @@ -1116,6 +1122,26 @@ const makeWsRpcLayer = (currentSession: AuthenticatedSession) => observeRpcEffect(WS_METHODS.serverSignalProcess, processDiagnostics.signal(input), { "rpc.aggregate": "server", }), + [WS_METHODS.serverReportClientActivity]: (input, metadata) => + observeRpcEffect( + WS_METHODS.serverReportClientActivity, + backgroundPolicy.reportClientActivity( + currentSessionId, + RpcClientId.make(metadata.client.id), + input, + ), + { "rpc.aggregate": "server" }, + ), + [WS_METHODS.serverReportHostPowerState]: (input) => + observeRpcEffect( + WS_METHODS.serverReportHostPowerState, + backgroundPolicy.reportHostPowerState(input), + { "rpc.aggregate": "server" }, + ), + [WS_METHODS.serverGetBackgroundPolicy]: (_input) => + observeRpcEffect(WS_METHODS.serverGetBackgroundPolicy, backgroundPolicy.snapshot, { + "rpc.aggregate": "server", + }), [WS_METHODS.cloudGetRelayClientStatus]: (_input) => observeRpcEffect(WS_METHODS.cloudGetRelayClientStatus, relayClient.resolve, { "rpc.aggregate": "cloud", @@ -1634,6 +1660,15 @@ const makeWsRpcLayer = (currentSession: AuthenticatedSession) => }), { "rpc.aggregate": "auth" }, ), + [WS_METHODS.subscribeBackgroundPolicy]: (_input) => + observeRpcStream( + WS_METHODS.subscribeBackgroundPolicy, + Stream.concat( + Stream.unwrap(Effect.map(backgroundPolicy.snapshot, Stream.make)), + backgroundPolicy.streamChanges, + ), + { "rpc.aggregate": "server" }, + ), }); }), ); diff --git a/apps/web/src/components/settings/SettingsPanels.tsx b/apps/web/src/components/settings/SettingsPanels.tsx index 71311c10d5c..7127eb2b14a 100644 --- a/apps/web/src/components/settings/SettingsPanels.tsx +++ b/apps/web/src/components/settings/SettingsPanels.tsx @@ -1,9 +1,19 @@ -import { ArchiveIcon, ArchiveX, LoaderIcon, PlusIcon, RefreshCwIcon } from "lucide-react"; +import { + ArchiveIcon, + ArchiveX, + InfoIcon, + LoaderIcon, + PlusIcon, + RefreshCwIcon, + SettingsIcon, +} from "lucide-react"; import { Link } from "@tanstack/react-router"; import { useCallback, useMemo, useRef, useState } from "react"; import { useAtomValue } from "@effect/atom-react"; import { defaultInstanceIdForDriver, + type BackgroundActivityProfile, + type BackgroundActivitySettings, type DesktopUpdateChannel, PROVIDER_DISPLAY_NAMES, ProviderDriverKind, @@ -18,6 +28,11 @@ import { squashAtomCommandFailure, } from "@t3tools/client-runtime/state/runtime"; import { DEFAULT_UNIFIED_SETTINGS } from "@t3tools/contracts/settings"; +import { + getBackgroundActivityBaseProfile, + getBackgroundActivityPresetSettings, + resolveServerBackgroundActivitySettings, +} from "@t3tools/shared/backgroundActivitySettings"; import { createModelSelection } from "@t3tools/shared/model"; import * as Arr from "effect/Array"; import * as Duration from "effect/Duration"; @@ -58,7 +73,23 @@ import { useProjects } from "../../state/entities"; import { useArchivedThreadSnapshots } from "../../lib/archivedThreadsState"; import { formatRelativeTime, formatRelativeTimeLabel } from "../../timestampFormat"; import { Button } from "../ui/button"; +import { + Dialog, + DialogDescription, + DialogFooter, + DialogHeader, + DialogPanel, + DialogPopup, + DialogTitle, +} from "../ui/dialog"; import { DraftInput } from "../ui/draft-input"; +import { + NumberField, + NumberFieldDecrement, + NumberFieldGroup, + NumberFieldIncrement, + NumberFieldInput, +} from "../ui/number-field"; import { Select, SelectItem, SelectPopup, SelectTrigger, SelectValue } from "../ui/select"; import { Switch } from "../ui/switch"; import { stackedThreadToast, toastManager } from "../ui/toast"; @@ -108,7 +139,128 @@ const TIMESTAMP_FORMAT_LABELS = { "24-hour": "24-hour", } as const; +const BACKGROUND_ACTIVITY_PROFILE_LABELS: Record = { + balanced: "Balanced", + performance: "Performance", + "battery-saver": "Battery saver", +}; + +type BackgroundActivityProfileOption = BackgroundActivityProfile | "advanced"; +type BackgroundActivityOverridePatch = Partial<{ + [K in keyof BackgroundActivitySettings["overrides"]]: + | BackgroundActivitySettings["overrides"][K] + | undefined; +}>; + +const BACKGROUND_ACTIVITY_PROFILE_OPTION_LABELS: Record = { + ...BACKGROUND_ACTIVITY_PROFILE_LABELS, + advanced: "Advanced", +}; + +const BACKGROUND_ACTIVITY_PROFILE_DESCRIPTIONS: Record = { + balanced: + "Pauses background probes when clients are idle, the host is locked, or low power mode is active.", + performance: "Allows scoped background probes while any subscribed client remains connected.", + "battery-saver": "Also pauses background probes when the host or client is on battery.", +}; + +const ADVANCED_BACKGROUND_ACTIVITY_DESCRIPTION = + "Uses custom background intervals with the selected shared power policy."; + +const PROVIDER_HEALTH_INTERVAL_STEP_SECONDS = 30; const DEFAULT_DRIVER_KIND = ProviderDriverKind.make("codex"); +const BACKGROUND_ACTIVITY_BOOLEAN_OVERRIDES: ReadonlyArray<{ + readonly key: + | "pauseWhenHostLocked" + | "pauseWhenHostLowPower" + | "pauseWhenClientLowPower" + | "pauseWhenOnBattery"; + readonly label: string; +}> = [ + { key: "pauseWhenHostLocked", label: "Pause when host is locked" }, + { key: "pauseWhenHostLowPower", label: "Pause on host low power" }, + { key: "pauseWhenClientLowPower", label: "Pause on client low power" }, + { key: "pauseWhenOnBattery", label: "Pause on battery" }, +]; + +function durationToSeconds(duration: Duration.Duration): number { + return Math.round(Duration.toMillis(duration) / 1_000); +} + +function normalizeIntervalSeconds(value: number | null): number { + if (value === null || !Number.isFinite(value)) { + return 0; + } + return Math.max(0, Math.round(value)); +} + +function resolveBackgroundActivityProfileOption(settings: { + readonly backgroundActivity: BackgroundActivitySettings; +}): BackgroundActivityProfileOption { + return settings.backgroundActivity.profile === "custom" + ? "advanced" + : settings.backgroundActivity.profile; +} + +function resetBackgroundActivitySettings() { + return { + backgroundActivity: DEFAULT_UNIFIED_SETTINGS.backgroundActivity, + }; +} + +function backgroundActivityProfileSettings(profile: BackgroundActivityProfile) { + return { + backgroundActivity: { + schemaVersion: 1 as const, + profile, + overrides: {}, + }, + }; +} + +function backgroundActivityOverrideSettings( + current: BackgroundActivitySettings, + overrides: BackgroundActivityOverridePatch, +) { + const nextOverrides: BackgroundActivityOverridePatch = { + ...current.overrides, + ...overrides, + }; + for (const [key, value] of Object.entries(nextOverrides)) { + if (value === undefined) { + delete nextOverrides[key as keyof typeof nextOverrides]; + } + } + return { + backgroundActivity: { + schemaVersion: 1 as const, + profile: "custom" as const, + baseProfile: getBackgroundActivityBaseProfile(current), + overrides: nextOverrides as BackgroundActivitySettings["overrides"], + }, + }; +} + +function PolicyTooltip({ children }: { readonly children: string }) { + return ( + + + + + } + /> + + {children} + + + ); +} function withoutProviderInstanceKey( record: Readonly> | undefined, @@ -401,9 +553,8 @@ export function useSettingsRestore(onRestored?: () => void) { ...(settings.enableAssistantStreaming !== DEFAULT_UNIFIED_SETTINGS.enableAssistantStreaming ? ["Assistant output"] : []), - ...(Duration.toMillis(settings.automaticGitFetchInterval) !== - Duration.toMillis(DEFAULT_UNIFIED_SETTINGS.automaticGitFetchInterval) - ? ["Automatic Git fetch interval"] + ...(!Equal.equals(settings.backgroundActivity, DEFAULT_UNIFIED_SETTINGS.backgroundActivity) + ? ["Background activity"] : []), ...(settings.defaultThreadEnvMode !== DEFAULT_UNIFIED_SETTINGS.defaultThreadEnvMode ? ["New thread mode"] @@ -433,7 +584,7 @@ export function useSettingsRestore(onRestored?: () => void) { settings.newWorktreesStartFromOrigin, settings.diffIgnoreWhitespace, settings.diffWordWrap, - settings.automaticGitFetchInterval, + settings.backgroundActivity, settings.enableAssistantStreaming, settings.sidebarThreadPreviewCount, settings.timestampFormat, @@ -459,7 +610,7 @@ export function useSettingsRestore(onRestored?: () => void) { sidebarThreadPreviewCount: DEFAULT_UNIFIED_SETTINGS.sidebarThreadPreviewCount, autoOpenPlanSidebar: DEFAULT_UNIFIED_SETTINGS.autoOpenPlanSidebar, enableAssistantStreaming: DEFAULT_UNIFIED_SETTINGS.enableAssistantStreaming, - automaticGitFetchInterval: DEFAULT_UNIFIED_SETTINGS.automaticGitFetchInterval, + backgroundActivity: DEFAULT_UNIFIED_SETTINGS.backgroundActivity, defaultThreadEnvMode: DEFAULT_UNIFIED_SETTINGS.defaultThreadEnvMode, newWorktreesStartFromOrigin: DEFAULT_UNIFIED_SETTINGS.newWorktreesStartFromOrigin, addProjectBaseDirectory: DEFAULT_UNIFIED_SETTINGS.addProjectBaseDirectory, @@ -476,10 +627,255 @@ export function useSettingsRestore(onRestored?: () => void) { }; } +function BackgroundActivityAdvancedDialog({ + open, + onOpenChange, +}: { + readonly open: boolean; + readonly onOpenChange: (open: boolean) => void; +}) { + const settings = useSettings(); + const updateSettings = useUpdateSettings(); + const resolvedBackgroundActivity = resolveServerBackgroundActivitySettings(settings); + const activeProfile = getBackgroundActivityBaseProfile(settings.backgroundActivity); + const automaticGitFetchIntervalSeconds = durationToSeconds( + resolvedBackgroundActivity.automaticGitFetchInterval, + ); + const providerHealthRefreshIntervalSeconds = durationToSeconds( + resolvedBackgroundActivity.providerHealthRefreshInterval, + ); + const hostPowerMonitorActiveIntervalSeconds = durationToSeconds( + resolvedBackgroundActivity.hostPowerMonitorActiveInterval, + ); + const hostPowerMonitorIdleIntervalSeconds = durationToSeconds( + resolvedBackgroundActivity.hostPowerMonitorIdleInterval, + ); + + return ( + + + + Background Activity + + Tune the shared power policy and the background intervals that feed it. + + + +
+
+
+
Shared policy
+

+ Controls whether background work may run after a subscribed interval fires. +

+
+ +
+ +
+
+
Git fetch interval
+

+ Refresh remote branch status in the background. +

+
+
+ + updateSettings( + backgroundActivityOverrideSettings(settings.backgroundActivity, { + automaticGitFetchInterval: Duration.seconds( + normalizeIntervalSeconds(value), + ), + }), + ) + } + > + + + + + + + seconds +
+
+ +
+
+
Provider health interval
+

+ Refresh provider availability, versions, auth state, and model metadata. +

+
+
+ + updateSettings( + backgroundActivityOverrideSettings(settings.backgroundActivity, { + providerHealthRefreshInterval: Duration.seconds( + normalizeIntervalSeconds(value), + ), + }), + ) + } + > + + + + + + + seconds +
+
+ +
+
+
Host power monitor
+

+ Poll host power state while clients are active. +

+
+
+ + updateSettings( + backgroundActivityOverrideSettings(settings.backgroundActivity, { + hostPowerMonitorActiveInterval: Duration.seconds( + normalizeIntervalSeconds(value), + ), + }), + ) + } + > + + + + + + + seconds +
+
+ +
+
+
Idle host monitor
+

+ Poll host power state when no foreground client is active. +

+
+
+ + updateSettings( + backgroundActivityOverrideSettings(settings.backgroundActivity, { + hostPowerMonitorIdleInterval: Duration.seconds( + normalizeIntervalSeconds(value), + ), + }), + ) + } + > + + + + + + + seconds +
+
+ +
+ {BACKGROUND_ACTIVITY_BOOLEAN_OVERRIDES.map(({ key, label }) => ( + + ))} +
+
+
+ + + + +
+
+ ); +} + export function GeneralSettingsPanel() { const { theme, setTheme } = useTheme(); const settings = useSettings(); const updateSettings = useUpdateSettings(); + const [backgroundActivityDialogOpen, setBackgroundActivityDialogOpen] = useState(false); const observability = useAtomValue(primaryServerObservabilityAtom); const serverProviders = useAtomValue(primaryServerProvidersAtom); const diagnosticsDescription = formatDiagnosticsDescription({ @@ -512,6 +908,21 @@ export function GeneralSettingsPanel() { settings.textGenerationModelSelection ?? null, DEFAULT_UNIFIED_SETTINGS.textGenerationModelSelection ?? null, ); + const resolvedBackgroundActivity = resolveServerBackgroundActivitySettings(settings); + const activeBackgroundActivityProfile = getBackgroundActivityBaseProfile( + settings.backgroundActivity, + ); + const backgroundActivityProfileOption = resolveBackgroundActivityProfileOption(settings); + const backgroundActivityDescription = + backgroundActivityProfileOption === "advanced" + ? `${ADVANCED_BACKGROUND_ACTIVITY_DESCRIPTION} Current shared policy: ${ + BACKGROUND_ACTIVITY_PROFILE_LABELS[activeBackgroundActivityProfile] + }.` + : BACKGROUND_ACTIVITY_PROFILE_DESCRIPTIONS[resolvedBackgroundActivity.profile]; + const canResetBackgroundActivity = !Equal.equals( + settings.backgroundActivity, + DEFAULT_UNIFIED_SETTINGS.backgroundActivity, + ); return ( @@ -668,6 +1079,88 @@ export function GeneralSettingsPanel() { } /> + + Background activity + + This shared policy gates background work such as Git refreshes and provider health + probes after their individual intervals elapse. + + + } + description={backgroundActivityDescription} + resetAction={ + canResetBackgroundActivity ? ( + updateSettings(resetBackgroundActivitySettings())} + /> + ) : null + } + control={ + <> + + {backgroundActivityProfileOption === "advanced" ? ( + + setBackgroundActivityDialogOpen(true)} + > + + + } + /> + Configure background activity + + ) : null} + + + } + /> + 0 ? serverProviders.reduce( @@ -1282,6 +1783,61 @@ export function ProviderSettingsPanel() { } > + + Health check interval + + This interval is configured here, then the shared Background activity policy decides + whether provider probes may run when the timer fires. Custom intervals appear as + Advanced in General settings. + + + } + description="Refresh provider availability, versions, auth state, and model metadata in the background. Set this to 0 seconds to rely on manual refreshes." + resetAction={ + providerHealthRefreshIntervalSeconds !== defaultProviderHealthRefreshIntervalSeconds ? ( + + updateSettings( + backgroundActivityOverrideSettings(settings.backgroundActivity, { + providerHealthRefreshInterval: undefined, + }), + ) + } + /> + ) : null + } + control={ +
+ + updateSettings( + backgroundActivityOverrideSettings(settings.backgroundActivity, { + providerHealthRefreshInterval: Duration.seconds( + normalizeIntervalSeconds(value), + ), + }), + ) + } + > + + + + + + + seconds +
+ } + /> + {rows.map((row) => { const driverOption = getDriverOption(row.driver); const liveProvider = serverProviders.find( diff --git a/apps/web/src/components/settings/SourceControlSettings.tsx b/apps/web/src/components/settings/SourceControlSettings.tsx index db1b2393626..8de7374d2a5 100644 --- a/apps/web/src/components/settings/SourceControlSettings.tsx +++ b/apps/web/src/components/settings/SourceControlSettings.tsx @@ -1,8 +1,9 @@ -import { ChevronDownIcon, GitPullRequestIcon, RefreshCwIcon } from "lucide-react"; +import { ChevronDownIcon, GitPullRequestIcon, InfoIcon, RefreshCwIcon } from "lucide-react"; import * as Duration from "effect/Duration"; import * as Option from "effect/Option"; import { useState, type ReactNode } from "react"; import type { + BackgroundActivitySettings, SourceControlProviderKind, SourceControlDiscoveryResult, SourceControlProviderAuth, @@ -10,7 +11,11 @@ import type { VcsDriverKind, VcsDiscoveryItem, } from "@t3tools/contracts"; -import { DEFAULT_UNIFIED_SETTINGS } from "@t3tools/contracts/settings"; +import { + getBackgroundActivityBaseProfile, + getBackgroundActivityPresetSettings, + resolveServerBackgroundActivitySettings, +} from "@t3tools/shared/backgroundActivitySettings"; import { useSettings, useUpdateSettings } from "../../hooks/useSettings"; import { cn } from "../../lib/utils"; @@ -69,6 +74,11 @@ const VCS_ICONS: Partial> = { const SOURCE_CONTROL_SKELETON_ROWS = ["primary", "secondary"] as const; const GIT_FETCH_INTERVAL_STEP_SECONDS = 5; +type BackgroundActivityOverridePatch = Partial<{ + [K in keyof BackgroundActivitySettings["overrides"]]: + | BackgroundActivitySettings["overrides"][K] + | undefined; +}>; function durationToSeconds(duration: Duration.Duration): number { return Math.round(Duration.toMillis(duration) / 1_000); @@ -81,6 +91,27 @@ function normalizeFetchIntervalSeconds(value: number | null): number { return Math.max(0, Math.round(value)); } +function BackgroundPolicyTooltip({ children }: { readonly children: string }) { + return ( + + + + + } + /> + + {children} + + + ); +} + function optionLabel(value: Option.Option): string | null { return Option.getOrNull(value); } @@ -291,14 +322,41 @@ function DiscoveryItemRow({ } function GitFetchIntervalSettings() { - const automaticGitFetchInterval = useSettings((settings) => settings.automaticGitFetchInterval); + const settings = useSettings(); const updateSettings = useUpdateSettings(); - const automaticGitFetchIntervalSeconds = durationToSeconds(automaticGitFetchInterval); + const resolvedBackgroundActivity = resolveServerBackgroundActivitySettings(settings); + const automaticGitFetchIntervalSeconds = durationToSeconds( + resolvedBackgroundActivity.automaticGitFetchInterval, + ); const defaultAutomaticGitFetchIntervalSeconds = durationToSeconds( - DEFAULT_UNIFIED_SETTINGS.automaticGitFetchInterval, + getBackgroundActivityPresetSettings( + getBackgroundActivityBaseProfile(settings.backgroundActivity), + ).automaticGitFetchInterval, ); const canResetFetchInterval = automaticGitFetchIntervalSeconds !== defaultAutomaticGitFetchIntervalSeconds; + const backgroundActivityOverrideSettings = ( + current: BackgroundActivitySettings, + overrides: BackgroundActivityOverridePatch, + ) => { + const nextOverrides: BackgroundActivityOverridePatch = { + ...current.overrides, + ...overrides, + }; + for (const [key, value] of Object.entries(nextOverrides)) { + if (value === undefined) { + delete nextOverrides[key as keyof typeof nextOverrides]; + } + } + return { + backgroundActivity: { + schemaVersion: 1 as const, + profile: "custom" as const, + baseProfile: getBackgroundActivityBaseProfile(current), + overrides: nextOverrides as BackgroundActivitySettings["overrides"], + }, + }; + }; return (
@@ -306,6 +364,11 @@ function GitFetchIntervalSettings() {
Fetch interval + + This interval is configured for Git only. The shared Background activity policy still + decides whether Git refreshes may run when the timer fires. Custom intervals appear as + Advanced in General settings. + - updateSettings({ - automaticGitFetchInterval: DEFAULT_UNIFIED_SETTINGS.automaticGitFetchInterval, - }) + updateSettings( + backgroundActivityOverrideSettings(settings.backgroundActivity, { + automaticGitFetchInterval: undefined, + }), + ) } /> ) : null} @@ -338,9 +403,11 @@ function GitFetchIntervalSettings() { size="sm" className="w-32" onValueChange={(value) => - updateSettings({ - automaticGitFetchInterval: Duration.seconds(normalizeFetchIntervalSeconds(value)), - }) + updateSettings( + backgroundActivityOverrideSettings(settings.backgroundActivity, { + automaticGitFetchInterval: Duration.seconds(normalizeFetchIntervalSeconds(value)), + }), + ) } > diff --git a/apps/web/src/connection/runtime.ts b/apps/web/src/connection/runtime.ts index 3b1eade0818..5be9b2bdf1e 100644 --- a/apps/web/src/connection/runtime.ts +++ b/apps/web/src/connection/runtime.ts @@ -3,14 +3,28 @@ import * as Layer from "effect/Layer"; import { Atom } from "effect/unstable/reactivity"; import { runtimeContextLayer } from "../lib/runtime"; +import { + backgroundActivityObserverLayer, + backgroundActivityReporterLayer, +} from "../lib/backgroundActivityReporter"; import { connectionPlatformLayer } from "./platform"; const providedConnectionPlatformLayer = connectionPlatformLayer.pipe( Layer.provide(runtimeContextLayer), ); -export const connectionLayer = clientConnectionLayer.pipe( - Layer.provideMerge(Layer.mergeAll(runtimeContextLayer, providedConnectionPlatformLayer)), +const providedClientConnectionLayer = clientConnectionLayer.pipe( + Layer.provideMerge( + Layer.mergeAll( + runtimeContextLayer, + providedConnectionPlatformLayer, + backgroundActivityObserverLayer, + ), + ), +); + +export const connectionLayer = backgroundActivityReporterLayer.pipe( + Layer.provideMerge(providedClientConnectionLayer), ); export const connectionAtomRuntime = Atom.runtime(connectionLayer); diff --git a/apps/web/src/hooks/useSettings.ts b/apps/web/src/hooks/useSettings.ts index 6759b227a13..a7cb8a358af 100644 --- a/apps/web/src/hooks/useSettings.ts +++ b/apps/web/src/hooks/useSettings.ts @@ -218,7 +218,6 @@ export function useUpdateSettings() { }); } } - if (Object.keys(clientPatch).length > 0) { persistClientSettings({ ...getClientSettingsSnapshot(), diff --git a/apps/web/src/lib/backgroundActivityReporter.ts b/apps/web/src/lib/backgroundActivityReporter.ts new file mode 100644 index 00000000000..cddd4715683 --- /dev/null +++ b/apps/web/src/lib/backgroundActivityReporter.ts @@ -0,0 +1,202 @@ +import { EnvironmentRegistry } from "@t3tools/client-runtime/connection"; +import { + EnvironmentRpcSubscriptionObserver, + request, + type EnvironmentRpcSubscriptionObservation, +} from "@t3tools/client-runtime/rpc"; +import { + type BackgroundScope, + type ClientActivityReportInput, + type EnvironmentId, + WS_METHODS, +} from "@t3tools/contracts"; +import * as DateTime from "effect/DateTime"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as Queue from "effect/Queue"; +import * as Schedule from "effect/Schedule"; +import * as Stream from "effect/Stream"; +import * as SubscriptionRef from "effect/SubscriptionRef"; + +import { randomUUID } from "./utils"; + +const CLIENT_ID_STORAGE_KEY = "t3.backgroundActivity.clientId"; +const REPORT_INTERVAL_MS = 25_000; +const LEASE_TTL_MS = 45_000; +const BASELINE_SCOPES: ReadonlyArray = [{ type: "provider-status" }]; + +interface RetainedScope { + readonly environmentId: EnvironmentId; + readonly scope: BackgroundScope; + refCount: number; +} + +const retainedScopes = new Map(); +const retainedScopeListeners = new Set<() => void>(); + +function notifyRetainedScopesChanged(): void { + for (const listener of retainedScopeListeners) { + listener(); + } +} + +function stableScopeKey(environmentId: EnvironmentId, scope: BackgroundScope): string { + const prefix = `${environmentId}:`; + switch (scope.type) { + case "server-config": + case "diagnostics": + return `${prefix}${scope.type}`; + case "provider-status": + return scope.instanceId + ? `${prefix}${scope.type}:${scope.instanceId}` + : `${prefix}${scope.type}`; + case "vcs-status": + case "git-refs": + return `${prefix}${scope.type}:${scope.cwd}`; + case "thread": + return `${prefix}${scope.type}:${scope.threadId}`; + } +} + +function getClientId(): string { + try { + const existing = window.localStorage.getItem(CLIENT_ID_STORAGE_KEY); + if (existing) return existing; + const next = randomUUID(); + window.localStorage.setItem(CLIENT_ID_STORAGE_KEY, next); + return next; + } catch { + return "ephemeral-browser-client"; + } +} + +function resolveClientKind(): ClientActivityReportInput["clientKind"] { + return window.desktopBridge ? "desktop-renderer" : "web"; +} + +function createActivityReport(environmentId: EnvironmentId): ClientActivityReportInput { + return { + environmentId, + clientId: getClientId(), + clientKind: resolveClientKind(), + visible: document.visibilityState === "visible", + focused: document.hasFocus(), + recentlyInteracted: document.hasFocus(), + appState: document.visibilityState === "visible" ? "active" : "background", + scopes: [ + ...BASELINE_SCOPES, + ...[...retainedScopes.values()] + .filter((entry) => entry.environmentId === environmentId) + .map((entry) => entry.scope), + ], + ttlMs: LEASE_TTL_MS, + observedAt: DateTime.makeUnsafe(new Date().toISOString()), + }; +} + +function scopeForSubscription( + observation: EnvironmentRpcSubscriptionObservation, +): BackgroundScope | null { + if (observation.method !== WS_METHODS.subscribeVcsStatus) { + return null; + } + const input = observation.input as { readonly cwd?: unknown }; + return typeof input.cwd === "string" ? { type: "vcs-status", cwd: input.cwd } : null; +} + +function retainBackgroundScope(environmentId: EnvironmentId, scope: BackgroundScope): () => void { + const key = stableScopeKey(environmentId, scope); + const existing = retainedScopes.get(key); + if (existing) { + existing.refCount += 1; + } else { + retainedScopes.set(key, { environmentId, scope, refCount: 1 }); + notifyRetainedScopesChanged(); + } + + return () => { + const current = retainedScopes.get(key); + if (!current) return; + current.refCount -= 1; + if (current.refCount <= 0) { + retainedScopes.delete(key); + notifyRetainedScopesChanged(); + } + }; +} + +export const backgroundActivityObserverLayer = Layer.succeed( + EnvironmentRpcSubscriptionObserver, + EnvironmentRpcSubscriptionObserver.of({ + observe: (observation) => { + const scope = scopeForSubscription(observation); + if (scope === null) { + return Effect.succeed(Effect.void); + } + return Effect.sync(() => + Effect.sync(retainBackgroundScope(observation.environmentId as EnvironmentId, scope)), + ); + }, + }), +); + +export const backgroundActivityReporterLayer = Layer.effectDiscard( + Effect.gen(function* () { + if (typeof window === "undefined" || typeof document === "undefined") { + return; + } + + const registry = yield* EnvironmentRegistry; + const reportRequests = yield* Queue.sliding(1); + const requestReport = () => Queue.offerUnsafe(reportRequests, undefined); + + const report = Effect.gen(function* () { + const entries = yield* SubscriptionRef.get(registry.entries); + yield* Effect.forEach( + entries.keys(), + (environmentId) => + registry + .run( + environmentId, + request(WS_METHODS.serverReportClientActivity, createActivityReport(environmentId)), + ) + .pipe(Effect.ignore), + { concurrency: "unbounded", discard: true }, + ); + }).pipe(Effect.withSpan("web.backgroundActivity.report")); + + yield* Effect.acquireRelease( + Effect.sync(() => { + retainedScopeListeners.add(requestReport); + document.addEventListener("visibilitychange", requestReport); + window.addEventListener("focus", requestReport); + window.addEventListener("blur", requestReport); + window.addEventListener("online", requestReport); + }), + () => + Effect.sync(() => { + retainedScopeListeners.delete(requestReport); + document.removeEventListener("visibilitychange", requestReport); + window.removeEventListener("focus", requestReport); + window.removeEventListener("blur", requestReport); + window.removeEventListener("online", requestReport); + }), + ); + + yield* SubscriptionRef.changes(registry.entries).pipe( + Stream.runForEach(() => Effect.sync(requestReport)), + Effect.forkScoped, + ); + yield* Stream.fromQueue(reportRequests).pipe( + Stream.debounce("250 millis"), + Stream.runForEach(() => report), + Effect.forkScoped, + ); + yield* report.pipe( + Effect.repeat(Schedule.spaced(`${REPORT_INTERVAL_MS} millis`)), + Effect.forkScoped, + ); + + requestReport(); + }), +); diff --git a/packages/client-runtime/src/rpc/client.ts b/packages/client-runtime/src/rpc/client.ts index 882d8f51b53..0af9e5023a0 100644 --- a/packages/client-runtime/src/rpc/client.ts +++ b/packages/client-runtime/src/rpc/client.ts @@ -62,6 +62,23 @@ export type EnvironmentStreamRpcTag = | EnvironmentStreamCommandRpcTag; export type EnvironmentUnaryRpcTag = Exclude; + +export interface EnvironmentRpcSubscriptionObservation { + readonly environmentId: string; + readonly method: EnvironmentSubscriptionRpcTag; + readonly input: unknown; +} + +export class EnvironmentRpcSubscriptionObserver extends Context.Reference<{ + readonly observe: ( + subscription: EnvironmentRpcSubscriptionObservation, + ) => Effect.Effect>; +}>("@t3tools/client-runtime/rpc/EnvironmentRpcSubscriptionObserver", { + defaultValue: () => ({ + observe: () => Effect.succeed(Effect.void), + }), +}) {} + const isRpcClientError = Schema.is(RpcClientError.RpcClientError); export type EnvironmentRpcInput = Parameters>[0]; @@ -163,9 +180,15 @@ export function subscribe( EnvironmentSupervisor > { return Stream.unwrap( - EnvironmentSupervisor.pipe( - Effect.map((supervisor) => - SubscriptionRef.changes(supervisor.session).pipe( + Effect.gen(function* () { + const supervisor = yield* EnvironmentSupervisor; + const observer = yield* EnvironmentRpcSubscriptionObserver; + const completeObservation = yield* observer.observe({ + environmentId: supervisor.target.environmentId, + method: tag, + input, + }); + return SubscriptionRef.changes(supervisor.session).pipe( Stream.switchMap( Option.match({ onNone: () => Stream.empty, @@ -227,9 +250,9 @@ export function subscribe( }, }), ), - ), - ), - ), + Stream.ensuring(completeObservation), + ); + }), ).pipe( Stream.withSpan("EnvironmentRpc.subscribe", { attributes: { "rpc.method": tag }, diff --git a/packages/contracts/src/background.ts b/packages/contracts/src/background.ts new file mode 100644 index 00000000000..afa25de0768 --- /dev/null +++ b/packages/contracts/src/background.ts @@ -0,0 +1,101 @@ +import * as Schema from "effect/Schema"; + +import { AuthSessionId, EnvironmentId, RpcClientId, ThreadId } from "./baseSchemas.ts"; +import { ProviderInstanceId } from "./providerInstance.ts"; + +export const BackgroundBooleanState = Schema.Literals(["true", "false", "unknown"]); +export type BackgroundBooleanState = typeof BackgroundBooleanState.Type; + +export const HostPowerThermalState = Schema.Literals([ + "unknown", + "nominal", + "fair", + "serious", + "critical", +]); +export type HostPowerThermalState = typeof HostPowerThermalState.Type; + +export const HostPowerSource = Schema.Literals([ + "unknown", + "node-macos-shell", + "node-macos-native", + "node-linux", + "node-windows", + "electron-main", +]); +export type HostPowerSource = typeof HostPowerSource.Type; + +export const HostPowerSnapshot = Schema.Struct({ + source: HostPowerSource, + idle: BackgroundBooleanState, + idleSeconds: Schema.NullOr(Schema.Number), + locked: BackgroundBooleanState, + suspended: Schema.Boolean, + onBattery: BackgroundBooleanState, + lowPowerMode: BackgroundBooleanState, + thermalState: HostPowerThermalState, + stale: Schema.Boolean, + updatedAt: Schema.DateTimeUtc, +}); +export type HostPowerSnapshot = typeof HostPowerSnapshot.Type; + +export const BackgroundScope = Schema.Union([ + Schema.Struct({ type: Schema.Literal("server-config") }), + Schema.Struct({ + type: Schema.Literal("provider-status"), + instanceId: Schema.optionalKey(ProviderInstanceId), + }), + Schema.Struct({ type: Schema.Literal("vcs-status"), cwd: Schema.String }), + Schema.Struct({ type: Schema.Literal("git-refs"), cwd: Schema.String }), + Schema.Struct({ type: Schema.Literal("diagnostics") }), + Schema.Struct({ type: Schema.Literal("thread"), threadId: ThreadId }), +]); +export type BackgroundScope = typeof BackgroundScope.Type; + +export const ClientKind = Schema.Literals(["web", "desktop-renderer", "mobile", "unknown"]); +export type ClientKind = typeof ClientKind.Type; + +export const ClientActivityReportInput = Schema.Struct({ + environmentId: Schema.optionalKey(EnvironmentId), + clientId: Schema.String, + clientKind: ClientKind, + visible: Schema.Boolean, + focused: Schema.Boolean, + recentlyInteracted: Schema.Boolean, + appState: Schema.optionalKey(Schema.Literals(["active", "inactive", "background", "unknown"])), + lowPowerMode: Schema.optionalKey(BackgroundBooleanState), + batteryState: Schema.optionalKey(Schema.Literals(["unknown", "unplugged", "charging", "full"])), + networkType: Schema.optionalKey(Schema.String), + scopes: Schema.Array(BackgroundScope), + ttlMs: Schema.optionalKey(Schema.Number), + observedAt: Schema.DateTimeUtc, +}); +export type ClientActivityReportInput = typeof ClientActivityReportInput.Type; + +export const ClientActivityLease = Schema.Struct({ + sessionId: AuthSessionId, + rpcClientId: RpcClientId, + clientId: Schema.String, + clientKind: ClientKind, + visible: Schema.Boolean, + focused: Schema.Boolean, + recentlyInteracted: Schema.Boolean, + appState: Schema.optionalKey(Schema.Literals(["active", "inactive", "background", "unknown"])), + lowPowerMode: Schema.optionalKey(BackgroundBooleanState), + batteryState: Schema.optionalKey(Schema.Literals(["unknown", "unplugged", "charging", "full"])), + networkType: Schema.optionalKey(Schema.String), + scopes: Schema.Array(BackgroundScope), + updatedAt: Schema.DateTimeUtc, + expiresAt: Schema.DateTimeUtc, +}); +export type ClientActivityLease = typeof ClientActivityLease.Type; + +export const BackgroundPolicySnapshot = Schema.Struct({ + hostPower: HostPowerSnapshot, + leases: Schema.Array(ClientActivityLease), + activeForegroundLeaseCount: Schema.Number, + activeScopeKeys: Schema.Array(Schema.String), + shouldRunOpportunisticWork: Schema.Boolean, + updatedAt: Schema.DateTimeUtc, +}); +export type BackgroundPolicySnapshot = typeof BackgroundPolicySnapshot.Type; diff --git a/packages/contracts/src/baseSchemas.ts b/packages/contracts/src/baseSchemas.ts index 614ea5131fb..a8fa565cef4 100644 --- a/packages/contracts/src/baseSchemas.ts +++ b/packages/contracts/src/baseSchemas.ts @@ -43,6 +43,8 @@ export const TurnId = makeEntityId("TurnId"); export type TurnId = typeof TurnId.Type; export const AuthSessionId = makeEntityId("AuthSessionId"); export type AuthSessionId = typeof AuthSessionId.Type; +export const RpcClientId = NonNegativeInt.pipe(Schema.brand("RpcClientId")); +export type RpcClientId = typeof RpcClientId.Type; export const ProviderItemId = makeEntityId("ProviderItemId"); export type ProviderItemId = typeof ProviderItemId.Type; diff --git a/packages/contracts/src/index.ts b/packages/contracts/src/index.ts index 43270efdec7..03ecb4fec2c 100644 --- a/packages/contracts/src/index.ts +++ b/packages/contracts/src/index.ts @@ -1,4 +1,5 @@ export * from "./baseSchemas.ts"; +export * from "./background.ts"; export * from "./auth.ts"; export * from "./environment.ts"; export * from "./environmentHttp.ts"; diff --git a/packages/contracts/src/rpc.ts b/packages/contracts/src/rpc.ts index 87c5a49c73b..b5865aad3e0 100644 --- a/packages/contracts/src/rpc.ts +++ b/packages/contracts/src/rpc.ts @@ -8,6 +8,11 @@ import { AuthAccessStreamEvent, EnvironmentAuthorizationError, } from "./auth.ts"; +import { + BackgroundPolicySnapshot, + ClientActivityReportInput, + HostPowerSnapshot, +} from "./background.ts"; import { FilesystemBrowseInput, FilesystemBrowseResult, @@ -211,6 +216,9 @@ export const WS_METHODS = { serverGetProcessDiagnostics: "server.getProcessDiagnostics", serverGetProcessResourceHistory: "server.getProcessResourceHistory", serverSignalProcess: "server.signalProcess", + serverReportClientActivity: "server.reportClientActivity", + serverReportHostPowerState: "server.reportHostPowerState", + serverGetBackgroundPolicy: "server.getBackgroundPolicy", // Cloud environment methods cloudGetRelayClientStatus: "cloud.getRelayClientStatus", @@ -230,6 +238,7 @@ export const WS_METHODS = { subscribeServerConfig: "subscribeServerConfig", subscribeServerLifecycle: "subscribeServerLifecycle", subscribeAuthAccess: "subscribeAuthAccess", + subscribeBackgroundPolicy: "subscribeBackgroundPolicy", } as const; export const WsServerUpsertKeybindingRpc = Rpc.make(WS_METHODS.serverUpsertKeybinding, { @@ -328,6 +337,22 @@ export const WsCloudInstallRelayClientRpc = Rpc.make(WS_METHODS.cloudInstallRela stream: true, }); +export const WsServerReportClientActivityRpc = Rpc.make(WS_METHODS.serverReportClientActivity, { + payload: ClientActivityReportInput, + error: EnvironmentAuthorizationError, +}); + +export const WsServerReportHostPowerStateRpc = Rpc.make(WS_METHODS.serverReportHostPowerState, { + payload: HostPowerSnapshot, + error: EnvironmentAuthorizationError, +}); + +export const WsServerGetBackgroundPolicyRpc = Rpc.make(WS_METHODS.serverGetBackgroundPolicy, { + payload: Schema.Struct({}), + success: BackgroundPolicySnapshot, + error: EnvironmentAuthorizationError, +}); + export const WsSourceControlLookupRepositoryRpc = Rpc.make( WS_METHODS.sourceControlLookupRepository, { @@ -678,6 +703,13 @@ export const WsSubscribeAuthAccessRpc = Rpc.make(WS_METHODS.subscribeAuthAccess, stream: true, }); +export const WsSubscribeBackgroundPolicyRpc = Rpc.make(WS_METHODS.subscribeBackgroundPolicy, { + payload: Schema.Struct({}), + success: BackgroundPolicySnapshot, + error: EnvironmentAuthorizationError, + stream: true, +}); + export const WsRpcGroup = RpcGroup.make( WsServerGetConfigRpc, WsServerRefreshProvidersRpc, @@ -691,6 +723,9 @@ export const WsRpcGroup = RpcGroup.make( WsServerGetProcessDiagnosticsRpc, WsServerGetProcessResourceHistoryRpc, WsServerSignalProcessRpc, + WsServerReportClientActivityRpc, + WsServerReportHostPowerStateRpc, + WsServerGetBackgroundPolicyRpc, WsCloudGetRelayClientStatusRpc, WsCloudInstallRelayClientRpc, WsSourceControlLookupRepositoryRpc, @@ -740,6 +775,7 @@ export const WsRpcGroup = RpcGroup.make( WsSubscribeServerConfigRpc, WsSubscribeServerLifecycleRpc, WsSubscribeAuthAccessRpc, + WsSubscribeBackgroundPolicyRpc, WsOrchestrationDispatchCommandRpc, WsOrchestrationGetTurnDiffRpc, WsOrchestrationGetFullThreadDiffRpc, diff --git a/packages/contracts/src/settings.ts b/packages/contracts/src/settings.ts index 0463a441759..69d93c157d3 100644 --- a/packages/contracts/src/settings.ts +++ b/packages/contracts/src/settings.ts @@ -362,14 +362,65 @@ export const ObservabilitySettings = Schema.Struct({ export type ObservabilitySettings = typeof ObservabilitySettings.Type; export const DEFAULT_AUTOMATIC_GIT_FETCH_INTERVAL = Duration.seconds(30); +export const DEFAULT_PROVIDER_HEALTH_REFRESH_INTERVAL = Duration.minutes(5); + +export const BackgroundActivityProfile = Schema.Literals([ + "balanced", + "performance", + "battery-saver", +]); +export type BackgroundActivityProfile = typeof BackgroundActivityProfile.Type; +export const DEFAULT_BACKGROUND_ACTIVITY_PROFILE: BackgroundActivityProfile = "balanced"; + +export const BackgroundActivityProfileSelection = Schema.Literals([ + "balanced", + "performance", + "battery-saver", + "custom", +]); +export type BackgroundActivityProfileSelection = typeof BackgroundActivityProfileSelection.Type; + +export const BackgroundActivityOverrides = Schema.Struct({ + automaticGitFetchInterval: Schema.optionalKey(Schema.DurationFromMillis), + providerHealthRefreshInterval: Schema.optionalKey(Schema.DurationFromMillis), + hostPowerMonitorActiveInterval: Schema.optionalKey(Schema.DurationFromMillis), + hostPowerMonitorIdleInterval: Schema.optionalKey(Schema.DurationFromMillis), + idleClientTtl: Schema.optionalKey(Schema.DurationFromMillis), + pauseWhenHostLocked: Schema.optionalKey(Schema.Boolean), + pauseWhenHostLowPower: Schema.optionalKey(Schema.Boolean), + pauseWhenClientLowPower: Schema.optionalKey(Schema.Boolean), + pauseWhenOnBattery: Schema.optionalKey(Schema.Boolean), +}); +export type BackgroundActivityOverrides = typeof BackgroundActivityOverrides.Type; + +export const BackgroundActivitySettings = Schema.Struct({ + schemaVersion: Schema.Literal(1).pipe(Schema.withDecodingDefault(Effect.succeed(1 as const))), + profile: BackgroundActivityProfileSelection.pipe( + Schema.withDecodingDefault(Effect.succeed(DEFAULT_BACKGROUND_ACTIVITY_PROFILE)), + ), + baseProfile: Schema.optionalKey(BackgroundActivityProfile), + overrides: BackgroundActivityOverrides.pipe(Schema.withDecodingDefault(Effect.succeed({}))), +}).pipe(Schema.withDecodingDefault(Effect.succeed({}))); +export type BackgroundActivitySettings = typeof BackgroundActivitySettings.Type; export const ServerSettings = Schema.Struct({ enableAssistantStreaming: Schema.Boolean.pipe(Schema.withDecodingDefault(Effect.succeed(false))), + backgroundActivity: BackgroundActivitySettings, + // Legacy flat fields retained for old settings files and old clients. New + // consumers should resolve `backgroundActivity` instead. automaticGitFetchInterval: Schema.DurationFromMillis.pipe( Schema.withDecodingDefault( Effect.succeed(Duration.toMillis(DEFAULT_AUTOMATIC_GIT_FETCH_INTERVAL)), ), ), + providerHealthRefreshInterval: Schema.DurationFromMillis.pipe( + Schema.withDecodingDefault( + Effect.succeed(Duration.toMillis(DEFAULT_PROVIDER_HEALTH_REFRESH_INTERVAL)), + ), + ), + backgroundActivityProfile: BackgroundActivityProfile.pipe( + Schema.withDecodingDefault(Effect.succeed(DEFAULT_BACKGROUND_ACTIVITY_PROFILE)), + ), defaultThreadEnvMode: ThreadEnvMode.pipe( Schema.withDecodingDefault(Effect.succeed("local" as const satisfies ThreadEnvMode)), ), @@ -482,7 +533,17 @@ const OpenCodeSettingsPatch = Schema.Struct({ export const ServerSettingsPatch = Schema.Struct({ // Server settings enableAssistantStreaming: Schema.optionalKey(Schema.Boolean), + backgroundActivity: Schema.optionalKey( + Schema.Struct({ + schemaVersion: Schema.optionalKey(Schema.Literal(1)), + profile: Schema.optionalKey(BackgroundActivityProfileSelection), + baseProfile: Schema.optionalKey(BackgroundActivityProfile), + overrides: Schema.optionalKey(BackgroundActivityOverrides), + }), + ), automaticGitFetchInterval: Schema.optionalKey(Schema.DurationFromMillis), + providerHealthRefreshInterval: Schema.optionalKey(Schema.DurationFromMillis), + backgroundActivityProfile: Schema.optionalKey(BackgroundActivityProfile), defaultThreadEnvMode: Schema.optionalKey(ThreadEnvMode), newWorktreesStartFromOrigin: Schema.optionalKey(Schema.Boolean), addProjectBaseDirectory: Schema.optionalKey(TrimmedString), diff --git a/packages/shared/package.json b/packages/shared/package.json index 23705178bef..7fed2955465 100644 --- a/packages/shared/package.json +++ b/packages/shared/package.json @@ -75,6 +75,10 @@ "types": "./src/serverSettings.ts", "import": "./src/serverSettings.ts" }, + "./backgroundActivitySettings": { + "types": "./src/backgroundActivitySettings.ts", + "import": "./src/backgroundActivitySettings.ts" + }, "./String": { "types": "./src/String.ts", "import": "./src/String.ts" diff --git a/packages/shared/src/backgroundActivitySettings.ts b/packages/shared/src/backgroundActivitySettings.ts new file mode 100644 index 00000000000..1fa941eb82f --- /dev/null +++ b/packages/shared/src/backgroundActivitySettings.ts @@ -0,0 +1,248 @@ +import { + type BackgroundActivityProfile, + type BackgroundActivitySettings, + DEFAULT_BACKGROUND_ACTIVITY_PROFILE, + DEFAULT_AUTOMATIC_GIT_FETCH_INTERVAL, + DEFAULT_PROVIDER_HEALTH_REFRESH_INTERVAL, + type ServerSettings, +} from "@t3tools/contracts"; +import * as Duration from "effect/Duration"; + +export interface ResolvedBackgroundActivitySettings { + readonly profile: BackgroundActivityProfile; + readonly automaticGitFetchInterval: Duration.Duration; + readonly providerHealthRefreshInterval: Duration.Duration; + readonly hostPowerMonitorActiveInterval: Duration.Duration; + readonly hostPowerMonitorIdleInterval: Duration.Duration; + readonly idleClientTtl: Duration.Duration; + readonly pauseWhenHostLocked: boolean; + readonly pauseWhenHostLowPower: boolean; + readonly pauseWhenClientLowPower: boolean; + readonly pauseWhenOnBattery: boolean; +} + +const PRESET_SETTINGS: Record = { + performance: { + profile: "performance", + automaticGitFetchInterval: Duration.seconds(15), + providerHealthRefreshInterval: Duration.minutes(1), + hostPowerMonitorActiveInterval: Duration.seconds(30), + hostPowerMonitorIdleInterval: Duration.minutes(2), + idleClientTtl: Duration.seconds(45), + pauseWhenHostLocked: true, + pauseWhenHostLowPower: false, + pauseWhenClientLowPower: false, + pauseWhenOnBattery: false, + }, + balanced: { + profile: "balanced", + automaticGitFetchInterval: DEFAULT_AUTOMATIC_GIT_FETCH_INTERVAL, + providerHealthRefreshInterval: DEFAULT_PROVIDER_HEALTH_REFRESH_INTERVAL, + hostPowerMonitorActiveInterval: Duration.seconds(30), + hostPowerMonitorIdleInterval: Duration.minutes(5), + idleClientTtl: Duration.seconds(45), + pauseWhenHostLocked: true, + pauseWhenHostLowPower: true, + pauseWhenClientLowPower: true, + pauseWhenOnBattery: false, + }, + "battery-saver": { + profile: "battery-saver", + automaticGitFetchInterval: Duration.seconds(0), + providerHealthRefreshInterval: Duration.minutes(15), + hostPowerMonitorActiveInterval: Duration.minutes(1), + hostPowerMonitorIdleInterval: Duration.minutes(10), + idleClientTtl: Duration.seconds(45), + pauseWhenHostLocked: true, + pauseWhenHostLowPower: true, + pauseWhenClientLowPower: true, + pauseWhenOnBattery: true, + }, +}; + +export function getBackgroundActivityPresetSettings( + profile: BackgroundActivityProfile, +): ResolvedBackgroundActivitySettings { + return PRESET_SETTINGS[profile]; +} + +export function getBackgroundActivityBaseProfile( + backgroundActivity: BackgroundActivitySettings, +): BackgroundActivityProfile { + if (backgroundActivity.profile === "custom") { + return backgroundActivity.baseProfile ?? DEFAULT_BACKGROUND_ACTIVITY_PROFILE; + } + return backgroundActivity.profile; +} + +export function resolveBackgroundActivitySettings( + backgroundActivity: BackgroundActivitySettings, +): ResolvedBackgroundActivitySettings { + const baseProfile = getBackgroundActivityBaseProfile(backgroundActivity); + const preset = PRESET_SETTINGS[baseProfile]; + const { overrides } = backgroundActivity; + return { + profile: baseProfile, + automaticGitFetchInterval: + overrides.automaticGitFetchInterval ?? preset.automaticGitFetchInterval, + providerHealthRefreshInterval: + overrides.providerHealthRefreshInterval ?? preset.providerHealthRefreshInterval, + hostPowerMonitorActiveInterval: + overrides.hostPowerMonitorActiveInterval ?? preset.hostPowerMonitorActiveInterval, + hostPowerMonitorIdleInterval: + overrides.hostPowerMonitorIdleInterval ?? preset.hostPowerMonitorIdleInterval, + idleClientTtl: overrides.idleClientTtl ?? preset.idleClientTtl, + pauseWhenHostLocked: overrides.pauseWhenHostLocked ?? preset.pauseWhenHostLocked, + pauseWhenHostLowPower: overrides.pauseWhenHostLowPower ?? preset.pauseWhenHostLowPower, + pauseWhenClientLowPower: overrides.pauseWhenClientLowPower ?? preset.pauseWhenClientLowPower, + pauseWhenOnBattery: overrides.pauseWhenOnBattery ?? preset.pauseWhenOnBattery, + }; +} + +function durationsEqual(a: Duration.Duration, b: Duration.Duration): boolean { + return Duration.toMillis(a) === Duration.toMillis(b); +} + +function resolvedSettingsEqual( + a: ResolvedBackgroundActivitySettings, + b: ResolvedBackgroundActivitySettings, +): boolean { + return ( + durationsEqual(a.automaticGitFetchInterval, b.automaticGitFetchInterval) && + durationsEqual(a.providerHealthRefreshInterval, b.providerHealthRefreshInterval) && + durationsEqual(a.hostPowerMonitorActiveInterval, b.hostPowerMonitorActiveInterval) && + durationsEqual(a.hostPowerMonitorIdleInterval, b.hostPowerMonitorIdleInterval) && + durationsEqual(a.idleClientTtl, b.idleClientTtl) && + a.pauseWhenHostLocked === b.pauseWhenHostLocked && + a.pauseWhenHostLowPower === b.pauseWhenHostLowPower && + a.pauseWhenClientLowPower === b.pauseWhenClientLowPower && + a.pauseWhenOnBattery === b.pauseWhenOnBattery + ); +} + +export function normalizeBackgroundActivitySettings( + backgroundActivity: BackgroundActivitySettings, +): BackgroundActivitySettings { + if (backgroundActivity.profile !== "custom") { + return { + schemaVersion: 1, + profile: backgroundActivity.profile, + overrides: {}, + }; + } + + const resolved = resolveBackgroundActivitySettings(backgroundActivity); + const profiles: ReadonlyArray = [ + getBackgroundActivityBaseProfile(backgroundActivity), + "balanced", + "performance", + "battery-saver", + ]; + for (const profile of profiles) { + if (resolvedSettingsEqual(resolved, PRESET_SETTINGS[profile])) { + return { + schemaVersion: 1, + profile, + overrides: {}, + }; + } + } + + const baseProfile = getBackgroundActivityBaseProfile(backgroundActivity); + const preset = PRESET_SETTINGS[baseProfile]; + const overrides: BackgroundActivitySettings["overrides"] = { + ...(!durationsEqual(resolved.automaticGitFetchInterval, preset.automaticGitFetchInterval) + ? { automaticGitFetchInterval: resolved.automaticGitFetchInterval } + : {}), + ...(!durationsEqual( + resolved.providerHealthRefreshInterval, + preset.providerHealthRefreshInterval, + ) + ? { providerHealthRefreshInterval: resolved.providerHealthRefreshInterval } + : {}), + ...(!durationsEqual( + resolved.hostPowerMonitorActiveInterval, + preset.hostPowerMonitorActiveInterval, + ) + ? { hostPowerMonitorActiveInterval: resolved.hostPowerMonitorActiveInterval } + : {}), + ...(!durationsEqual(resolved.hostPowerMonitorIdleInterval, preset.hostPowerMonitorIdleInterval) + ? { hostPowerMonitorIdleInterval: resolved.hostPowerMonitorIdleInterval } + : {}), + ...(!durationsEqual(resolved.idleClientTtl, preset.idleClientTtl) + ? { idleClientTtl: resolved.idleClientTtl } + : {}), + ...(resolved.pauseWhenHostLocked !== preset.pauseWhenHostLocked + ? { pauseWhenHostLocked: resolved.pauseWhenHostLocked } + : {}), + ...(resolved.pauseWhenHostLowPower !== preset.pauseWhenHostLowPower + ? { pauseWhenHostLowPower: resolved.pauseWhenHostLowPower } + : {}), + ...(resolved.pauseWhenClientLowPower !== preset.pauseWhenClientLowPower + ? { pauseWhenClientLowPower: resolved.pauseWhenClientLowPower } + : {}), + ...(resolved.pauseWhenOnBattery !== preset.pauseWhenOnBattery + ? { pauseWhenOnBattery: resolved.pauseWhenOnBattery } + : {}), + }; + + return { + schemaVersion: 1, + profile: "custom", + baseProfile, + overrides, + }; +} + +export function resolveServerBackgroundActivitySettings( + settings: ServerSettings, +): ResolvedBackgroundActivitySettings { + const defaultBackgroundActivity: BackgroundActivitySettings = { + schemaVersion: 1, + profile: DEFAULT_BACKGROUND_ACTIVITY_PROFILE, + overrides: {}, + }; + const backgroundActivityIsDefault = + settings.backgroundActivity.profile === defaultBackgroundActivity.profile && + settings.backgroundActivity.baseProfile === undefined && + Object.keys(settings.backgroundActivity.overrides).length === 0; + const legacyProfile = settings.backgroundActivityProfile; + const hasLegacyOverrides = + legacyProfile !== DEFAULT_BACKGROUND_ACTIVITY_PROFILE || + Duration.toMillis(settings.automaticGitFetchInterval) !== + Duration.toMillis(DEFAULT_AUTOMATIC_GIT_FETCH_INTERVAL) || + Duration.toMillis(settings.providerHealthRefreshInterval) !== + Duration.toMillis(DEFAULT_PROVIDER_HEALTH_REFRESH_INTERVAL); + if (backgroundActivityIsDefault && hasLegacyOverrides) { + return resolveBackgroundActivitySettings({ + schemaVersion: 1, + profile: + Duration.toMillis(settings.automaticGitFetchInterval) === + Duration.toMillis( + getBackgroundActivityPresetSettings(legacyProfile).automaticGitFetchInterval, + ) && + Duration.toMillis(settings.providerHealthRefreshInterval) === + Duration.toMillis( + getBackgroundActivityPresetSettings(legacyProfile).providerHealthRefreshInterval, + ) + ? legacyProfile + : "custom", + baseProfile: legacyProfile, + overrides: { + ...(Duration.toMillis(settings.automaticGitFetchInterval) !== + Duration.toMillis( + getBackgroundActivityPresetSettings(legacyProfile).automaticGitFetchInterval, + ) + ? { automaticGitFetchInterval: settings.automaticGitFetchInterval } + : {}), + ...(Duration.toMillis(settings.providerHealthRefreshInterval) !== + Duration.toMillis( + getBackgroundActivityPresetSettings(legacyProfile).providerHealthRefreshInterval, + ) + ? { providerHealthRefreshInterval: settings.providerHealthRefreshInterval } + : {}), + }, + }); + } + return resolveBackgroundActivitySettings(settings.backgroundActivity); +} diff --git a/packages/shared/src/serverSettings.test.ts b/packages/shared/src/serverSettings.test.ts index 5bec7d386b6..77d287b00b1 100644 --- a/packages/shared/src/serverSettings.test.ts +++ b/packages/shared/src/serverSettings.test.ts @@ -3,7 +3,9 @@ import { ProviderDriverKind, ProviderInstanceId, } from "@t3tools/contracts"; +import * as Duration from "effect/Duration"; import { describe, expect, it } from "vite-plus/test"; +import { resolveServerBackgroundActivitySettings } from "./backgroundActivitySettings.ts"; import { createModelSelection } from "./model.ts"; import { applyServerSettingsPatch, @@ -194,4 +196,80 @@ describe("serverSettings helpers", () => { config: { homePath: "~/.codex" }, }); }); + + it("stores background activity profiles as a versioned object and syncs legacy aliases", () => { + const next = applyServerSettingsPatch(DEFAULT_SERVER_SETTINGS, { + backgroundActivity: { + schemaVersion: 1, + profile: "battery-saver", + overrides: {}, + }, + }); + + expect(next.backgroundActivity).toEqual({ + schemaVersion: 1, + profile: "battery-saver", + overrides: {}, + }); + expect(next.backgroundActivityProfile).toBe("battery-saver"); + expect(Duration.toMillis(next.automaticGitFetchInterval)).toBe(0); + expect(Duration.toMillis(next.providerHealthRefreshInterval)).toBe( + Duration.toMillis(Duration.minutes(15)), + ); + }); + + it("turns legacy interval patches into custom background activity overrides", () => { + const next = applyServerSettingsPatch(DEFAULT_SERVER_SETTINGS, { + automaticGitFetchInterval: Duration.seconds(15), + }); + + expect(next.backgroundActivity).toEqual({ + schemaVersion: 1, + profile: "custom", + baseProfile: "balanced", + overrides: { + automaticGitFetchInterval: Duration.seconds(15), + }, + }); + expect(resolveServerBackgroundActivitySettings(next).profile).toBe("balanced"); + expect( + Duration.toMillis(resolveServerBackgroundActivitySettings(next).automaticGitFetchInterval), + ).toBe(15_000); + }); + + it("reconciles custom background activity back to a preset when overrides match the preset", () => { + const custom = applyServerSettingsPatch(DEFAULT_SERVER_SETTINGS, { + automaticGitFetchInterval: Duration.seconds(15), + }); + const next = applyServerSettingsPatch(custom, { + automaticGitFetchInterval: Duration.seconds(30), + }); + + expect(next.backgroundActivity).toEqual({ + schemaVersion: 1, + profile: "balanced", + overrides: {}, + }); + expect(next.backgroundActivityProfile).toBe("balanced"); + expect(Duration.toMillis(next.automaticGitFetchInterval)).toBe(30_000); + }); + + it("drops custom overrides that duplicate the base profile", () => { + const next = applyServerSettingsPatch(DEFAULT_SERVER_SETTINGS, { + backgroundActivity: { + schemaVersion: 1, + profile: "custom", + baseProfile: "balanced", + overrides: { + automaticGitFetchInterval: Duration.seconds(30), + }, + }, + }); + + expect(next.backgroundActivity).toEqual({ + schemaVersion: 1, + profile: "balanced", + overrides: {}, + }); + }); }); diff --git a/packages/shared/src/serverSettings.ts b/packages/shared/src/serverSettings.ts index 1bbf466f60b..c8cee52a68c 100644 --- a/packages/shared/src/serverSettings.ts +++ b/packages/shared/src/serverSettings.ts @@ -4,6 +4,11 @@ import * as Schema from "effect/Schema"; import { deepMerge } from "./Struct.ts"; import { fromLenientJson } from "./schemaJson.ts"; import { createModelSelection } from "./model.ts"; +import { + getBackgroundActivityBaseProfile, + normalizeBackgroundActivitySettings, + resolveBackgroundActivitySettings, +} from "./backgroundActivitySettings.ts"; const ServerSettingsJson = fromLenientJson(ServerSettings); const decodeServerSettingsJson = Schema.decodeUnknownOption(ServerSettingsJson); @@ -76,14 +81,61 @@ export function applyServerSettingsPatch( patch: ServerSettingsPatch, ): ServerSettings { const selectionPatch = patch.textGenerationModelSelection; - const { automaticGitFetchInterval, ...patchForMerge } = patch; + const { + automaticGitFetchInterval, + providerHealthRefreshInterval, + backgroundActivityProfile, + backgroundActivity, + ...patchForMerge + } = patch; + const backgroundActivityPatch = + backgroundActivityProfile !== undefined + ? { + schemaVersion: 1 as const, + profile: backgroundActivityProfile, + overrides: {}, + } + : automaticGitFetchInterval !== undefined || providerHealthRefreshInterval !== undefined + ? { + schemaVersion: 1 as const, + profile: "custom" as const, + baseProfile: getBackgroundActivityBaseProfile(current.backgroundActivity), + overrides: { + ...current.backgroundActivity.overrides, + ...(automaticGitFetchInterval !== undefined ? { automaticGitFetchInterval } : {}), + ...(providerHealthRefreshInterval !== undefined + ? { providerHealthRefreshInterval } + : {}), + }, + } + : undefined; const next = deepMerge(current, patchForMerge); - const nextWithReplacements = { + const nextWithReplacementsBase = { ...next, + ...(backgroundActivity !== undefined + ? { backgroundActivity: deepMerge(current.backgroundActivity, backgroundActivity) } + : {}), + ...(backgroundActivityPatch !== undefined + ? { backgroundActivity: backgroundActivityPatch } + : {}), ...(patch.providerInstances !== undefined ? { providerInstances: patch.providerInstances } : {}), ...(automaticGitFetchInterval !== undefined ? { automaticGitFetchInterval } : {}), + ...(providerHealthRefreshInterval !== undefined ? { providerHealthRefreshInterval } : {}), + }; + const normalizedBackgroundActivity = normalizeBackgroundActivitySettings( + nextWithReplacementsBase.backgroundActivity, + ); + const resolvedBackgroundActivity = resolveBackgroundActivitySettings( + normalizedBackgroundActivity, + ); + const nextWithReplacements = { + ...nextWithReplacementsBase, + backgroundActivity: normalizedBackgroundActivity, + automaticGitFetchInterval: resolvedBackgroundActivity.automaticGitFetchInterval, + providerHealthRefreshInterval: resolvedBackgroundActivity.providerHealthRefreshInterval, + backgroundActivityProfile: resolvedBackgroundActivity.profile, }; if (!selectionPatch) { return nextWithReplacements; From 4ae9d9e9f76ea8467317c51ada39ea94f439c920 Mon Sep 17 00:00:00 2001 From: Julius Marminge Date: Wed, 17 Jun 2026 13:34:50 -0700 Subject: [PATCH 2/5] feat: add native resource telemetry Replace scheduled process shell probes with a persistent native collector, Electron power/process telemetry, Effect services, diagnostics UI, attribution, and release packaging. Co-authored-by: codex --- .github/workflows/ci.yml | 12 + .github/workflows/release.yml | 51 + .gitignore | 1 + .../src/app/DesktopAppIdentity.test.ts | 1 + apps/desktop/src/app/DesktopCloudAuth.test.ts | 1 + .../DesktopBackendConfiguration.test.ts | 37 +- .../backend/DesktopBackendConfiguration.ts | 47 + .../src/backend/DesktopBackendManager.test.ts | 6 + .../src/backend/DesktopBackendManager.ts | 8 + apps/desktop/src/electron/ElectronApp.ts | 2 + .../src/electron/ElectronPowerMonitor.ts | 89 ++ apps/desktop/src/main.ts | 4 + .../DesktopTelemetryPublisher.test.ts | 122 ++ .../telemetry/DesktopTelemetryPublisher.ts | 215 +++ .../src/window/DesktopApplicationMenu.test.ts | 1 + .../src/background/HostPowerMonitor.test.ts | 44 + .../server/src/background/HostPowerMonitor.ts | 174 +-- apps/server/src/cli/config.ts | 4 + apps/server/src/config.ts | 4 + .../diagnostics/ProcessDiagnostics.test.ts | 378 +++--- .../src/diagnostics/ProcessDiagnostics.ts | 550 ++------ .../ProcessResourceMonitor.test.ts | 323 ++--- .../src/diagnostics/ProcessResourceMonitor.ts | 315 +---- .../src/observability/Layers/Observability.ts | 10 + .../provider/Layers/EventNdjsonLogger.test.ts | 33 + .../src/provider/Layers/EventNdjsonLogger.ts | 29 +- .../provider/Layers/ProviderEventLoggers.ts | 4 + .../DesktopTelemetryReceiver.ts | 279 ++++ .../src/resourceTelemetry/Model.test.ts | 393 ++++++ apps/server/src/resourceTelemetry/Model.ts | 542 ++++++++ .../NativeTelemetryClient.ts | 656 +++++++++ .../resourceTelemetry/ResourceAttribution.ts | 73 + .../ResourceMonitorBinary.test.ts | 80 ++ .../ResourceMonitorBinary.ts | 193 +++ .../ResourceTelemetry.test.ts | 302 +++++ .../resourceTelemetry/ResourceTelemetry.ts | 372 ++++++ .../ResourceTelemetryStore.ts | 253 ++++ apps/server/src/server.test.ts | 16 + apps/server/src/server.ts | 35 +- apps/server/src/ws.ts | 31 + .../settings/DiagnosticsSettings.tsx | 11 +- .../settings/ResourceTelemetryDiagnostics.tsx | 1171 +++++++++++++++++ .../web/src/lib/backgroundActivityReporter.ts | 3 + apps/web/src/lib/resourceTelemetryState.ts | 51 + apps/web/src/localApi.ts | 3 + docs/architecture/overview.md | 4 + docs/architecture/resource-telemetry.md | 335 +++++ native/resource-monitor/Cargo.lock | 343 +++++ native/resource-monitor/Cargo.toml | 17 + native/resource-monitor/src/main.rs | 560 ++++++++ package.json | 2 + packages/client-runtime/src/rpc/client.ts | 123 +- packages/client-runtime/src/state/server.ts | 18 + packages/contracts/src/desktopBootstrap.ts | 4 +- packages/contracts/src/index.ts | 1 + packages/contracts/src/ipc.ts | 11 + packages/contracts/src/resourceTelemetry.ts | 365 +++++ packages/contracts/src/rpc.ts | 42 + packages/contracts/src/server.ts | 2 + packages/shared/src/observability.test.ts | 30 + packages/shared/src/observability.ts | 41 +- scripts/build-desktop-artifact.test.ts | 24 + scripts/build-desktop-artifact.ts | 106 ++ 63 files changed, 7635 insertions(+), 1322 deletions(-) create mode 100644 apps/desktop/src/electron/ElectronPowerMonitor.ts create mode 100644 apps/desktop/src/telemetry/DesktopTelemetryPublisher.test.ts create mode 100644 apps/desktop/src/telemetry/DesktopTelemetryPublisher.ts create mode 100644 apps/server/src/background/HostPowerMonitor.test.ts create mode 100644 apps/server/src/resourceTelemetry/DesktopTelemetryReceiver.ts create mode 100644 apps/server/src/resourceTelemetry/Model.test.ts create mode 100644 apps/server/src/resourceTelemetry/Model.ts create mode 100644 apps/server/src/resourceTelemetry/NativeTelemetryClient.ts create mode 100644 apps/server/src/resourceTelemetry/ResourceAttribution.ts create mode 100644 apps/server/src/resourceTelemetry/ResourceMonitorBinary.test.ts create mode 100644 apps/server/src/resourceTelemetry/ResourceMonitorBinary.ts create mode 100644 apps/server/src/resourceTelemetry/ResourceTelemetry.test.ts create mode 100644 apps/server/src/resourceTelemetry/ResourceTelemetry.ts create mode 100644 apps/server/src/resourceTelemetry/ResourceTelemetryStore.ts create mode 100644 apps/web/src/components/settings/ResourceTelemetryDiagnostics.tsx create mode 100644 apps/web/src/lib/resourceTelemetryState.ts create mode 100644 docs/architecture/resource-telemetry.md create mode 100644 native/resource-monitor/Cargo.lock create mode 100644 native/resource-monitor/Cargo.toml create mode 100644 native/resource-monitor/src/main.rs create mode 100644 packages/contracts/src/resourceTelemetry.ts diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eaf8fc367cc..1d620c04bb1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,6 +22,9 @@ jobs: cache: true run-install: true + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + - name: Ensure Electron runtime is installed run: vp run --filter @t3tools/desktop ensure:electron @@ -31,6 +34,9 @@ jobs: - name: Typecheck run: vpr typecheck + - name: Check resource monitor formatting + run: cargo fmt --manifest-path native/resource-monitor/Cargo.toml -- --check + - name: Build desktop pipeline run: vp run build:desktop @@ -54,12 +60,18 @@ jobs: cache: true run-install: true + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + - name: Ensure Electron runtime is installed run: vp run --filter @t3tools/desktop ensure:electron - name: Test run: vp run test + - name: Test resource monitor + run: cargo test --locked --manifest-path native/resource-monitor/Cargo.toml + mobile_native_static_analysis: name: Mobile Native Static Analysis runs-on: blacksmith-12vcpu-macos-26 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2348417abc5..e51982b6aa8 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -272,21 +272,29 @@ jobs: platform: mac target: dmg arch: arm64 + rust_target: aarch64-apple-darwin + resource_key: darwin-arm64 - label: macOS x64 runner: blacksmith-12vcpu-macos-26 platform: mac target: dmg arch: x64 + rust_target: x86_64-apple-darwin + resource_key: darwin-x64 - label: Linux x64 runner: blacksmith-32vcpu-ubuntu-2404 platform: linux target: AppImage arch: x64 + rust_target: x86_64-unknown-linux-gnu + resource_key: linux-x64 - label: Windows x64 runner: blacksmith-32vcpu-windows-2025 platform: win target: nsis arch: x64 + rust_target: x86_64-pc-windows-msvc + resource_key: win32-x64 # - label: Windows arm64 # runner: windows-11-arm # platform: win @@ -306,6 +314,11 @@ jobs: cache: true run-install: true + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + with: + targets: ${{ matrix.rust_target }} + - name: Download relay client tracing config uses: actions/download-artifact@v8 with: @@ -518,6 +531,19 @@ jobs: # done # fi + - name: Collect resource monitor + shell: bash + run: | + set -euo pipefail + binary_name="t3-resource-monitor" + if [[ "${{ matrix.platform }}" == "win" ]]; then + binary_name="${binary_name}.exe" + fi + source_path="native/resource-monitor/target/${{ matrix.rust_target }}/release/${binary_name}" + target_dir="resource-monitor-publish/${{ matrix.resource_key }}" + mkdir -p "$target_dir" + cp "$source_path" "$target_dir/$binary_name" + - name: Upload build artifacts uses: actions/upload-artifact@v7 with: @@ -525,6 +551,13 @@ jobs: path: release-publish/* if-no-files-found: error + - name: Upload resource monitor + uses: actions/upload-artifact@v7 + with: + name: resource-monitor-${{ matrix.resource_key }} + path: resource-monitor-publish/${{ matrix.resource_key }}/* + if-no-files-found: error + publish_cli: name: Publish CLI to npm needs: [preflight, relay_public_config, build] @@ -579,6 +612,24 @@ jobs: - name: Build CLI package run: vp run --filter t3 build + - name: Download resource monitors + uses: actions/download-artifact@v8 + with: + pattern: resource-monitor-* + path: ${{ runner.temp }}/resource-monitors + + - name: Bundle resource monitors into CLI package + shell: bash + run: | + set -euo pipefail + for artifact_dir in "$RUNNER_TEMP"/resource-monitors/resource-monitor-*; do + resource_key="${artifact_dir##*/resource-monitor-}" + target_dir="apps/server/dist/resource-monitor/${resource_key}" + mkdir -p "$target_dir" + cp "$artifact_dir"/t3-resource-monitor* "$target_dir/" + chmod +x "$target_dir"/t3-resource-monitor 2>/dev/null || true + done + - name: Publish CLI package run: node apps/server/scripts/cli.ts publish --tag "${{ needs.preflight.outputs.cli_dist_tag }}" --app-version "${{ needs.preflight.outputs.version }}" --verbose diff --git a/.gitignore b/.gitignore index ef6067824f2..69544f4b86a 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ squashfs-root/ .gstack/ dist-electron/ .electron-runtime/ +native/**/target/ node_modules/ .alchemy/ *.log diff --git a/apps/desktop/src/app/DesktopAppIdentity.test.ts b/apps/desktop/src/app/DesktopAppIdentity.test.ts index eafdbf056dc..81812cf4854 100644 --- a/apps/desktop/src/app/DesktopAppIdentity.test.ts +++ b/apps/desktop/src/app/DesktopAppIdentity.test.ts @@ -54,6 +54,7 @@ const makeElectronAppLayer = (calls: ElectronAppCalls) => }), setAppUserModelId: () => Effect.void, requestSingleInstanceLock: Effect.succeed(true), + getAppMetrics: Effect.succeed([]), isDefaultProtocolClient: () => Effect.succeed(false), setAsDefaultProtocolClient: () => Effect.succeed(true), setDesktopName: () => Effect.void, diff --git a/apps/desktop/src/app/DesktopCloudAuth.test.ts b/apps/desktop/src/app/DesktopCloudAuth.test.ts index 002fd86b0a4..8d783910e59 100644 --- a/apps/desktop/src/app/DesktopCloudAuth.test.ts +++ b/apps/desktop/src/app/DesktopCloudAuth.test.ts @@ -54,6 +54,7 @@ function makeHarness(input: { readonly isDevelopment: boolean }): CloudAuthHarne setAboutPanelOptions: () => Effect.void, setAppUserModelId: () => Effect.void, requestSingleInstanceLock: Effect.succeed(true), + getAppMetrics: Effect.succeed([]), isDefaultProtocolClient: () => Effect.succeed(false), setAsDefaultProtocolClient: (protocol, path, args) => Effect.sync(() => { diff --git a/apps/desktop/src/backend/DesktopBackendConfiguration.test.ts b/apps/desktop/src/backend/DesktopBackendConfiguration.test.ts index 96e56a87c9d..7e3580e44ac 100644 --- a/apps/desktop/src/backend/DesktopBackendConfiguration.test.ts +++ b/apps/desktop/src/backend/DesktopBackendConfiguration.test.ts @@ -41,6 +41,7 @@ function makeEnvironmentLayer( options?: { readonly isPackaged?: boolean; readonly devServerUrl?: string; + readonly resourcesPath?: string; }, ) { return DesktopEnvironment.layer({ @@ -51,7 +52,7 @@ function makeEnvironmentLayer( appVersion: "1.2.3", appPath: "/repo", isPackaged: options?.isPackaged ?? true, - resourcesPath: "/missing/resources", + resourcesPath: options?.resourcesPath ?? "/missing/resources", runningUnderArm64Translation: false, }).pipe( Layer.provide( @@ -192,4 +193,38 @@ describe("DesktopBackendConfiguration", () => { ); }).pipe(Effect.scoped, Effect.provide(NodeServices.layer)), ); + + it.effect("passes the packaged resource monitor path to the backend", () => + Effect.gen(function* () { + const fileSystem = yield* FileSystem.FileSystem; + const baseDir = yield* fileSystem.makeTempDirectoryScoped({ + prefix: "t3-desktop-backend-config-test-", + }); + const resourcesPath = `${baseDir}/resources`; + const monitorPath = `${resourcesPath}/resource-monitor/t3-resource-monitor`; + yield* fileSystem.makeDirectory(`${resourcesPath}/resource-monitor`, { + recursive: true, + }); + yield* fileSystem.writeFileString(monitorPath, "binary"); + yield* fileSystem.chmod(monitorPath, 0o755); + + yield* Effect.gen(function* () { + const configuration = yield* DesktopBackendConfiguration.DesktopBackendConfiguration; + const config = yield* configuration.resolve; + assert.equal(config.bootstrap.resourceMonitorPath, monitorPath); + assert.equal(config.bootstrap.desktopTelemetryFd, 4); + }).pipe( + Effect.provide( + DesktopBackendConfiguration.layer.pipe( + Layer.provideMerge(serverExposureLayer), + Layer.provideMerge( + makeEnvironmentLayer(baseDir, { + resourcesPath, + }), + ), + ), + ), + ); + }).pipe(Effect.scoped, Effect.provide(NodeServices.layer)), + ); }); diff --git a/apps/desktop/src/backend/DesktopBackendConfiguration.ts b/apps/desktop/src/backend/DesktopBackendConfiguration.ts index 5e4e034b5e7..f2f23194ce3 100644 --- a/apps/desktop/src/backend/DesktopBackendConfiguration.ts +++ b/apps/desktop/src/backend/DesktopBackendConfiguration.ts @@ -56,6 +56,42 @@ const { logWarning: logBackendConfigurationWarning } = DesktopObservability.make "desktop-backend-configuration", ); +function resourceMonitorBinaryName(platform: NodeJS.Platform): string { + return platform === "win32" ? "t3-resource-monitor.exe" : "t3-resource-monitor"; +} + +const resolveResourceMonitorPath = Effect.fn( + "desktop.backendConfiguration.resolveResourceMonitorPath", +)(function* () { + const environment = yield* DesktopEnvironment.DesktopEnvironment; + const fileSystem = yield* FileSystem.FileSystem; + const binaryName = resourceMonitorBinaryName(environment.platform); + const candidates = environment.isDevelopment + ? [ + environment.path.join( + environment.rootDir, + "native/resource-monitor/target/debug", + binaryName, + ), + environment.path.join( + environment.rootDir, + "native/resource-monitor/target/release", + binaryName, + ), + ] + : environment.resolveResourcePathCandidates( + environment.path.join("resource-monitor", binaryName), + ); + + for (const candidate of candidates) { + if (yield* fileSystem.exists(candidate).pipe(Effect.orElseSucceed(() => false))) { + return Option.some(candidate); + } + } + + return Option.none(); +}); + const readPersistedBackendObservabilitySettings: Effect.Effect< BackendObservabilitySettings, never, @@ -89,6 +125,7 @@ const resolveBackendStartConfig = Effect.fn("desktop.backendConfiguration.resolv function* (input: { readonly bootstrapToken: string; readonly observabilitySettings: BackendObservabilitySettings; + readonly resourceMonitorPath: Option.Option; }): Effect.fn.Return< DesktopBackendManager.DesktopBackendStartConfig, never, @@ -115,6 +152,11 @@ const resolveBackendStartConfig = Effect.fn("desktop.backendConfiguration.resolv desktopBootstrapToken: input.bootstrapToken, tailscaleServeEnabled: backendExposure.tailscaleServeEnabled, tailscaleServePort: backendExposure.tailscaleServePort, + desktopTelemetryFd: 4, + ...Option.match(input.resourceMonitorPath, { + onNone: () => ({}), + onSome: (resourceMonitorPath) => ({ resourceMonitorPath }), + }), ...Option.match(input.observabilitySettings.otlpTracesUrl, { onNone: () => ({}), onSome: (otlpTracesUrl) => ({ otlpTracesUrl }), @@ -156,9 +198,14 @@ export const layer = Layer.effect( Effect.provideService(FileSystem.FileSystem, fileSystem), Effect.provideService(DesktopEnvironment.DesktopEnvironment, environment), ); + const resourceMonitorPath = yield* resolveResourceMonitorPath().pipe( + Effect.provideService(FileSystem.FileSystem, fileSystem), + Effect.provideService(DesktopEnvironment.DesktopEnvironment, environment), + ); return yield* resolveBackendStartConfig({ bootstrapToken, observabilitySettings, + resourceMonitorPath, }).pipe( Effect.provideService(DesktopEnvironment.DesktopEnvironment, environment), Effect.provideService(DesktopServerExposure.DesktopServerExposure, serverExposure), diff --git a/apps/desktop/src/backend/DesktopBackendManager.test.ts b/apps/desktop/src/backend/DesktopBackendManager.test.ts index 6c5109c8714..5222439b0ca 100644 --- a/apps/desktop/src/backend/DesktopBackendManager.test.ts +++ b/apps/desktop/src/backend/DesktopBackendManager.test.ts @@ -23,6 +23,7 @@ import * as DesktopBackendManager from "./DesktopBackendManager.ts"; import * as DesktopBackendConfiguration from "./DesktopBackendConfiguration.ts"; import * as DesktopObservability from "../app/DesktopObservability.ts"; import * as DesktopState from "../app/DesktopState.ts"; +import * as DesktopTelemetryPublisher from "../telemetry/DesktopTelemetryPublisher.ts"; import * as DesktopWindow from "../window/DesktopWindow.ts"; const decodeDesktopBackendBootstrap = Schema.decodeEffect( @@ -120,6 +121,11 @@ function makeManagerLayer(input: { }), input.spawnerLayer, input.httpClientLayer ?? healthyHttpClientLayer, + Layer.succeed(DesktopTelemetryPublisher.DesktopTelemetryPublisher, { + latest: Effect.succeed(Option.none()), + changes: Stream.empty, + encoded: Stream.empty, + }), input.desktopState ? Layer.succeed(DesktopState.DesktopState, input.desktopState) : DesktopState.layer, diff --git a/apps/desktop/src/backend/DesktopBackendManager.ts b/apps/desktop/src/backend/DesktopBackendManager.ts index 07693a82707..cb7307e83c8 100644 --- a/apps/desktop/src/backend/DesktopBackendManager.ts +++ b/apps/desktop/src/backend/DesktopBackendManager.ts @@ -27,6 +27,7 @@ import { import * as DesktopBackendConfiguration from "./DesktopBackendConfiguration.ts"; import * as DesktopObservability from "../app/DesktopObservability.ts"; import * as DesktopState from "../app/DesktopState.ts"; +import * as DesktopTelemetryPublisher from "../telemetry/DesktopTelemetryPublisher.ts"; import * as DesktopWindow from "../window/DesktopWindow.ts"; const INITIAL_RESTART_DELAY = Duration.millis(500); @@ -88,6 +89,7 @@ class BackendProcessSpawnError extends Data.TaggedError("BackendProcessSpawnErro type BackendProcessError = BackendProcessBootstrapEncodeError | BackendProcessSpawnError; interface RunBackendProcessOptions extends DesktopBackendStartConfig { + readonly desktopTelemetryStream: Stream.Stream; readonly readinessTimeout?: Duration.Duration; readonly onStarted?: (pid: number) => Effect.Effect; readonly onReady?: () => Effect.Effect; @@ -252,6 +254,10 @@ const runBackendProcess = Effect.fn("runBackendProcess")(function* ( type: "input", stream: Stream.encodeText(Stream.make(`${bootstrapJson}\n`)), }, + fd4: { + type: "input", + stream: options.desktopTelemetryStream, + }, }, }, ); @@ -283,6 +289,7 @@ const makeDesktopBackendManager = Effect.fn("makeDesktopBackendManager")(functio const configuration = yield* DesktopBackendConfiguration.DesktopBackendConfiguration; const backendOutputLog = yield* DesktopObservability.DesktopBackendOutputLog; const desktopState = yield* DesktopState.DesktopState; + const desktopTelemetryPublisher = yield* DesktopTelemetryPublisher.DesktopTelemetryPublisher; const desktopWindow = yield* DesktopWindow.DesktopWindow; const spawner = yield* ChildProcessSpawner.ChildProcessSpawner; const httpClient = yield* HttpClient.HttpClient; @@ -436,6 +443,7 @@ const makeDesktopBackendManager = Effect.fn("makeDesktopBackendManager")(functio const program = runBackendProcess({ ...config.value, + desktopTelemetryStream: desktopTelemetryPublisher.encoded, onStarted: Effect.fn("desktop.backendManager.onStarted")(function* (pid) { yield* updateActiveRun(runId, (run) => ({ ...run, diff --git a/apps/desktop/src/electron/ElectronApp.ts b/apps/desktop/src/electron/ElectronApp.ts index 49b432fd5dd..8648b837335 100644 --- a/apps/desktop/src/electron/ElectronApp.ts +++ b/apps/desktop/src/electron/ElectronApp.ts @@ -30,6 +30,7 @@ export interface ElectronAppShape { ) => Effect.Effect; readonly setAppUserModelId: (id: string) => Effect.Effect; readonly requestSingleInstanceLock: Effect.Effect; + readonly getAppMetrics: Effect.Effect>; readonly isDefaultProtocolClient: (protocol: string) => Effect.Effect; readonly setAsDefaultProtocolClient: ( protocol: string, @@ -101,6 +102,7 @@ const make = ElectronApp.of({ Electron.app.setAppUserModelId(id); }), requestSingleInstanceLock: Effect.sync(() => Electron.app.requestSingleInstanceLock()), + getAppMetrics: Effect.sync(() => Electron.app.getAppMetrics()), isDefaultProtocolClient: (protocol) => Effect.sync(() => Electron.app.isDefaultProtocolClient(protocol)), setAsDefaultProtocolClient: (protocol, path, args) => diff --git a/apps/desktop/src/electron/ElectronPowerMonitor.ts b/apps/desktop/src/electron/ElectronPowerMonitor.ts new file mode 100644 index 00000000000..8578b407462 --- /dev/null +++ b/apps/desktop/src/electron/ElectronPowerMonitor.ts @@ -0,0 +1,89 @@ +import * as Context from "effect/Context"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as Scope from "effect/Scope"; + +import * as Electron from "electron"; + +export type ElectronThermalState = ReturnType; +export type ElectronIdleState = ReturnType; + +export interface ElectronPowerMonitorShape { + readonly isOnBatteryPower: Effect.Effect; + readonly getSystemIdleTime: Effect.Effect; + readonly getSystemIdleState: (idleThresholdSeconds: number) => Effect.Effect; + readonly getCurrentThermalState: Effect.Effect; + readonly onSimpleEvent: ( + eventName: "lock-screen" | "unlock-screen" | "on-ac" | "on-battery" | "suspend" | "resume", + listener: () => void, + ) => Effect.Effect; + readonly onThermalStateChange: ( + listener: (state: ElectronThermalState) => void, + ) => Effect.Effect; + readonly onSpeedLimitChange: ( + listener: (limit: number) => void, + ) => Effect.Effect; +} + +export class ElectronPowerMonitor extends Context.Service< + ElectronPowerMonitor, + ElectronPowerMonitorShape +>()("@t3tools/desktop/electron/ElectronPowerMonitor") {} + +const onSimpleEvent: ElectronPowerMonitorShape["onSimpleEvent"] = (eventName, listener) => + Effect.acquireRelease( + Effect.sync(() => { + Electron.powerMonitor.on(eventName as any, listener as any); + }), + () => + Effect.sync(() => { + Electron.powerMonitor.removeListener(eventName as any, listener as any); + }), + ).pipe(Effect.asVoid); + +const onThermalStateChange: ElectronPowerMonitorShape["onThermalStateChange"] = (listener) => { + const wrapped = ( + event: Electron.Event, + ): void => { + listener(event.state); + }; + return Effect.acquireRelease( + Effect.sync(() => { + Electron.powerMonitor.on("thermal-state-change", wrapped); + }), + () => + Effect.sync(() => { + Electron.powerMonitor.removeListener("thermal-state-change", wrapped); + }), + ).pipe(Effect.asVoid); +}; + +const onSpeedLimitChange: ElectronPowerMonitorShape["onSpeedLimitChange"] = (listener) => { + const wrapped = ( + event: Electron.Event, + ): void => { + listener(event.limit); + }; + return Effect.acquireRelease( + Effect.sync(() => { + Electron.powerMonitor.on("speed-limit-change", wrapped); + }), + () => + Effect.sync(() => { + Electron.powerMonitor.removeListener("speed-limit-change", wrapped); + }), + ).pipe(Effect.asVoid); +}; + +export const make = ElectronPowerMonitor.of({ + isOnBatteryPower: Effect.sync(() => Electron.powerMonitor.isOnBatteryPower()), + getSystemIdleTime: Effect.sync(() => Electron.powerMonitor.getSystemIdleTime()), + getSystemIdleState: (idleThresholdSeconds) => + Effect.sync(() => Electron.powerMonitor.getSystemIdleState(idleThresholdSeconds)), + getCurrentThermalState: Effect.sync(() => Electron.powerMonitor.getCurrentThermalState()), + onSimpleEvent, + onThermalStateChange, + onSpeedLimitChange, +}); + +export const layer = Layer.succeed(ElectronPowerMonitor, make); diff --git a/apps/desktop/src/main.ts b/apps/desktop/src/main.ts index 33eac8ea646..d3c82d2f7db 100644 --- a/apps/desktop/src/main.ts +++ b/apps/desktop/src/main.ts @@ -19,6 +19,7 @@ import * as DesktopIpc from "./ipc/DesktopIpc.ts"; import * as ElectronApp from "./electron/ElectronApp.ts"; import * as ElectronDialog from "./electron/ElectronDialog.ts"; import * as ElectronMenu from "./electron/ElectronMenu.ts"; +import * as ElectronPowerMonitor from "./electron/ElectronPowerMonitor.ts"; import * as ElectronProtocol from "./electron/ElectronProtocol.ts"; import * as DesktopSecretStorage from "./electron/ElectronSafeStorage.ts"; import * as ElectronShell from "./electron/ElectronShell.ts"; @@ -45,6 +46,7 @@ import * as DesktopShellEnvironment from "./shell/DesktopShellEnvironment.ts"; import * as DesktopSshEnvironment from "./ssh/DesktopSshEnvironment.ts"; import * as DesktopSshPasswordPrompts from "./ssh/DesktopSshPasswordPrompts.ts"; import * as DesktopState from "./app/DesktopState.ts"; +import * as DesktopTelemetryPublisher from "./telemetry/DesktopTelemetryPublisher.ts"; import * as DesktopUpdates from "./updates/DesktopUpdates.ts"; import * as PreviewBrowserSession from "./preview/BrowserSession.ts"; import * as PreviewManager from "./preview/Manager.ts"; @@ -104,6 +106,7 @@ const electronLayer = Layer.mergeAll( ElectronApp.layer, ElectronDialog.layer, ElectronMenu.layer, + ElectronPowerMonitor.layer, ElectronProtocol.layer, DesktopSecretStorage.layer, ElectronShell.layer, @@ -146,6 +149,7 @@ const desktopWindowLayer = DesktopWindow.layer.pipe( const desktopBackendLayer = DesktopBackendManager.layer.pipe( Layer.provideMerge(DesktopAppIdentity.layer), Layer.provideMerge(DesktopBackendConfiguration.layer), + Layer.provideMerge(DesktopTelemetryPublisher.layer), Layer.provideMerge(desktopWindowLayer), ); diff --git a/apps/desktop/src/telemetry/DesktopTelemetryPublisher.test.ts b/apps/desktop/src/telemetry/DesktopTelemetryPublisher.test.ts new file mode 100644 index 00000000000..7518fed42f6 --- /dev/null +++ b/apps/desktop/src/telemetry/DesktopTelemetryPublisher.test.ts @@ -0,0 +1,122 @@ +import { assert, describe, it } from "@effect/vitest"; +import * as Duration from "effect/Duration"; +import * as Effect from "effect/Effect"; +import * as Fiber from "effect/Fiber"; +import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; +import * as Ref from "effect/Ref"; +import * as Stream from "effect/Stream"; +import * as TestClock from "effect/testing/TestClock"; + +import type * as Electron from "electron"; + +import * as ElectronApp from "../electron/ElectronApp.ts"; +import * as ElectronPowerMonitor from "../electron/ElectronPowerMonitor.ts"; +import * as DesktopTelemetryPublisher from "./DesktopTelemetryPublisher.ts"; + +function makeElectronAppLayer(metrics: ReadonlyArray) { + return Layer.succeed(ElectronApp.ElectronApp, { + metadata: Effect.die("unexpected metadata read"), + name: Effect.succeed("T3 Code"), + whenReady: Effect.void, + quit: Effect.void, + exit: () => Effect.void, + relaunch: () => Effect.void, + setPath: () => Effect.void, + setName: () => Effect.void, + setAboutPanelOptions: () => Effect.void, + setAppUserModelId: () => Effect.void, + requestSingleInstanceLock: Effect.succeed(true), + getAppMetrics: Effect.succeed(metrics), + isDefaultProtocolClient: () => Effect.succeed(false), + setAsDefaultProtocolClient: () => Effect.succeed(true), + setDesktopName: () => Effect.void, + setDockIcon: () => Effect.void, + appendCommandLineSwitch: () => Effect.void, + on: () => Effect.void, + } satisfies ElectronApp.ElectronAppShape); +} + +describe("DesktopTelemetryPublisher", () => { + it.effect("publishes Electron metrics and event-driven power state over NDJSON", () => + Effect.gen(function* () { + const onBattery = yield* Ref.make(false); + const simpleListeners = new Map void>(); + let thermalListener: ((state: ElectronPowerMonitor.ElectronThermalState) => void) | null = + null; + let speedLimitListener: ((limit: number) => void) | null = null; + const metrics = [ + { + pid: 4_242, + type: "Browser", + creationTime: 1_000, + name: "electron", + cpu: { + percentCPUUsage: 12.5, + cumulativeCPUUsage: 3.25, + idleWakeupsPerSecond: 7, + }, + memory: { + workingSetSize: 2_048, + peakWorkingSetSize: 4_096, + }, + } as Electron.ProcessMetric, + ]; + const powerLayer = Layer.succeed( + ElectronPowerMonitor.ElectronPowerMonitor, + ElectronPowerMonitor.ElectronPowerMonitor.of({ + isOnBatteryPower: Ref.get(onBattery), + getSystemIdleTime: Effect.succeed(5), + getSystemIdleState: () => Effect.succeed("active"), + getCurrentThermalState: Effect.succeed("nominal"), + onSimpleEvent: (eventName, listener) => + Effect.sync(() => { + simpleListeners.set(eventName, listener); + }), + onThermalStateChange: (listener) => + Effect.sync(() => { + thermalListener = listener; + }), + onSpeedLimitChange: (listener) => + Effect.sync(() => { + speedLimitListener = listener; + }), + }), + ); + const layer = DesktopTelemetryPublisher.layer.pipe( + Layer.provide(Layer.mergeAll(makeElectronAppLayer(metrics), powerLayer)), + ); + + yield* Effect.gen(function* () { + const publisher = yield* DesktopTelemetryPublisher.DesktopTelemetryPublisher; + const encoded = yield* publisher.encoded.pipe(Stream.take(2), Stream.runCollect); + const decoder = new TextDecoder(); + const messages = Array.from(encoded, (bytes) => JSON.parse(decoder.decode(bytes).trim())); + + assert.equal(messages[0]?.type, "desktopTelemetryHello"); + assert.equal(messages[0]?.electronPid, process.pid); + assert.equal(messages[1]?.type, "desktopTelemetry"); + assert.equal(messages[1]?.electronProcesses[0]?.pid, 4_242); + assert.equal(messages[1]?.electronProcesses[0]?.cpuPercent, 12.5); + assert.equal(messages[1]?.electronProcesses[0]?.workingSetBytes, 2_048 * 1_024); + + const nextSnapshotFiber = yield* Stream.runHead(publisher.changes).pipe(Effect.forkChild); + yield* Effect.yieldNow; + yield* Ref.set(onBattery, true); + simpleListeners.get("lock-screen")?.(); + simpleListeners.get("suspend")?.(); + thermalListener?.("serious"); + speedLimitListener?.(65); + yield* Effect.yieldNow; + yield* TestClock.adjust(Duration.seconds(1)); + + const nextSnapshot = Option.getOrThrow(yield* Fiber.join(nextSnapshotFiber)); + assert.equal(nextSnapshot.power.locked, "true"); + assert.equal(nextSnapshot.power.suspended, true); + assert.equal(nextSnapshot.power.onBattery, "true"); + assert.equal(nextSnapshot.power.thermalState, "serious"); + assert.equal(Option.getOrNull(nextSnapshot.speedLimitPercent), 65); + }).pipe(Effect.provide(layer)); + }), + ); +}); diff --git a/apps/desktop/src/telemetry/DesktopTelemetryPublisher.ts b/apps/desktop/src/telemetry/DesktopTelemetryPublisher.ts new file mode 100644 index 00000000000..86b2538695e --- /dev/null +++ b/apps/desktop/src/telemetry/DesktopTelemetryPublisher.ts @@ -0,0 +1,215 @@ +import { + DesktopHostTelemetryMessage, + type DesktopHostTelemetrySnapshot, + type HostPowerSnapshot, +} from "@t3tools/contracts"; +import * as Context from "effect/Context"; +import * as DateTime from "effect/DateTime"; +import * as Duration from "effect/Duration"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; +import * as PubSub from "effect/PubSub"; +import * as Queue from "effect/Queue"; +import * as Ref from "effect/Ref"; +import * as Schema from "effect/Schema"; +import * as Stream from "effect/Stream"; + +import * as ElectronApp from "../electron/ElectronApp.ts"; +import * as ElectronPowerMonitor from "../electron/ElectronPowerMonitor.ts"; + +const SAMPLE_INTERVAL = Duration.seconds(1); +const IDLE_THRESHOLD_SECONDS = 60; +const encodeMessage = Schema.encodeSync(Schema.fromJsonString(DesktopHostTelemetryMessage)); +const textEncoder = new TextEncoder(); + +type PowerEvent = + | { readonly type: "locked"; readonly value: boolean } + | { readonly type: "suspended"; readonly value: boolean } + | { readonly type: "onBattery"; readonly value: boolean } + | { readonly type: "thermal"; readonly value: HostPowerSnapshot["thermalState"] } + | { readonly type: "speedLimit"; readonly value: number }; + +interface PowerState { + readonly locked: HostPowerSnapshot["locked"]; + readonly suspended: boolean; + readonly onBattery: HostPowerSnapshot["onBattery"]; + readonly thermalState: HostPowerSnapshot["thermalState"]; + readonly speedLimitPercent: Option.Option; +} + +export interface DesktopTelemetryPublisherShape { + readonly latest: Effect.Effect>; + readonly changes: Stream.Stream; + readonly encoded: Stream.Stream; +} + +export class DesktopTelemetryPublisher extends Context.Service< + DesktopTelemetryPublisher, + DesktopTelemetryPublisherShape +>()("@t3tools/desktop/telemetry/DesktopTelemetryPublisher") {} + +function booleanState(value: boolean): HostPowerSnapshot["onBattery"] { + return value ? "true" : "false"; +} + +function idleState(value: ElectronPowerMonitor.ElectronIdleState): HostPowerSnapshot["idle"] { + switch (value) { + case "active": + return "false"; + case "idle": + case "locked": + return "true"; + case "unknown": + return "unknown"; + } +} + +function updatePowerState(state: PowerState, event: PowerEvent): PowerState { + switch (event.type) { + case "locked": + return { ...state, locked: booleanState(event.value) }; + case "suspended": + return { ...state, suspended: event.value }; + case "onBattery": + return { ...state, onBattery: booleanState(event.value) }; + case "thermal": + return { ...state, thermalState: event.value }; + case "speedLimit": + return { ...state, speedLimitPercent: Option.some(event.value) }; + } +} + +export const make = Effect.fn("desktop.telemetryPublisher.make")(function* () { + const electronApp = yield* ElectronApp.ElectronApp; + const powerMonitor = yield* ElectronPowerMonitor.ElectronPowerMonitor; + yield* electronApp.whenReady; + + const initialPowerState: PowerState = { + locked: "unknown", + suspended: false, + onBattery: booleanState(yield* powerMonitor.isOnBatteryPower), + thermalState: yield* powerMonitor.getCurrentThermalState, + speedLimitPercent: Option.none(), + }; + const powerState = yield* Ref.make(initialPowerState); + const powerEvents = yield* Queue.unbounded(); + const latest = yield* Ref.make(Option.none()); + const changes = yield* PubSub.sliding(8); + const sequence = yield* Ref.make(0); + + const offer = (event: PowerEvent): void => { + Queue.offerUnsafe(powerEvents, event); + }; + yield* Effect.all( + [ + powerMonitor.onSimpleEvent("lock-screen", () => offer({ type: "locked", value: true })), + powerMonitor.onSimpleEvent("unlock-screen", () => offer({ type: "locked", value: false })), + powerMonitor.onSimpleEvent("suspend", () => offer({ type: "suspended", value: true })), + powerMonitor.onSimpleEvent("resume", () => offer({ type: "suspended", value: false })), + powerMonitor.onSimpleEvent("on-battery", () => offer({ type: "onBattery", value: true })), + powerMonitor.onSimpleEvent("on-ac", () => offer({ type: "onBattery", value: false })), + powerMonitor.onThermalStateChange((value) => offer({ type: "thermal", value })), + powerMonitor.onSpeedLimitChange((value) => offer({ type: "speedLimit", value })), + ], + { concurrency: "unbounded" }, + ); + yield* Effect.forever( + Queue.take(powerEvents).pipe( + Effect.flatMap((event) => Ref.update(powerState, (state) => updatePowerState(state, event))), + ), + ).pipe(Effect.forkScoped); + + const sampleOnce = Effect.gen(function* () { + const sampledAt = yield* DateTime.now; + const sampledAtUnixMs = DateTime.toEpochMillis(sampledAt); + const [currentPower, idleSeconds, systemIdleState, onBattery, metrics] = yield* Effect.all( + [ + Ref.get(powerState), + powerMonitor.getSystemIdleTime, + powerMonitor.getSystemIdleState(IDLE_THRESHOLD_SECONDS), + powerMonitor.isOnBatteryPower, + electronApp.getAppMetrics, + ], + { concurrency: "unbounded" }, + ); + const nextSequence = yield* Ref.modify(sequence, (current) => [current + 1, current + 1]); + const locked = systemIdleState === "locked" ? "true" : currentPower.locked; + const snapshot: DesktopHostTelemetrySnapshot = { + version: 1, + type: "desktopTelemetry", + sequence: nextSequence, + sampledAtUnixMs, + power: { + source: "electron-main", + idle: idleState(systemIdleState), + idleSeconds, + locked, + suspended: currentPower.suspended, + onBattery: booleanState(onBattery), + lowPowerMode: "unknown", + thermalState: currentPower.thermalState, + stale: false, + updatedAt: sampledAt, + }, + speedLimitPercent: currentPower.speedLimitPercent, + electronProcesses: metrics.map((metric) => ({ + pid: metric.pid, + creationTimeMs: metric.creationTime, + type: metric.type, + ...(metric.name === undefined ? {} : { name: metric.name }), + ...(metric.serviceName === undefined ? {} : { serviceName: metric.serviceName }), + cpuPercent: metric.cpu.percentCPUUsage, + ...(metric.cpu.cumulativeCPUUsage === undefined + ? {} + : { cumulativeCpuSeconds: metric.cpu.cumulativeCPUUsage }), + idleWakeupsPerSecond: metric.cpu.idleWakeupsPerSecond, + workingSetBytes: Math.max(0, Math.round(metric.memory.workingSetSize * 1024)), + peakWorkingSetBytes: Math.max(0, Math.round(metric.memory.peakWorkingSetSize * 1024)), + })), + }; + + yield* Ref.set(latest, Option.some(snapshot)); + yield* PubSub.publish(changes, snapshot); + }).pipe( + Effect.catchCause((cause) => + Effect.logWarning("Failed to sample Electron telemetry", { + cause: String(cause), + }), + ), + ); + + yield* Effect.forever(sampleOnce.pipe(Effect.andThen(Effect.sleep(SAMPLE_INTERVAL)))).pipe( + Effect.forkScoped, + ); + + const snapshots = Stream.concat( + Stream.unwrap( + Ref.get(latest).pipe( + Effect.map( + Option.match({ + onNone: () => Stream.empty, + onSome: Stream.make, + }), + ), + ), + ), + Stream.fromPubSub(changes), + ); + const encoded = Stream.concat( + Stream.make({ + version: 1, + type: "desktopTelemetryHello", + electronPid: process.pid, + } as const), + snapshots, + ).pipe(Stream.map((message) => textEncoder.encode(`${encodeMessage(message)}\n`))); + + return DesktopTelemetryPublisher.of({ + latest: Ref.get(latest), + changes: Stream.fromPubSub(changes), + encoded, + }); +}); + +export const layer = Layer.effect(DesktopTelemetryPublisher, make()); diff --git a/apps/desktop/src/window/DesktopApplicationMenu.test.ts b/apps/desktop/src/window/DesktopApplicationMenu.test.ts index 62d619fe18b..08afcd2fc13 100644 --- a/apps/desktop/src/window/DesktopApplicationMenu.test.ts +++ b/apps/desktop/src/window/DesktopApplicationMenu.test.ts @@ -40,6 +40,7 @@ const electronAppLayer = Layer.succeed(ElectronApp.ElectronApp, { setAboutPanelOptions: () => Effect.void, setAppUserModelId: () => Effect.void, requestSingleInstanceLock: Effect.succeed(true), + getAppMetrics: Effect.succeed([]), isDefaultProtocolClient: () => Effect.succeed(false), setAsDefaultProtocolClient: () => Effect.succeed(true), setDesktopName: () => Effect.void, diff --git a/apps/server/src/background/HostPowerMonitor.test.ts b/apps/server/src/background/HostPowerMonitor.test.ts new file mode 100644 index 00000000000..35fa72c0b10 --- /dev/null +++ b/apps/server/src/background/HostPowerMonitor.test.ts @@ -0,0 +1,44 @@ +import { describe, expect, it } from "@effect/vitest"; +import * as DateTime from "effect/DateTime"; +import * as Effect from "effect/Effect"; +import * as Fiber from "effect/Fiber"; +import * as Option from "effect/Option"; +import * as Stream from "effect/Stream"; + +import * as HostPowerMonitor from "./HostPowerMonitor.ts"; + +describe("HostPowerMonitor", () => { + it.effect("publishes semantic power changes without idle-time heartbeat churn", () => + Effect.gen(function* () { + const monitor = yield* HostPowerMonitor.make(); + const initial = { + source: "electron-main", + idle: "false", + idleSeconds: 0, + locked: "false", + suspended: false, + onBattery: "false", + lowPowerMode: "unknown", + thermalState: "nominal", + stale: false, + updatedAt: DateTime.makeUnsafe("2026-06-17T12:00:00.000Z"), + } as const; + yield* monitor.report(initial); + + const nextChange = yield* Stream.runHead(monitor.streamChanges).pipe(Effect.forkChild); + yield* Effect.yieldNow; + yield* monitor.report({ + ...initial, + idleSeconds: 1, + updatedAt: DateTime.makeUnsafe("2026-06-17T12:00:01.000Z"), + }); + yield* monitor.report({ + ...initial, + locked: "true", + updatedAt: DateTime.makeUnsafe("2026-06-17T12:00:02.000Z"), + }); + + expect(Option.getOrThrow(yield* Fiber.join(nextChange)).locked).toBe("true"); + }), + ); +}); diff --git a/apps/server/src/background/HostPowerMonitor.ts b/apps/server/src/background/HostPowerMonitor.ts index b6d068531a3..76a2c249e80 100644 --- a/apps/server/src/background/HostPowerMonitor.ts +++ b/apps/server/src/background/HostPowerMonitor.ts @@ -1,23 +1,13 @@ -import { - type BackgroundBooleanState, - type HostPowerSnapshot, - type HostPowerThermalState, -} from "@t3tools/contracts"; -import { - getBackgroundActivityPresetSettings, - resolveServerBackgroundActivitySettings, -} from "@t3tools/shared/backgroundActivitySettings"; +import type { HostPowerSnapshot } from "@t3tools/contracts"; import * as Context from "effect/Context"; import * as DateTime from "effect/DateTime"; -import * as Duration from "effect/Duration"; import * as Effect from "effect/Effect"; import * as Layer from "effect/Layer"; import * as PubSub from "effect/PubSub"; import * as Ref from "effect/Ref"; import * as Stream from "effect/Stream"; -import * as ProcessRunner from "../processRunner.ts"; -import { ServerSettingsService } from "../serverSettings.ts"; +import * as ResourceTelemetry from "../resourceTelemetry/ResourceTelemetry.ts"; export interface HostPowerMonitorShape { readonly snapshot: Effect.Effect; @@ -30,8 +20,6 @@ export class HostPowerMonitor extends Context.Service, -) { - return runner - .run({ - command, - args, - timeout: COMMAND_TIMEOUT, - timeoutBehavior: "timedOutResult", - outputMode: "truncate", - maxOutputBytes: 32_000, - }) - .pipe(Effect.option); -} - -const readMacShellSnapshot = Effect.fn("background.hostPower.readMacShellSnapshot")(function* () { - const runner = yield* ProcessRunner.ProcessRunner; - const updatedAt = yield* DateTime.now; - const [idleOutput, batteryOutput, pmsetOutput] = yield* Effect.all( - [ - runOptional(runner, "ioreg", ["-c", "IOHIDSystem"]), - runOptional(runner, "pmset", ["-g", "batt"]), - runOptional(runner, "pmset", ["-g"]), - ], - { concurrency: "unbounded" }, +function samePowerState(left: HostPowerSnapshot, right: HostPowerSnapshot): boolean { + return ( + left.source === right.source && + left.idle === right.idle && + left.locked === right.locked && + left.suspended === right.suspended && + left.onBattery === right.onBattery && + left.lowPowerMode === right.lowPowerMode && + left.thermalState === right.thermalState && + left.stale === right.stale ); - - const idleSeconds = idleOutput._tag === "Some" ? parseIdleSeconds(idleOutput.value.stdout) : null; - const onBattery = - batteryOutput._tag === "Some" ? parseOnBattery(batteryOutput.value.stdout) : null; - const lowPowerMode = - pmsetOutput._tag === "Some" ? parseLowPowerMode(pmsetOutput.value.stdout) : null; - - return { - source: "node-macos-shell", - idle: boolState(idleSeconds === null ? null : idleSeconds >= 60), - idleSeconds, - locked: "unknown", - suspended: false, - onBattery: boolState(onBattery), - lowPowerMode: boolState(lowPowerMode), - thermalState: parseThermalState(""), - stale: false, - updatedAt, - } satisfies HostPowerSnapshot; -}); +} export const make = Effect.fn("background.hostPower.make")(function* ( initialSource: HostPowerSnapshot["source"] = "unknown", ) { const initial = makeUnknownSnapshot(initialSource, yield* DateTime.now); const latestRef = yield* Ref.make(initial); - const demandActiveRef = yield* Ref.make(false); const changes = yield* PubSub.sliding(1); const report: HostPowerMonitorShape["report"] = (snapshot) => - Ref.set(latestRef, snapshot).pipe( - Effect.andThen(PubSub.publish(changes, snapshot)), + Ref.modify(latestRef, (current) => [!samePowerState(current, snapshot), snapshot]).pipe( + Effect.flatMap((changed) => (changed ? PubSub.publish(changes, snapshot) : Effect.void)), Effect.asVoid, ); return HostPowerMonitor.of({ snapshot: Ref.get(latestRef), report, - setDemandActive: (active) => Ref.set(demandActiveRef, active), + setDemandActive: () => Effect.void, streamChanges: Stream.fromPubSub(changes), }); }); -const unknownLayer = Layer.effect(HostPowerMonitor, make("unknown")); -const linuxLayer = Layer.effect(HostPowerMonitor, make("node-linux")); -const windowsLayer = Layer.effect(HostPowerMonitor, make("node-windows")); - -const macShellLayer = Layer.effect( +export const layer = Layer.effect( HostPowerMonitor, Effect.gen(function* () { - const serverSettings = yield* ServerSettingsService; - const monitor = yield* make("node-macos-shell"); - const demandActiveRef = yield* Ref.make(true); - const setDemandActive: HostPowerMonitorShape["setDemandActive"] = (active) => - Ref.set(demandActiveRef, active); - const getPollInterval = Effect.gen(function* () { - const demandActive = yield* Ref.get(demandActiveRef); - const settings = yield* serverSettings.getSettings.pipe( - Effect.map(resolveServerBackgroundActivitySettings), - Effect.catch(() => Effect.succeed(getBackgroundActivityPresetSettings("balanced"))), - ); - return demandActive - ? settings.hostPowerMonitorActiveInterval - : settings.hostPowerMonitorIdleInterval; - }); - const adaptiveMonitor = HostPowerMonitor.of({ - snapshot: monitor.snapshot, - report: monitor.report, - setDemandActive, - streamChanges: monitor.streamChanges, - }); - yield* readMacShellSnapshot().pipe( - Effect.flatMap(adaptiveMonitor.report), - Effect.ignoreCause({ log: true }), + const telemetry = yield* ResourceTelemetry.ResourceTelemetry; + const initial = yield* telemetry.latest; + const monitor = yield* make(initial.power.source); + yield* monitor.report(initial.power); + yield* telemetry.changes.pipe( + Stream.map((snapshot) => snapshot.power), + Stream.runForEach(monitor.report), + Effect.forkScoped, ); - yield* Effect.forever( - getPollInterval.pipe( - Effect.flatMap((interval) => Effect.sleep(Duration.max(interval, Duration.seconds(5)))), - Effect.andThen(readMacShellSnapshot()), - Effect.flatMap(adaptiveMonitor.report), - Effect.ignoreCause({ log: true }), - ), - ).pipe(Effect.forkScoped); - return adaptiveMonitor; - }), -).pipe(Layer.provide(ProcessRunner.layer)); - -export const layer = Layer.unwrap( - Effect.sync(() => { - switch (process.platform) { - case "darwin": - return macShellLayer; - case "linux": - return linuxLayer; - case "win32": - return windowsLayer; - default: - return unknownLayer; - } + return monitor; }), ); diff --git a/apps/server/src/cli/config.ts b/apps/server/src/cli/config.ts index 7182854e18c..78c9f99a2cc 100644 --- a/apps/server/src/cli/config.ts +++ b/apps/server/src/cli/config.ts @@ -298,6 +298,8 @@ export const resolveServerConfig = ( () => mode === "desktop", ); const desktopBootstrapToken = bootstrap?.desktopBootstrapToken; + const desktopTelemetryFd = bootstrap?.desktopTelemetryFd; + const resourceMonitorPath = bootstrap?.resourceMonitorPath; const autoBootstrapProjectFromCwd = Option.getOrElse( resolveOptionPrecedence( Option.fromUndefinedOr(options?.forceAutoBootstrapProjectFromCwd), @@ -370,6 +372,8 @@ export const resolveServerConfig = ( noBrowser, startupPresentation, desktopBootstrapToken, + desktopTelemetryFd, + resourceMonitorPath, autoBootstrapProjectFromCwd, logWebSocketEvents, tailscaleServeEnabled, diff --git a/apps/server/src/config.ts b/apps/server/src/config.ts index b0a23cb273c..3269dd11568 100644 --- a/apps/server/src/config.ts +++ b/apps/server/src/config.ts @@ -69,6 +69,8 @@ export interface ServerConfigShape extends ServerDerivedPaths { readonly noBrowser: boolean; readonly startupPresentation: StartupPresentation; readonly desktopBootstrapToken: string | undefined; + readonly desktopTelemetryFd?: number | undefined; + readonly resourceMonitorPath?: string | undefined; readonly autoBootstrapProjectFromCwd: boolean; readonly logWebSocketEvents: boolean; readonly tailscaleServeEnabled: boolean; @@ -171,6 +173,8 @@ export class ServerConfig extends Context.Service Effect.void, - unref: Effect.succeed(Effect.void), - stdin: Sink.drain, - stdout: Stream.make(encoder.encode(result.stdout ?? "")), - stderr: Stream.make(encoder.encode(result.stderr ?? "")), - all: Stream.empty, - getInputFd: () => Sink.drain, - getOutputFd: () => Stream.empty, +function makeTelemetryLayer( + snapshot: ResourceMonitorSnapshotEvent, + desktopSnapshot?: DesktopHostTelemetrySnapshot, +) { + const nativeLayer = NativeTelemetryClient.layerTest({ + sampleNow: Effect.succeed(snapshot), + health: Effect.succeed({ + status: "healthy", + hello: Option.none(), + lastSampleAt: Option.some(DateTime.makeUnsafe(snapshot.sampledAtUnixMs)), + lastError: Option.none(), + restartCount: 0, + }), }); + const desktopLayer = desktopSnapshot + ? DesktopTelemetryReceiver.layerTest({ + latest: Effect.succeedSome(desktopSnapshot), + health: Effect.succeed({ + status: "healthy", + lastSampleAt: Option.some(DateTime.makeUnsafe(desktopSnapshot.sampledAtUnixMs)), + lastError: Option.none(), + }), + }) + : DesktopTelemetryReceiver.layerTest(); + return ResourceTelemetry.layer.pipe( + Layer.provide(Layer.mergeAll(nativeLayer, desktopLayer, ResourceAttribution.layer)), + ); } describe("ProcessDiagnostics", () => { - it.effect("parses POSIX ps rows with full commands", () => - Effect.sync(() => { - const rows = ProcessDiagnostics.parsePosixProcessRows( - [ - " 10 1 10 Ss 0.0 1024 01:02.03 /usr/bin/node server.js", - " 11 10 10 S+ 12.5 20480 00:04 codex app-server --config /tmp/one two", - ].join("\n"), - ); - - expect(rows).toEqual([ + it.effect("projects live process data from resource telemetry", () => + Effect.gen(function* () { + const snapshot = makeNativeSnapshot([ { - pid: 10, + pid: process.pid, ppid: 1, - pgid: 10, - status: "Ss", + startTimeMs: 1_000, + runTimeMs: 60_000, + name: "node", + command: "t3 server", + status: "Running", cpuPercent: 0, - rssBytes: 1024 * 1024, - elapsed: "01:02.03", - command: "/usr/bin/node server.js", + cpuTimeMs: 100, + residentBytes: 1_024, + virtualBytes: 2_048, + ioReadBytes: 100, + ioWriteBytes: 200, + ioSemantics: "storage", }, { - pid: 11, - ppid: 10, - pgid: 10, - status: "S+", - cpuPercent: 12.5, - rssBytes: 20480 * 1024, - elapsed: "00:04", - command: "codex app-server --config /tmp/one two", + pid: 4_242, + ppid: process.pid, + startTimeMs: 2_000, + runTimeMs: 4_000, + name: "agent", + command: "codex app-server", + status: "Running", + cpuPercent: 1.5, + cpuTimeMs: 60, + residentBytes: 2_048, + virtualBytes: 4_096, + ioReadBytes: 300, + ioWriteBytes: 400, + ioSemantics: "storage", }, ]); - }), - ); + const telemetryLayer = makeTelemetryLayer(snapshot); + const layer = ProcessDiagnostics.layer.pipe(Layer.provideMerge(telemetryLayer)); - it.effect("aggregates only descendants of the server process", () => - Effect.sync(() => { - const diagnostics = ProcessDiagnostics.aggregateProcessDiagnostics({ - serverPid: 100, - readAt: DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"), - rows: [ - { - pid: 100, - ppid: 1, - pgid: 100, - status: "S", - cpuPercent: 0, - rssBytes: 1_000, - elapsed: "01:00", - command: "t3 server", - }, - { - pid: 101, - ppid: 100, - pgid: 100, - status: "S", - cpuPercent: 1.5, - rssBytes: 2_000, - elapsed: "00:20", - command: "codex app-server", - }, - { - pid: 102, - ppid: 101, - pgid: 100, - status: "R", - cpuPercent: 3.25, - rssBytes: 4_000, - elapsed: "00:05", - command: "git status", - }, - { - pid: 200, - ppid: 1, - pgid: 200, - status: "S", - cpuPercent: 99, - rssBytes: 8_000, - elapsed: "00:01", - command: "unrelated", - }, - { - pid: 201, - ppid: 100, - pgid: 100, - status: "R", - cpuPercent: 9, - rssBytes: 9_000, - elapsed: "00:00", - command: "ps -axo pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command=", - }, - ], - }); - - expect(diagnostics.serverPid).toBe(100); - expect(DateTime.formatIso(diagnostics.readAt)).toBe("2026-05-05T10:00:00.000Z"); - expect(diagnostics.processCount).toBe(2); - expect(diagnostics.totalRssBytes).toBe(6_000); - expect(diagnostics.totalCpuPercent).toBe(4.75); - expect(diagnostics.processes.map((process) => process.pid)).toEqual([101, 102]); - expect(diagnostics.processes.map((process) => process.depth)).toEqual([0, 1]); - expect(Option.getOrNull(diagnostics.processes[0]!.pgid)).toBe(100); - expect(diagnostics.processes[0]?.childPids).toEqual([102]); - }), - ); - - it.effect("preserves ascending sibling order for nested descendants", () => - Effect.sync(() => { - const diagnostics = ProcessDiagnostics.aggregateProcessDiagnostics({ - serverPid: 100, - readAt: DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"), - rows: [ - { - pid: 101, - ppid: 100, - pgid: 100, - status: "S", - cpuPercent: 0, - rssBytes: 100, - elapsed: "00:10", - command: "agent", - }, - { - pid: 103, - ppid: 101, - pgid: 100, - status: "S", - cpuPercent: 0, - rssBytes: 100, - elapsed: "00:10", - command: "child-b", - }, - { - pid: 102, - ppid: 101, - pgid: 100, - status: "S", - cpuPercent: 0, - rssBytes: 100, - elapsed: "00:10", - command: "child-a", - }, - ], - }); + const diagnostics = yield* Effect.gen(function* () { + const telemetry = yield* ResourceTelemetry.ResourceTelemetry; + const processDiagnostics = yield* ProcessDiagnostics.ProcessDiagnostics; + yield* telemetry.refresh; + return yield* processDiagnostics.read; + }).pipe(Effect.provide(layer)); - expect(diagnostics.processes.map((process) => process.pid)).toEqual([101, 102, 103]); + expect(diagnostics.processes.map((process) => process.pid)).toEqual([4242]); + expect(diagnostics.processes[0]?.startTimeMs).toBe(2_000); + expect(diagnostics.processes[0]?.cpuPercent).toBe(1.5); + expect(diagnostics.processes[0]?.rssBytes).toBe(2_048); }), ); - it.effect("queries processes through the ChildProcessSpawner service", () => + it.effect("rejects stale process identities before signaling", () => Effect.gen(function* () { - const commands: Array<{ readonly command: string; readonly args: ReadonlyArray }> = - []; - const spawnerLayer = Layer.succeed( - ChildProcessSpawner.ChildProcessSpawner, - ChildProcessSpawner.make((command) => { - const childProcess = command as unknown as { - readonly command: string; - readonly args: ReadonlyArray; - }; - commands.push({ command: childProcess.command, args: childProcess.args }); - return Effect.succeed( - mockHandle({ - stdout: [ - ` ${process.pid} 1 ${process.pid} Ss 0.0 1024 01:02.03 t3 server`, - ` 4242 ${process.pid} ${process.pid} S 1.5 2048 00:04 agent`, - ].join("\n"), - }), - ); - }), - ); - const layer = ProcessDiagnostics.layer.pipe(Layer.provide(spawnerLayer)); + const snapshot = makeNativeSnapshot([]); + const telemetryLayer = makeTelemetryLayer(snapshot); + const layer = ProcessDiagnostics.layer.pipe(Layer.provide(telemetryLayer)); - const diagnostics = yield* Effect.service(ProcessDiagnostics.ProcessDiagnostics).pipe( - Effect.flatMap((pd) => pd.read), + const result = yield* Effect.service(ProcessDiagnostics.ProcessDiagnostics).pipe( + Effect.flatMap((processDiagnostics) => + processDiagnostics.signal({ + pid: 4_242, + startTimeMs: 2_000, + signal: "SIGINT", + }), + ), Effect.provide(layer), ); - expect(diagnostics.processes.map((process) => process.pid)).toEqual([4242]); - expect(commands).toEqual([ - { - command: "ps", - args: ["-axo", "pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command="], - }, - ]); + expect(result).toEqual({ + pid: 4242, + signal: "SIGINT", + signaled: false, + message: Option.some("Process 4242 no longer matches the selected process identity."), + }); }), ); - it.effect("does not allow signaling the diagnostics query process", () => + it.effect("rejects Electron processes as signal targets", () => Effect.gen(function* () { - const spawnerLayer = Layer.succeed( - ChildProcessSpawner.ChildProcessSpawner, - ChildProcessSpawner.make(() => - Effect.succeed( - mockHandle({ - stdout: [ - ` ${process.pid} 1 ${process.pid} Ss 0.0 1024 01:02.03 t3 server`, - ` 4242 ${process.pid} ${process.pid} R 1.5 2048 00:00 ps -axo pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command=`, - ].join("\n"), - }), - ), - ), + const sampledAtUnixMs = DateTime.toEpochMillis( + DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"), ); - const layer = ProcessDiagnostics.layer.pipe(Layer.provide(spawnerLayer)); + const snapshot = makeNativeSnapshot([ + { + pid: 4_242, + ppid: 1, + startTimeMs: 2_000, + runTimeMs: 4_000, + name: "electron", + command: "electron", + status: "Running", + cpuPercent: 1.5, + cpuTimeMs: 60, + residentBytes: 2_048, + virtualBytes: 4_096, + ioReadBytes: 300, + ioWriteBytes: 400, + ioSemantics: "storage", + }, + ]); + const sampledAt = DateTime.makeUnsafe(sampledAtUnixMs); + const telemetryLayer = makeTelemetryLayer(snapshot, { + version: 1, + type: "desktopTelemetry", + sequence: 1, + sampledAtUnixMs, + power: { + source: "electron-main", + idle: "false", + idleSeconds: 0, + locked: "false", + suspended: false, + onBattery: "false", + lowPowerMode: "unknown", + thermalState: "nominal", + stale: false, + updatedAt: sampledAt, + }, + speedLimitPercent: Option.none(), + electronProcesses: [ + { + pid: 4_242, + creationTimeMs: 2_000, + type: "Browser", + name: "electron", + cpuPercent: 1.5, + idleWakeupsPerSecond: 0, + workingSetBytes: 2_048, + peakWorkingSetBytes: 2_048, + }, + ], + }); + const layer = ProcessDiagnostics.layer.pipe(Layer.provide(telemetryLayer)); const result = yield* Effect.service(ProcessDiagnostics.ProcessDiagnostics).pipe( - Effect.flatMap((pd) => pd.signal({ pid: 4242, signal: "SIGINT" })), + Effect.flatMap((processDiagnostics) => + processDiagnostics.signal({ + pid: 4_242, + startTimeMs: 2_000, + signal: "SIGKILL", + }), + ), Effect.provide(layer), ); expect(result).toEqual({ - pid: 4242, - signal: "SIGINT", + pid: 4_242, + signal: "SIGKILL", signaled: false, - message: Option.some("Process 4242 is not a live descendant of the T3 server."), + message: Option.some("Process 4242 is not a signalable T3 backend descendant."), }); }), ); diff --git a/apps/server/src/diagnostics/ProcessDiagnostics.ts b/apps/server/src/diagnostics/ProcessDiagnostics.ts index f5f746134f2..70997379478 100644 --- a/apps/server/src/diagnostics/ProcessDiagnostics.ts +++ b/apps/server/src/diagnostics/ProcessDiagnostics.ts @@ -1,40 +1,23 @@ import type { + ResourceTelemetryProcessCategory, ServerProcessDiagnosticsEntry, ServerProcessDiagnosticsResult, ServerProcessSignal, ServerSignalProcessResult, } from "@t3tools/contracts"; -import { HostProcessPlatform } from "@t3tools/shared/hostProcess"; import * as Context from "effect/Context"; -import * as DateTime from "effect/DateTime"; -import * as Duration from "effect/Duration"; import * as Effect from "effect/Effect"; import * as Layer from "effect/Layer"; import * as Option from "effect/Option"; import * as Schema from "effect/Schema"; -import { ChildProcess, ChildProcessSpawner } from "effect/unstable/process"; -import { collectUint8StreamText } from "../stream/collectUint8StreamText.ts"; - -export interface ProcessRow { - readonly pid: number; - readonly ppid: number; - readonly pgid: number | null; - readonly status: string; - readonly cpuPercent: number; - readonly rssBytes: number; - readonly elapsed: string; - readonly command: string; -} - -const PROCESS_QUERY_TIMEOUT_MS = 1_000; -const POSIX_PROCESS_QUERY_COMMAND = "pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command="; -const PROCESS_QUERY_MAX_OUTPUT_BYTES = 2 * 1024 * 1024; +import * as ResourceTelemetry from "../resourceTelemetry/ResourceTelemetry.ts"; export interface ProcessDiagnosticsShape { readonly read: Effect.Effect; readonly signal: (input: { readonly pid: number; + readonly startTimeMs: number; readonly signal: ServerProcessSignal; }) => Effect.Effect; } @@ -44,420 +27,161 @@ export class ProcessDiagnostics extends Context.Service< ProcessDiagnosticsShape >()("t3/diagnostics/ProcessDiagnostics") {} -class ProcessDiagnosticsError extends Schema.TaggedErrorClass()( - "ProcessDiagnosticsError", +export class ProcessIdentityChanged extends Schema.TaggedErrorClass()( + "ProcessIdentityChanged", { - message: Schema.String, - cause: Schema.optional(Schema.Defect()), + pid: Schema.Number, + startTimeMs: Schema.Number, }, -) {} -const isProcessDiagnosticsError = Schema.is(ProcessDiagnosticsError); - -function toProcessDiagnosticsError(message: string, cause?: unknown): ProcessDiagnosticsError { - return new ProcessDiagnosticsError({ - message, - ...(cause === undefined ? {} : { cause }), - }); -} - -function parsePositiveInt(value: string): number | null { - const parsed = Number.parseInt(value, 10); - return Number.isInteger(parsed) && parsed > 0 ? parsed : null; -} - -function parseNonNegativeInt(value: string): number | null { - const parsed = Number.parseInt(value, 10); - return Number.isInteger(parsed) && parsed >= 0 ? parsed : null; -} - -function parseNumber(value: string): number | null { - const parsed = Number.parseFloat(value); - return Number.isFinite(parsed) ? parsed : null; -} - -export function parsePosixProcessRows(output: string): ReadonlyArray { - const rows: ProcessRow[] = []; - const rowPattern = - /^\s*(\d+)\s+(\d+)\s+(-?\d+)\s+(\S+)\s+([+-]?(?:\d+\.?\d*|\.\d+))\s+(\d+)\s+(\S+)\s+(.+)$/; - - for (const line of output.split(/\r?\n/)) { - if (line.trim().length === 0) continue; - - const match = rowPattern.exec(line); - if (!match) continue; - - const pidText = match[1]; - const ppidText = match[2]; - const pgidText = match[3]; - const status = match[4]; - const cpuText = match[5]; - const rssText = match[6]; - const elapsed = match[7]; - const command = match[8]; - if ( - pidText === undefined || - ppidText === undefined || - pgidText === undefined || - status === undefined || - cpuText === undefined || - rssText === undefined || - elapsed === undefined || - command === undefined - ) { - continue; - } - - const pid = parsePositiveInt(pidText); - const ppid = parseNonNegativeInt(ppidText); - const pgid = Number.parseInt(pgidText, 10); - const cpuPercent = parseNumber(cpuText); - const rssKiB = parseNonNegativeInt(rssText); - if ( - pid === null || - ppid === null || - !Number.isInteger(pgid) || - cpuPercent === null || - rssKiB === null || - !status || - !elapsed || - !command - ) { - continue; - } - - rows.push({ - pid, - ppid, - pgid, - status, - cpuPercent, - rssBytes: rssKiB * 1024, - elapsed, - command, - }); +) { + override get message(): string { + return `Process ${this.pid} no longer matches start time ${this.startTimeMs}.`; } - - return rows; -} - -function normalizeWindowsProcessRow(value: unknown): ProcessRow | null { - if (typeof value !== "object" || value === null) return null; - const record = value as Record; - const pid = typeof record.ProcessId === "number" ? record.ProcessId : null; - const ppid = typeof record.ParentProcessId === "number" ? record.ParentProcessId : null; - const commandLine = - typeof record.CommandLine === "string" && record.CommandLine.trim().length > 0 - ? record.CommandLine - : typeof record.Name === "string" - ? record.Name - : null; - const workingSet = - typeof record.WorkingSetSize === "number" && Number.isFinite(record.WorkingSetSize) - ? Math.max(0, Math.round(record.WorkingSetSize)) - : 0; - const cpuPercent = - typeof record.PercentProcessorTime === "number" && Number.isFinite(record.PercentProcessorTime) - ? Math.max(0, record.PercentProcessorTime) - : 0; - - if (!pid || pid <= 0 || ppid === null || ppid < 0 || !commandLine) return null; - return { - pid, - ppid, - pgid: null, - status: typeof record.Status === "string" && record.Status.length > 0 ? record.Status : "Live", - cpuPercent, - rssBytes: workingSet, - elapsed: "", - command: commandLine, - }; } -function parseWindowsProcessRows(output: string): ReadonlyArray { - if (output.trim().length === 0) return []; - try { - const parsed = JSON.parse(output) as unknown; - const records = Array.isArray(parsed) ? parsed : [parsed]; - return records.flatMap((record) => { - const row = normalizeWindowsProcessRow(record); - return row ? [row] : []; - }); - } catch { - return []; +export class ProcessSignalFailed extends Schema.TaggedErrorClass()( + "ProcessSignalFailed", + { + pid: Schema.Number, + signal: Schema.String, + cause: Schema.Defect(), + }, +) { + override get message(): string { + return `Failed to signal process ${this.pid} with ${this.signal}.`; } } -export function buildDescendantEntries( - rows: ReadonlyArray, - serverPid: number, -): ReadonlyArray { - const childrenByParent = new Map(); - for (const row of rows) { - const children = childrenByParent.get(row.ppid) ?? []; - children.push(row); - childrenByParent.set(row.ppid, children); - } - - const entries: ServerProcessDiagnosticsEntry[] = []; - const visited = new Set(); - const stack = [...(childrenByParent.get(serverPid) ?? [])] - .toSorted((left, right) => left.pid - right.pid) - .map((row) => ({ row, depth: 0 })); - - while (stack.length > 0) { - const item = stack.shift(); - if (!item || visited.has(item.row.pid)) continue; - visited.add(item.row.pid); - - const children = [...(childrenByParent.get(item.row.pid) ?? [])].toSorted( - (left, right) => left.pid - right.pid, - ); - entries.push({ - pid: item.row.pid, - ppid: item.row.ppid, - pgid: Option.fromNullishOr(item.row.pgid), - status: item.row.status, - cpuPercent: item.row.cpuPercent, - rssBytes: item.row.rssBytes, - elapsed: item.row.elapsed || "n/a", - command: item.row.command, - depth: item.depth, - childPids: children.map((child) => child.pid), - }); - - stack.unshift(...children.map((row) => ({ row, depth: item.depth + 1 }))); - } +export type ProcessDiagnosticsError = ProcessIdentityChanged | ProcessSignalFailed; - return entries; +function formatElapsed(runTimeMs: number): string { + const totalSeconds = Math.max(0, Math.floor(runTimeMs / 1_000)); + const hours = Math.floor(totalSeconds / 3_600); + const minutes = Math.floor((totalSeconds % 3_600) / 60); + const seconds = totalSeconds % 60; + return hours > 0 + ? `${hours}:${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}` + : `${minutes}:${String(seconds).padStart(2, "0")}`; } -export function isDiagnosticsQueryProcess(row: ProcessRow, serverPid: number): boolean { - if (row.ppid !== serverPid) return false; - - const command = row.command.trim(); +function canSignalCategory(category: ResourceTelemetryProcessCategory): boolean { return ( - /(?:^|[/\\])ps\s+-axo\s+pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command=/.test(command) || - (/\bpowershell(?:\.exe)?\b/i.test(command) && - /\bGet-CimInstance\s+Win32_Process\b/i.test(command)) - ); -} - -function makeResult(input: { - readonly serverPid: number; - readonly rows: ReadonlyArray; - readonly readAt: DateTime.Utc; - readonly error?: string; -}): ServerProcessDiagnosticsResult { - const readAt = input.readAt; - const rows = input.rows.filter((row) => !isDiagnosticsQueryProcess(row, input.serverPid)); - const processes = buildDescendantEntries(rows, input.serverPid); - const totalRssBytes = processes.reduce((total, process) => total + process.rssBytes, 0); - const totalCpuPercent = processes.reduce((total, process) => total + process.cpuPercent, 0); - - return { - serverPid: input.serverPid, - readAt, - processCount: processes.length, - totalRssBytes, - totalCpuPercent, - processes, - error: input.error ? Option.some({ message: input.error }) : Option.none(), - }; -} - -interface ProcessOutput { - readonly exitCode: number; - readonly stdout: string; - readonly stderr: string; -} - -const runProcess = Effect.fn("runProcess")( - function* (input: { - readonly command: string; - readonly args: ReadonlyArray; - readonly errorMessage: string; - }) { - const spawner = yield* ChildProcessSpawner.ChildProcessSpawner; - // `ps` and `powershell.exe` are real executables; spawning through cmd.exe - // shell mode would re-tokenize the PowerShell `-Command` payload (which - // contains pipes) before PowerShell ever sees it. - const child = yield* spawner.spawn( - ChildProcess.make(input.command, input.args, { - cwd: process.cwd(), - }), - ); - const [stdout, stderr, exitCode] = yield* Effect.all( - [ - collectUint8StreamText({ - stream: child.stdout, - maxBytes: PROCESS_QUERY_MAX_OUTPUT_BYTES, - truncatedMarker: "\n\n[truncated]", - }), - collectUint8StreamText({ - stream: child.stderr, - maxBytes: PROCESS_QUERY_MAX_OUTPUT_BYTES, - truncatedMarker: "\n\n[truncated]", - }), - child.exitCode, - ], - { concurrency: "unbounded" }, - ); - - return { - exitCode, - stdout: stdout.text, - stderr: stderr.text, - } satisfies ProcessOutput; - }, - (effect, input) => - effect.pipe( - Effect.scoped, - Effect.timeoutOption(Duration.millis(PROCESS_QUERY_TIMEOUT_MS)), - Effect.flatMap((result) => - Option.match(result, { - onNone: () => Effect.fail(toProcessDiagnosticsError(`${input.errorMessage} timed out.`)), - onSome: Effect.succeed, - }), - ), - Effect.mapError((cause) => - isProcessDiagnosticsError(cause) - ? cause - : toProcessDiagnosticsError(input.errorMessage, cause), - ), - ), -); - -function readPosixProcessRows(): Effect.Effect< - ReadonlyArray, - ProcessDiagnosticsError, - ChildProcessSpawner.ChildProcessSpawner -> { - return runProcess({ - command: "ps", - args: ["-axo", POSIX_PROCESS_QUERY_COMMAND], - errorMessage: "Failed to query process diagnostics.", - }).pipe( - Effect.flatMap((result) => - result.exitCode !== 0 - ? Effect.fail(toProcessDiagnosticsError(result.stderr.trim() || "ps failed.")) - : Effect.succeed(parsePosixProcessRows(result.stdout)), - ), - ); -} - -function readWindowsProcessRows(): Effect.Effect< - ReadonlyArray, - ProcessDiagnosticsError, - ChildProcessSpawner.ChildProcessSpawner -> { - const command = [ - "$processes = Get-CimInstance Win32_Process | ForEach-Object {", - '$perf = Get-CimInstance Win32_PerfFormattedData_PerfProc_Process -Filter "IDProcess = $($_.ProcessId)" -ErrorAction SilentlyContinue;', - "[pscustomobject]@{ ProcessId = $_.ProcessId; ParentProcessId = $_.ParentProcessId; Name = $_.Name; CommandLine = $_.CommandLine; Status = $_.Status; WorkingSetSize = $_.WorkingSetSize; PercentProcessorTime = if ($perf) { $perf.PercentProcessorTime } else { 0 } }", - "};", - "$processes | ConvertTo-Json -Compress -Depth 3", - ].join(" "); - - return runProcess({ - command: "powershell.exe", - args: ["-NoProfile", "-NonInteractive", "-Command", command], - errorMessage: "Failed to query process diagnostics.", - }).pipe( - Effect.flatMap((result) => - result.exitCode !== 0 - ? Effect.fail( - toProcessDiagnosticsError(result.stderr.trim() || "PowerShell process query failed."), - ) - : Effect.succeed(parseWindowsProcessRows(result.stdout)), - ), - ); -} - -export const readProcessRows = Effect.gen(function* () { - const platform = yield* HostProcessPlatform; - return yield* platform === "win32" ? readWindowsProcessRows() : readPosixProcessRows(); -}); - -export function aggregateProcessDiagnostics(input: { - readonly serverPid: number; - readonly rows: ReadonlyArray; - readonly readAt: DateTime.Utc; -}): ServerProcessDiagnosticsResult { - return makeResult(input); -} - -function assertDescendantPid( - pid: number, -): Effect.Effect { - if (pid === process.pid) { - return Effect.fail(toProcessDiagnosticsError("Refusing to signal the T3 server process.")); - } - - return readProcessRows.pipe( - Effect.flatMap((rows) => { - const filteredRows = rows.filter((row) => !isDiagnosticsQueryProcess(row, process.pid)); - const descendant = buildDescendantEntries(filteredRows, process.pid).some( - (entry) => entry.pid === pid, - ); - return descendant - ? Effect.void - : Effect.fail( - toProcessDiagnosticsError(`Process ${pid} is not a live descendant of the T3 server.`), - ); - }), + category === "server-child" || category === "provider-root" || category === "terminal-root" ); } export const make = Effect.fn("makeProcessDiagnostics")(function* () { - const spawner = yield* ChildProcessSpawner.ChildProcessSpawner; - - const read: ProcessDiagnosticsShape["read"] = Effect.gen(function* () { - const readAt = yield* DateTime.now; - const rows = yield* readProcessRows.pipe( - Effect.provideService(ChildProcessSpawner.ChildProcessSpawner, spawner), - ); - return makeResult({ serverPid: process.pid, rows, readAt }); - }).pipe( - Effect.catch((error: ProcessDiagnosticsError) => - DateTime.now.pipe( - Effect.map((readAt) => - makeResult({ serverPid: process.pid, rows: [], readAt, error: error.message }), - ), - ), - ), + const telemetry = yield* ResourceTelemetry.ResourceTelemetry; + const read: ProcessDiagnosticsShape["read"] = telemetry.latest.pipe( + Effect.map((snapshot) => { + const processes = snapshot.processes + .filter((entry) => entry.identity.pid !== process.pid) + .map( + (entry): ServerProcessDiagnosticsEntry => ({ + pid: entry.identity.pid, + startTimeMs: entry.identity.startTimeMs, + ppid: entry.ppid, + pgid: Option.none(), + status: entry.status || "Unknown", + cpuPercent: entry.cpuPercent, + rssBytes: entry.residentBytes, + elapsed: formatElapsed(entry.runTimeMs), + command: entry.command || entry.name || "unknown", + depth: Math.max(0, entry.depth - 1), + childPids: entry.childPids, + }), + ); + return { + serverPid: process.pid, + readAt: snapshot.readAt, + processCount: processes.length, + totalRssBytes: processes.reduce((total, entry) => total + entry.rssBytes, 0), + totalCpuPercent: processes.reduce((total, entry) => total + entry.cpuPercent, 0), + processes, + error: Option.map(snapshot.health.native.lastError, (message) => ({ message })), + }; + }), ); const signal: ProcessDiagnosticsShape["signal"] = Effect.fn("ProcessDiagnostics.signal")( function* (input) { - return yield* assertDescendantPid(input.pid).pipe( - Effect.provideService(ChildProcessSpawner.ChildProcessSpawner, spawner), - Effect.flatMap(() => - Effect.try({ - try: () => { - process.kill(input.pid, input.signal); - return { - pid: input.pid, - signal: input.signal, - signaled: true, - message: Option.none(), - }; - }, - catch: (cause) => - toProcessDiagnosticsError( - `Failed to signal process ${input.pid} with ${input.signal}.`, - cause, - ), - }), - ), - Effect.catch((error: ProcessDiagnosticsError) => - Effect.succeed({ - pid: input.pid, - signal: input.signal, - signaled: false, - message: Option.some(error.message), - }), - ), + if (input.pid === process.pid) { + return { + pid: input.pid, + signal: input.signal, + signaled: false, + message: Option.some("Refusing to signal the T3 server process."), + }; + } + const current = yield* telemetry.latest; + const selected = current.processes.find( + (entry) => + entry.identity.pid === input.pid && entry.identity.startTimeMs === input.startTimeMs, ); + if (!selected) { + return { + pid: input.pid, + signal: input.signal, + signaled: false, + message: Option.some( + `Process ${input.pid} no longer matches the selected process identity.`, + ), + }; + } + if (!canSignalCategory(selected.category)) { + return { + pid: input.pid, + signal: input.signal, + signaled: false, + message: Option.some(`Process ${input.pid} is not a signalable T3 backend descendant.`), + }; + } + return yield* telemetry + .validateProcessIdentity({ + pid: input.pid, + startTimeMs: input.startTimeMs, + }) + .pipe( + Effect.flatMap((valid) => + valid + ? Effect.void + : Effect.fail( + new ProcessIdentityChanged({ + pid: input.pid, + startTimeMs: input.startTimeMs, + }), + ), + ), + Effect.flatMap(() => + Effect.try({ + try: () => { + process.kill(input.pid, input.signal); + return { + pid: input.pid, + signal: input.signal, + signaled: true, + message: Option.none(), + }; + }, + catch: (cause) => + new ProcessSignalFailed({ + pid: input.pid, + signal: input.signal, + cause, + }), + }), + ), + Effect.catch((error) => + Effect.succeed({ + pid: input.pid, + signal: input.signal, + signaled: false, + message: Option.some( + error instanceof Error ? error.message : "Failed to signal process.", + ), + }), + ), + ); }, ); diff --git a/apps/server/src/diagnostics/ProcessResourceMonitor.test.ts b/apps/server/src/diagnostics/ProcessResourceMonitor.test.ts index 11d12c012db..0b71785fbf6 100644 --- a/apps/server/src/diagnostics/ProcessResourceMonitor.test.ts +++ b/apps/server/src/diagnostics/ProcessResourceMonitor.test.ts @@ -1,231 +1,132 @@ import { describe, expect, it } from "@effect/vitest"; +import type { ResourceTelemetryHistory } from "@t3tools/contracts"; import * as DateTime from "effect/DateTime"; import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; import * as Option from "effect/Option"; +import * as Stream from "effect/Stream"; -import { - aggregateProcessResourceHistory, - collectMonitoredSamples, -} from "./ProcessResourceMonitor.ts"; +import * as ResourceTelemetry from "../resourceTelemetry/ResourceTelemetry.ts"; +import * as ProcessResourceMonitor from "./ProcessResourceMonitor.ts"; describe("ProcessResourceMonitor", () => { - it.effect("samples the server root process and descendants", () => - Effect.sync(() => { - const sampledAt = DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"); - const samples = collectMonitoredSamples({ - serverPid: 100, - sampledAt, - sampledAtMs: DateTime.toEpochMillis(sampledAt), - rows: [ - { - pid: 100, - ppid: 1, - pgid: 100, - status: "S", - cpuPercent: 2, - rssBytes: 1_000, - elapsed: "01:00", - command: "t3 server", - }, - { - pid: 101, - ppid: 100, - pgid: 100, - status: "S", - cpuPercent: 10, - rssBytes: 2_000, - elapsed: "00:20", - command: "codex app-server", - }, - { - pid: 102, - ppid: 101, - pgid: 100, - status: "R", - cpuPercent: 50, - rssBytes: 3_000, - elapsed: "00:05", - command: "rg needle", - }, + it.effect("projects resource telemetry history into the legacy diagnostics contract", () => + Effect.gen(function* () { + const readAt = DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"); + const history: ResourceTelemetryHistory = { + readAt, + windowMs: 60_000, + bucketMs: 10_000, + sampleIntervalMs: 1_000, + retainedSampleCount: 2, + buckets: [ { - pid: 200, - ppid: 1, - pgid: 200, - status: "R", - cpuPercent: 99, - rssBytes: 9_000, - elapsed: "00:05", - command: "unrelated", + startedAt: DateTime.makeUnsafe("2026-05-05T09:59:50.000Z"), + endedAt: readAt, + avgCpuPercent: 15, + maxCpuPercent: 25, + maxRssBytes: 4_096, + ioReadBytes: 1_024, + ioWriteBytes: 2_048, + maxProcessCount: 2, }, ], - }); - - expect(samples.map((sample) => sample.pid)).toEqual([100, 101, 102]); - expect(samples.map((sample) => sample.depth)).toEqual([0, 1, 2]); - expect(samples[0]?.isServerRoot).toBe(true); - expect(samples[1]?.isServerRoot).toBe(false); - }), - ); - - it.effect("rolls samples up by process and CPU time", () => - Effect.sync(() => { - const firstAt = DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"); - const secondAt = DateTime.makeUnsafe("2026-05-05T10:00:05.000Z"); - const samples = [ - ...collectMonitoredSamples({ - serverPid: 100, - sampledAt: firstAt, - sampledAtMs: DateTime.toEpochMillis(firstAt), - rows: [ - { - pid: 100, - ppid: 1, - pgid: 100, - status: "S", - cpuPercent: 10, - rssBytes: 1_000, - elapsed: "01:00", - command: "t3 server", - }, - ], - }), - ...collectMonitoredSamples({ - serverPid: 100, - sampledAt: secondAt, - sampledAtMs: DateTime.toEpochMillis(secondAt), - rows: [ - { - pid: 100, - ppid: 1, - pgid: 100, - status: "S", - cpuPercent: 30, - rssBytes: 2_000, - elapsed: "01:05", - command: "t3 server", - }, - ], - }), - ]; - - const result = aggregateProcessResourceHistory({ - samples, - readAt: secondAt, - readAtMs: DateTime.toEpochMillis(secondAt), - windowMs: 60_000, - bucketMs: 10_000, - lastError: null, - }); - - expect(Option.isNone(result.error)).toBe(true); - expect(result.topProcesses).toHaveLength(1); - expect(result.topProcesses[0]?.avgCpuPercent).toBe(20); - expect(result.topProcesses[0]?.maxCpuPercent).toBe(30); - expect(result.topProcesses[0]?.cpuSecondsApprox).toBe(2); - expect(result.totalCpuSecondsApprox).toBe(2); - expect(result.buckets.some((bucket) => bucket.maxCpuPercent === 30)).toBe(true); - }), - ); - - it.effect("keeps a process grouped when elapsed time drifts between samples", () => - Effect.sync(() => { - const firstAt = DateTime.makeUnsafe("2026-05-05T10:00:00.400Z"); - const secondAt = DateTime.makeUnsafe("2026-05-05T10:00:05.900Z"); - const samples = [ - ...collectMonitoredSamples({ - serverPid: 100, - sampledAt: firstAt, - sampledAtMs: DateTime.toEpochMillis(firstAt), - rows: [ - { - pid: 100, - ppid: 1, - pgid: 100, - status: "S", - cpuPercent: 1, - rssBytes: 1_000, - elapsed: "01:00", - command: "t3 server", - }, - ], - }), - ...collectMonitoredSamples({ - serverPid: 100, - sampledAt: secondAt, - sampledAtMs: DateTime.toEpochMillis(secondAt), - rows: [ - { - pid: 100, - ppid: 1, - pgid: 100, - status: "S", - cpuPercent: 2, - rssBytes: 2_000, - elapsed: "01:06", - command: "t3 server", - }, - ], - }), - ]; - - const result = aggregateProcessResourceHistory({ - samples, - readAt: secondAt, - readAtMs: DateTime.toEpochMillis(secondAt), - windowMs: 60_000, - bucketMs: 10_000, - lastError: null, - }); - - expect(result.topProcesses).toHaveLength(1); - expect(result.topProcesses[0]?.isServerRoot).toBe(true); - expect(result.topProcesses[0]?.sampleCount).toBe(2); - expect(result.topProcesses[0]?.maxRssBytes).toBe(2_000); - }), - ); - - it.effect("returns all process summaries in the selected window", () => - Effect.sync(() => { - const sampledAt = DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"); - const samples = collectMonitoredSamples({ - serverPid: 100, - sampledAt, - sampledAtMs: DateTime.toEpochMillis(sampledAt), - rows: [ + topProcesses: [ { - pid: 100, + identity: { pid: process.pid, startTimeMs: 100 }, ppid: 1, - pgid: 100, - status: "S", - cpuPercent: 1, - rssBytes: 1_000, - elapsed: "01:00", + depth: 0, + name: "node", command: "t3 server", + category: "server", + firstSeenAt: DateTime.makeUnsafe("2026-05-05T09:59:55.000Z"), + lastSeenAt: readAt, + currentCpuPercent: 5, + avgCpuPercent: 4, + maxCpuPercent: 8, + cpuTimeMs: 1_500, + currentRssBytes: 2_048, + peakRssBytes: 4_096, + ioReadBytes: 1_024, + ioWriteBytes: 2_048, + ioSemantics: "storage", + sampleCount: 2, }, - ...Array.from({ length: 35 }, (_, index) => ({ - pid: 200 + index, - ppid: index === 0 ? 100 : 199 + index, - pgid: 100, - status: "S", - cpuPercent: 35 - index, - rssBytes: 2_000 + index, - elapsed: "00:10", - command: `worker ${index}`, - })), ], - }); + health: { + native: { + status: "degraded", + lastSampleAt: Option.some(readAt), + lastError: Option.some("collector stalled"), + }, + desktop: { + status: "healthy", + lastSampleAt: Option.some(readAt), + lastError: Option.none(), + }, + sidecarVersion: Option.some("0.1.0"), + sidecarPid: Option.some(9_000), + restartCount: 1, + collectionDurationMicros: 250, + scannedProcessCount: 80, + retainedProcessCount: 2, + inaccessibleProcessCount: 0, + }, + }; + const telemetry: ResourceTelemetry.ResourceTelemetryShape = { + latest: Effect.die("unused"), + changes: Stream.empty, + readHistory: () => Effect.succeed(history), + refresh: Effect.die("unused"), + validateProcessIdentity: () => Effect.die("unused"), + retry: Effect.die("unused"), + }; + const layer = ProcessResourceMonitor.layer.pipe( + Layer.provide( + Layer.succeed( + ResourceTelemetry.ResourceTelemetry, + ResourceTelemetry.ResourceTelemetry.of(telemetry), + ), + ), + ); - const result = aggregateProcessResourceHistory({ - samples, - readAt: sampledAt, - readAtMs: DateTime.toEpochMillis(sampledAt), - windowMs: 60_000, - bucketMs: 10_000, - lastError: null, - }); + const result = yield* Effect.service(ProcessResourceMonitor.ProcessResourceMonitor).pipe( + Effect.flatMap((monitor) => + monitor.readHistory({ + windowMs: 60_000, + bucketMs: 10_000, + }), + ), + Effect.provide(layer), + ); - expect(result.topProcesses).toHaveLength(36); - expect(result.topProcesses.some((process) => process.command === "worker 34")).toBe(true); + expect(result.totalCpuSecondsApprox).toBe(1.5); + expect(result.topProcesses).toEqual([ + { + processKey: `${process.pid}:100`, + pid: process.pid, + ppid: 1, + command: "t3 server", + depth: 0, + isServerRoot: true, + firstSeenAt: DateTime.makeUnsafe("2026-05-05T09:59:55.000Z"), + lastSeenAt: readAt, + currentCpuPercent: 5, + avgCpuPercent: 4, + maxCpuPercent: 8, + cpuSecondsApprox: 1.5, + currentRssBytes: 2_048, + maxRssBytes: 4_096, + sampleCount: 2, + }, + ]); + expect(result.buckets[0]).toMatchObject({ + avgCpuPercent: 15, + maxCpuPercent: 25, + maxRssBytes: 4_096, + maxProcessCount: 2, + }); + expect(result.error).toEqual(Option.some({ message: "collector stalled" })); }), ); }); diff --git a/apps/server/src/diagnostics/ProcessResourceMonitor.ts b/apps/server/src/diagnostics/ProcessResourceMonitor.ts index efeeb66256d..455db721b20 100644 --- a/apps/server/src/diagnostics/ProcessResourceMonitor.ts +++ b/apps/server/src/diagnostics/ProcessResourceMonitor.ts @@ -1,45 +1,13 @@ import type { - ServerProcessResourceHistoryBucket, ServerProcessResourceHistoryInput, ServerProcessResourceHistoryResult, - ServerProcessResourceHistorySummary, } from "@t3tools/contracts"; import * as Context from "effect/Context"; -import * as DateTime from "effect/DateTime"; import * as Effect from "effect/Effect"; import * as Layer from "effect/Layer"; import * as Option from "effect/Option"; -import * as Ref from "effect/Ref"; -import { ChildProcessSpawner } from "effect/unstable/process"; -import { - buildDescendantEntries, - isDiagnosticsQueryProcess, - type ProcessRow, - readProcessRows, -} from "./ProcessDiagnostics.ts"; - -const SAMPLE_INTERVAL_MS = 5_000; -const RETENTION_MS = 60 * 60_000; -const MAX_RETAINED_SAMPLES = 20_000; - -export interface ProcessResourceSample { - readonly sampledAt: DateTime.Utc; - readonly sampledAtMs: number; - readonly processKey: string; - readonly pid: number; - readonly ppid: number; - readonly command: string; - readonly cpuPercent: number; - readonly rssBytes: number; - readonly depth: number; - readonly isServerRoot: boolean; -} - -interface MonitorState { - readonly samples: ReadonlyArray; - readonly lastError: string | null; -} +import * as ResourceTelemetry from "../resourceTelemetry/ResourceTelemetry.ts"; export interface ProcessResourceMonitorShape { readonly readHistory: ( @@ -52,246 +20,51 @@ export class ProcessResourceMonitor extends Context.Service< ProcessResourceMonitorShape >()("t3/diagnostics/ProcessResourceMonitor") {} -function dateTimeFromMillis(ms: number): DateTime.Utc { - return DateTime.makeUnsafe(ms); -} - -function sampleKey(row: Pick): string { - return `${row.pid}:${row.command}`; -} - -function findServerRootRow(rows: ReadonlyArray, serverPid: number): ProcessRow | null { - return rows.find((row) => row.pid === serverPid) ?? null; -} - -export function collectMonitoredSamples(input: { - readonly rows: ReadonlyArray; - readonly serverPid: number; - readonly sampledAt: DateTime.Utc; - readonly sampledAtMs: number; -}): ReadonlyArray { - const rows = input.rows.filter((row) => !isDiagnosticsQueryProcess(row, input.serverPid)); - const root = findServerRootRow(rows, input.serverPid); - const descendants = buildDescendantEntries(rows, input.serverPid); - const samples: ProcessResourceSample[] = []; - - if (root) { - samples.push({ - sampledAt: input.sampledAt, - sampledAtMs: input.sampledAtMs, - processKey: sampleKey(root), - pid: root.pid, - ppid: root.ppid, - command: root.command, - cpuPercent: root.cpuPercent, - rssBytes: root.rssBytes, - depth: 0, - isServerRoot: true, - }); - } - - for (const process of descendants) { - samples.push({ - sampledAt: input.sampledAt, - sampledAtMs: input.sampledAtMs, - processKey: sampleKey(process), - pid: process.pid, - ppid: process.ppid, - command: process.command, - cpuPercent: process.cpuPercent, - rssBytes: process.rssBytes, - depth: process.depth + 1, - isServerRoot: false, - }); - } - - return samples; -} - -function trimSamples( - samples: ReadonlyArray, - nowMs: number, -): ReadonlyArray { - const minSampledAtMs = nowMs - RETENTION_MS; - const retained = samples.filter((sample) => sample.sampledAtMs >= minSampledAtMs); - return retained.length <= MAX_RETAINED_SAMPLES - ? retained - : retained.slice(retained.length - MAX_RETAINED_SAMPLES); -} - -function summarizeProcesses( - samples: ReadonlyArray, -): ReadonlyArray { - const groups = new Map(); - for (const sample of samples) { - const processSamples = groups.get(sample.processKey) ?? []; - processSamples.push(sample); - groups.set(sample.processKey, processSamples); - } - - return [...groups.entries()] - .map(([processKey, processSamples]) => { - const sorted = processSamples.toSorted((left, right) => left.sampledAtMs - right.sampledAtMs); - const first = sorted[0]!; - const latest = sorted[sorted.length - 1]!; - const cpuPercentTotal = sorted.reduce((total, sample) => total + sample.cpuPercent, 0); - const maxCpuPercent = Math.max(...sorted.map((sample) => sample.cpuPercent)); - const maxRssBytes = Math.max(...sorted.map((sample) => sample.rssBytes)); - const cpuSecondsApprox = sorted.reduce( - (total, sample) => total + (sample.cpuPercent / 100) * (SAMPLE_INTERVAL_MS / 1_000), - 0, - ); - - return { - processKey, - pid: latest.pid, - ppid: latest.ppid, - command: latest.command, - depth: latest.depth, - isServerRoot: latest.isServerRoot, - firstSeenAt: first.sampledAt, - lastSeenAt: latest.sampledAt, - currentCpuPercent: latest.cpuPercent, - avgCpuPercent: cpuPercentTotal / sorted.length, - maxCpuPercent, - cpuSecondsApprox, - currentRssBytes: latest.rssBytes, - maxRssBytes, - sampleCount: sorted.length, - } satisfies ServerProcessResourceHistorySummary; - }) - .toSorted((left, right) => right.cpuSecondsApprox - left.cpuSecondsApprox); -} - -function buildBuckets(input: { - readonly samples: ReadonlyArray; - readonly nowMs: number; - readonly windowMs: number; - readonly bucketMs: number; -}): ReadonlyArray { - const bucketMs = Math.max(1_000, input.bucketMs); - const windowStartMs = input.nowMs - input.windowMs; - const buckets: ServerProcessResourceHistoryBucket[] = []; - - for (let startedAtMs = windowStartMs; startedAtMs < input.nowMs; startedAtMs += bucketMs) { - const endedAtMs = Math.min(input.nowMs, startedAtMs + bucketMs); - const bucketSamples = input.samples.filter( - (sample) => - sample.sampledAtMs >= startedAtMs && - (endedAtMs === input.nowMs - ? sample.sampledAtMs <= endedAtMs - : sample.sampledAtMs < endedAtMs), - ); - const samplesByRead = new Map(); - for (const sample of bucketSamples) { - const samplesAtTime = samplesByRead.get(sample.sampledAtMs) ?? []; - samplesAtTime.push(sample); - samplesByRead.set(sample.sampledAtMs, samplesAtTime); - } - - const readTotals = [...samplesByRead.values()].map((samplesAtTime) => ({ - cpuPercent: samplesAtTime.reduce((total, sample) => total + sample.cpuPercent, 0), - rssBytes: samplesAtTime.reduce((total, sample) => total + sample.rssBytes, 0), - processCount: samplesAtTime.length, - })); - const avgCpuPercent = - readTotals.length === 0 - ? 0 - : readTotals.reduce((total, read) => total + read.cpuPercent, 0) / readTotals.length; - - buckets.push({ - startedAt: dateTimeFromMillis(startedAtMs), - endedAt: dateTimeFromMillis(endedAtMs), - avgCpuPercent, - maxCpuPercent: readTotals.length ? Math.max(...readTotals.map((read) => read.cpuPercent)) : 0, - maxRssBytes: readTotals.length ? Math.max(...readTotals.map((read) => read.rssBytes)) : 0, - maxProcessCount: readTotals.length - ? Math.max(...readTotals.map((read) => read.processCount)) - : 0, - }); - } - - return buckets; -} - -export function aggregateProcessResourceHistory(input: { - readonly samples: ReadonlyArray; - readonly readAt: DateTime.Utc; - readonly readAtMs: number; - readonly windowMs: number; - readonly bucketMs: number; - readonly lastError: string | null; -}): ServerProcessResourceHistoryResult { - const windowMs = Math.max(1_000, input.windowMs); - const bucketMs = Math.max(1_000, input.bucketMs); - const minSampledAtMs = input.readAtMs - windowMs; - const samples = input.samples.filter((sample) => sample.sampledAtMs >= minSampledAtMs); - const topProcesses = summarizeProcesses(samples); - const totalCpuSecondsApprox = samples.reduce( - (total, sample) => total + (sample.cpuPercent / 100) * (SAMPLE_INTERVAL_MS / 1_000), - 0, - ); - - return { - readAt: input.readAt, - windowMs, - bucketMs, - sampleIntervalMs: SAMPLE_INTERVAL_MS, - retainedSampleCount: input.samples.length, - totalCpuSecondsApprox, - buckets: buildBuckets({ samples, nowMs: input.readAtMs, windowMs, bucketMs }), - topProcesses, - error: input.lastError ? Option.some({ message: input.lastError }) : Option.none(), - }; -} - export const make = Effect.fn("makeProcessResourceMonitor")(function* () { - const spawner = yield* ChildProcessSpawner.ChildProcessSpawner; - const state = yield* Ref.make({ samples: [], lastError: null }); - - const sampleOnce = Effect.gen(function* () { - const sampledAt = yield* DateTime.now; - const sampledAtMs = DateTime.toEpochMillis(sampledAt); - const rows = yield* readProcessRows.pipe( - Effect.provideService(ChildProcessSpawner.ChildProcessSpawner, spawner), - ); - const samples = collectMonitoredSamples({ - rows, - serverPid: process.pid, - sampledAt, - sampledAtMs, - }); - yield* Ref.update(state, (current) => ({ - samples: trimSamples([...current.samples, ...samples], sampledAtMs), - lastError: null, - })); - }).pipe( - Effect.catch((error: unknown) => - Ref.update(state, (current) => ({ - ...current, - lastError: error instanceof Error ? error.message : "Failed to sample process resources.", - })), - ), - ); - - yield* Effect.forever(sampleOnce.pipe(Effect.andThen(Effect.sleep(SAMPLE_INTERVAL_MS)))).pipe( - Effect.forkScoped, - ); - + const telemetry = yield* ResourceTelemetry.ResourceTelemetry; const readHistory: ProcessResourceMonitorShape["readHistory"] = (input) => - Effect.gen(function* () { - const readAt = yield* DateTime.now; - const readAtMs = DateTime.toEpochMillis(readAt); - const current = yield* Ref.get(state); - return aggregateProcessResourceHistory({ - samples: current.samples, - readAt, - readAtMs, - windowMs: input.windowMs, - bucketMs: input.bucketMs, - lastError: current.lastError, - }); - }); + telemetry.readHistory(input).pipe( + Effect.map((history) => { + const topProcesses = history.topProcesses.map((entry) => ({ + processKey: `${entry.identity.pid}:${entry.identity.startTimeMs}`, + pid: entry.identity.pid, + ppid: entry.ppid, + command: entry.command || entry.name || "unknown", + depth: entry.depth, + isServerRoot: entry.category === "server", + firstSeenAt: entry.firstSeenAt, + lastSeenAt: entry.lastSeenAt, + currentCpuPercent: entry.currentCpuPercent, + avgCpuPercent: entry.avgCpuPercent, + maxCpuPercent: entry.maxCpuPercent, + cpuSecondsApprox: entry.cpuTimeMs / 1_000, + currentRssBytes: entry.currentRssBytes, + maxRssBytes: entry.peakRssBytes, + sampleCount: entry.sampleCount, + })); + return { + readAt: history.readAt, + windowMs: history.windowMs, + bucketMs: history.bucketMs, + sampleIntervalMs: history.sampleIntervalMs, + retainedSampleCount: history.retainedSampleCount, + totalCpuSecondsApprox: topProcesses.reduce( + (total, entry) => total + entry.cpuSecondsApprox, + 0, + ), + buckets: history.buckets.map((bucket) => ({ + startedAt: bucket.startedAt, + endedAt: bucket.endedAt, + avgCpuPercent: bucket.avgCpuPercent, + maxCpuPercent: bucket.maxCpuPercent, + maxRssBytes: bucket.maxRssBytes, + maxProcessCount: bucket.maxProcessCount, + })), + topProcesses, + error: history.health.native.lastError.pipe(Option.map((message) => ({ message }))), + }; + }), + ); return ProcessResourceMonitor.of({ readHistory }); }); diff --git a/apps/server/src/observability/Layers/Observability.ts b/apps/server/src/observability/Layers/Observability.ts index 95263866d80..f1eea07cac5 100644 --- a/apps/server/src/observability/Layers/Observability.ts +++ b/apps/server/src/observability/Layers/Observability.ts @@ -7,6 +7,7 @@ import * as Tracer from "effect/Tracer"; import { OtlpMetrics, OtlpSerialization, OtlpTracer } from "effect/unstable/observability"; import { ServerConfig } from "../../config.ts"; +import * as ResourceAttribution from "../../resourceTelemetry/ResourceAttribution.ts"; import { ServerLoggerLive } from "../../serverLogger.ts"; import { BrowserTraceCollector } from "../Services/BrowserTraceCollector.ts"; @@ -15,6 +16,7 @@ const otlpSerializationLayer = OtlpSerialization.layerJson; export const ObservabilityLive = Layer.unwrap( Effect.gen(function* () { const config = yield* ServerConfig; + const attribution = yield* ResourceAttribution.ResourceAttribution; const traceReferencesLayer = Layer.mergeAll( Layer.succeed(Tracer.MinimumTraceLevel, config.traceMinLevel), @@ -29,6 +31,14 @@ export const ObservabilityLive = Layer.unwrap( maxBytes: config.traceMaxBytes, maxFiles: config.traceMaxFiles, batchWindowMs: config.traceBatchWindowMs, + onFlush: (stats) => + attribution.record({ + component: "server-trace", + operation: "append", + logicalWriteBytes: stats.logicalWriteBytes, + count: stats.count, + durationMs: stats.durationMs, + }), }); const delegate = config.otlpTracesUrl === undefined diff --git a/apps/server/src/provider/Layers/EventNdjsonLogger.test.ts b/apps/server/src/provider/Layers/EventNdjsonLogger.test.ts index 0b1f99d3c11..ab2fc2ca81d 100644 --- a/apps/server/src/provider/Layers/EventNdjsonLogger.test.ts +++ b/apps/server/src/provider/Layers/EventNdjsonLogger.test.ts @@ -7,6 +7,7 @@ import { ThreadId } from "@t3tools/contracts"; import { assert, describe, it } from "@effect/vitest"; import * as Effect from "effect/Effect"; +import * as ResourceAttribution from "../../resourceTelemetry/ResourceAttribution.ts"; import { makeEventNdjsonLogger } from "./EventNdjsonLogger.ts"; function parseLogLine(line: string) { @@ -207,4 +208,36 @@ describe("EventNdjsonLogger", () => { } }), ); + + it.effect("reports logical provider log writes to resource attribution", () => + Effect.gen(function* () { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "t3-provider-log-")); + const basePath = path.join(tempDir, "provider-native.ndjson"); + + try { + const attribution = yield* ResourceAttribution.make(); + const logger = yield* makeEventNdjsonLogger(basePath, { + stream: "native", + batchWindowMs: 0, + attribution, + }); + assert.notEqual(logger, undefined); + if (!logger) { + return; + } + + yield* logger.write({ id: "attributed-event" }, ThreadId.make("thread-attribution")); + yield* logger.close(); + + const snapshot = yield* attribution.snapshot; + assert.equal(snapshot.entries.length, 1); + assert.equal(snapshot.entries[0]?.component, "provider-event-log"); + assert.equal(snapshot.entries[0]?.operation, "native.append"); + assert.equal(snapshot.entries[0]?.count, 1); + assert.isAbove(snapshot.entries[0]?.logicalWriteBytes ?? 0, 0); + } finally { + fs.rmSync(tempDir, { recursive: true, force: true }); + } + }), + ); }); diff --git a/apps/server/src/provider/Layers/EventNdjsonLogger.ts b/apps/server/src/provider/Layers/EventNdjsonLogger.ts index 04377ad520c..8e736672a7b 100644 --- a/apps/server/src/provider/Layers/EventNdjsonLogger.ts +++ b/apps/server/src/provider/Layers/EventNdjsonLogger.ts @@ -11,6 +11,7 @@ import path from "node:path"; import type { ThreadId } from "@t3tools/contracts"; import { RotatingFileSink } from "@t3tools/shared/logging"; +import * as Clock from "effect/Clock"; import * as Effect from "effect/Effect"; import * as Exit from "effect/Exit"; import * as Logger from "effect/Logger"; @@ -19,6 +20,7 @@ import * as Scope from "effect/Scope"; import * as SynchronizedRef from "effect/SynchronizedRef"; import { toSafeThreadAttachmentSegment } from "../../attachmentStore.ts"; +import type { ResourceAttributionShape } from "../../resourceTelemetry/ResourceAttribution.ts"; const DEFAULT_MAX_BYTES = 10 * 1024 * 1024; const DEFAULT_MAX_FILES = 10; @@ -26,6 +28,7 @@ const DEFAULT_BATCH_WINDOW_MS = 200; const GLOBAL_THREAD_SEGMENT = "_global"; const LOG_SCOPE = "provider-observability"; const encodeUnknownJsonString = Schema.encodeUnknownEffect(Schema.UnknownFromJsonString); +const textEncoder = new TextEncoder(); export type EventNdjsonStream = "native" | "canonical" | "orchestration"; @@ -40,6 +43,7 @@ export interface EventNdjsonLoggerOptions { readonly maxBytes?: number; readonly maxFiles?: number; readonly batchWindowMs?: number; + readonly attribution?: ResourceAttributionShape; } interface ThreadWriter { @@ -104,6 +108,8 @@ const makeThreadWriter = Effect.fn("makeThreadWriter")(function* (input: { readonly maxFiles: number; readonly batchWindowMs: number; readonly streamLabel: string; + readonly stream: EventNdjsonStream; + readonly attribution?: ResourceAttributionShape; }): Effect.fn.Return { const sinkResult = yield* Effect.sync(() => { try { @@ -135,12 +141,19 @@ const makeThreadWriter = Effect.fn("makeThreadWriter")(function* (input: { const batchedLogger = yield* Logger.batched(lineLogger, { window: input.batchWindowMs, flush: Effect.fn("makeThreadWriter.flush")(function* (messages) { + const startedAt = yield* Clock.currentTimeMillis; const flushResult = yield* Effect.sync(() => { try { + let logicalWriteBytes = 0; for (const message of messages) { sink.write(message); + logicalWriteBytes += textEncoder.encode(message).byteLength; } - return { ok: true as const }; + return { + ok: true as const, + logicalWriteBytes, + count: messages.length, + }; } catch (error) { return { ok: false as const, error }; } @@ -151,6 +164,18 @@ const makeThreadWriter = Effect.fn("makeThreadWriter")(function* (input: { filePath: input.filePath, error: flushResult.error, }); + return; + } + + if (input.attribution && flushResult.count > 0) { + const completedAt = yield* Clock.currentTimeMillis; + yield* input.attribution.record({ + component: "provider-event-log", + operation: `${input.stream}.append`, + logicalWriteBytes: flushResult.logicalWriteBytes, + count: flushResult.count, + durationMs: Math.max(0, completedAt - startedAt), + }); } }), }).pipe(Effect.provideService(Scope.Scope, scope)); @@ -216,6 +241,8 @@ export const makeEventNdjsonLogger = Effect.fn("makeEventNdjsonLogger")(function maxFiles, batchWindowMs, streamLabel, + stream: options.stream, + ...(options.attribution ? { attribution: options.attribution } : {}), }).pipe( Effect.map((writer) => { if (!writer) { diff --git a/apps/server/src/provider/Layers/ProviderEventLoggers.ts b/apps/server/src/provider/Layers/ProviderEventLoggers.ts index 711aa6e76b6..b020983e13c 100644 --- a/apps/server/src/provider/Layers/ProviderEventLoggers.ts +++ b/apps/server/src/provider/Layers/ProviderEventLoggers.ts @@ -32,6 +32,7 @@ import * as Effect from "effect/Effect"; import * as Layer from "effect/Layer"; import { ServerConfig } from "../../config.ts"; +import * as ResourceAttribution from "../../resourceTelemetry/ResourceAttribution.ts"; import { type EventNdjsonLogger, makeEventNdjsonLogger } from "./EventNdjsonLogger.ts"; export interface ProviderEventLoggersShape { @@ -71,11 +72,14 @@ export const ProviderEventLoggersLive = Layer.effect( ProviderEventLoggers, Effect.gen(function* () { const { providerEventLogPath } = yield* ServerConfig; + const attribution = yield* ResourceAttribution.ResourceAttribution; const native = yield* makeEventNdjsonLogger(providerEventLogPath, { stream: "native", + attribution, }); const canonical = yield* makeEventNdjsonLogger(providerEventLogPath, { stream: "canonical", + attribution, }); return { native, diff --git a/apps/server/src/resourceTelemetry/DesktopTelemetryReceiver.ts b/apps/server/src/resourceTelemetry/DesktopTelemetryReceiver.ts new file mode 100644 index 00000000000..56f1bad2aeb --- /dev/null +++ b/apps/server/src/resourceTelemetry/DesktopTelemetryReceiver.ts @@ -0,0 +1,279 @@ +// @effect-diagnostics nodeBuiltinImport:off +import * as NodeFileSystem from "node:fs"; + +import * as NodeStream from "@effect/platform-node/NodeStream"; +import { + DesktopHostTelemetryMessage, + type DesktopHostTelemetryMessage as DesktopHostTelemetryMessageValue, + type DesktopHostTelemetrySnapshot, + type ResourceTelemetrySourceStatus, +} from "@t3tools/contracts"; +import * as Context from "effect/Context"; +import * as DateTime from "effect/DateTime"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; +import * as PubSub from "effect/PubSub"; +import * as Ref from "effect/Ref"; +import * as Schema from "effect/Schema"; +import * as Stream from "effect/Stream"; +import * as Ndjson from "effect/unstable/encoding/Ndjson"; + +import { ServerConfig } from "../config.ts"; + +export class DesktopTelemetryDescriptorUnavailable extends Schema.TaggedErrorClass()( + "DesktopTelemetryDescriptorUnavailable", + { + mode: Schema.String, + }, +) { + override get message(): string { + return `Desktop telemetry descriptor is unavailable in '${this.mode}' mode.`; + } +} + +export class DesktopTelemetryProtocolMismatch extends Schema.TaggedErrorClass()( + "DesktopTelemetryProtocolMismatch", + { + expectedVersion: Schema.Number, + receivedVersion: Schema.Number, + }, +) { + override get message(): string { + return `Desktop telemetry protocol ${this.receivedVersion} is incompatible with expected protocol ${this.expectedVersion}.`; + } +} + +export class DesktopTelemetryDecodeFailed extends Schema.TaggedErrorClass()( + "DesktopTelemetryDecodeFailed", + { + cause: Schema.Defect(), + }, +) { + override get message(): string { + return "Failed to decode desktop telemetry."; + } +} + +export class DesktopTelemetryStreamFailed extends Schema.TaggedErrorClass()( + "DesktopTelemetryStreamFailed", + { + fd: Schema.Number, + cause: Schema.Defect(), + }, +) { + override get message(): string { + return `Desktop telemetry stream on fd ${this.fd} failed.`; + } +} + +export class DesktopTelemetryStreamClosed extends Schema.TaggedErrorClass()( + "DesktopTelemetryStreamClosed", + { + fd: Schema.Number, + }, +) { + override get message(): string { + return `Desktop telemetry stream on fd ${this.fd} closed.`; + } +} + +export type DesktopTelemetryReceiverError = + | DesktopTelemetryDescriptorUnavailable + | DesktopTelemetryProtocolMismatch + | DesktopTelemetryDecodeFailed + | DesktopTelemetryStreamFailed + | DesktopTelemetryStreamClosed; + +export interface DesktopTelemetryReceiverHealth { + readonly status: ResourceTelemetrySourceStatus; + readonly lastSampleAt: Option.Option; + readonly lastError: Option.Option; +} + +export interface DesktopTelemetryReceiverShape { + readonly latest: Effect.Effect>; + readonly changes: Stream.Stream; + readonly health: Effect.Effect; + readonly healthChanges: Stream.Stream; +} + +export class DesktopTelemetryReceiver extends Context.Service< + DesktopTelemetryReceiver, + DesktopTelemetryReceiverShape +>()("t3/resourceTelemetry/DesktopTelemetryReceiver") {} + +const decodeMessage = Schema.decodeUnknownEffect(DesktopHostTelemetryMessage); +const isDescriptorUnavailable = Schema.is(DesktopTelemetryDescriptorUnavailable); +const isProtocolMismatch = Schema.is(DesktopTelemetryProtocolMismatch); +const isDecodeFailed = Schema.is(DesktopTelemetryDecodeFailed); +const isStreamFailed = Schema.is(DesktopTelemetryStreamFailed); + +function normalizeReceiverError(error: unknown): DesktopTelemetryReceiverError { + if ( + isDescriptorUnavailable(error) || + isProtocolMismatch(error) || + isDecodeFailed(error) || + isStreamFailed(error) + ) { + return error; + } + return new DesktopTelemetryDecodeFailed({ cause: error }); +} + +function messageVersion(value: unknown): number | undefined { + if (typeof value !== "object" || value === null) return undefined; + const version = Reflect.get(value, "version"); + return typeof version === "number" ? version : undefined; +} + +export const make = Effect.fn("resourceTelemetry.desktopTelemetryReceiver.make")(function* () { + const config = yield* ServerConfig; + const latest = yield* Ref.make(Option.none()); + const changes = yield* PubSub.sliding(8); + const healthChanges = yield* PubSub.sliding(4); + const health = yield* Ref.make({ + status: config.desktopTelemetryFd === undefined ? "unavailable" : "starting", + lastSampleAt: Option.none(), + lastError: + config.desktopTelemetryFd === undefined + ? Option.some( + new DesktopTelemetryDescriptorUnavailable({ + mode: config.mode, + }).message, + ) + : Option.none(), + }); + const updateHealth = ( + update: (current: DesktopTelemetryReceiverHealth) => DesktopTelemetryReceiverHealth, + ) => + Ref.modify(health, (current) => { + const next = update(current); + return [next, next]; + }).pipe( + Effect.flatMap((next) => PubSub.publish(healthChanges, next)), + Effect.asVoid, + ); + + if (config.desktopTelemetryFd !== undefined) { + const fd = config.desktopTelemetryFd; + const readable = yield* Effect.acquireRelease( + Effect.try({ + try: () => + NodeFileSystem.createReadStream("", { + fd, + autoClose: true, + }), + catch: (cause) => new DesktopTelemetryStreamFailed({ fd, cause }), + }), + (stream) => + Effect.sync(() => { + stream.destroy(); + }), + ); + + const messages: Stream.Stream = + NodeStream.fromReadable({ + evaluate: () => readable, + closeOnDone: true, + onError: (cause) => new DesktopTelemetryStreamFailed({ fd, cause }), + }).pipe( + Stream.pipeThroughChannel(Ndjson.decode({ ignoreEmptyLines: true })), + Stream.mapEffect( + ( + value, + ): Effect.Effect< + DesktopHostTelemetryMessageValue, + DesktopTelemetryProtocolMismatch | DesktopTelemetryDecodeFailed + > => { + const version = messageVersion(value); + if (version !== undefined && version !== 1) { + return Effect.fail( + new DesktopTelemetryProtocolMismatch({ + expectedVersion: 1, + receivedVersion: version, + }), + ); + } + return decodeMessage(value).pipe( + Effect.mapError((cause) => new DesktopTelemetryDecodeFailed({ cause })), + ); + }, + ), + Stream.mapError(normalizeReceiverError), + ); + + yield* messages.pipe( + Stream.runForEach((message) => { + if (message.type === "desktopTelemetryHello") { + return updateHealth( + (current): DesktopTelemetryReceiverHealth => ({ + ...current, + status: "healthy", + lastError: Option.none(), + }), + ); + } + + const sampledAt = DateTime.makeUnsafe(message.sampledAtUnixMs); + return Ref.set(latest, Option.some(message)).pipe( + Effect.andThen( + Ref.set(health, { + status: "healthy", + lastSampleAt: Option.some(sampledAt), + lastError: Option.none(), + }), + ), + Effect.andThen(PubSub.publish(changes, message)), + Effect.asVoid, + ); + }), + Effect.andThen( + updateHealth( + (current): DesktopTelemetryReceiverHealth => ({ + ...current, + status: "stopped", + lastError: Option.some(new DesktopTelemetryStreamClosed({ fd }).message), + }), + ), + ), + Effect.catch((error) => + updateHealth( + (current): DesktopTelemetryReceiverHealth => ({ + ...current, + status: "degraded", + lastError: Option.some(error.message), + }), + ), + ), + Effect.forkScoped, + ); + } + + return DesktopTelemetryReceiver.of({ + latest: Ref.get(latest), + changes: Stream.fromPubSub(changes), + health: Ref.get(health), + healthChanges: Stream.fromPubSub(healthChanges), + }); +}); + +export const layer = Layer.effect(DesktopTelemetryReceiver, make()); + +export const layerTest = ( + overrides: Partial = {}, +): Layer.Layer => + Layer.succeed( + DesktopTelemetryReceiver, + DesktopTelemetryReceiver.of({ + latest: Effect.succeedNone, + changes: Stream.empty, + health: Effect.succeed({ + status: "unavailable", + lastSampleAt: Option.none(), + lastError: Option.some("Desktop telemetry test implementation is unavailable."), + }), + healthChanges: Stream.empty, + ...overrides, + }), + ); diff --git a/apps/server/src/resourceTelemetry/Model.test.ts b/apps/server/src/resourceTelemetry/Model.test.ts new file mode 100644 index 00000000000..175f268b658 --- /dev/null +++ b/apps/server/src/resourceTelemetry/Model.test.ts @@ -0,0 +1,393 @@ +import { + type DesktopElectronProcessMetric, + type DesktopHostTelemetrySnapshot, + type ResourceMonitorProcessSample, + type ResourceMonitorSnapshotEvent, +} from "@t3tools/contracts"; +import { describe, expect, it } from "@effect/vitest"; +import * as DateTime from "effect/DateTime"; +import * as Option from "effect/Option"; + +import { emptyTelemetryCounters, mergeProcesses, type MergeProcessesResult } from "./Model.ts"; + +const SERVER_PID = 100; +const BASE_TIME_MS = DateTime.toEpochMillis(DateTime.makeUnsafe("2026-06-17T12:00:00.000Z")); + +function processSample( + input: Partial & + Pick, +): ResourceMonitorProcessSample { + return { + runTimeMs: 1_000, + name: `process-${input.pid}`, + command: `process-${input.pid}`, + status: "Running", + cpuPercent: 0, + cpuTimeMs: 0, + residentBytes: 1_024, + virtualBytes: 2_048, + ioReadBytes: 0, + ioWriteBytes: 0, + ioSemantics: "storage", + ...input, + }; +} + +function nativeSnapshot( + sampledAtUnixMs: number, + processes: ReadonlyArray, + sequence = 1, +): ResourceMonitorSnapshotEvent { + return { + version: 1, + type: "snapshot", + sequence, + sampledAtUnixMs, + collectionDurationMicros: 250, + scannedProcessCount: processes.length, + retainedProcessCount: processes.length, + inaccessibleProcessCount: 0, + processes: [...processes], + }; +} + +function electronMetric( + input: Partial & + Pick, +): DesktopElectronProcessMetric { + return { + cpuPercent: 0, + idleWakeupsPerSecond: 0, + workingSetBytes: 1_024, + peakWorkingSetBytes: 2_048, + ...input, + }; +} + +function desktopSnapshot( + sampledAtUnixMs: number, + electronProcesses: ReadonlyArray, +): DesktopHostTelemetrySnapshot { + const sampledAt = DateTime.makeUnsafe(sampledAtUnixMs); + return { + version: 1, + type: "desktopTelemetry", + sequence: 1, + sampledAtUnixMs, + power: { + source: "electron-main", + idle: "false", + idleSeconds: 0, + locked: "false", + suspended: false, + onBattery: "false", + lowPowerMode: "unknown", + thermalState: "nominal", + stale: false, + updatedAt: sampledAt, + }, + speedLimitPercent: Option.none(), + electronProcesses: [...electronProcesses], + }; +} + +function merge(input: { + readonly native: ResourceMonitorSnapshotEvent; + readonly desktop?: DesktopHostTelemetrySnapshot; + readonly previous?: MergeProcessesResult; + readonly sidecarPid?: number; +}): MergeProcessesResult { + return mergeProcesses({ + serverPid: SERVER_PID, + sidecarPid: Option.fromUndefinedOr(input.sidecarPid), + fallbackSampledAtMs: input.native.sampledAtUnixMs, + nativeSnapshot: Option.some(input.native), + desktopSnapshot: Option.fromUndefinedOr(input.desktop), + previous: input.previous?.previous ?? new Map(), + counters: input.previous?.counters ?? emptyTelemetryCounters(), + updatePrevious: true, + }); +} + +describe("resource telemetry process model", () => { + it("builds complete descendant depths and isolates monitor overhead", () => { + const result = merge({ + sidecarPid: 900, + native: nativeSnapshot(BASE_TIME_MS, [ + processSample({ pid: SERVER_PID, ppid: 1, startTimeMs: 1_000 }), + processSample({ pid: 200, ppid: SERVER_PID, startTimeMs: 2_000 }), + processSample({ pid: 201, ppid: 200, startTimeMs: 3_000 }), + processSample({ pid: 202, ppid: 201, startTimeMs: 4_000 }), + processSample({ pid: 900, ppid: SERVER_PID, startTimeMs: 5_000 }), + ]), + }); + + expect(result.processes.map((process) => [process.identity.pid, process.depth])).toEqual([ + [100, 0], + [200, 1], + [201, 2], + [202, 3], + [900, 1], + ]); + expect(result.processes.find((process) => process.identity.pid === 900)?.category).toBe( + "resource-monitor", + ); + expect(result.groups.backend.processCount).toBe(4); + expect(result.groups.monitor.processCount).toBe(1); + expect(result.groups.monitor.processStarts).toBe(1); + expect(result.groups.allT3.processStarts).toBe(5); + }); + + it("deduplicates Electron metrics and classifies Electron descendants", () => { + const electronStart = 10_000; + const result = merge({ + native: nativeSnapshot(BASE_TIME_MS, [ + processSample({ pid: SERVER_PID, ppid: 1, startTimeMs: 1_000 }), + processSample({ pid: 300, ppid: 1, startTimeMs: electronStart }), + processSample({ pid: 301, ppid: 300, startTimeMs: electronStart + 1 }), + ]), + desktop: desktopSnapshot(BASE_TIME_MS, [ + electronMetric({ + pid: 300, + creationTimeMs: electronStart + 500, + type: "Browser", + name: "electron", + }), + electronMetric({ + pid: 301, + creationTimeMs: electronStart + 500, + type: "Utility", + name: "network-service", + }), + ]), + }); + + expect(result.processes.filter((process) => process.identity.pid === 300)).toHaveLength(1); + expect(result.processes.find((process) => process.identity.pid === 300)?.category).toBe( + "electron-main", + ); + expect(result.processes.find((process) => process.identity.pid === 301)?.category).toBe( + "electron-utility", + ); + expect(result.processes.find((process) => process.identity.pid === 301)?.depth).toBe(1); + expect(result.groups.electron.processCount).toBe(2); + }); + + it("ignores stale Electron metrics after PID reuse", () => { + const result = merge({ + native: nativeSnapshot(BASE_TIME_MS, [ + processSample({ pid: SERVER_PID, ppid: 1, startTimeMs: 1_000 }), + processSample({ pid: 300, ppid: SERVER_PID, startTimeMs: 50_000 }), + ]), + desktop: desktopSnapshot(BASE_TIME_MS, [ + electronMetric({ + pid: 300, + creationTimeMs: 10_000, + type: "Browser", + }), + ]), + }); + + expect(result.processes.find((process) => process.identity.pid === 300)?.category).toBe( + "server-child", + ); + expect(result.groups.electron.processCount).toBe(0); + }); + + it("derives rates from cumulative counters and preserves I/O semantics", () => { + const first = merge({ + native: nativeSnapshot(BASE_TIME_MS, [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 1_000, + ioReadBytes: 10_000, + ioWriteBytes: 20_000, + ioSemantics: "all-io", + }), + ]), + }); + const second = merge({ + previous: first, + native: nativeSnapshot( + BASE_TIME_MS + 1_000, + [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 1_250, + ioReadBytes: 12_000, + ioWriteBytes: 23_000, + ioSemantics: "all-io", + }), + ], + 2, + ), + }); + const server = second.processes[0]!; + + expect(server.cpuPercent).toBe(25); + expect(server.ioReadBytesPerSecond).toBe(2_000); + expect(server.ioWriteBytesPerSecond).toBe(3_000); + expect(server.ioSemantics).toBe("all-io"); + expect(second.groups.backend.cpuTimeMs).toBe(250); + expect(second.groups.backend.ioReadBytes).toBe(2_000); + expect(second.groups.backend.ioWriteBytes).toBe(3_000); + }); + + it("preserves native rates while applying a desktop-only update", () => { + const first = merge({ + native: nativeSnapshot(BASE_TIME_MS, [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 1_000, + ioReadBytes: 10_000, + ioWriteBytes: 20_000, + }), + ]), + }); + const second = merge({ + previous: first, + native: nativeSnapshot( + BASE_TIME_MS + 1_000, + [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 1_250, + ioReadBytes: 12_000, + ioWriteBytes: 23_000, + }), + ], + 2, + ), + }); + const desktopOnly = mergeProcesses({ + serverPid: SERVER_PID, + sidecarPid: Option.none(), + fallbackSampledAtMs: BASE_TIME_MS + 1_000, + nativeSnapshot: Option.some( + nativeSnapshot( + BASE_TIME_MS + 1_000, + [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 1_250, + ioReadBytes: 12_000, + ioWriteBytes: 23_000, + }), + ], + 2, + ), + ), + desktopSnapshot: Option.some(desktopSnapshot(BASE_TIME_MS + 1_500, [])), + previous: second.previous, + counters: second.counters, + updatePrevious: false, + }); + + expect(desktopOnly.processes[0]?.cpuPercent).toBe(25); + expect(desktopOnly.processes[0]?.ioReadBytesPerSecond).toBe(2_000); + expect(desktopOnly.processes[0]?.ioWriteBytesPerSecond).toBe(3_000); + }); + + it("resets deltas when counters decrease or the sampling gap is unsafe", () => { + const first = merge({ + native: nativeSnapshot(BASE_TIME_MS, [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 1_000, + ioReadBytes: 10_000, + ioWriteBytes: 20_000, + }), + ]), + }); + const decreased = merge({ + previous: first, + native: nativeSnapshot( + BASE_TIME_MS + 1_000, + [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 100, + ioReadBytes: 100, + ioWriteBytes: 200, + }), + ], + 2, + ), + }); + const delayed = merge({ + previous: decreased, + native: nativeSnapshot( + BASE_TIME_MS + 20_000, + [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 10_000, + ioReadBytes: 100_000, + ioWriteBytes: 200_000, + }), + ], + 3, + ), + }); + + expect(decreased.processes[0]?.cpuPercent).toBe(0); + expect(decreased.processes[0]?.ioReadBytesPerSecond).toBe(0); + expect(decreased.processes[0]?.ioWriteBytesPerSecond).toBe(0); + expect(delayed.processes[0]?.cpuPercent).toBe(0); + expect(delayed.processes[0]?.ioReadBytesPerSecond).toBe(0); + expect(delayed.processes[0]?.ioWriteBytesPerSecond).toBe(0); + expect(delayed.groups.backend.cpuTimeMs).toBe(0); + expect(delayed.groups.backend.ioReadBytes).toBe(0); + expect(delayed.groups.backend.ioWriteBytes).toBe(0); + }); + + it("treats reused PIDs as an exit plus a new process", () => { + const first = merge({ + native: nativeSnapshot(BASE_TIME_MS, [ + processSample({ pid: SERVER_PID, ppid: 1, startTimeMs: 1_000 }), + processSample({ pid: 200, ppid: SERVER_PID, startTimeMs: 2_000 }), + ]), + }); + const second = merge({ + previous: first, + native: nativeSnapshot( + BASE_TIME_MS + 1_000, + [ + processSample({ pid: SERVER_PID, ppid: 1, startTimeMs: 1_000 }), + processSample({ + pid: 200, + ppid: SERVER_PID, + startTimeMs: 9_000, + cpuTimeMs: 999, + ioReadBytes: 999, + ioWriteBytes: 999, + }), + ], + 2, + ), + }); + const reused = second.processes.find((process) => process.identity.pid === 200)!; + + expect(reused.identity.startTimeMs).toBe(9_000); + expect(reused.cpuPercent).toBe(0); + expect(reused.ioReadBytesPerSecond).toBe(0); + expect(second.groups.backend.processStarts).toBe(3); + expect(second.groups.backend.processExits).toBe(1); + }); +}); diff --git a/apps/server/src/resourceTelemetry/Model.ts b/apps/server/src/resourceTelemetry/Model.ts new file mode 100644 index 00000000000..6b50ef92f4a --- /dev/null +++ b/apps/server/src/resourceTelemetry/Model.ts @@ -0,0 +1,542 @@ +import type { + DesktopElectronProcessMetric, + DesktopHostTelemetrySnapshot, + ResourceMonitorProcessSample, + ResourceMonitorSnapshotEvent, + ResourceTelemetryAggregate, + ResourceTelemetryProcess, + ResourceTelemetryProcessCategory, +} from "@t3tools/contracts"; +import * as DateTime from "effect/DateTime"; +import * as Option from "effect/Option"; + +const MAX_DELTA_INTERVAL_MS = 10_000; +const ELECTRON_IDENTITY_TOLERANCE_MS = 2_000; + +export interface ProcessState { + readonly process: ResourceTelemetryProcess; + readonly sampledAtMs: number; +} + +export interface GroupCounters { + readonly cpuTimeMs: number; + readonly ioReadBytes: number; + readonly ioWriteBytes: number; + readonly processStarts: number; + readonly processExits: number; +} + +export interface TelemetryCounters { + readonly backend: GroupCounters; + readonly electron: GroupCounters; + readonly monitor: GroupCounters; + readonly allT3: GroupCounters; +} + +export interface ProcessDelta { + readonly identityKey: string; + readonly category: ResourceTelemetryProcessCategory; + readonly cpuTimeMs: number; + readonly ioReadBytes: number; + readonly ioWriteBytes: number; +} + +export interface MergeProcessesInput { + readonly serverPid: number; + readonly sidecarPid: Option.Option; + readonly fallbackSampledAtMs: number; + readonly nativeSnapshot: Option.Option; + readonly desktopSnapshot: Option.Option; + readonly previous: ReadonlyMap; + readonly counters: TelemetryCounters; + readonly updatePrevious: boolean; +} + +export interface MergeProcessesResult { + readonly sampledAtMs: number; + readonly processes: ReadonlyArray; + readonly previous: ReadonlyMap; + readonly counters: TelemetryCounters; + readonly groups: { + readonly backend: ResourceTelemetryAggregate; + readonly electron: ResourceTelemetryAggregate; + readonly monitor: ResourceTelemetryAggregate; + readonly allT3: ResourceTelemetryAggregate; + }; + readonly deltas: ReadonlyArray; +} + +export const emptyGroupCounters = (): GroupCounters => ({ + cpuTimeMs: 0, + ioReadBytes: 0, + ioWriteBytes: 0, + processStarts: 0, + processExits: 0, +}); + +export const emptyTelemetryCounters = (): TelemetryCounters => ({ + backend: emptyGroupCounters(), + electron: emptyGroupCounters(), + monitor: emptyGroupCounters(), + allT3: emptyGroupCounters(), +}); + +export function processIdentityKey(pid: number, startTimeMs: number): string { + return `${pid}:${startTimeMs}`; +} + +function finiteNonNegative(value: number): number { + return Number.isFinite(value) ? Math.max(0, value) : 0; +} + +function categoryGroup( + category: ResourceTelemetryProcessCategory, +): "backend" | "electron" | "monitor" { + if (category === "resource-monitor") return "monitor"; + if (category.startsWith("electron-")) return "electron"; + return "backend"; +} + +function electronCategory(metric: DesktopElectronProcessMetric): ResourceTelemetryProcessCategory { + switch (metric.type) { + case "Browser": + return "electron-main"; + case "Tab": + return "electron-renderer"; + case "GPU": + return "electron-gpu"; + default: + return "electron-utility"; + } +} + +function matchElectronMetric( + process: ResourceMonitorProcessSample, + metricsByPid: ReadonlyMap, +): DesktopElectronProcessMetric | undefined { + const metric = metricsByPid.get(process.pid); + if (!metric) return undefined; + return Math.abs(metric.creationTimeMs - process.startTimeMs) <= ELECTRON_IDENTITY_TOLERANCE_MS + ? metric + : undefined; +} + +function syntheticNativeSample( + metric: DesktopElectronProcessMetric, + sampledAtMs: number, +): ResourceMonitorProcessSample { + return { + pid: metric.pid, + ppid: 0, + startTimeMs: metric.creationTimeMs, + runTimeMs: Math.max(0, sampledAtMs - metric.creationTimeMs), + name: metric.name ?? metric.serviceName ?? metric.type, + command: metric.name ?? metric.serviceName ?? metric.type, + status: "Running", + cpuPercent: metric.cpuPercent, + cpuTimeMs: Math.max(0, Math.round((metric.cumulativeCpuSeconds ?? 0) * 1_000)), + residentBytes: metric.workingSetBytes, + virtualBytes: 0, + ioReadBytes: 0, + ioWriteBytes: 0, + ioSemantics: "storage", + }; +} + +function processDepths( + processes: ReadonlyArray, + roots: ReadonlySet, +): ReadonlyMap { + const childrenByParent = new Map(); + for (const process of processes) { + const children = childrenByParent.get(process.ppid) ?? []; + children.push(process.pid); + childrenByParent.set(process.ppid, children); + } + + const depths = new Map(); + const queue = [...roots].map((pid) => ({ pid, depth: 0 })); + while (queue.length > 0) { + const current = queue.shift(); + if (!current || depths.has(current.pid)) continue; + depths.set(current.pid, current.depth); + for (const childPid of childrenByParent.get(current.pid) ?? []) { + queue.push({ pid: childPid, depth: current.depth + 1 }); + } + } + return depths; +} + +function isElectronDescendant( + pid: number, + processesByPid: ReadonlyMap, + electronPids: ReadonlySet, +): boolean { + const visited = new Set(); + let currentPid = pid; + while (!visited.has(currentPid)) { + visited.add(currentPid); + if (electronPids.has(currentPid)) return true; + const current = processesByPid.get(currentPid); + if (!current || current.ppid <= 0 || current.ppid === currentPid) return false; + currentPid = current.ppid; + } + return false; +} + +function hasElectronAncestor( + process: ResourceMonitorProcessSample, + processesByPid: ReadonlyMap, + electronPids: ReadonlySet, +): boolean { + const visited = new Set(); + let currentPid = process.ppid; + while (currentPid > 0 && !visited.has(currentPid)) { + visited.add(currentPid); + if (electronPids.has(currentPid)) return true; + const current = processesByPid.get(currentPid); + if (!current || current.ppid === currentPid) return false; + currentPid = current.ppid; + } + return false; +} + +function orderProcessTree( + processes: ReadonlyArray, + rootPids: ReadonlyArray, +): ReadonlyArray { + const processesByPid = new Map(processes.map((process) => [process.identity.pid, process])); + const childrenByParent = new Map(); + for (const process of processes) { + const children = childrenByParent.get(process.ppid) ?? []; + children.push(process); + childrenByParent.set(process.ppid, children); + } + for (const children of childrenByParent.values()) { + children.sort((left, right) => left.identity.pid - right.identity.pid); + } + + const ordered: ResourceTelemetryProcess[] = []; + const visited = new Set(); + const visit = (process: ResourceTelemetryProcess): void => { + if (visited.has(process.identity.pid)) return; + visited.add(process.identity.pid); + ordered.push(process); + for (const child of childrenByParent.get(process.identity.pid) ?? []) { + visit(child); + } + }; + + for (const rootPid of rootPids) { + const root = processesByPid.get(rootPid); + if (root) visit(root); + } + for (const process of processes.toSorted( + (left, right) => left.depth - right.depth || left.identity.pid - right.identity.pid, + )) { + visit(process); + } + return ordered; +} + +function delta(input: { + readonly current: number; + readonly previous: number; + readonly elapsedMs: number; +}): number { + if ( + input.elapsedMs <= 0 || + input.elapsedMs > MAX_DELTA_INTERVAL_MS || + input.current < input.previous + ) { + return 0; + } + return input.current - input.previous; +} + +function incrementCounters(counters: GroupCounters, update: Partial): GroupCounters { + return { + cpuTimeMs: counters.cpuTimeMs + (update.cpuTimeMs ?? 0), + ioReadBytes: counters.ioReadBytes + (update.ioReadBytes ?? 0), + ioWriteBytes: counters.ioWriteBytes + (update.ioWriteBytes ?? 0), + processStarts: counters.processStarts + (update.processStarts ?? 0), + processExits: counters.processExits + (update.processExits ?? 0), + }; +} + +function applyLifecycleCounters(input: { + readonly counters: TelemetryCounters; + readonly deltas: ReadonlyArray; + readonly current: ReadonlyMap; + readonly previous: ReadonlyMap; +}): TelemetryCounters { + let backend = input.counters.backend; + let electron = input.counters.electron; + let monitor = input.counters.monitor; + let allT3 = input.counters.allT3; + for (const processDelta of input.deltas) { + const group = categoryGroup(processDelta.category); + switch (group) { + case "backend": + backend = incrementCounters(backend, processDelta); + break; + case "electron": + electron = incrementCounters(electron, processDelta); + break; + case "monitor": + monitor = incrementCounters(monitor, processDelta); + break; + } + allT3 = incrementCounters(allT3, processDelta); + } + + for (const [identityKey, current] of input.current) { + if (input.previous.has(identityKey)) continue; + const group = categoryGroup(current.process.category); + switch (group) { + case "backend": + backend = incrementCounters(backend, { processStarts: 1 }); + break; + case "electron": + electron = incrementCounters(electron, { processStarts: 1 }); + break; + case "monitor": + monitor = incrementCounters(monitor, { processStarts: 1 }); + break; + } + allT3 = incrementCounters(allT3, { processStarts: 1 }); + } + + for (const [identityKey, previous] of input.previous) { + if (input.current.has(identityKey)) continue; + const group = categoryGroup(previous.process.category); + switch (group) { + case "backend": + backend = incrementCounters(backend, { processExits: 1 }); + break; + case "electron": + electron = incrementCounters(electron, { processExits: 1 }); + break; + case "monitor": + monitor = incrementCounters(monitor, { processExits: 1 }); + break; + } + allT3 = incrementCounters(allT3, { processExits: 1 }); + } + + return { backend, electron, monitor, allT3 }; +} + +function aggregate( + processes: ReadonlyArray, + counters: GroupCounters, +): ResourceTelemetryAggregate { + return { + processCount: processes.length, + currentCpuPercent: processes.reduce((total, process) => total + process.cpuPercent, 0), + cpuTimeMs: counters.cpuTimeMs, + currentRssBytes: processes.reduce((total, process) => total + process.residentBytes, 0), + peakRssBytes: processes.reduce((total, process) => total + process.peakResidentBytes, 0), + ioReadBytes: counters.ioReadBytes, + ioWriteBytes: counters.ioWriteBytes, + ioReadBytesPerSecond: processes.reduce( + (total, process) => total + process.ioReadBytesPerSecond, + 0, + ), + ioWriteBytesPerSecond: processes.reduce( + (total, process) => total + process.ioWriteBytesPerSecond, + 0, + ), + processStarts: counters.processStarts, + processExits: counters.processExits, + }; +} + +export function mergeProcesses(input: MergeProcessesInput): MergeProcessesResult { + const nativeProcesses = Option.match(input.nativeSnapshot, { + onNone: () => [] as ReadonlyArray, + onSome: (snapshot) => snapshot.processes, + }); + const electronMetrics = Option.match(input.desktopSnapshot, { + onNone: () => [] as ReadonlyArray, + onSome: (snapshot) => snapshot.electronProcesses, + }); + const sampledAtMs = Option.match(input.nativeSnapshot, { + onNone: () => + Option.match(input.desktopSnapshot, { + onNone: () => input.fallbackSampledAtMs, + onSome: (snapshot) => snapshot.sampledAtUnixMs, + }), + onSome: (snapshot) => snapshot.sampledAtUnixMs, + }); + const nativeByPid = new Map(nativeProcesses.map((process) => [process.pid, process])); + const metricsByPid = new Map(); + for (const metric of electronMetrics) { + const nativeProcess = nativeByPid.get(metric.pid); + if (!nativeProcess) { + nativeByPid.set(metric.pid, syntheticNativeSample(metric, sampledAtMs)); + metricsByPid.set(metric.pid, metric); + continue; + } + if ( + Math.abs(metric.creationTimeMs - nativeProcess.startTimeMs) <= ELECTRON_IDENTITY_TOLERANCE_MS + ) { + metricsByPid.set(metric.pid, metric); + } + } + const processes = [...nativeByPid.values()]; + const processesByPid = new Map(processes.map((process) => [process.pid, process])); + const electronPids = new Set(metricsByPid.keys()); + const electronRootPids = [...electronPids] + .filter((pid) => { + const process = processesByPid.get(pid); + return process === undefined + ? true + : !hasElectronAncestor(process, processesByPid, electronPids); + }) + .toSorted((left, right) => left - right); + const rootPids = [input.serverPid, ...electronRootPids]; + const roots = new Set(rootPids); + const depths = processDepths(processes, roots); + const childrenByParent = new Map(); + for (const process of processes) { + const children = childrenByParent.get(process.ppid) ?? []; + children.push(process.pid); + childrenByParent.set(process.ppid, children); + } + + const nextPrevious = new Map(); + const processDeltas: ProcessDelta[] = []; + const normalized = processes.map((process): ResourceTelemetryProcess => { + const identityKey = processIdentityKey(process.pid, process.startTimeMs); + const previous = input.previous.get(identityKey); + const elapsedMs = previous ? sampledAtMs - previous.sampledAtMs : 0; + const cpuTimeDelta = previous + ? delta({ + current: process.cpuTimeMs, + previous: previous.process.cpuTimeMs, + elapsedMs, + }) + : 0; + const ioReadDelta = previous + ? delta({ + current: process.ioReadBytes, + previous: previous.process.ioReadBytes, + elapsedMs, + }) + : 0; + const ioWriteDelta = previous + ? delta({ + current: process.ioWriteBytes, + previous: previous.process.ioWriteBytes, + elapsedMs, + }) + : 0; + const electronMetric = matchElectronMetric(process, metricsByPid); + const category: ResourceTelemetryProcessCategory = + process.pid === input.serverPid + ? "server" + : Option.contains(input.sidecarPid, process.pid) + ? "resource-monitor" + : electronMetric + ? electronCategory(electronMetric) + : isElectronDescendant(process.pid, processesByPid, electronPids) + ? "electron-utility" + : "server-child"; + const firstSeenAt = previous?.process.firstSeenAt ?? DateTime.makeUnsafe(sampledAtMs); + const preservePreviousRates = !input.updatePrevious && previous !== undefined; + const cpuPercent = preservePreviousRates + ? previous.process.cpuPercent + : previous && elapsedMs > 0 && elapsedMs <= MAX_DELTA_INTERVAL_MS + ? (cpuTimeDelta / elapsedMs) * 100 + : finiteNonNegative(process.cpuPercent); + const normalizedProcess: ResourceTelemetryProcess = { + identity: { + pid: process.pid, + startTimeMs: process.startTimeMs, + }, + ppid: process.ppid, + childPids: [...(childrenByParent.get(process.pid) ?? [])].toSorted( + (left, right) => left - right, + ), + depth: depths.get(process.pid) ?? 0, + name: process.name, + command: process.command, + status: process.status, + category, + ...(electronMetric ? { electronType: electronMetric.type } : {}), + ...(electronMetric?.serviceName ? { electronServiceName: electronMetric.serviceName } : {}), + cpuPercent: finiteNonNegative(cpuPercent), + cpuTimeMs: process.cpuTimeMs, + residentBytes: process.residentBytes, + peakResidentBytes: Math.max( + process.residentBytes, + electronMetric?.peakWorkingSetBytes ?? 0, + previous?.process.peakResidentBytes ?? 0, + ), + virtualBytes: process.virtualBytes, + ioReadBytes: process.ioReadBytes, + ioWriteBytes: process.ioWriteBytes, + ioReadBytesPerSecond: preservePreviousRates + ? previous.process.ioReadBytesPerSecond + : elapsedMs > 0 + ? finiteNonNegative((ioReadDelta * 1_000) / elapsedMs) + : 0, + ioWriteBytesPerSecond: preservePreviousRates + ? previous.process.ioWriteBytesPerSecond + : elapsedMs > 0 + ? finiteNonNegative((ioWriteDelta * 1_000) / elapsedMs) + : 0, + ioSemantics: process.ioSemantics, + ...(electronMetric ? { idleWakeupsPerSecond: electronMetric.idleWakeupsPerSecond } : {}), + runTimeMs: process.runTimeMs, + firstSeenAt, + lastSeenAt: DateTime.makeUnsafe(sampledAtMs), + }; + nextPrevious.set(identityKey, { + process: normalizedProcess, + sampledAtMs, + }); + processDeltas.push({ + identityKey, + category, + cpuTimeMs: cpuTimeDelta, + ioReadBytes: ioReadDelta, + ioWriteBytes: ioWriteDelta, + }); + return normalizedProcess; + }); + const ordered = orderProcessTree(normalized, rootPids); + + const counters = input.updatePrevious + ? applyLifecycleCounters({ + counters: input.counters, + deltas: processDeltas, + current: nextPrevious, + previous: input.previous, + }) + : input.counters; + const backendProcesses = ordered.filter( + (process) => categoryGroup(process.category) === "backend", + ); + const electronProcesses = ordered.filter( + (process) => categoryGroup(process.category) === "electron", + ); + const monitorProcesses = ordered.filter( + (process) => categoryGroup(process.category) === "monitor", + ); + + return { + sampledAtMs, + processes: ordered, + previous: input.updatePrevious ? nextPrevious : input.previous, + counters, + groups: { + backend: aggregate(backendProcesses, counters.backend), + electron: aggregate(electronProcesses, counters.electron), + monitor: aggregate(monitorProcesses, counters.monitor), + allT3: aggregate(ordered, counters.allT3), + }, + deltas: processDeltas, + }; +} diff --git a/apps/server/src/resourceTelemetry/NativeTelemetryClient.ts b/apps/server/src/resourceTelemetry/NativeTelemetryClient.ts new file mode 100644 index 00000000000..df942089c50 --- /dev/null +++ b/apps/server/src/resourceTelemetry/NativeTelemetryClient.ts @@ -0,0 +1,656 @@ +import type { + ResourceMonitorCapabilities, + ResourceMonitorCommand, + ResourceMonitorEvent, + ResourceMonitorExternalProcess, + ResourceMonitorHelloEvent, + ResourceMonitorSnapshotEvent, + ResourceTelemetrySourceStatus, +} from "@t3tools/contracts"; +import { + RESOURCE_MONITOR_PROTOCOL_VERSION, + ResourceMonitorCommand as ResourceMonitorCommandSchema, + ResourceMonitorEvent as ResourceMonitorEventSchema, +} from "@t3tools/contracts"; +import * as Cause from "effect/Cause"; +import * as Context from "effect/Context"; +import * as Crypto from "effect/Crypto"; +import * as DateTime from "effect/DateTime"; +import * as Deferred from "effect/Deferred"; +import * as Duration from "effect/Duration"; +import * as Effect from "effect/Effect"; +import * as Fiber from "effect/Fiber"; +import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; +import * as PubSub from "effect/PubSub"; +import * as Queue from "effect/Queue"; +import * as Ref from "effect/Ref"; +import * as Result from "effect/Result"; +import * as Schema from "effect/Schema"; +import * as Semaphore from "effect/Semaphore"; +import * as Stream from "effect/Stream"; +import * as Ndjson from "effect/unstable/encoding/Ndjson"; +import { ChildProcess, ChildProcessSpawner } from "effect/unstable/process"; + +import * as ResourceMonitorBinary from "./ResourceMonitorBinary.ts"; +import { ServerConfig } from "../config.ts"; + +const SAMPLE_INTERVAL_MS = 1_000; +const HANDSHAKE_TIMEOUT = Duration.seconds(5); +const SAMPLE_REQUEST_TIMEOUT = Duration.seconds(5); +const INITIAL_RESTART_DELAY = Duration.millis(500); +const MAX_RESTART_DELAY = Duration.seconds(10); +const FAILURE_WINDOW_MS = 60_000; +const MAX_FAILURES_PER_WINDOW = 5; + +export class NativeTelemetrySpawnFailed extends Schema.TaggedErrorClass()( + "NativeTelemetrySpawnFailed", + { + path: Schema.String, + cause: Schema.Defect(), + }, +) { + override get message(): string { + return `Failed to start resource monitor '${this.path}'.`; + } +} + +export class NativeTelemetryHandshakeTimedOut extends Schema.TaggedErrorClass()( + "NativeTelemetryHandshakeTimedOut", + { + timeoutMs: Schema.Number, + }, +) { + override get message(): string { + return `Resource monitor handshake timed out after ${this.timeoutMs}ms.`; + } +} + +export class NativeTelemetryProtocolMismatch extends Schema.TaggedErrorClass()( + "NativeTelemetryProtocolMismatch", + { + expectedVersion: Schema.Number, + receivedVersion: Schema.Number, + }, +) { + override get message(): string { + return `Resource monitor protocol ${this.receivedVersion} is incompatible with expected protocol ${this.expectedVersion}.`; + } +} + +export class NativeTelemetryDecodeFailed extends Schema.TaggedErrorClass()( + "NativeTelemetryDecodeFailed", + { + cause: Schema.Defect(), + }, +) { + override get message(): string { + return "Failed to decode resource monitor output."; + } +} + +export class NativeTelemetryCommandFailed extends Schema.TaggedErrorClass()( + "NativeTelemetryCommandFailed", + { + operation: Schema.String, + cause: Schema.Defect(), + }, +) { + override get message(): string { + return `Resource monitor command '${this.operation}' failed.`; + } +} + +export class NativeTelemetryExited extends Schema.TaggedErrorClass()( + "NativeTelemetryExited", + { + exitCode: Schema.Number, + }, +) { + override get message(): string { + return `Resource monitor exited with code ${this.exitCode}.`; + } +} + +export class NativeTelemetryUnavailable extends Schema.TaggedErrorClass()( + "NativeTelemetryUnavailable", + { + reason: Schema.String, + }, +) { + override get message(): string { + return `Resource monitor is unavailable: ${this.reason}`; + } +} + +export type NativeTelemetryClientError = + | NativeTelemetrySpawnFailed + | NativeTelemetryHandshakeTimedOut + | NativeTelemetryProtocolMismatch + | NativeTelemetryDecodeFailed + | NativeTelemetryCommandFailed + | NativeTelemetryExited + | NativeTelemetryUnavailable; + +export interface NativeTelemetryClientHealth { + readonly status: ResourceTelemetrySourceStatus; + readonly hello: Option.Option; + readonly lastSampleAt: Option.Option; + readonly lastError: Option.Option; + readonly restartCount: number; +} + +export interface NativeTelemetryClientShape { + readonly capabilities: Effect.Effect; + readonly snapshots: Stream.Stream; + readonly setExternalProcesses: ( + processes: ReadonlyArray, + ) => Effect.Effect; + readonly sampleNow: Effect.Effect; + readonly retry: Effect.Effect; + readonly health: Effect.Effect; + readonly healthChanges: Stream.Stream; +} + +export class NativeTelemetryClient extends Context.Service< + NativeTelemetryClient, + NativeTelemetryClientShape +>()("t3/resourceTelemetry/NativeTelemetryClient") {} + +interface ClientState { + readonly status: ResourceTelemetrySourceStatus; + readonly handle: Option.Option; + readonly hello: Option.Option; + readonly lastSampleAt: Option.Option; + readonly lastError: Option.Option; + readonly restartCount: number; +} + +const initialState: ClientState = { + status: "starting", + handle: Option.none(), + hello: Option.none(), + lastSampleAt: Option.none(), + lastError: Option.none(), + restartCount: 0, +}; + +function toHealth(state: ClientState): NativeTelemetryClientHealth { + return { + status: state.status, + hello: state.hello, + lastSampleAt: state.lastSampleAt, + lastError: state.lastError, + restartCount: state.restartCount, + }; +} + +const decodeMonitorEvent: ( + value: unknown, +) => Effect.Effect = Schema.decodeUnknownEffect( + ResourceMonitorEventSchema, +); +const encodeMonitorCommand = Schema.encodeEffect( + Schema.fromJsonString(ResourceMonitorCommandSchema), +); +const isProtocolMismatch = Schema.is(NativeTelemetryProtocolMismatch); +const isDecodeFailed = Schema.is(NativeTelemetryDecodeFailed); +const isCommandFailed = Schema.is(NativeTelemetryCommandFailed); + +function eventVersion(value: unknown): number | undefined { + if (typeof value !== "object" || value === null) return undefined; + const version = Reflect.get(value, "version"); + return typeof version === "number" ? version : undefined; +} + +function restartDelay(attempt: number): Duration.Duration { + return Duration.min(Duration.times(INITIAL_RESTART_DELAY, 2 ** attempt), MAX_RESTART_DELAY); +} + +function errorMessage(error: NativeTelemetryClientError): string { + return error.message; +} + +export const make = Effect.fn("resourceTelemetry.nativeTelemetryClient.make")(function* () { + const binary = yield* ResourceMonitorBinary.ResourceMonitorBinary; + const spawner = yield* ChildProcessSpawner.ChildProcessSpawner; + const crypto = yield* Crypto.Crypto; + const config = yield* ServerConfig; + const state = yield* Ref.make(initialState); + const externalProcesses = yield* Ref.make>([]); + const pendingSamples = yield* Ref.make( + new Map>(), + ); + const snapshots = yield* PubSub.sliding(8); + const healthChanges = yield* PubSub.sliding(4); + const retryQueue = yield* Queue.sliding(1); + const commandMutex = yield* Semaphore.make(1); + const publishHealth = Ref.get(state).pipe( + Effect.map(toHealth), + Effect.flatMap((health) => PubSub.publish(healthChanges, health)), + Effect.asVoid, + ); + + const failPending = (error: NativeTelemetryClientError) => + Ref.getAndSet(pendingSamples, new Map()).pipe( + Effect.flatMap((pending) => + Effect.forEach(pending.values(), (deferred) => Deferred.fail(deferred, error), { + discard: true, + }), + ), + ); + + const writeCommand = ( + handle: ChildProcessSpawner.ChildProcessHandle, + command: ResourceMonitorCommand, + ): Effect.Effect => + commandMutex.withPermits(1)( + encodeMonitorCommand(command).pipe( + Effect.map((encoded) => `${encoded}\n`), + Effect.mapError( + (cause) => + new NativeTelemetryCommandFailed({ + operation: command.type, + cause, + }), + ), + Effect.flatMap((encoded) => + Stream.run(Stream.encodeText(Stream.make(encoded)), handle.stdin), + ), + Effect.mapError( + (cause) => + new NativeTelemetryCommandFailed({ + operation: command.type, + cause, + }), + ), + ), + ); + + const processEvent = ( + event: ResourceMonitorEvent, + helloDeferred: Deferred.Deferred, + ): Effect.Effect => { + switch (event.type) { + case "hello": + return Ref.update(state, (current) => ({ + ...current, + status: "healthy" as const, + hello: Option.some(event), + lastError: Option.none(), + })).pipe( + Effect.andThen(publishHealth), + Effect.andThen(Deferred.succeed(helloDeferred, event)), + Effect.asVoid, + ); + case "snapshot": + return Effect.gen(function* () { + const sampledAt = DateTime.makeUnsafe(event.sampledAtUnixMs); + yield* Ref.update(state, (current) => ({ + ...current, + status: "healthy" as const, + lastSampleAt: Option.some(sampledAt), + lastError: Option.none(), + })); + yield* PubSub.publish(snapshots, event); + if (event.requestId) { + const deferred = yield* Ref.modify(pendingSamples, (pending) => { + const next = new Map(pending); + const current = next.get(event.requestId!); + next.delete(event.requestId!); + return [Option.fromUndefinedOr(current), next]; + }); + if (Option.isSome(deferred)) { + yield* Deferred.succeed(deferred.value, event); + } + } + }); + case "error": + return Ref.update(state, (current) => ({ + ...current, + status: "degraded" as const, + lastError: Option.some(event.message), + })).pipe( + Effect.andThen(publishHealth), + Effect.andThen( + event.recoverable + ? Effect.void + : Effect.fail( + new NativeTelemetryCommandFailed({ + operation: event.code, + cause: event.message, + }), + ), + ), + ); + } + }; + + const runAttempt: Effect.Effect = Effect.scoped( + Effect.gen(function* () { + const executablePath = yield* binary.resolve.pipe( + Effect.mapError( + (error) => + new NativeTelemetryUnavailable({ + reason: error.message, + }), + ), + ); + const command = ChildProcess.make(executablePath, [], { + cwd: config.cwd, + stdin: { + stream: "pipe", + endOnDone: false, + }, + stdout: "pipe", + stderr: "pipe", + killSignal: "SIGTERM", + forceKillAfter: Duration.seconds(2), + }); + const handle = yield* Effect.acquireRelease( + spawner + .spawn(command) + .pipe( + Effect.mapError( + (cause) => new NativeTelemetrySpawnFailed({ path: executablePath, cause }), + ), + ), + (child) => child.kill().pipe(Effect.ignore), + ); + yield* Ref.update(state, (current) => ({ + ...current, + status: "starting" as const, + handle: Option.some(handle), + hello: Option.none(), + })); + yield* publishHealth; + + const helloDeferred = yield* Deferred.make(); + const eventFiber = yield* handle.stdout.pipe( + Stream.pipeThroughChannel(Ndjson.decode({ ignoreEmptyLines: true })), + Stream.mapEffect( + ( + value, + ): Effect.Effect< + ResourceMonitorEvent, + NativeTelemetryProtocolMismatch | NativeTelemetryDecodeFailed + > => { + const version = eventVersion(value); + if (version !== undefined && version !== RESOURCE_MONITOR_PROTOCOL_VERSION) { + return Effect.fail( + new NativeTelemetryProtocolMismatch({ + expectedVersion: RESOURCE_MONITOR_PROTOCOL_VERSION, + receivedVersion: version, + }), + ); + } + return decodeMonitorEvent(value).pipe( + Effect.mapError((cause) => new NativeTelemetryDecodeFailed({ cause })), + ); + }, + ), + Stream.runForEach((event) => processEvent(event, helloDeferred)), + Effect.mapError((cause) => + isProtocolMismatch(cause) || isDecodeFailed(cause) || isCommandFailed(cause) + ? cause + : new NativeTelemetryDecodeFailed({ cause }), + ), + Effect.forkScoped, + ); + yield* handle.stderr.pipe(Stream.runDrain, Effect.ignore, Effect.forkScoped); + + const hello = yield* Deferred.await(helloDeferred).pipe( + Effect.timeoutOption(HANDSHAKE_TIMEOUT), + Effect.flatMap( + Option.match({ + onNone: () => + Effect.fail( + new NativeTelemetryHandshakeTimedOut({ + timeoutMs: Duration.toMillis(HANDSHAKE_TIMEOUT), + }), + ), + onSome: Effect.succeed, + }), + ), + ); + yield* writeCommand(handle, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "configure", + rootPid: process.pid, + sampleIntervalMs: SAMPLE_INTERVAL_MS, + externalProcesses: [...(yield* Ref.get(externalProcesses))], + }); + yield* Ref.update(state, (current) => ({ + ...current, + status: "healthy" as const, + hello: Option.some(hello), + })); + + const exitEffect = handle.exitCode.pipe( + Effect.mapError( + (cause) => + new NativeTelemetryCommandFailed({ + operation: "waitForExit", + cause, + }), + ), + Effect.flatMap((exitCode) => + Effect.fail(new NativeTelemetryExited({ exitCode: Number(exitCode) })), + ), + ); + const decoderEffect = Fiber.join(eventFiber).pipe( + Effect.andThen(Effect.fail(new NativeTelemetryExited({ exitCode: -1 }))), + ); + return yield* Effect.raceFirst(exitEffect, decoderEffect); + }), + ).pipe( + Effect.ensuring( + Ref.update(state, (current) => ({ + ...current, + handle: Option.none(), + })), + ), + ); + + yield* Effect.gen(function* () { + let failures: ReadonlyArray = []; + let restartAttempt = 0; + + while (true) { + const result = yield* Effect.result(runAttempt); + if (Result.isSuccess(result)) { + return; + } + + const error = result.failure; + const now = DateTime.toEpochMillis(yield* DateTime.now); + failures = [...failures.filter((failedAt) => now - failedAt <= FAILURE_WINDOW_MS), now]; + const exhausted = failures.length >= MAX_FAILURES_PER_WINDOW; + yield* Ref.update(state, (current) => ({ + ...current, + status: exhausted ? ("unavailable" as const) : ("degraded" as const), + hello: Option.none(), + lastError: Option.some(errorMessage(error)), + restartCount: current.restartCount + 1, + })); + yield* publishHealth; + yield* failPending(error); + + if (exhausted) { + yield* Queue.take(retryQueue); + failures = []; + restartAttempt = 0; + yield* Ref.update(state, (current) => ({ + ...current, + status: "starting" as const, + hello: Option.none(), + lastError: Option.none(), + })); + yield* publishHealth; + continue; + } + + const manuallyRetried = yield* Effect.raceFirst( + Effect.sleep(restartDelay(restartAttempt)).pipe(Effect.as(false)), + Queue.take(retryQueue).pipe(Effect.as(true)), + ); + restartAttempt = manuallyRetried ? 0 : restartAttempt + 1; + } + }).pipe( + Effect.catchCause((cause) => + Cause.hasInterruptsOnly(cause) + ? Effect.void + : Ref.update(state, (current) => ({ + ...current, + status: "unavailable" as const, + hello: Option.none(), + lastError: Option.some(Cause.pretty(cause)), + })).pipe( + Effect.andThen(publishHealth), + Effect.andThen( + Effect.logWarning("Resource monitor supervisor failed", { + cause: Cause.pretty(cause), + }), + ), + ), + ), + Effect.forkScoped, + ); + + const setExternalProcesses: NativeTelemetryClientShape["setExternalProcesses"] = (processes) => + Effect.gen(function* () { + yield* Ref.set(externalProcesses, [...processes]); + const current = yield* Ref.get(state); + if (Option.isNone(current.handle)) return; + yield* writeCommand(current.handle.value, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "setExternalProcesses", + processes: [...processes], + }); + }); + + const sampleNow: NativeTelemetryClientShape["sampleNow"] = Effect.gen(function* () { + const current = yield* Ref.get(state); + if (Option.isNone(current.handle)) { + return yield* new NativeTelemetryUnavailable({ + reason: Option.getOrElse(current.lastError, () => "sidecar is not running"), + }); + } + + const requestId = yield* crypto.randomUUIDv4.pipe( + Effect.mapError( + (cause) => + new NativeTelemetryCommandFailed({ + operation: "createRequestId", + cause, + }), + ), + ); + const deferred = yield* Deferred.make< + ResourceMonitorSnapshotEvent, + NativeTelemetryClientError + >(); + yield* Ref.update(pendingSamples, (pending) => { + const next = new Map(pending); + next.set(requestId, deferred); + return next; + }); + return yield* writeCommand(current.handle.value, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "sampleNow", + requestId, + }).pipe( + Effect.andThen( + Deferred.await(deferred).pipe( + Effect.timeoutOption(SAMPLE_REQUEST_TIMEOUT), + Effect.flatMap( + Option.match({ + onNone: () => + Effect.fail( + new NativeTelemetryCommandFailed({ + operation: "sampleNow", + cause: "sample request timed out", + }), + ), + onSome: Effect.succeed, + }), + ), + ), + ), + Effect.ensuring( + Ref.update(pendingSamples, (pending) => { + const next = new Map(pending); + next.delete(requestId); + return next; + }), + ), + ); + }); + + const health = Ref.get(state).pipe(Effect.map(toHealth)); + + return NativeTelemetryClient.of({ + capabilities: Ref.get(state).pipe( + Effect.flatMap((current) => + Option.match(current.hello, { + onNone: () => + Effect.fail( + new NativeTelemetryUnavailable({ + reason: Option.getOrElse(current.lastError, () => "handshake is incomplete"), + }), + ), + onSome: (hello) => Effect.succeed(hello.capabilities), + }), + ), + ), + snapshots: Stream.fromPubSub(snapshots), + setExternalProcesses, + sampleNow, + retry: Ref.get(state).pipe( + Effect.flatMap((current) => + current.status === "healthy" || current.status === "starting" + ? Effect.succeed(false) + : Queue.offer(retryQueue, undefined).pipe(Effect.as(true)), + ), + ), + health, + healthChanges: Stream.fromPubSub(healthChanges), + }); +}); + +export const layer = Layer.effect(NativeTelemetryClient, make()); + +export const layerTest = ( + overrides: Partial = {}, +): Layer.Layer => + Layer.succeed( + NativeTelemetryClient, + NativeTelemetryClient.of({ + capabilities: Effect.succeed({ + cumulativeCpuTime: true, + currentCpuPercent: true, + residentMemory: true, + virtualMemory: true, + ioBytes: true, + processStartTime: true, + processTree: true, + }), + snapshots: Stream.empty, + setExternalProcesses: () => Effect.void, + sampleNow: Effect.fail( + new NativeTelemetryUnavailable({ + reason: "No resource monitor sample was configured for this test.", + }), + ), + retry: Effect.succeed(false), + health: Effect.succeed({ + status: "unavailable", + hello: Option.none(), + lastSampleAt: Option.none(), + lastError: Option.some("Resource monitor test implementation is unavailable."), + restartCount: 0, + }), + healthChanges: Stream.empty, + ...overrides, + }), + ); diff --git a/apps/server/src/resourceTelemetry/ResourceAttribution.ts b/apps/server/src/resourceTelemetry/ResourceAttribution.ts new file mode 100644 index 00000000000..1d916cd0f7e --- /dev/null +++ b/apps/server/src/resourceTelemetry/ResourceAttribution.ts @@ -0,0 +1,73 @@ +import type { ResourceAttributionEntry, ResourceAttributionSnapshot } from "@t3tools/contracts"; +import * as Context from "effect/Context"; +import * as DateTime from "effect/DateTime"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as Ref from "effect/Ref"; + +export interface ResourceAttributionRecord { + readonly component: string; + readonly operation: string; + readonly logicalReadBytes?: number; + readonly logicalWriteBytes?: number; + readonly count?: number; + readonly durationMs?: number; +} + +export interface ResourceAttributionShape { + readonly record: (input: ResourceAttributionRecord) => Effect.Effect; + readonly snapshot: Effect.Effect; +} + +export class ResourceAttribution extends Context.Service< + ResourceAttribution, + ResourceAttributionShape +>()("t3/resourceTelemetry/ResourceAttribution") {} + +function key(input: Pick): string { + return `${input.component}\u0000${input.operation}`; +} + +function nonNegativeInteger(value: number | undefined, fallback: number): number { + if (value === undefined) return fallback; + if (!Number.isFinite(value)) return 0; + return Math.max(0, Math.round(value)); +} + +export const make = Effect.fn("resourceTelemetry.resourceAttribution.make")(function* () { + const entries = yield* Ref.make(new Map()); + + const record: ResourceAttributionShape["record"] = (input) => + Ref.update(entries, (current) => { + const next = new Map(current); + const entryKey = key(input); + const existing = next.get(entryKey); + next.set(entryKey, { + component: input.component, + operation: input.operation, + logicalReadBytes: + (existing?.logicalReadBytes ?? 0) + nonNegativeInteger(input.logicalReadBytes, 0), + logicalWriteBytes: + (existing?.logicalWriteBytes ?? 0) + nonNegativeInteger(input.logicalWriteBytes, 0), + count: (existing?.count ?? 0) + nonNegativeInteger(input.count, 1), + durationMs: (existing?.durationMs ?? 0) + nonNegativeInteger(input.durationMs, 0), + }); + return next; + }); + + return ResourceAttribution.of({ + record, + snapshot: Effect.gen(function* () { + const readAt = yield* DateTime.now; + const current = yield* Ref.get(entries); + return { + readAt, + entries: [...current.values()].toSorted( + (left, right) => right.logicalWriteBytes - left.logicalWriteBytes, + ), + }; + }), + }); +}); + +export const layer = Layer.effect(ResourceAttribution, make()); diff --git a/apps/server/src/resourceTelemetry/ResourceMonitorBinary.test.ts b/apps/server/src/resourceTelemetry/ResourceMonitorBinary.test.ts new file mode 100644 index 00000000000..1fe536e7828 --- /dev/null +++ b/apps/server/src/resourceTelemetry/ResourceMonitorBinary.test.ts @@ -0,0 +1,80 @@ +import * as NodeServices from "@effect/platform-node/NodeServices"; +import { + HostProcessArchitecture, + HostProcessEnvironment, + HostProcessPlatform, +} from "@t3tools/shared/hostProcess"; +import { assert, describe, it } from "@effect/vitest"; +import * as Effect from "effect/Effect"; +import * as FileSystem from "effect/FileSystem"; + +import { ServerConfig } from "../config.ts"; +import * as ResourceMonitorBinary from "./ResourceMonitorBinary.ts"; + +describe("ResourceMonitorBinary", () => { + it.effect("resolves an executable override", () => + Effect.gen(function* () { + const fileSystem = yield* FileSystem.FileSystem; + const baseDir = yield* fileSystem.makeTempDirectoryScoped({ + prefix: "t3-resource-monitor-binary-", + }); + const binaryPath = `${baseDir}/t3-resource-monitor`; + yield* fileSystem.writeFileString(binaryPath, "binary"); + yield* fileSystem.chmod(binaryPath, 0o755); + + const service = yield* ResourceMonitorBinary.make().pipe( + Effect.provide(ServerConfig.layerTest(process.cwd(), baseDir)), + Effect.provideService(HostProcessPlatform, "linux"), + Effect.provideService(HostProcessArchitecture, "x64"), + Effect.provideService(HostProcessEnvironment, { + T3CODE_RESOURCE_MONITOR_PATH: binaryPath, + }), + ); + + assert.equal(yield* service.resolve, binaryPath); + }).pipe(Effect.scoped, Effect.provide(NodeServices.layer)), + ); + + it.effect("rejects a non-executable POSIX override", () => + Effect.gen(function* () { + const fileSystem = yield* FileSystem.FileSystem; + const baseDir = yield* fileSystem.makeTempDirectoryScoped({ + prefix: "t3-resource-monitor-binary-", + }); + const binaryPath = `${baseDir}/t3-resource-monitor`; + yield* fileSystem.writeFileString(binaryPath, "binary"); + yield* fileSystem.chmod(binaryPath, 0o644); + + const service = yield* ResourceMonitorBinary.make().pipe( + Effect.provide(ServerConfig.layerTest(process.cwd(), baseDir)), + Effect.provideService(HostProcessPlatform, "linux"), + Effect.provideService(HostProcessArchitecture, "x64"), + Effect.provideService(HostProcessEnvironment, { + T3CODE_RESOURCE_MONITOR_PATH: binaryPath, + }), + ); + const error = yield* Effect.flip(service.resolve); + + assert.instanceOf(error, ResourceMonitorBinary.ResourceMonitorBinaryNotExecutable); + assert.equal(error.path, binaryPath); + }).pipe(Effect.scoped, Effect.provide(NodeServices.layer)), + ); + + it.effect("rejects unsupported platform and architecture pairs", () => + Effect.gen(function* () { + const fileSystem = yield* FileSystem.FileSystem; + const baseDir = yield* fileSystem.makeTempDirectoryScoped({ + prefix: "t3-resource-monitor-binary-", + }); + const service = yield* ResourceMonitorBinary.make().pipe( + Effect.provide(ServerConfig.layerTest(process.cwd(), baseDir)), + Effect.provideService(HostProcessPlatform, "freebsd"), + Effect.provideService(HostProcessArchitecture, "ia32"), + Effect.provideService(HostProcessEnvironment, {}), + ); + const error = yield* Effect.flip(service.resolve); + + assert.instanceOf(error, ResourceMonitorBinary.ResourceMonitorBinaryUnsupported); + }).pipe(Effect.scoped, Effect.provide(NodeServices.layer)), + ); +}); diff --git a/apps/server/src/resourceTelemetry/ResourceMonitorBinary.ts b/apps/server/src/resourceTelemetry/ResourceMonitorBinary.ts new file mode 100644 index 00000000000..6e3cec09d82 --- /dev/null +++ b/apps/server/src/resourceTelemetry/ResourceMonitorBinary.ts @@ -0,0 +1,193 @@ +import { + HostProcessArchitecture, + HostProcessEnvironment, + HostProcessPlatform, +} from "@t3tools/shared/hostProcess"; +import * as Context from "effect/Context"; +import * as Effect from "effect/Effect"; +import * as FileSystem from "effect/FileSystem"; +import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; +import * as Path from "effect/Path"; +import * as Schema from "effect/Schema"; + +import { ServerConfig } from "../config.ts"; + +export class ResourceMonitorBinaryUnsupported extends Schema.TaggedErrorClass()( + "ResourceMonitorBinaryUnsupported", + { + platform: Schema.String, + architecture: Schema.String, + }, +) { + override get message(): string { + return `Resource monitoring is unsupported on ${this.platform}/${this.architecture}.`; + } +} + +export class ResourceMonitorBinaryNotFound extends Schema.TaggedErrorClass()( + "ResourceMonitorBinaryNotFound", + { + platform: Schema.String, + architecture: Schema.String, + candidates: Schema.Array(Schema.String), + }, +) { + override get message(): string { + return `Resource monitor binary was not found for ${this.platform}/${this.architecture}.`; + } +} + +export class ResourceMonitorBinaryNotExecutable extends Schema.TaggedErrorClass()( + "ResourceMonitorBinaryNotExecutable", + { + path: Schema.String, + mode: Schema.Number, + }, +) { + override get message(): string { + return `Resource monitor binary at '${this.path}' is not executable.`; + } +} + +export type ResourceMonitorBinaryError = + | ResourceMonitorBinaryUnsupported + | ResourceMonitorBinaryNotFound + | ResourceMonitorBinaryNotExecutable; + +export interface ResourceMonitorBinaryShape { + readonly resolve: Effect.Effect; +} + +export class ResourceMonitorBinary extends Context.Service< + ResourceMonitorBinary, + ResourceMonitorBinaryShape +>()("t3/resourceTelemetry/ResourceMonitorBinary") {} + +function binaryName(platform: NodeJS.Platform): string { + return platform === "win32" ? "t3-resource-monitor.exe" : "t3-resource-monitor"; +} + +export function resourceMonitorPlatformKey( + platform: NodeJS.Platform, + architecture: NodeJS.Architecture, +): string | undefined { + if ( + (platform !== "darwin" && platform !== "linux" && platform !== "win32") || + (architecture !== "arm64" && architecture !== "x64") + ) { + return undefined; + } + return `${platform}-${architecture}`; +} + +function resourceMonitorRustTarget( + platform: NodeJS.Platform, + architecture: NodeJS.Architecture, +): string | undefined { + if (platform === "darwin") { + return architecture === "arm64" + ? "aarch64-apple-darwin" + : architecture === "x64" + ? "x86_64-apple-darwin" + : undefined; + } + if (platform === "linux") { + return architecture === "arm64" + ? "aarch64-unknown-linux-gnu" + : architecture === "x64" + ? "x86_64-unknown-linux-gnu" + : undefined; + } + if (platform === "win32") { + return architecture === "arm64" + ? "aarch64-pc-windows-msvc" + : architecture === "x64" + ? "x86_64-pc-windows-msvc" + : undefined; + } + return undefined; +} + +export const make = Effect.fn("resourceTelemetry.resourceMonitorBinary.make")(function* () { + const config = yield* ServerConfig; + const fileSystem = yield* FileSystem.FileSystem; + const path = yield* Path.Path; + const platform = yield* HostProcessPlatform; + const architecture = yield* HostProcessArchitecture; + const environment = yield* HostProcessEnvironment; + const executableName = binaryName(platform); + const platformKey = resourceMonitorPlatformKey(platform, architecture); + const rustTarget = resourceMonitorRustTarget(platform, architecture); + if (platformKey === undefined || rustTarget === undefined) { + return ResourceMonitorBinary.of({ + resolve: Effect.fail( + new ResourceMonitorBinaryUnsupported({ + platform, + architecture, + }), + ), + }); + } + + const candidates = [ + environment.T3CODE_RESOURCE_MONITOR_PATH, + config.resourceMonitorPath, + path.resolve(import.meta.dirname, "resource-monitor", platformKey, executableName), + path.resolve(import.meta.dirname, "resource-monitor", executableName), + path.resolve(import.meta.dirname, "../resource-monitor", executableName), + path.resolve( + import.meta.dirname, + "../../../../native/resource-monitor/target", + rustTarget, + "release", + executableName, + ), + path.resolve( + import.meta.dirname, + "../../../native/resource-monitor/target", + rustTarget, + "release", + executableName, + ), + path.resolve( + import.meta.dirname, + "../../../../native/resource-monitor/target/release", + executableName, + ), + path.resolve( + import.meta.dirname, + "../../../../native/resource-monitor/target/debug", + executableName, + ), + ].filter((candidate): candidate is string => Boolean(candidate)); + + const resolve: ResourceMonitorBinaryShape["resolve"] = Effect.gen(function* () { + for (const candidate of candidates) { + const exists = yield* fileSystem.exists(candidate).pipe(Effect.orElseSucceed(() => false)); + if (!exists) continue; + + if (platform !== "win32") { + const stat = yield* fileSystem.stat(candidate).pipe(Effect.option); + if (Option.isSome(stat) && (stat.value.mode & 0o111) === 0) { + return yield* new ResourceMonitorBinaryNotExecutable({ + path: candidate, + mode: stat.value.mode, + }); + } + } + + return candidate; + } + + return yield* new ResourceMonitorBinaryNotFound({ + platform, + architecture, + candidates, + }); + }); + + return ResourceMonitorBinary.of({ resolve }); +}); + +export const layer = Layer.effect(ResourceMonitorBinary, make()); diff --git a/apps/server/src/resourceTelemetry/ResourceTelemetry.test.ts b/apps/server/src/resourceTelemetry/ResourceTelemetry.test.ts new file mode 100644 index 00000000000..cbf41dbd544 --- /dev/null +++ b/apps/server/src/resourceTelemetry/ResourceTelemetry.test.ts @@ -0,0 +1,302 @@ +import type { + DesktopHostTelemetrySnapshot, + ResourceMonitorProcessSample, + ResourceMonitorSnapshotEvent, +} from "@t3tools/contracts"; +import { describe, expect, it } from "@effect/vitest"; +import * as DateTime from "effect/DateTime"; +import * as Duration from "effect/Duration"; +import * as Effect from "effect/Effect"; +import * as Fiber from "effect/Fiber"; +import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; +import * as PubSub from "effect/PubSub"; +import * as Ref from "effect/Ref"; +import * as Stream from "effect/Stream"; +import * as TestClock from "effect/testing/TestClock"; + +import * as DesktopTelemetryReceiver from "./DesktopTelemetryReceiver.ts"; +import * as NativeTelemetryClient from "./NativeTelemetryClient.ts"; +import * as ResourceAttribution from "./ResourceAttribution.ts"; +import * as ResourceTelemetry from "./ResourceTelemetry.ts"; + +function processSample( + input: Partial & + Pick, +): ResourceMonitorProcessSample { + return { + runTimeMs: 1_000, + name: `process-${input.pid}`, + command: `process-${input.pid}`, + status: "Running", + cpuPercent: 0, + cpuTimeMs: 0, + residentBytes: 1_024, + virtualBytes: 2_048, + ioReadBytes: 0, + ioWriteBytes: 0, + ioSemantics: "storage", + ...input, + }; +} + +function nativeSnapshot(input: { + readonly sequence: number; + readonly sampledAtUnixMs: number; + readonly childCpuTimeMs: number; + readonly childWriteBytes: number; +}): ResourceMonitorSnapshotEvent { + const processes = [ + processSample({ + pid: process.pid, + ppid: 1, + startTimeMs: 100, + cpuTimeMs: input.sequence * 10, + }), + processSample({ + pid: 4_242, + ppid: process.pid, + startTimeMs: 200, + name: "codex", + command: "codex app-server", + cpuTimeMs: input.childCpuTimeMs, + ioWriteBytes: input.childWriteBytes, + }), + processSample({ + pid: 5_000, + ppid: 1, + startTimeMs: 300, + name: "electron", + command: "electron", + cpuTimeMs: input.sequence * 20, + }), + processSample({ + pid: 9_000, + ppid: process.pid, + startTimeMs: 400, + name: "t3-resource-monitor", + command: "t3-resource-monitor", + cpuTimeMs: input.sequence * 5, + }), + ]; + return { + version: 1, + type: "snapshot", + sequence: input.sequence, + sampledAtUnixMs: input.sampledAtUnixMs, + collectionDurationMicros: 300, + scannedProcessCount: 80, + retainedProcessCount: processes.length, + inaccessibleProcessCount: 1, + processes, + }; +} + +function desktopSnapshot(sampledAtUnixMs: number): DesktopHostTelemetrySnapshot { + const sampledAt = DateTime.makeUnsafe(sampledAtUnixMs); + return { + version: 1, + type: "desktopTelemetry", + sequence: 1, + sampledAtUnixMs, + power: { + source: "electron-main", + idle: "false", + idleSeconds: 2, + locked: "false", + suspended: false, + onBattery: "true", + lowPowerMode: "unknown", + thermalState: "fair", + stale: false, + updatedAt: sampledAt, + }, + speedLimitPercent: Option.some(90), + electronProcesses: [ + { + pid: 5_000, + creationTimeMs: 300, + type: "Browser", + name: "electron", + cpuPercent: 2, + cumulativeCpuSeconds: 0.02, + idleWakeupsPerSecond: 3, + workingSetBytes: 4_096, + peakWorkingSetBytes: 8_192, + }, + ], + }; +} + +describe("ResourceTelemetry", () => { + it.effect("combines native, Electron, attribution, retry, and history data", () => + Effect.gen(function* () { + const startedAt = DateTime.toEpochMillis(yield* DateTime.now); + const samples = [ + nativeSnapshot({ + sequence: 1, + sampledAtUnixMs: startedAt, + childCpuTimeMs: 100, + childWriteBytes: 1_000, + }), + nativeSnapshot({ + sequence: 2, + sampledAtUnixMs: startedAt + 1_000, + childCpuTimeMs: 350, + childWriteBytes: 5_000, + }), + nativeSnapshot({ + sequence: 1, + sampledAtUnixMs: startedAt + 2_000, + childCpuTimeMs: 500, + childWriteBytes: 7_000, + }), + ] as const; + const sampleIndex = yield* Ref.make(0); + const externalProcesses = yield* Ref.make< + ReadonlyArray<{ readonly pid: number; readonly startTimeMs?: number }> + >([]); + const retryCount = yield* Ref.make(0); + const nativeHealth = yield* Ref.make({ + status: "healthy", + hello: Option.some({ + version: 1, + type: "hello", + sidecarVersion: "0.1.0", + sidecarPid: 9_000, + platform: "test", + arch: "test", + capabilities: { + cumulativeCpuTime: true, + currentCpuPercent: true, + residentMemory: true, + virtualMemory: true, + ioBytes: true, + processStartTime: true, + processTree: true, + }, + }), + lastSampleAt: Option.some(DateTime.makeUnsafe(startedAt)), + lastError: Option.none(), + restartCount: 2, + }); + const nativeHealthChanges = + yield* PubSub.sliding(4); + const nativeLayer = NativeTelemetryClient.layerTest({ + setExternalProcesses: (processes) => Ref.set(externalProcesses, processes), + sampleNow: Ref.modify(sampleIndex, (index) => [ + samples[Math.min(index, samples.length - 1)]!, + index + 1, + ]), + retry: Ref.updateAndGet(retryCount, (count) => count + 1).pipe(Effect.as(true)), + health: Ref.get(nativeHealth), + healthChanges: Stream.fromPubSub(nativeHealthChanges), + }); + const desktopLayer = DesktopTelemetryReceiver.layerTest({ + latest: Effect.succeedSome(desktopSnapshot(startedAt)), + health: Effect.succeed({ + status: "healthy", + lastSampleAt: Option.some(DateTime.makeUnsafe(startedAt)), + lastError: Option.none(), + }), + }); + const attributionLayer = ResourceAttribution.layer; + const dependencies = Layer.mergeAll(nativeLayer, desktopLayer, attributionLayer); + const telemetryLayer = ResourceTelemetry.layer.pipe(Layer.provide(dependencies)); + const layer = Layer.mergeAll(dependencies, telemetryLayer); + + yield* Effect.gen(function* () { + const telemetry = yield* ResourceTelemetry.ResourceTelemetry; + const attribution = yield* ResourceAttribution.ResourceAttribution; + + expect(yield* Ref.get(externalProcesses)).toEqual([{ pid: 5_000, startTimeMs: 300 }]); + + yield* attribution.record({ + component: "provider-event-log", + operation: "append", + logicalWriteBytes: 512, + count: 2, + durationMs: 4, + }); + const first = yield* telemetry.refresh; + expect(first.groups.backend.processCount).toBe(2); + expect(first.groups.electron.processCount).toBe(1); + expect(first.groups.monitor.processCount).toBe(1); + expect(first.power.onBattery).toBe("true"); + expect(Option.getOrNull(first.speedLimitPercent)).toBe(90); + expect(first.attribution.entries).toEqual([ + { + component: "provider-event-log", + operation: "append", + logicalReadBytes: 0, + logicalWriteBytes: 512, + count: 2, + durationMs: 4, + }, + ]); + + yield* TestClock.adjust(Duration.seconds(1)); + const second = yield* telemetry.refresh; + const codex = second.processes.find((entry) => entry.identity.pid === 4_242); + expect(codex?.cpuPercent).toBe(25); + expect(codex?.ioWriteBytesPerSecond).toBe(4_000); + expect(second.groups.backend.ioWriteBytes).toBe(4_000); + expect(second.health.collectionDurationMicros).toBe(300); + expect(second.health.scannedProcessCount).toBe(80); + expect(second.health.inaccessibleProcessCount).toBe(1); + + const history = yield* telemetry.readHistory({ + windowMs: 60_000, + bucketMs: 10_000, + }); + expect(history.retainedSampleCount).toBeGreaterThan(0); + expect( + history.topProcesses.find((entry) => entry.identity.pid === 4_242)?.sampleCount, + ).toBe(2); + expect(history.topProcesses.find((entry) => entry.identity.pid === 4_242)?.cpuTimeMs).toBe( + 250, + ); + expect( + history.topProcesses.find((entry) => entry.identity.pid === 4_242)?.ioWriteBytes, + ).toBe(4_000); + expect(history.buckets.reduce((total, bucket) => total + bucket.ioWriteBytes, 0)).toBe( + 4_000, + ); + + const retry = yield* telemetry.retry; + expect(retry.accepted).toBe(true); + expect(yield* Ref.get(retryCount)).toBe(1); + + yield* Ref.update(nativeHealth, (current) => ({ + ...current, + hello: Option.map(current.hello, (hello) => ({ + ...hello, + sidecarPid: 9_001, + })), + restartCount: 3, + })); + yield* TestClock.adjust(Duration.seconds(1)); + const restarted = yield* telemetry.refresh; + expect(DateTime.toEpochMillis(restarted.readAt)).toBe(startedAt + 2_000); + expect(Option.getOrNull(restarted.health.sidecarPid)).toBe(9_001); + + const healthUpdateFiber = yield* Stream.runHead(telemetry.changes).pipe(Effect.forkChild); + yield* Effect.yieldNow; + yield* Ref.update(nativeHealth, (current) => ({ + ...current, + status: "degraded" as const, + lastError: Option.some("collector exited"), + })); + yield* PubSub.publish(nativeHealthChanges, yield* Ref.get(nativeHealth)); + const healthUpdate = Option.getOrThrow(yield* Fiber.join(healthUpdateFiber)); + expect(healthUpdate.health.native.status).toBe("degraded"); + expect(Option.getOrNull(healthUpdate.health.native.lastError)).toBe("collector exited"); + const degradedHistory = yield* telemetry.readHistory({ + windowMs: 60_000, + bucketMs: 10_000, + }); + expect(degradedHistory.health.native.status).toBe("degraded"); + }).pipe(Effect.provide(layer)); + }), + ); +}); diff --git a/apps/server/src/resourceTelemetry/ResourceTelemetry.ts b/apps/server/src/resourceTelemetry/ResourceTelemetry.ts new file mode 100644 index 00000000000..a74f8d664c9 --- /dev/null +++ b/apps/server/src/resourceTelemetry/ResourceTelemetry.ts @@ -0,0 +1,372 @@ +import type { + DesktopHostTelemetrySnapshot, + HostPowerSnapshot, + ResourceMonitorSnapshotEvent, + ResourceTelemetryHealth, + ResourceTelemetryHistory, + ResourceTelemetryHistoryInput, + ResourceTelemetryProcessIdentity, + ResourceTelemetryRetryResult, + ResourceTelemetrySnapshot, +} from "@t3tools/contracts"; +import * as Context from "effect/Context"; +import * as DateTime from "effect/DateTime"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; +import * as PubSub from "effect/PubSub"; +import * as Ref from "effect/Ref"; +import * as Schema from "effect/Schema"; +import * as Semaphore from "effect/Semaphore"; +import * as Stream from "effect/Stream"; + +import * as DesktopTelemetryReceiver from "./DesktopTelemetryReceiver.ts"; +import { + emptyTelemetryCounters, + mergeProcesses, + type ProcessState, + type TelemetryCounters, +} from "./Model.ts"; +import * as NativeTelemetryClient from "./NativeTelemetryClient.ts"; +import * as ResourceAttribution from "./ResourceAttribution.ts"; +import * as ResourceTelemetryStore from "./ResourceTelemetryStore.ts"; + +const SAMPLE_INTERVAL_MS = 1_000; + +export class ResourceTelemetryRefreshFailed extends Schema.TaggedErrorClass()( + "ResourceTelemetryRefreshFailed", + { + operation: Schema.String, + cause: Schema.Defect(), + }, +) { + override get message(): string { + return `Resource telemetry operation '${this.operation}' failed.`; + } +} + +export interface ResourceTelemetryShape { + readonly latest: Effect.Effect; + readonly changes: Stream.Stream; + readonly readHistory: ( + input: ResourceTelemetryHistoryInput, + ) => Effect.Effect; + readonly refresh: Effect.Effect; + readonly validateProcessIdentity: ( + identity: ResourceTelemetryProcessIdentity, + ) => Effect.Effect; + readonly retry: Effect.Effect; +} + +export class ResourceTelemetry extends Context.Service()( + "t3/resourceTelemetry/ResourceTelemetry", +) {} + +interface TelemetryState { + readonly nativeSnapshot: Option.Option; + readonly desktopSnapshot: Option.Option; + readonly previous: ReadonlyMap; + readonly counters: TelemetryCounters; + readonly latest: ResourceTelemetrySnapshot; + readonly lastNativeSequence: number; + readonly lastNativeRestartCount: number; +} + +function unknownPower(updatedAt: DateTime.Utc): HostPowerSnapshot { + return { + source: "unknown", + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt, + }; +} + +function buildHealth(input: { + readonly native: NativeTelemetryClient.NativeTelemetryClientHealth; + readonly desktop: DesktopTelemetryReceiver.DesktopTelemetryReceiverHealth; + readonly nativeSnapshot: Option.Option; +}): ResourceTelemetryHealth { + return { + native: { + status: input.native.status, + lastSampleAt: input.native.lastSampleAt, + lastError: input.native.lastError, + }, + desktop: { + status: input.desktop.status, + lastSampleAt: input.desktop.lastSampleAt, + lastError: input.desktop.lastError, + }, + sidecarVersion: Option.map(input.native.hello, (hello) => hello.sidecarVersion), + sidecarPid: Option.map(input.native.hello, (hello) => hello.sidecarPid), + restartCount: input.native.restartCount, + collectionDurationMicros: Option.match(input.nativeSnapshot, { + onNone: () => 0, + onSome: (snapshot) => snapshot.collectionDurationMicros, + }), + scannedProcessCount: Option.match(input.nativeSnapshot, { + onNone: () => 0, + onSome: (snapshot) => snapshot.scannedProcessCount, + }), + retainedProcessCount: Option.match(input.nativeSnapshot, { + onNone: () => 0, + onSome: (snapshot) => snapshot.retainedProcessCount, + }), + inaccessibleProcessCount: Option.match(input.nativeSnapshot, { + onNone: () => 0, + onSome: (snapshot) => snapshot.inaccessibleProcessCount, + }), + }; +} + +export const make = Effect.fn("resourceTelemetry.resourceTelemetry.make")(function* () { + const nativeClient = yield* NativeTelemetryClient.NativeTelemetryClient; + const desktopReceiver = yield* DesktopTelemetryReceiver.DesktopTelemetryReceiver; + const attribution = yield* ResourceAttribution.ResourceAttribution; + const mutex = yield* Semaphore.make(1); + const changes = yield* PubSub.sliding(8); + const initialReadAt = yield* DateTime.now; + const initialDesktop = yield* desktopReceiver.latest; + if (Option.isSome(initialDesktop)) { + yield* nativeClient + .setExternalProcesses( + initialDesktop.value.electronProcesses.map((process) => ({ + pid: process.pid, + startTimeMs: process.creationTimeMs, + })), + ) + .pipe(Effect.ignore); + } + const [initialNativeHealth, initialDesktopHealth, initialAttribution] = yield* Effect.all([ + nativeClient.health, + desktopReceiver.health, + attribution.snapshot, + ]); + const initialMerge = mergeProcesses({ + serverPid: process.pid, + sidecarPid: Option.map(initialNativeHealth.hello, (hello) => hello.sidecarPid), + fallbackSampledAtMs: DateTime.toEpochMillis(initialReadAt), + nativeSnapshot: Option.none(), + desktopSnapshot: initialDesktop, + previous: new Map(), + counters: emptyTelemetryCounters(), + updatePrevious: false, + }); + const initialSnapshot: ResourceTelemetrySnapshot = { + readAt: initialReadAt, + sampleIntervalMs: SAMPLE_INTERVAL_MS, + processes: initialMerge.processes, + groups: initialMerge.groups, + power: Option.match(initialDesktop, { + onNone: () => unknownPower(initialReadAt), + onSome: (desktop) => desktop.power, + }), + speedLimitPercent: Option.flatMap(initialDesktop, (desktop) => desktop.speedLimitPercent), + attribution: initialAttribution, + health: buildHealth({ + native: initialNativeHealth, + desktop: initialDesktopHealth, + nativeSnapshot: Option.none(), + }), + }; + const state = yield* Ref.make({ + nativeSnapshot: Option.none(), + desktopSnapshot: initialDesktop, + previous: new Map(), + counters: emptyTelemetryCounters(), + latest: initialSnapshot, + lastNativeSequence: 0, + lastNativeRestartCount: initialNativeHealth.restartCount, + }); + const store = yield* ResourceTelemetryStore.make(initialSnapshot); + const refreshHealth = mutex.withPermits(1)( + Effect.gen(function* () { + const current = yield* Ref.get(state); + const [nativeHealth, desktopHealth] = yield* Effect.all([ + nativeClient.health, + desktopReceiver.health, + ]); + const snapshot: ResourceTelemetrySnapshot = { + ...current.latest, + health: buildHealth({ + native: nativeHealth, + desktop: desktopHealth, + nativeSnapshot: current.nativeSnapshot, + }), + }; + yield* Ref.set(state, { + ...current, + latest: snapshot, + }); + yield* store.updateLatest(snapshot); + yield* PubSub.publish(changes, snapshot); + }), + ); + + const rebuild = (input: { + readonly nativeSnapshot?: ResourceMonitorSnapshotEvent; + readonly desktopSnapshot?: DesktopHostTelemetrySnapshot; + readonly updatePrevious: boolean; + }): Effect.Effect => + mutex.withPermits(1)( + Effect.gen(function* () { + const current = yield* Ref.get(state); + const nativeHealth = yield* nativeClient.health; + const nativeGenerationChanged = + nativeHealth.restartCount !== current.lastNativeRestartCount; + if ( + input.nativeSnapshot && + !nativeGenerationChanged && + input.nativeSnapshot.sequence <= current.lastNativeSequence + ) { + return current.latest; + } + const nativeSnapshot = input.nativeSnapshot + ? Option.some(input.nativeSnapshot) + : current.nativeSnapshot; + const desktopSnapshot = input.desktopSnapshot + ? Option.some(input.desktopSnapshot) + : current.desktopSnapshot; + const [desktopHealth, attributionSnapshot] = yield* Effect.all([ + desktopReceiver.health, + attribution.snapshot, + ]); + const merged = mergeProcesses({ + serverPid: process.pid, + sidecarPid: Option.map(nativeHealth.hello, (hello) => hello.sidecarPid), + fallbackSampledAtMs: DateTime.toEpochMillis(current.latest.readAt), + nativeSnapshot, + desktopSnapshot, + previous: current.previous, + counters: current.counters, + updatePrevious: input.updatePrevious, + }); + const readAt = DateTime.makeUnsafe(merged.sampledAtMs); + const snapshot: ResourceTelemetrySnapshot = { + readAt, + sampleIntervalMs: SAMPLE_INTERVAL_MS, + processes: merged.processes, + groups: merged.groups, + power: Option.match(desktopSnapshot, { + onNone: () => unknownPower(readAt), + onSome: (desktop) => desktop.power, + }), + speedLimitPercent: Option.match(desktopSnapshot, { + onNone: () => Option.none(), + onSome: (desktop) => desktop.speedLimitPercent, + }), + attribution: attributionSnapshot, + health: buildHealth({ + native: nativeHealth, + desktop: desktopHealth, + nativeSnapshot, + }), + }; + yield* Ref.set(state, { + nativeSnapshot, + desktopSnapshot, + previous: merged.previous, + counters: merged.counters, + latest: snapshot, + lastNativeSequence: input.nativeSnapshot?.sequence ?? current.lastNativeSequence, + lastNativeRestartCount: input.nativeSnapshot + ? nativeHealth.restartCount + : current.lastNativeRestartCount, + }); + if (input.updatePrevious) { + yield* store.record(snapshot, merged.deltas); + } + yield* PubSub.publish(changes, snapshot); + return snapshot; + }), + ); + + const ingestNative = (snapshot: ResourceMonitorSnapshotEvent) => + rebuild({ nativeSnapshot: snapshot, updatePrevious: true }); + const ingestDesktop = (snapshot: DesktopHostTelemetrySnapshot) => + Effect.gen(function* () { + yield* nativeClient + .setExternalProcesses( + snapshot.electronProcesses.map((process) => ({ + pid: process.pid, + startTimeMs: process.creationTimeMs, + })), + ) + .pipe(Effect.ignore); + return yield* rebuild({ desktopSnapshot: snapshot, updatePrevious: false }); + }); + + yield* nativeClient.snapshots.pipe( + Stream.runForEach((snapshot) => ingestNative(snapshot)), + Effect.catch((error) => + Effect.logWarning("Native resource telemetry stream stopped", { + cause: error.message, + }), + ), + Effect.forkScoped, + ); + yield* desktopReceiver.changes.pipe( + Stream.runForEach((snapshot) => ingestDesktop(snapshot)), + Effect.forkScoped, + ); + yield* nativeClient.healthChanges.pipe( + Stream.runForEach(() => refreshHealth), + Effect.forkScoped, + ); + yield* desktopReceiver.healthChanges.pipe( + Stream.runForEach(() => refreshHealth), + Effect.forkScoped, + ); + + const refresh: ResourceTelemetryShape["refresh"] = nativeClient.sampleNow.pipe( + Effect.flatMap(ingestNative), + Effect.mapError( + (cause) => + new ResourceTelemetryRefreshFailed({ + operation: "refresh", + cause, + }), + ), + ); + + const validateProcessIdentity: ResourceTelemetryShape["validateProcessIdentity"] = (identity) => + nativeClient.sampleNow.pipe( + Effect.map((snapshot) => + snapshot.processes.some( + (process) => process.pid === identity.pid && process.startTimeMs === identity.startTimeMs, + ), + ), + Effect.mapError( + (cause) => + new ResourceTelemetryRefreshFailed({ + operation: "validateProcessIdentity", + cause, + }), + ), + ); + + return ResourceTelemetry.of({ + latest: Ref.get(state).pipe(Effect.map((current) => current.latest)), + changes: Stream.fromPubSub(changes), + readHistory: store.readHistory, + refresh, + validateProcessIdentity, + retry: nativeClient.retry.pipe( + Effect.zip(Ref.get(state)), + Effect.map( + ([accepted, current]): ResourceTelemetryRetryResult => ({ + accepted, + snapshot: current.latest, + }), + ), + ), + }); +}); + +export const layer = Layer.effect(ResourceTelemetry, make()); diff --git a/apps/server/src/resourceTelemetry/ResourceTelemetryStore.ts b/apps/server/src/resourceTelemetry/ResourceTelemetryStore.ts new file mode 100644 index 00000000000..cb016db1a39 --- /dev/null +++ b/apps/server/src/resourceTelemetry/ResourceTelemetryStore.ts @@ -0,0 +1,253 @@ +import type { + ResourceTelemetryHistory, + ResourceTelemetryHistoryBucket, + ResourceTelemetryHistoryInput, + ResourceTelemetryProcess, + ResourceTelemetryProcessSummary, + ResourceTelemetrySnapshot, +} from "@t3tools/contracts"; +import * as Context from "effect/Context"; +import * as DateTime from "effect/DateTime"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as Ref from "effect/Ref"; + +import type { ProcessDelta } from "./Model.ts"; +import { processIdentityKey } from "./Model.ts"; + +const RETENTION_MS = 60 * 60_000; +const MAX_AGGREGATE_SAMPLES = 3_600; +const MAX_PROCESS_SAMPLES = 20_000; + +interface AggregateSample { + readonly sampledAtMs: number; + readonly cpuPercent: number; + readonly rssBytes: number; + readonly processCount: number; + readonly ioReadBytes: number; + readonly ioWriteBytes: number; +} + +interface ProcessSample { + readonly sampledAtMs: number; + readonly process: ResourceTelemetryProcess; + readonly cpuTimeMs: number; + readonly ioReadBytes: number; + readonly ioWriteBytes: number; +} + +interface StoreState { + readonly aggregateSamples: ReadonlyArray; + readonly processSamples: ReadonlyArray; + readonly latest: ResourceTelemetrySnapshot; +} + +export interface ResourceTelemetryStoreShape { + readonly updateLatest: (snapshot: ResourceTelemetrySnapshot) => Effect.Effect; + readonly record: ( + snapshot: ResourceTelemetrySnapshot, + deltas: ReadonlyArray, + ) => Effect.Effect; + readonly readHistory: ( + input: ResourceTelemetryHistoryInput, + ) => Effect.Effect; +} + +export class ResourceTelemetryStore extends Context.Service< + ResourceTelemetryStore, + ResourceTelemetryStoreShape +>()("t3/resourceTelemetry/ResourceTelemetryStore") {} + +function trimByTime( + values: ReadonlyArray, + nowMs: number, + max: number, +): ReadonlyArray { + const retained = values.filter((value) => value.sampledAtMs >= nowMs - RETENTION_MS); + return retained.length <= max ? retained : retained.slice(retained.length - max); +} + +function summarizeProcesses( + samples: ReadonlyArray, +): ReadonlyArray { + const groups = new Map(); + for (const sample of samples) { + const identityKey = processIdentityKey( + sample.process.identity.pid, + sample.process.identity.startTimeMs, + ); + const current = groups.get(identityKey) ?? []; + current.push(sample); + groups.set(identityKey, current); + } + + return [...groups.values()] + .map((processSamples): ResourceTelemetryProcessSummary => { + const sorted = processSamples.toSorted((left, right) => left.sampledAtMs - right.sampledAtMs); + const first = sorted[0]!; + const latest = sorted[sorted.length - 1]!; + const cpuTotal = sorted.reduce((total, sample) => total + sample.process.cpuPercent, 0); + return { + identity: latest.process.identity, + ppid: latest.process.ppid, + depth: latest.process.depth, + name: latest.process.name, + command: latest.process.command, + category: latest.process.category, + firstSeenAt: first.process.firstSeenAt, + lastSeenAt: latest.process.lastSeenAt, + currentCpuPercent: latest.process.cpuPercent, + avgCpuPercent: cpuTotal / sorted.length, + maxCpuPercent: Math.max(...sorted.map((sample) => sample.process.cpuPercent)), + cpuTimeMs: sorted.reduce((total, sample) => total + sample.cpuTimeMs, 0), + currentRssBytes: latest.process.residentBytes, + peakRssBytes: Math.max(...sorted.map((sample) => sample.process.peakResidentBytes)), + ioReadBytes: sorted.reduce((total, sample) => total + sample.ioReadBytes, 0), + ioWriteBytes: sorted.reduce((total, sample) => total + sample.ioWriteBytes, 0), + ioSemantics: latest.process.ioSemantics, + sampleCount: sorted.length, + }; + }) + .toSorted( + (left, right) => right.cpuTimeMs - left.cpuTimeMs || right.peakRssBytes - left.peakRssBytes, + ); +} + +function buildBuckets(input: { + readonly samples: ReadonlyArray; + readonly nowMs: number; + readonly windowMs: number; + readonly bucketMs: number; +}): ReadonlyArray { + const windowStartMs = input.nowMs - input.windowMs; + const buckets: ResourceTelemetryHistoryBucket[] = []; + for (let startedAtMs = windowStartMs; startedAtMs < input.nowMs; startedAtMs += input.bucketMs) { + const endedAtMs = Math.min(input.nowMs, startedAtMs + input.bucketMs); + const samples = input.samples.filter( + (sample) => + sample.sampledAtMs >= startedAtMs && + (endedAtMs === input.nowMs + ? sample.sampledAtMs <= endedAtMs + : sample.sampledAtMs < endedAtMs), + ); + const cpuTotal = samples.reduce((total, sample) => total + sample.cpuPercent, 0); + buckets.push({ + startedAt: DateTime.makeUnsafe(startedAtMs), + endedAt: DateTime.makeUnsafe(endedAtMs), + avgCpuPercent: samples.length === 0 ? 0 : cpuTotal / samples.length, + maxCpuPercent: + samples.length === 0 ? 0 : Math.max(...samples.map((sample) => sample.cpuPercent)), + maxRssBytes: samples.length === 0 ? 0 : Math.max(...samples.map((sample) => sample.rssBytes)), + ioReadBytes: samples.reduce((total, sample) => total + sample.ioReadBytes, 0), + ioWriteBytes: samples.reduce((total, sample) => total + sample.ioWriteBytes, 0), + maxProcessCount: + samples.length === 0 ? 0 : Math.max(...samples.map((sample) => sample.processCount)), + }); + } + return buckets; +} + +export const make = Effect.fn("resourceTelemetry.resourceTelemetryStore.make")(function* ( + initial: ResourceTelemetrySnapshot, +) { + const state = yield* Ref.make({ + aggregateSamples: [], + processSamples: [], + latest: initial, + }); + + const record: ResourceTelemetryStoreShape["record"] = (snapshot, deltas) => + Ref.update(state, (current) => { + const sampledAtMs = DateTime.toEpochMillis(snapshot.readAt); + const deltasByIdentity = new Map( + deltas.map((processDelta) => [processDelta.identityKey, processDelta]), + ); + const aggregateDelta = deltas.reduce( + (total, process) => ({ + ioReadBytes: total.ioReadBytes + process.ioReadBytes, + ioWriteBytes: total.ioWriteBytes + process.ioWriteBytes, + }), + { ioReadBytes: 0, ioWriteBytes: 0 }, + ); + return { + latest: snapshot, + aggregateSamples: trimByTime( + [ + ...current.aggregateSamples, + { + sampledAtMs, + cpuPercent: snapshot.groups.allT3.currentCpuPercent, + rssBytes: snapshot.groups.allT3.currentRssBytes, + processCount: snapshot.groups.allT3.processCount, + ioReadBytes: aggregateDelta.ioReadBytes, + ioWriteBytes: aggregateDelta.ioWriteBytes, + }, + ], + sampledAtMs, + MAX_AGGREGATE_SAMPLES, + ), + processSamples: trimByTime( + [ + ...current.processSamples, + ...snapshot.processes.map((process) => { + const processDelta = deltasByIdentity.get( + processIdentityKey(process.identity.pid, process.identity.startTimeMs), + ); + return { + sampledAtMs, + process, + cpuTimeMs: processDelta?.cpuTimeMs ?? 0, + ioReadBytes: processDelta?.ioReadBytes ?? 0, + ioWriteBytes: processDelta?.ioWriteBytes ?? 0, + }; + }), + ], + sampledAtMs, + MAX_PROCESS_SAMPLES, + ), + }; + }); + + const updateLatest: ResourceTelemetryStoreShape["updateLatest"] = (snapshot) => + Ref.update(state, (current) => ({ + ...current, + latest: snapshot, + })); + + const readHistory: ResourceTelemetryStoreShape["readHistory"] = (input) => + Effect.gen(function* () { + const readAt = yield* DateTime.now; + const readAtMs = DateTime.toEpochMillis(readAt); + const windowMs = Math.max(1_000, Math.min(RETENTION_MS, input.windowMs)); + const bucketMs = Math.max(1_000, Math.min(windowMs, input.bucketMs)); + const current = yield* Ref.get(state); + const minSampledAtMs = readAtMs - windowMs; + const aggregateSamples = current.aggregateSamples.filter( + (sample) => sample.sampledAtMs >= minSampledAtMs, + ); + const processSamples = current.processSamples.filter( + (sample) => sample.sampledAtMs >= minSampledAtMs, + ); + + return { + readAt, + windowMs, + bucketMs, + sampleIntervalMs: current.latest.sampleIntervalMs, + retainedSampleCount: current.aggregateSamples.length + current.processSamples.length, + buckets: buildBuckets({ + samples: aggregateSamples, + nowMs: readAtMs, + windowMs, + bucketMs, + }), + topProcesses: summarizeProcesses(processSamples), + health: current.latest.health, + }; + }); + + return ResourceTelemetryStore.of({ updateLatest, record, readHistory }); +}); + +export const layer = (initial: ResourceTelemetrySnapshot) => + Layer.effect(ResourceTelemetryStore, make(initial)); diff --git a/apps/server/src/server.test.ts b/apps/server/src/server.test.ts index f96f4562e1c..f084b89a2f8 100644 --- a/apps/server/src/server.test.ts +++ b/apps/server/src/server.test.ts @@ -141,6 +141,10 @@ import * as CloudCliTokenManager from "./cloud/CliTokenManager.ts"; import * as ProcessDiagnostics from "./diagnostics/ProcessDiagnostics.ts"; import * as ProcessResourceMonitor from "./diagnostics/ProcessResourceMonitor.ts"; import * as TraceDiagnostics from "./diagnostics/TraceDiagnostics.ts"; +import * as DesktopTelemetryReceiver from "./resourceTelemetry/DesktopTelemetryReceiver.ts"; +import * as NativeTelemetryClient from "./resourceTelemetry/NativeTelemetryClient.ts"; +import * as ResourceAttribution from "./resourceTelemetry/ResourceAttribution.ts"; +import * as ResourceTelemetry from "./resourceTelemetry/ResourceTelemetry.ts"; import * as Data from "effect/Data"; const defaultProjectId = ProjectId.make("project-default"); @@ -368,6 +372,8 @@ const buildAppUnderTest = (options?: { cloudManagedEndpointRuntime?: Partial; relayClient?: Partial; cloudCliTokenManager?: Partial; + nativeTelemetryClient?: Partial; + desktopTelemetryReceiver?: Partial; }; }) => Effect.gen(function* () { @@ -540,6 +546,15 @@ const buildAppUnderTest = (options?: { ...options.layers.vcsStatusBroadcaster, }) : VcsStatusBroadcaster.layer.pipe(Layer.provide(gitWorkflowLayer)); + const resourceTelemetryLayer = ResourceTelemetry.layer.pipe( + Layer.provide( + Layer.mergeAll( + NativeTelemetryClient.layerTest(options?.layers?.nativeTelemetryClient), + DesktopTelemetryReceiver.layerTest(options?.layers?.desktopTelemetryReceiver), + ResourceAttribution.layer, + ), + ), + ); const servedRoutesLayer = HttpRouter.serve(makeRoutesLayer, { disableListenLog: true, @@ -751,6 +766,7 @@ const buildAppUnderTest = (options?: { ); const appLayer = servedRoutesLayer.pipe( + Layer.provide(resourceTelemetryLayer), Layer.provide( Layer.mock(BrowserTraceCollector)({ record: () => Effect.void, diff --git a/apps/server/src/server.ts b/apps/server/src/server.ts index b21b30eff8f..d4f7d1cf2fa 100644 --- a/apps/server/src/server.ts +++ b/apps/server/src/server.ts @@ -83,6 +83,11 @@ import * as CloudCliState from "./cloud/CliState.ts"; import * as ProcessDiagnostics from "./diagnostics/ProcessDiagnostics.ts"; import * as ProcessResourceMonitor from "./diagnostics/ProcessResourceMonitor.ts"; import * as TraceDiagnostics from "./diagnostics/TraceDiagnostics.ts"; +import * as DesktopTelemetryReceiver from "./resourceTelemetry/DesktopTelemetryReceiver.ts"; +import * as NativeTelemetryClient from "./resourceTelemetry/NativeTelemetryClient.ts"; +import * as ResourceAttribution from "./resourceTelemetry/ResourceAttribution.ts"; +import * as ResourceMonitorBinary from "./resourceTelemetry/ResourceMonitorBinary.ts"; +import * as ResourceTelemetry from "./resourceTelemetry/ResourceTelemetry.ts"; import { OrchestrationLayerLive } from "./orchestration/runtimeLayer.ts"; import { clearPersistedServerRuntimeState, @@ -99,6 +104,10 @@ import { disableTailscaleServe, ensureTailscaleServe } from "@t3tools/tailscale" // already closes the websocket gracefully. Do not add an artificial drain before // those finalizers get a chance to run. const HTTP_PREEMPTIVE_SHUTDOWN_GRACE_MS = 0; +const ResourceAttributionLayerLive = ResourceAttribution.layer; +const ApplicationObservabilityLive = ObservabilityLive.pipe( + Layer.provideMerge(ResourceAttributionLayerLive), +); const PtyAdapterLive = Layer.unwrap( Effect.gen(function* () { @@ -112,11 +121,30 @@ const PtyAdapterLive = Layer.unwrap( }), ); +const NativeTelemetryLayerLive = NativeTelemetryClient.layer.pipe( + Layer.provide(ResourceMonitorBinary.layer), +); + +const ResourceTelemetryLayerLive = ResourceTelemetry.layer.pipe( + Layer.provideMerge(NativeTelemetryLayerLive), + Layer.provideMerge(DesktopTelemetryReceiver.layer), +); + +const HostPowerMonitorLayerLive = HostPowerMonitor.layer.pipe( + Layer.provide(ResourceTelemetryLayerLive), +); + const BackgroundLayerLive = BackgroundPolicy.layer.pipe( - Layer.provide(HostPowerMonitor.layer), + Layer.provide(HostPowerMonitorLayerLive), Layer.provideMerge(ServerSettingsLive), ); +const ResourceDiagnosticsLayerLive = Layer.mergeAll( + ResourceTelemetryLayerLive, + ProcessDiagnostics.layer.pipe(Layer.provide(ResourceTelemetryLayerLive)), + ProcessResourceMonitor.layer.pipe(Layer.provide(ResourceTelemetryLayerLive)), +); + const RelayClientLive = Layer.unwrap( Effect.gen(function* () { const config = yield* ServerConfig; @@ -337,8 +365,7 @@ const RuntimeCoreDependenciesLive = ReactorLayerLive.pipe( const RuntimeDependenciesLive = RuntimeCoreDependenciesLive.pipe( // Misc. Layer.provideMerge(BackgroundLayerLive), - Layer.provideMerge(ProcessDiagnostics.layer), - Layer.provideMerge(ProcessResourceMonitor.layer), + Layer.provideMerge(ResourceDiagnosticsLayerLive), Layer.provideMerge(TraceDiagnostics.layer), Layer.provideMerge(AnalyticsServiceLayerLive), Layer.provideMerge(ExternalLauncher.layer), @@ -488,7 +515,7 @@ export const makeServerLayer = Layer.unwrap( Layer.provideMerge(RuntimeServicesLive), Layer.provideMerge(serverRelayBrokerTracingLayer), Layer.provideMerge(HttpServerLive), - Layer.provide(ObservabilityLive), + Layer.provide(ApplicationObservabilityLive), Layer.provideMerge(FetchHttpClient.layer), Layer.provideMerge(VcsProcess.layer), Layer.provideMerge(PlatformServicesLive), diff --git a/apps/server/src/ws.ts b/apps/server/src/ws.ts index 5f7010228a9..57822a2e739 100644 --- a/apps/server/src/ws.ts +++ b/apps/server/src/ws.ts @@ -95,6 +95,7 @@ import * as EnvironmentAuth from "./auth/EnvironmentAuth.ts"; import type { AuthenticatedSession } from "./auth/EnvironmentAuth.ts"; import * as ProcessDiagnostics from "./diagnostics/ProcessDiagnostics.ts"; import * as ProcessResourceMonitor from "./diagnostics/ProcessResourceMonitor.ts"; +import * as ResourceTelemetry from "./resourceTelemetry/ResourceTelemetry.ts"; import * as TraceDiagnostics from "./diagnostics/TraceDiagnostics.ts"; import * as SourceControlDiscoveryLayer from "./sourceControl/SourceControlDiscovery.ts"; import { SourceControlRepositoryService } from "./sourceControl/SourceControlRepositoryService.ts"; @@ -159,6 +160,9 @@ const RPC_REQUIRED_SCOPE = new Map([ [WS_METHODS.serverGetTraceDiagnostics, AuthOrchestrationReadScope], [WS_METHODS.serverGetProcessDiagnostics, AuthOrchestrationReadScope], [WS_METHODS.serverGetProcessResourceHistory, AuthOrchestrationReadScope], + [WS_METHODS.serverGetResourceTelemetry, AuthOrchestrationReadScope], + [WS_METHODS.serverGetResourceTelemetryHistory, AuthOrchestrationReadScope], + [WS_METHODS.serverRetryResourceTelemetry, AuthOrchestrationOperateScope], [WS_METHODS.serverSignalProcess, AuthOrchestrationOperateScope], [WS_METHODS.cloudGetRelayClientStatus, AuthRelayWriteScope], [WS_METHODS.cloudInstallRelayClient, AuthRelayWriteScope], @@ -173,6 +177,7 @@ const RPC_REQUIRED_SCOPE = new Map([ [WS_METHODS.filesystemBrowse, AuthOrchestrationReadScope], [WS_METHODS.assetsCreateUrl, AuthOrchestrationReadScope], [WS_METHODS.subscribeVcsStatus, AuthOrchestrationReadScope], + [WS_METHODS.subscribeResourceTelemetry, AuthOrchestrationReadScope], [WS_METHODS.vcsRefreshStatus, AuthOrchestrationReadScope], [WS_METHODS.vcsPull, AuthOrchestrationOperateScope], [WS_METHODS.gitRunStackedAction, AuthOrchestrationOperateScope], @@ -298,6 +303,7 @@ const makeWsRpcLayer = (currentSession: AuthenticatedSession) => const sessions = yield* SessionStore.SessionStore; const processDiagnostics = yield* ProcessDiagnostics.ProcessDiagnostics; const processResourceMonitor = yield* ProcessResourceMonitor.ProcessResourceMonitor; + const resourceTelemetry = yield* ResourceTelemetry.ResourceTelemetry; const relayClient = yield* RelayClient.RelayClient; const authorizationError = (requiredScope: AuthEnvironmentScope) => new EnvironmentAuthorizationError({ @@ -1118,6 +1124,22 @@ const makeWsRpcLayer = (currentSession: AuthenticatedSession) => "rpc.aggregate": "server", }, ), + [WS_METHODS.serverGetResourceTelemetry]: (_input) => + observeRpcEffect(WS_METHODS.serverGetResourceTelemetry, resourceTelemetry.latest, { + "rpc.aggregate": "server", + }), + [WS_METHODS.serverGetResourceTelemetryHistory]: (input) => + observeRpcEffect( + WS_METHODS.serverGetResourceTelemetryHistory, + resourceTelemetry.readHistory(input), + { + "rpc.aggregate": "server", + }, + ), + [WS_METHODS.serverRetryResourceTelemetry]: (_input) => + observeRpcEffect(WS_METHODS.serverRetryResourceTelemetry, resourceTelemetry.retry, { + "rpc.aggregate": "server", + }), [WS_METHODS.serverSignalProcess]: (input) => observeRpcEffect(WS_METHODS.serverSignalProcess, processDiagnostics.signal(input), { "rpc.aggregate": "server", @@ -1669,6 +1691,15 @@ const makeWsRpcLayer = (currentSession: AuthenticatedSession) => ), { "rpc.aggregate": "server" }, ), + [WS_METHODS.subscribeResourceTelemetry]: (_input) => + observeRpcStream( + WS_METHODS.subscribeResourceTelemetry, + Stream.concat( + Stream.unwrap(Effect.map(resourceTelemetry.latest, Stream.make)), + resourceTelemetry.changes, + ), + { "rpc.aggregate": "server" }, + ), }); }), ); diff --git a/apps/web/src/components/settings/DiagnosticsSettings.tsx b/apps/web/src/components/settings/DiagnosticsSettings.tsx index 6df3367c642..419994c553d 100644 --- a/apps/web/src/components/settings/DiagnosticsSettings.tsx +++ b/apps/web/src/components/settings/DiagnosticsSettings.tsx @@ -36,6 +36,7 @@ import { Button } from "../ui/button"; import { ScrollArea } from "../ui/scroll-area"; import { Tooltip, TooltipPopup, TooltipTrigger } from "../ui/tooltip"; import { toastManager } from "../ui/toast"; +import { ResourceTelemetryDiagnostics } from "./ResourceTelemetryDiagnostics"; import { SettingsPageContainer, SettingsSection, useRelativeTimeTick } from "./settingsLayout"; import { useAtomCommand } from "../../state/use-atom-command"; @@ -906,12 +907,16 @@ export function DiagnosticsSettingsPanel() { if (environmentId === null) { return; } + const process = processData?.processes.find((entry) => entry.pid === pid); + if (process === undefined) { + return; + } setSignalingPid(pid); void (async () => { const result = await signalServerProcess({ environmentId, - input: { pid, signal }, + input: { pid, startTimeMs: process.startTimeMs, signal }, }); setSignalingPid(null); if (result._tag === "Failure") { @@ -948,7 +953,7 @@ export function DiagnosticsSettingsPanel() { refreshProcesses(); })(); }, - [environmentId, refreshProcesses, signalServerProcess], + [environmentId, processData?.processes, refreshProcesses, signalServerProcess], ); const processDiagnosticsError = processData ? Option.getOrNull(processData.error) : null; @@ -960,6 +965,8 @@ export function DiagnosticsSettingsPanel() { return ( + + = 1_024 && unitIndex < units.length - 1); + return `${next.toFixed(next >= 100 ? 0 : next >= 10 ? 1 : 2)} ${units[unitIndex]}`; +} + +function formatRate(value: number): string { + return `${formatBytes(value)}/s`; +} + +function formatCpuTime(valueMs: number): string { + const seconds = valueMs / 1_000; + if (seconds < 60) return `${seconds.toFixed(seconds >= 10 ? 1 : 2)}s`; + const minutes = seconds / 60; + if (minutes < 60) return `${minutes.toFixed(minutes >= 10 ? 1 : 2)}m`; + return `${(minutes / 60).toFixed(2)}h`; +} + +function formatDurationMicros(value: number): string { + if (value < 1_000) return `${Math.round(value)} µs`; + if (value < 1_000_000) return `${(value / 1_000).toFixed(2)} ms`; + return `${(value / 1_000_000).toFixed(2)} s`; +} + +function processIdentityKey(process: ResourceTelemetryProcess): string { + return `${process.identity.pid}:${process.identity.startTimeMs}`; +} + +function processSummaryIdentityKey(process: ResourceTelemetryProcessSummary): string { + return `${process.identity.pid}:${process.identity.startTimeMs}`; +} + +function formatProcessName(process: Pick): string { + if (process.name.trim()) return process.name; + const firstToken = process.command.trim().split(/\s+/)[0] ?? process.command; + const normalized = firstToken.replace(/^['"]|['"]$/g, ""); + return normalized.split(/[\\/]/).findLast((segment) => segment.length > 0) ?? normalized; +} + +function categoryLabel(category: ResourceTelemetryProcessCategory): string { + switch (category) { + case "server": + return "Server"; + case "server-child": + return "Backend child"; + case "provider-root": + return "Provider"; + case "terminal-root": + return "Terminal"; + case "electron-main": + return "Electron main"; + case "electron-renderer": + return "Renderer"; + case "electron-gpu": + return "GPU"; + case "electron-utility": + return "Electron utility"; + case "resource-monitor": + return "Monitor"; + case "unknown-t3": + return "T3 process"; + } +} + +function categoryDotClass(category: ResourceTelemetryProcessCategory): string { + if (category === "resource-monitor") return "bg-amber-500"; + if (category.startsWith("electron-")) return "bg-sky-500"; + if (category === "server") return "bg-violet-500"; + return "bg-emerald-500"; +} + +function ioSemanticsLabel(semantics: ResourceTelemetryIoSemantics): string { + switch (semantics) { + case "storage": + return "Storage bytes"; + case "logical": + return "Logical bytes"; + case "all-io": + return "All I/O bytes"; + case "unavailable": + return "Unavailable"; + } +} + +function booleanStateLabel( + value: BackgroundBooleanState, + labels: { readonly true: string; readonly false: string }, +): string { + if (value === "true") return labels.true; + if (value === "false") return labels.false; + return "Unknown"; +} + +function sourceStatusTone(status: ResourceTelemetrySourceStatus): "default" | "warning" | "danger" { + if (status === "healthy") return "default"; + if (status === "starting" || status === "degraded") return "warning"; + return "danger"; +} + +function SourceStatusBadge({ + label, + status, +}: { + label: string; + status: ResourceTelemetrySourceStatus; +}) { + const tone = sourceStatusTone(status); + return ( + + + {label} {status} + + ); +} + +function LastSampleLabel({ sampledAt }: { sampledAt: DateTime.Utc | null }) { + useRelativeTimeTick(); + if (!sampledAt) { + return Waiting for sample; + } + const relative = formatRelativeTime(DateTime.formatIso(sampledAt)); + return ( + + Updated {relative.value} + {relative.suffix ? ` ${relative.suffix}` : ""} + + ); +} + +function IconStat({ + icon, + label, + value, + detail, + tone = "default", +}: { + icon: ReactNode; + label: string; + value: string; + detail?: string | undefined; + tone?: "default" | "warning" | "danger"; +}) { + return ( +
+
+ {icon} + {label} +
+
+ {value} +
+ {detail ? ( +
{detail}
+ ) : null} +
+ ); +} + +function AggregateCard({ + label, + accentClass, + aggregate, +}: { + label: string; + accentClass: string; + aggregate: ResourceTelemetryAggregate; +}) { + return ( +
+ +
+
+ {label} +
+
+ {aggregate.processCount} proc +
+
+
+ + + + +
+
+ ); +} + +function MetricPair({ label, value }: { label: string; value: string }) { + return ( +
+
+ {label} +
+
+ {value} +
+
+ ); +} + +function HealthSource({ label, health }: { label: string; health: ResourceTelemetrySourceHealth }) { + return ( +
+
+
{label}
+
+ {Option.match(health.lastError, { + onNone: () => "No reported errors", + onSome: (error) => error, + })} +
+
+ +
+ ); +} + +function DetailRow({ + label, + value, + valueClassName, +}: { + label: string; + value: ReactNode; + valueClassName?: string | undefined; +}) { + return ( +
+ {label} + + {value} + +
+ ); +} + +function HistoryWindowSelector({ + selectedWindowMs, + onSelect, +}: { + selectedWindowMs: number; + onSelect: (windowMs: number) => void; +}) { + return ( +
+ {HISTORY_WINDOWS.map((option) => ( + + ))} +
+ ); +} + +function ResourceHistoryChart({ + buckets, +}: { + buckets: ReadonlyArray; +}) { + const maxCpu = Math.max(1, ...buckets.map((bucket) => bucket.maxCpuPercent)); + const maxIo = Math.max(1, ...buckets.map((bucket) => bucket.ioReadBytes + bucket.ioWriteBytes)); + + return ( +
+
+ + CPU average + + + I/O reads + + + I/O writes + +
+
+ {buckets.map((bucket) => { + const cpuHeight = Math.max(2, (bucket.avgCpuPercent / maxCpu) * 100); + const readHeight = Math.max(1, (bucket.ioReadBytes / maxIo) * 100); + const writeHeight = Math.max(1, (bucket.ioWriteBytes / maxIo) * 100); + return ( + + + + + +
+ } + /> + +
CPU avg {bucket.avgCpuPercent.toFixed(1)}%
+
CPU peak {bucket.maxCpuPercent.toFixed(1)}%
+
Read {formatBytes(bucket.ioReadBytes)}
+
Write {formatBytes(bucket.ioWriteBytes)}
+
+ + ); + })} +
+
+ ); +} + +function ProcessTreeName({ + process, + collapsed, + onToggle, +}: { + process: ResourceTelemetryProcess; + collapsed: boolean; + onToggle: (process: ResourceTelemetryProcess) => void; +}) { + const name = formatProcessName(process); + const hasChildren = process.childPids.length > 0; + const ChevronIcon = collapsed ? ChevronRightIcon : ChevronDownIcon; + return ( +
+ {hasChildren ? ( + + ) : ( + + )} + + + {name}} + /> + + {process.command || process.name} + + +
+ ); +} + +function canSignalProcess(process: ResourceTelemetryProcess): boolean { + return ( + process.category === "server-child" || + process.category === "provider-root" || + process.category === "terminal-root" + ); +} + +function ProcessActions({ + process, + signalingKey, + onSignal, +}: { + process: ResourceTelemetryProcess; + signalingKey: string | null; + onSignal: (process: ResourceTelemetryProcess, signal: ServerProcessSignal) => void; +}) { + if (!canSignalProcess(process)) { + return ; + } + const isSignaling = signalingKey === processIdentityKey(process); + return ( +
+ + +
+ ); +} + +function ProcessTable({ + processes, + signalingKey, + onSignal, +}: { + processes: ReadonlyArray; + signalingKey: string | null; + onSignal: (process: ResourceTelemetryProcess, signal: ServerProcessSignal) => void; +}) { + const [collapsed, setCollapsed] = useState>(() => new Set()); + const visible = useMemo(() => { + const result: ResourceTelemetryProcess[] = []; + let hiddenDepth: number | null = null; + for (const process of processes) { + if (hiddenDepth !== null) { + if (process.depth > hiddenDepth) continue; + hiddenDepth = null; + } + result.push(process); + if (collapsed.has(processIdentityKey(process))) { + hiddenDepth = process.depth; + } + } + return result; + }, [collapsed, processes]); + const toggle = useCallback((process: ResourceTelemetryProcess) => { + const identityKey = processIdentityKey(process); + setCollapsed((current) => { + const next = new Set(current); + if (next.has(identityKey)) { + next.delete(identityKey); + } else { + next.add(identityKey); + } + return next; + }); + }, []); + + return ( + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {visible.length === 0 ? ( + + + + ) : null} + {visible.map((process) => ( + + + + + + + + + + + + + + ))} + +
ProcessCategoryCPUCPU TimeMemoryRead/sWrite/sRead TotalWrite TotalPIDKill
+ Waiting for the native process monitor. +
+ + + {categoryLabel(process.category)} + + {process.cpuPercent.toFixed(1)}% + + {formatCpuTime(process.cpuTimeMs)} + + {formatBytes(process.residentBytes)} + + {formatRate(process.ioReadBytesPerSecond)} + + {formatRate(process.ioWriteBytesPerSecond)} + + {formatBytes(process.ioReadBytes)} + + + {formatBytes(process.ioWriteBytes)}} /> + {ioSemanticsLabel(process.ioSemantics)} + + + {process.identity.pid} + + +
+
+ ); +} + +function HistoryProcessTable({ + processes, +}: { + processes: ReadonlyArray; +}) { + return ( + + + + + + + + + + + + + + + + + + + + + + + + + + + + {processes.length === 0 ? ( + + + + ) : null} + {processes.map((process) => ( + + + + + + + + + + + + ))} + +
ProcessCategoryCPU TimePeak CPUPeak MemReadWriteSamplesPID
+ No retained process samples in this window. +
+ + + {process.name || process.command} + + } + /> + + {process.command || process.name} + + + + {categoryLabel(process.category)} + + {formatCpuTime(process.cpuTimeMs)} + + {process.maxCpuPercent.toFixed(1)}% + + {formatBytes(process.peakRssBytes)} + + {formatBytes(process.ioReadBytes)} + + {formatBytes(process.ioWriteBytes)} + + {process.sampleCount} + + {process.identity.pid} +
+
+ ); +} + +function AttributionTable({ entries }: { entries: ReadonlyArray }) { + return ( +
+ + + + + + + + + + + + + + + + + + + + + {entries.length === 0 ? ( + + + + ) : null} + {entries.map((entry) => ( + + + + + + + + + ))} + +
ComponentOperationLogical ReadLogical WriteCountTime
+ No instrumented application I/O has been recorded yet. +
+ {entry.component} + {entry.operation} + {formatBytes(entry.logicalReadBytes)} + + {formatBytes(entry.logicalWriteBytes)} + {entry.count} + {(entry.durationMs / 1_000).toFixed(2)}s +
+
+ ); +} + +export function ResourceTelemetryDiagnostics() { + const [windowMs, setWindowMs] = useState(15 * 60_000); + const selectedWindow = + HISTORY_WINDOWS.find((option) => option.windowMs === windowMs) ?? HISTORY_WINDOWS[1]; + const telemetry = useResourceTelemetry(); + const retryTelemetry = telemetry.retry; + const history = useResourceTelemetryHistory({ + windowMs: selectedWindow.windowMs, + bucketMs: selectedWindow.bucketMs, + }); + const primaryEnvironment = usePrimaryEnvironment(); + const signalServerProcess = useAtomCommand(serverEnvironment.signalProcess, { + reportFailure: false, + }); + const [signalingKey, setSignalingKey] = useState(null); + const [isRetrying, setIsRetrying] = useState(false); + const snapshot = telemetry.data; + const allT3 = snapshot?.groups.allT3; + + const signalProcess = useCallback( + (process: ResourceTelemetryProcess, signal: ServerProcessSignal) => { + if ( + signal === "SIGKILL" && + !window.confirm( + `Send SIGKILL to process ${process.identity.pid}? This cannot be handled by the process.`, + ) + ) { + return; + } + const identityKey = processIdentityKey(process); + const environmentId = primaryEnvironment?.environmentId; + if (environmentId === undefined) { + return; + } + setSignalingKey(identityKey); + void signalServerProcess({ + environmentId, + input: { + pid: process.identity.pid, + startTimeMs: process.identity.startTimeMs, + signal, + }, + }) + .then((result) => { + if (result._tag === "Failure") { + if (isAtomCommandInterrupted(result)) return; + throw squashAtomCommandFailure(result); + } + if (result.value.signaled) return; + toastManager.add({ + type: "error", + title: `Could not send ${signal}`, + description: Option.getOrElse( + result.value.message, + () => `Failed to send ${signal} to process ${process.identity.pid}.`, + ), + }); + }) + .catch((error: unknown) => { + toastManager.add({ + type: "error", + title: `Could not send ${signal}`, + description: error instanceof Error ? error.message : `Failed to send ${signal}.`, + }); + }) + .finally(() => { + setSignalingKey((current) => (current === identityKey ? null : current)); + }); + }, + [primaryEnvironment?.environmentId, signalServerProcess], + ); + + const retryCollector = useCallback(() => { + setIsRetrying(true); + void retryTelemetry() + .catch((error: unknown) => { + toastManager.add({ + type: "error", + title: "Could not restart resource monitor", + description: + error instanceof Error ? error.message : "The resource monitor retry failed.", + }); + }) + .finally(() => { + setIsRetrying(false); + }); + }, [retryTelemetry]); + + const speedLimit = snapshot ? Option.getOrNull(snapshot.speedLimitPercent) : null; + const collectorNeedsRetry = + snapshot?.health.native.status === "degraded" || + snapshot?.health.native.status === "unavailable" || + snapshot?.health.native.status === "stopped"; + + return ( + <> + + {snapshot ? ( + + ) : null} + + + + + + } + /> + Refresh telemetry snapshot + +
+ } + > +
+ } + label="Current CPU" + value={allT3 ? `${allT3.currentCpuPercent.toFixed(1)}%` : "..."} + detail={allT3 ? `${formatCpuTime(allT3.cpuTimeMs)} observed CPU time` : undefined} + /> + } + label="Resident Memory" + value={allT3 ? formatBytes(allT3.currentRssBytes) : "..."} + detail={allT3 ? `${formatBytes(allT3.peakRssBytes)} process peaks` : undefined} + /> + } + label="I/O Reads" + value={allT3 ? formatRate(allT3.ioReadBytesPerSecond) : "..."} + detail={allT3 ? `${formatBytes(allT3.ioReadBytes)} observed` : undefined} + /> + } + label="I/O Writes" + value={allT3 ? formatRate(allT3.ioWriteBytesPerSecond) : "..."} + detail={allT3 ? `${formatBytes(allT3.ioWriteBytes)} observed` : undefined} + tone={ + allT3 && allT3.ioWriteBytesPerSecond >= 10 * 1_024 * 1_024 + ? "danger" + : allT3 && allT3.ioWriteBytesPerSecond >= 1_024 * 1_024 + ? "warning" + : "default" + } + /> + } + label="Processes" + value={allT3 ? String(allT3.processCount) : "..."} + detail={ + allT3 ? `${allT3.processStarts} starts · ${allT3.processExits} exits` : undefined + } + /> + } + label="CPU Speed Limit" + value={ + snapshot ? (speedLimit === null ? "Unknown" : `${speedLimit.toFixed(0)}%`) : "..." + } + detail={snapshot ? `${snapshot.power.thermalState} thermal state` : undefined} + tone={speedLimit !== null && speedLimit < 80 ? "warning" : "default"} + /> +
+ {telemetry.error ? ( +
+ + {telemetry.error} +
+ ) : null} + {snapshot ? ( +
+ + + +
+ ) : null} + + + + + Retry monitor + + ) : null + } + > +
+
+
+ Host state +
+ + + + + +
+
+
+ Collection health +
+ {snapshot ? ( + <> + + + + + 0 + ? "text-amber-600 dark:text-amber-300" + : undefined + } + /> + "Unavailable", + onSome: (version) => + `${version}${Option.match(snapshot.health.sidecarPid, { + onNone: () => "", + onSome: (pid) => ` · PID ${pid}`, + })}`, + })} + /> + + + ) : ( +
+ Waiting for collector health. +
+ )} +
+
+
+ + + + +
+ } + > + {history.error ? ( +
+ + {history.error} +
+ ) : null} + + + + + + Identity: PID + start time + + ) : null + } + > + + + + Logical bytes by operation + } + > +
+ Native counters identify which process is reading or writing. These application-level + counters identify known T3 operations so process spikes can be correlated with specific + persistence and logging paths. +
+ +
+ + ); +} diff --git a/apps/web/src/lib/backgroundActivityReporter.ts b/apps/web/src/lib/backgroundActivityReporter.ts index cddd4715683..95592238093 100644 --- a/apps/web/src/lib/backgroundActivityReporter.ts +++ b/apps/web/src/lib/backgroundActivityReporter.ts @@ -97,6 +97,9 @@ function createActivityReport(environmentId: EnvironmentId): ClientActivityRepor function scopeForSubscription( observation: EnvironmentRpcSubscriptionObservation, ): BackgroundScope | null { + if (observation.method === WS_METHODS.subscribeResourceTelemetry) { + return { type: "diagnostics" }; + } if (observation.method !== WS_METHODS.subscribeVcsStatus) { return null; } diff --git a/apps/web/src/lib/resourceTelemetryState.ts b/apps/web/src/lib/resourceTelemetryState.ts new file mode 100644 index 00000000000..47ca79898df --- /dev/null +++ b/apps/web/src/lib/resourceTelemetryState.ts @@ -0,0 +1,51 @@ +import type { ResourceTelemetryHistoryInput, ResourceTelemetrySnapshot } from "@t3tools/contracts"; +import * as Cause from "effect/Cause"; +import { useCallback } from "react"; + +import { usePrimaryEnvironment } from "../state/environments"; +import { useEnvironmentQuery } from "../state/query"; +import { serverEnvironment } from "../state/server"; +import { useAtomCommand } from "../state/use-atom-command"; + +export interface ResourceTelemetryState { + readonly data: ResourceTelemetrySnapshot | null; + readonly error: string | null; + readonly isPending: boolean; + readonly refresh: () => void; + readonly retry: () => Promise; +} + +export function useResourceTelemetry(): ResourceTelemetryState { + const primaryEnvironment = usePrimaryEnvironment(); + const environmentId = primaryEnvironment?.environmentId ?? null; + const query = useEnvironmentQuery( + environmentId === null + ? null + : serverEnvironment.resourceTelemetry({ environmentId, input: {} }), + ); + const retryCommand = useAtomCommand(serverEnvironment.retryResourceTelemetry, { + reportFailure: false, + }); + const retry = useCallback(async () => { + if (environmentId === null) { + throw new Error("No environment is selected."); + } + const result = await retryCommand({ environmentId, input: {} }); + if (result._tag === "Failure") { + throw Cause.squash(result.cause); + } + return result.value.snapshot; + }, [environmentId, retryCommand]); + + return { ...query, retry }; +} + +export function useResourceTelemetryHistory(input: ResourceTelemetryHistoryInput) { + const primaryEnvironment = usePrimaryEnvironment(); + const environmentId = primaryEnvironment?.environmentId ?? null; + return useEnvironmentQuery( + environmentId === null + ? null + : serverEnvironment.resourceTelemetryHistory({ environmentId, input }), + ); +} diff --git a/apps/web/src/localApi.ts b/apps/web/src/localApi.ts index 2fbf183f91b..41f50193b4f 100644 --- a/apps/web/src/localApi.ts +++ b/apps/web/src/localApi.ts @@ -75,6 +75,9 @@ function createBrowserLocalApi(): LocalApi { getTraceDiagnostics: () => Promise.reject(unavailableLocalBackendError()), getProcessDiagnostics: () => Promise.reject(unavailableLocalBackendError()), getProcessResourceHistory: () => Promise.reject(unavailableLocalBackendError()), + getResourceTelemetry: () => Promise.reject(unavailableLocalBackendError()), + getResourceTelemetryHistory: () => Promise.reject(unavailableLocalBackendError()), + retryResourceTelemetry: () => Promise.reject(unavailableLocalBackendError()), signalProcess: () => Promise.reject(unavailableLocalBackendError()), }, }; diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md index ce5a0afe92a..236af7b2c07 100644 --- a/docs/architecture/overview.md +++ b/docs/architecture/overview.md @@ -37,6 +37,10 @@ T3 Code runs as a **Node.js WebSocket server** that wraps `codex app-server` (JS - **Runtime signals**: The server emits lightweight typed receipts when important async milestones finish, such as checkpoint capture, diff finalization, or a turn becoming fully quiescent. Tests and orchestration code wait on these signals instead of polling internal state. +Related design: + +- [Resource telemetry architecture](./resource-telemetry.md) + ## Event Lifecycle ### Startup and client connect diff --git a/docs/architecture/resource-telemetry.md b/docs/architecture/resource-telemetry.md new file mode 100644 index 00000000000..8decbc3c516 --- /dev/null +++ b/docs/architecture/resource-telemetry.md @@ -0,0 +1,335 @@ +# Resource telemetry architecture + +Status: implemented + +## Purpose + +Resource telemetry replaces recurring `ps`, PowerShell, `ioreg`, and `pmset` +subprocess probes with two persistent, direct data sources: + +1. a standalone Rust resource-monitor executable that reads process counters + through operating-system APIs via `sysinfo`; +2. Electron main-process APIs for Electron process metrics and host power state. + +The server merges both sources, computes rates from cumulative counters, keeps +bounded in-memory history, exposes typed RPCs, and drives the diagnostics page. +Telemetry history is not persisted to disk. + +## Why a standalone executable + +The monitor is intentionally not a Node native addon. + +- No N-API, `ffi-rs`, or dynamic-library ABI is loaded into the server process. +- A monitor crash cannot corrupt the Node runtime. +- The server can supervise, restart, version-check, and measure the monitor as a + normal child process. +- The same protocol works for the desktop app and the published CLI. +- Packaging is a single platform executable instead of an addon toolchain plus + Node/Electron ABI matrix. + +The cost is one persistent child process and NDJSON serialization. That is a +better failure boundary than repeatedly spawning shell utilities or loading +native code into Node. + +## Runtime topology + +### Desktop + +```text +Electron main + ├─ powerMonitor + ├─ app.getAppMetrics() + └─ inherited fd 4, NDJSON + │ + ▼ +Node server ── stdin/stdout NDJSON ── Rust resource monitor + │ + ├─ ResourceTelemetry Effect service + ├─ bounded in-memory history + ├─ background power policy projection + └─ WebSocket RPC/subscription ── diagnostics UI +``` + +### Web, headless, and remote server + +Electron telemetry is unavailable. The native monitor still runs beside the +server and tracks the server process tree. Power fields degrade to `unknown` +instead of invoking platform shell commands. + +## Native monitor + +The executable lives in `native/resource-monitor`. + +It receives schema-compatible commands on stdin and emits one JSON object per +line on stdout: + +- `configure` +- `setExternalProcesses` +- `sampleNow` +- `shutdown` +- `hello` +- `snapshot` +- `error` + +The protocol version is defined by +`RESOURCE_MONITOR_PROTOCOL_VERSION` in +`packages/contracts/src/resourceTelemetry.ts`. + +### Collection + +The monitor keeps one `sysinfo::System` instance and refreshes it on a one-second +interval. It collects: + +- PID and parent PID; +- process start time and run time; +- process name and command line; +- current and cumulative CPU usage; +- resident and virtual memory; +- cumulative process I/O counters. + +On Linux, task/thread enumeration is disabled. Command lines are loaded only +when first needed. This avoids the expensive default behavior of walking every +`/proc//task/` directory on each refresh. + +### Process-tree selection + +Each sample scans the accessible process table, builds the PID/PPID graph, and +retains: + +- the server process; +- every descendant of the server, including provider-spawned grandchildren such + as shells, `node`, `tsgo`, language servers, and other tools; +- Electron processes supplied as explicit external roots; +- descendants of those Electron roots; +- the resource monitor itself, because it is a server child. + +Process identity is `(pid, startTimeMs)`, not PID alone. Electron and native +start times are matched with a two-second tolerance because native start times +can have coarser platform resolution. + +The process list is emitted in depth-first tree order so renderer collapse and +expansion preserves complete subtrees. + +### Sampling limits + +This is counter sampling, not syscall tracing. + +- A process that starts and exits entirely between samples may not be observed. +- Cumulative CPU and I/O counters still provide accurate deltas for processes + that survive across samples. +- Exact file paths, individual write syscalls, ETW events, eBPF events, and + Endpoint Security events are outside this implementation. + +Those deeper tracing systems can be added later as opt-in diagnostic modes +without changing the public `ResourceTelemetry` model. + +## I/O semantics + +The monitor preserves platform semantics instead of presenting all counters as +equivalent: + +- Unix-like platforms report storage I/O counters exposed by `sysinfo`. +- Windows reports all process I/O bytes, not only disk bytes. +- Operating-system caches can prevent logical application reads or writes from + appearing as physical storage bytes. + +The UI therefore labels these values as I/O reads and writes and exposes the +per-process `ioSemantics` value. + +Group totals are observed deltas since telemetry startup. Per-process total +columns are the operating system's cumulative counters for that process. + +## Electron telemetry + +Electron main owns `DesktopTelemetryPublisher`. + +It samples once per second from: + +- `app.getAppMetrics()`; +- `powerMonitor.isOnBatteryPower()`; +- `powerMonitor.getSystemIdleTime()`; +- `powerMonitor.getSystemIdleState()`; +- `powerMonitor.getCurrentThermalState()`. + +It also listens for: + +- lock and unlock; +- suspend and resume; +- AC and battery transitions; +- thermal-state changes; +- CPU speed-limit changes. + +Electron does not expose a cross-platform low-power-mode getter, so that field +remains `unknown`. + +The desktop backend is spawned with: + +- fd 3 for the existing bootstrap payload; +- fd 4 for desktop telemetry NDJSON. + +This is a private Electron-main-to-server pipe. It does not use the renderer +WebSocket and is recreated for every backend restart. + +## Server Effect services + +The implementation is under `apps/server/src/resourceTelemetry`. + +### `ResourceMonitorBinary` + +Resolves an executable from: + +1. `T3CODE_RESOURCE_MONITOR_PATH`; +2. desktop bootstrap configuration; +3. bundled CLI resources; +4. local Cargo build outputs. + +Unsupported platforms, missing binaries, and non-executable binaries use +schema-backed tagged errors with descriptive messages. + +### `NativeTelemetryClient` + +Owns the resource-monitor process and protocol. + +- validates the hello/version handshake; +- sends configuration and external process roots; +- exposes automatic snapshots and `sampleNow`; +- serializes commands; +- supervises process exit and protocol failure; +- restarts with bounded exponential backoff; +- opens a circuit after repeated failures; +- supports explicit retry; +- publishes health changes immediately. + +Snapshot sequence numbers are scoped to a monitor generation. Server ingestion +uses the monitor restart count as the generation key, so sequence reset after a +restart cannot freeze telemetry. + +### `DesktopTelemetryReceiver` + +Reads fd 4, decodes schema-validated messages, stores the latest Electron +snapshot, and publishes desktop health. Decode errors, protocol mismatch, +stream failure, and normal stream closure are represented explicitly. + +### `ResourceTelemetry` + +Merges native and Electron data and owns public telemetry semantics. + +- calculates CPU and I/O rates from cumulative native counters; +- preserves the last native rates during desktop-only updates; +- classifies backend, Electron, and monitor processes; +- computes process depth and child relationships; +- tracks starts, exits, CPU time, and observed I/O; +- projects power data; +- publishes live snapshots; +- validates `(pid, startTimeMs)` before process signaling; +- updates history health even when no further native sample arrives. + +Electron and monitor processes are visible but are not valid targets for the +existing process-signal RPC. + +### `ResourceTelemetryStore` + +Keeps aggregate and process samples in memory for at most one hour, subject to +hard sample-count bounds. Aggregate history retains up to 3,600 samples. +Detailed process history retains up to 20,000 process samples, so high process +fan-out can shorten detailed per-process coverage while aggregate coverage +remains available. + +### `ResourceAttribution` + +Tracks known logical application I/O separately from OS counters. Current +integration points record successful writes for: + +- provider native and canonical event logs; +- the local server trace sink. + +Entries contain component, operation, logical bytes, count, and elapsed time. +Future persistence paths should call `ResourceAttribution.record` rather than +adding diagnostics-specific counters. + +## Background policy integration + +`HostPowerMonitor` now projects power state from `ResourceTelemetry`; it does not +spawn macOS shell probes. + +The monitor updates its latest timestamp on every Electron sample but only +publishes semantic state changes. Increasing idle seconds alone does not cause a +background-policy broadcast every second. + +## Public API and UI + +The WebSocket RPC surface provides: + +- current snapshot; +- bounded history; +- explicit monitor retry; +- a live snapshot subscription. + +The diagnostics page displays: + +- aggregate CPU, memory, I/O, and process counts; +- backend, Electron, and monitor overhead groups; +- power and thermal state; +- collector health and restart information; +- CPU and I/O history; +- a collapsible live process tree; +- safe process signaling for backend descendants; +- instrumented logical application I/O. + +Legacy process diagnostics RPCs are projected from the same service so they no +longer start recurring process-table commands. + +## Packaging + +Desktop artifact builds compile the Rust target, stage it as +`resources/resource-monitor/t3-resource-monitor[.exe]`, and pass its path to the +backend bootstrap. + +CLI release jobs upload each active platform monitor artifact and copy it into: + +```text +apps/server/dist/resource-monitor/-/ +``` + +The published server package already includes `dist`, so those executables ship +with the CLI. Missing platform artifacts degrade native telemetry to +`unavailable`; the server continues running. + +## Resource and failure behavior + +Steady state uses: + +- one native process; +- one native sample per second; +- one Electron sample per second in desktop mode; +- no telemetry database; +- no recurring shell probes; +- bounded PubSub queues and bounded history. + +The diagnostics page exposes the monitor's own process resource usage and +collection duration so the observer's cost is measurable. + +Failures are isolated: + +- native failure does not stop the server; +- Electron telemetry loss does not stop native telemetry; +- schema/version errors are visible in health; +- repeated native failures stop automatic restart churn until explicit retry; +- server and desktop shutdown close their respective streams and child process + scopes. + +## Future integration points + +High-value follow-up work can use the existing service boundaries: + +- opt-in file-path attribution through platform-specific tracing; +- process lifecycle events to reduce the chance of missing very short-lived + children; +- additional `ResourceAttribution` instrumentation for databases, checkpoints, + caches, and file synchronization; +- exported diagnostic bundles; +- adaptive sample intervals based on diagnostics visibility and active work. + +These additions should preserve the current rules: direct platform APIs, +schema-validated boundaries, explicit metric semantics, bounded retention, and +no mandatory telemetry persistence. diff --git a/native/resource-monitor/Cargo.lock b/native/resource-monitor/Cargo.lock new file mode 100644 index 00000000000..cdc5f952288 --- /dev/null +++ b/native/resource-monitor/Cargo.lock @@ -0,0 +1,343 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "bitflags" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8" + +[[package]] +name = "dispatch2" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38" +dependencies = [ + "bitflags", + "objc2", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "memchr" +version = "2.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4" + +[[package]] +name = "ntapi" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" +dependencies = [ + "winapi", +] + +[[package]] +name = "objc2" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a12a8ed07aefc768292f076dc3ac8c48f3781c8f2d5851dd3d98950e8c5a89f" +dependencies = [ + "objc2-encode", +] + +[[package]] +name = "objc2-core-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" +dependencies = [ + "bitflags", + "dispatch2", + "objc2", +] + +[[package]] +name = "objc2-encode" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33" + +[[package]] +name = "objc2-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272" +dependencies = [ + "bitflags", + "objc2", +] + +[[package]] +name = "objc2-io-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15" +dependencies = [ + "libc", + "objc2-core-foundation", +] + +[[package]] +name = "objc2-open-directory" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb82bed227edf5201dfedf072bba4015a33d3d4a98519837295a90f0a23f676d" +dependencies = [ + "objc2", + "objc2-core-foundation", + "objc2-foundation", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.150" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "syn" +version = "2.0.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sysinfo" +version = "0.39.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21d0d938c10fcda3e897e28aaddf4ab462375d411f4378cd63b1c945f69aba96" +dependencies = [ + "libc", + "memchr", + "ntapi", + "objc2-core-foundation", + "objc2-io-kit", + "objc2-open-directory", + "windows", +] + +[[package]] +name = "t3-resource-monitor" +version = "0.1.0" +dependencies = [ + "serde", + "serde_json", + "sysinfo", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580" +dependencies = [ + "windows-collections", + "windows-core", + "windows-future", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610" +dependencies = [ + "windows-core", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-future" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb" +dependencies = [ + "windows-core", + "windows-link", + "windows-threading", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-numerics" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26" +dependencies = [ + "windows-core", + "windows-link", +] + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-threading" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37" +dependencies = [ + "windows-link", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/native/resource-monitor/Cargo.toml b/native/resource-monitor/Cargo.toml new file mode 100644 index 00000000000..30cf2ad7892 --- /dev/null +++ b/native/resource-monitor/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "t3-resource-monitor" +version = "0.1.0" +edition = "2024" +license = "MIT" +publish = false + +[dependencies] +serde = { version = "1.0.228", features = ["derive"] } +serde_json = "1.0.150" +sysinfo = "0.39.3" + +[profile.release] +codegen-units = 1 +lto = "thin" +panic = "abort" +strip = true diff --git a/native/resource-monitor/src/main.rs b/native/resource-monitor/src/main.rs new file mode 100644 index 00000000000..5f747847442 --- /dev/null +++ b/native/resource-monitor/src/main.rs @@ -0,0 +1,560 @@ +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, HashSet, VecDeque}; +use std::io::{self, BufRead, BufWriter, Write}; +use std::sync::mpsc::{self, Receiver, RecvTimeoutError}; +use std::thread; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; +use sysinfo::{Pid, ProcessRefreshKind, ProcessesToUpdate, System, UpdateKind}; + +const PROTOCOL_VERSION: u32 = 1; +const MIN_SAMPLE_INTERVAL_MS: u64 = 250; +const MAX_SAMPLE_INTERVAL_MS: u64 = 60_000; +const EXTERNAL_PROCESS_START_TOLERANCE_MS: u64 = 2_000; + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +struct ExternalProcess { + pid: u32, + #[serde(default)] + start_time_ms: Option, +} + +#[derive(Debug, Deserialize)] +#[serde( + tag = "type", + rename_all = "camelCase", + rename_all_fields = "camelCase" +)] +enum Command { + Configure { + version: u32, + root_pid: u32, + sample_interval_ms: u64, + #[serde(default)] + external_processes: Vec, + }, + SetExternalProcesses { + version: u32, + processes: Vec, + }, + SampleNow { + version: u32, + request_id: String, + }, + Shutdown { + version: u32, + }, +} + +impl Command { + fn version(&self) -> u32 { + match self { + Self::Configure { version, .. } + | Self::SetExternalProcesses { version, .. } + | Self::SampleNow { version, .. } + | Self::Shutdown { version } => *version, + } + } +} + +enum Input { + Command(Command), + Invalid(String), +} + +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct Capabilities { + cumulative_cpu_time: bool, + current_cpu_percent: bool, + resident_memory: bool, + virtual_memory: bool, + io_bytes: bool, + process_start_time: bool, + process_tree: bool, +} + +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct HelloEvent { + version: u32, + #[serde(rename = "type")] + event_type: &'static str, + sidecar_version: &'static str, + sidecar_pid: u32, + platform: &'static str, + arch: &'static str, + capabilities: Capabilities, +} + +#[derive(Debug, Clone, Copy, Serialize)] +#[serde(rename_all = "kebab-case")] +enum IoSemantics { + Storage, + AllIo, +} + +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct ProcessSample { + pid: u32, + ppid: u32, + start_time_ms: u64, + run_time_ms: u64, + name: String, + command: String, + status: String, + cpu_percent: f32, + cpu_time_ms: u64, + resident_bytes: u64, + virtual_bytes: u64, + io_read_bytes: u64, + io_write_bytes: u64, + io_semantics: IoSemantics, +} + +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct SnapshotEvent { + version: u32, + #[serde(rename = "type")] + event_type: &'static str, + sequence: u64, + sampled_at_unix_ms: u64, + collection_duration_micros: u64, + scanned_process_count: usize, + retained_process_count: usize, + inaccessible_process_count: usize, + #[serde(skip_serializing_if = "Option::is_none")] + request_id: Option, + processes: Vec, +} + +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct ErrorEvent { + version: u32, + #[serde(rename = "type")] + event_type: &'static str, + code: &'static str, + message: String, + recoverable: bool, +} + +#[derive(Debug, Clone)] +struct CollectorConfig { + root_pid: u32, + sample_interval: Duration, + external_processes: HashMap>, +} + +struct Collector { + system: System, + sequence: u64, +} + +impl Collector { + fn new() -> Self { + Self { + system: System::new(), + sequence: 0, + } + } + + fn sample(&mut self, config: &CollectorConfig, request_id: Option) -> SnapshotEvent { + let collection_started = Instant::now(); + self.system.refresh_processes_specifics( + ProcessesToUpdate::All, + true, + process_refresh_kind(), + ); + + let rows = self + .system + .processes() + .iter() + .map(|(pid, process)| { + let pid = pid.as_u32(); + let ppid = process.parent().map(Pid::as_u32).unwrap_or(0); + (pid, ppid, process.start_time().saturating_mul(1_000)) + }) + .collect::>(); + let mut roots = config + .external_processes + .iter() + .filter_map(|(pid, expected_start_time_ms)| { + let (_, _, actual_start_time_ms) = rows + .iter() + .find(|(candidate_pid, _, _)| candidate_pid == pid)?; + matches_external_identity(*actual_start_time_ms, *expected_start_time_ms) + .then_some(*pid) + }) + .collect::>(); + roots.insert(config.root_pid); + let tracked = select_tracked_pids(&rows, &roots); + let mut processes = tracked + .into_iter() + .filter_map(|pid| { + let process = self.system.process(Pid::from_u32(pid))?; + let disk_usage = process.disk_usage(); + let command = if process.cmd().is_empty() { + process.name().to_string_lossy().into_owned() + } else { + process + .cmd() + .iter() + .map(|part| part.to_string_lossy()) + .collect::>() + .join(" ") + }; + + Some(ProcessSample { + pid, + ppid: process.parent().map(Pid::as_u32).unwrap_or(0), + start_time_ms: process.start_time().saturating_mul(1_000), + run_time_ms: process.run_time().saturating_mul(1_000), + name: process.name().to_string_lossy().into_owned(), + command, + status: format!("{:?}", process.status()), + cpu_percent: process.cpu_usage(), + cpu_time_ms: process.accumulated_cpu_time(), + resident_bytes: process.memory(), + virtual_bytes: process.virtual_memory(), + io_read_bytes: disk_usage.total_read_bytes, + io_write_bytes: disk_usage.total_written_bytes, + io_semantics: io_semantics(), + }) + }) + .collect::>(); + processes.sort_by_key(|process| process.pid); + self.sequence = self.sequence.saturating_add(1); + + SnapshotEvent { + version: PROTOCOL_VERSION, + event_type: "snapshot", + sequence: self.sequence, + sampled_at_unix_ms: unix_time_ms(), + collection_duration_micros: collection_started.elapsed().as_micros() as u64, + scanned_process_count: self.system.processes().len(), + retained_process_count: processes.len(), + inaccessible_process_count: 0, + request_id, + processes, + } + } +} + +fn process_refresh_kind() -> ProcessRefreshKind { + ProcessRefreshKind::nothing() + .with_memory() + .with_cpu() + .with_disk_usage() + .with_cmd(UpdateKind::OnlyIfNotSet) + .without_tasks() +} + +fn matches_external_identity( + actual_start_time_ms: u64, + expected_start_time_ms: Option, +) -> bool { + expected_start_time_ms.is_none_or(|expected| { + actual_start_time_ms.abs_diff(expected) <= EXTERNAL_PROCESS_START_TOLERANCE_MS + }) +} + +fn select_tracked_pids(rows: &[(u32, u32, u64)], roots: &HashSet) -> HashSet { + let mut children_by_parent = HashMap::>::new(); + for (pid, ppid, _) in rows { + children_by_parent.entry(*ppid).or_default().push(*pid); + } + + let known_pids = rows.iter().map(|(pid, _, _)| *pid).collect::>(); + let mut tracked = HashSet::new(); + let mut queue = roots + .iter() + .copied() + .filter(|pid| known_pids.contains(pid)) + .collect::>(); + + while let Some(pid) = queue.pop_front() { + if !tracked.insert(pid) { + continue; + } + if let Some(children) = children_by_parent.get(&pid) { + queue.extend(children.iter().copied()); + } + } + + tracked +} + +fn io_semantics() -> IoSemantics { + if cfg!(target_os = "windows") { + IoSemantics::AllIo + } else { + IoSemantics::Storage + } +} + +fn unix_time_ms() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64 +} + +fn clamp_sample_interval(sample_interval_ms: u64) -> Duration { + Duration::from_millis(sample_interval_ms.clamp(MIN_SAMPLE_INTERVAL_MS, MAX_SAMPLE_INTERVAL_MS)) +} + +fn spawn_input_reader() -> Receiver { + let (sender, receiver) = mpsc::channel(); + thread::spawn(move || { + let stdin = io::stdin(); + for line in stdin.lock().lines() { + let line = match line { + Ok(line) => line, + Err(error) => { + let _ = sender.send(Input::Invalid(format!( + "failed reading command stream: {error}" + ))); + return; + } + }; + if line.trim().is_empty() { + continue; + } + match serde_json::from_str::(&line) { + Ok(command) => { + if sender.send(Input::Command(command)).is_err() { + return; + } + } + Err(error) => { + if sender + .send(Input::Invalid(format!("invalid command: {error}"))) + .is_err() + { + return; + } + } + } + } + }); + receiver +} + +fn write_event(writer: &mut impl Write, event: &T) -> io::Result<()> { + serde_json::to_writer(&mut *writer, event)?; + writer.write_all(b"\n")?; + writer.flush() +} + +fn write_error( + writer: &mut impl Write, + code: &'static str, + message: impl Into, + recoverable: bool, +) -> io::Result<()> { + write_event( + writer, + &ErrorEvent { + version: PROTOCOL_VERSION, + event_type: "error", + code, + message: message.into(), + recoverable, + }, + ) +} + +fn main() -> io::Result<()> { + let mut writer = BufWriter::new(io::stdout().lock()); + write_event( + &mut writer, + &HelloEvent { + version: PROTOCOL_VERSION, + event_type: "hello", + sidecar_version: env!("CARGO_PKG_VERSION"), + sidecar_pid: std::process::id(), + platform: std::env::consts::OS, + arch: std::env::consts::ARCH, + capabilities: Capabilities { + cumulative_cpu_time: true, + current_cpu_percent: true, + resident_memory: true, + virtual_memory: true, + io_bytes: true, + process_start_time: true, + process_tree: true, + }, + }, + )?; + + let receiver = spawn_input_reader(); + let mut collector = Collector::new(); + let mut config: Option = None; + let mut next_sample_at: Option = None; + + loop { + let timeout = next_sample_at + .map(|deadline| deadline.saturating_duration_since(Instant::now())) + .unwrap_or(Duration::from_secs(60)); + + match receiver.recv_timeout(timeout) { + Ok(Input::Invalid(message)) => { + write_error(&mut writer, "invalid-command", message, true)?; + } + Ok(Input::Command(command)) => { + if command.version() != PROTOCOL_VERSION { + write_error( + &mut writer, + "protocol-mismatch", + format!( + "unsupported protocol version {}; expected {PROTOCOL_VERSION}", + command.version() + ), + false, + )?; + continue; + } + + match command { + Command::Configure { + root_pid, + sample_interval_ms, + external_processes, + .. + } => { + let sample_interval = clamp_sample_interval(sample_interval_ms); + config = Some(CollectorConfig { + root_pid, + sample_interval, + external_processes: external_processes + .into_iter() + .map(|process| (process.pid, process.start_time_ms)) + .collect(), + }); + next_sample_at = Some(Instant::now()); + } + Command::SetExternalProcesses { processes, .. } => { + if let Some(current) = config.as_mut() { + current.external_processes = processes + .into_iter() + .map(|process| (process.pid, process.start_time_ms)) + .collect(); + } else { + write_error( + &mut writer, + "not-configured", + "configure must be sent before external processes", + true, + )?; + } + } + Command::SampleNow { request_id, .. } => { + if let Some(current) = config.as_ref() { + let event = collector.sample(current, Some(request_id)); + write_event(&mut writer, &event)?; + next_sample_at = Some(Instant::now() + current.sample_interval); + } else { + write_error( + &mut writer, + "not-configured", + "configure must be sent before sampling", + true, + )?; + } + } + Command::Shutdown { .. } => return Ok(()), + } + } + Err(RecvTimeoutError::Timeout) => { + if let Some(current) = config.as_ref() { + let event = collector.sample(current, None); + write_event(&mut writer, &event)?; + next_sample_at = Some(Instant::now() + current.sample_interval); + } + } + Err(RecvTimeoutError::Disconnected) => return Ok(()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn selects_roots_and_all_descendants() { + let rows = vec![ + (10, 1, 1_000), + (11, 10, 1_100), + (12, 11, 1_200), + (20, 1, 2_000), + (21, 20, 2_100), + (30, 99, 3_000), + ]; + let tracked = select_tracked_pids(&rows, &HashSet::from([10, 20])); + + assert_eq!(tracked, HashSet::from([10, 11, 12, 20, 21])); + } + + #[test] + fn ignores_missing_roots() { + let rows = vec![(10, 1, 1_000), (11, 10, 1_100)]; + let tracked = select_tracked_pids(&rows, &HashSet::from([99])); + + assert!(tracked.is_empty()); + } + + #[test] + fn validates_external_process_start_identity() { + assert!(matches_external_identity(10_000, None)); + assert!(matches_external_identity(10_000, Some(11_999))); + assert!(!matches_external_identity(10_000, Some(12_001))); + } + + #[test] + fn decodes_protocol_commands() { + let configure = serde_json::from_str::( + r#"{"version":1,"type":"configure","rootPid":42,"sampleIntervalMs":1000,"externalProcesses":[{"pid":7}]}"#, + ) + .expect("configure command"); + + match configure { + Command::Configure { + root_pid, + sample_interval_ms, + external_processes, + .. + } => { + assert_eq!(root_pid, 42); + assert_eq!(sample_interval_ms, 1_000); + assert_eq!(external_processes[0].pid, 7); + assert_eq!(external_processes[0].start_time_ms, None); + } + _ => panic!("unexpected command"), + } + } + + #[test] + fn clamps_sample_interval() { + assert_eq!(clamp_sample_interval(1), Duration::from_millis(250)); + assert_eq!( + clamp_sample_interval(100_000), + Duration::from_millis(60_000) + ); + } + + #[test] + fn refreshes_commands_without_enumerating_linux_tasks() { + let refresh_kind = process_refresh_kind(); + + assert_eq!(refresh_kind.cmd(), UpdateKind::OnlyIfNotSet); + assert!(!refresh_kind.tasks()); + assert!(refresh_kind.cpu()); + assert!(refresh_kind.memory()); + assert!(refresh_kind.disk_usage()); + } +} diff --git a/package.json b/package.json index f97275e60bb..90ecaec834b 100644 --- a/package.json +++ b/package.json @@ -16,11 +16,13 @@ "build": "vp run --filter './apps/*' --filter './packages/*' --filter './oxlint-plugin-t3code' --filter './scripts' build", "build:marketing": "vp run --filter @t3tools/marketing build", "build:desktop": "vp run --filter @t3tools/desktop --filter t3 build", + "build:resource-monitor": "cargo build --locked --release --manifest-path native/resource-monitor/Cargo.toml", "typecheck": "vp run -r --concurrency-limit 2 typecheck", "tc": "vp run -r --concurrency-limit 2 typecheck", "lint": "vp lint --report-unused-disable-directives", "lint:mobile": "node scripts/mobile-native-static-check.ts", "test": "vp run -r test", + "test:resource-monitor": "cargo test --locked --manifest-path native/resource-monitor/Cargo.toml", "test:desktop-smoke": "vp run --filter @t3tools/desktop smoke-test", "fmt": "vp fmt", "fmt:check": "vp fmt --check", diff --git a/packages/client-runtime/src/rpc/client.ts b/packages/client-runtime/src/rpc/client.ts index 0af9e5023a0..75596f80bef 100644 --- a/packages/client-runtime/src/rpc/client.ts +++ b/packages/client-runtime/src/rpc/client.ts @@ -49,6 +49,7 @@ export type EnvironmentSubscriptionRpcTag = | typeof WS_METHODS.subscribeTerminalMetadata | typeof WS_METHODS.subscribePreviewEvents | typeof WS_METHODS.subscribeDiscoveredLocalServers + | typeof WS_METHODS.subscribeResourceTelemetry | typeof WS_METHODS.previewAutomationConnect | typeof WS_METHODS.subscribeVcsStatus | typeof WS_METHODS.terminalAttach; @@ -189,69 +190,69 @@ export function subscribe( input, }); return SubscriptionRef.changes(supervisor.session).pipe( - Stream.switchMap( - Option.match({ - onNone: () => Stream.empty, - onSome: (session) => { - const method = session.client[tag] as ( - input: EnvironmentRpcInput, - ) => Stream.Stream< - EnvironmentRpcStreamValue, - EnvironmentRpcStreamFailure - >; - const subscribeToSession = (): Stream.Stream< - EnvironmentRpcStreamValue, - EnvironmentRpcStreamFailure - > => - Stream.suspend(() => - method(input).pipe( - Stream.catchCause((cause) => { - const hasOnlyExpectedFailures = - cause.reasons.length > 0 && - cause.reasons.every((reason) => reason._tag === "Fail"); - const isTransportFailure = - hasOnlyExpectedFailures && - cause.reasons.every( - (reason) => reason._tag === "Fail" && isRpcClientError(reason.error), - ); - if (isTransportFailure) { - return Stream.fromEffect( - Effect.logWarning( - "Durable RPC subscription lost its transport; waiting for the next session.", - { - cause: Cause.pretty(cause), - method: tag, - environmentId: supervisor.target.environmentId, - }, - ), - ).pipe(Stream.drain); + Stream.switchMap( + Option.match({ + onNone: () => Stream.empty, + onSome: (session) => { + const method = session.client[tag] as ( + input: EnvironmentRpcInput, + ) => Stream.Stream< + EnvironmentRpcStreamValue, + EnvironmentRpcStreamFailure + >; + const subscribeToSession = (): Stream.Stream< + EnvironmentRpcStreamValue, + EnvironmentRpcStreamFailure + > => + Stream.suspend(() => + method(input).pipe( + Stream.catchCause((cause) => { + const hasOnlyExpectedFailures = + cause.reasons.length > 0 && + cause.reasons.every((reason) => reason._tag === "Fail"); + const isTransportFailure = + hasOnlyExpectedFailures && + cause.reasons.every( + (reason) => reason._tag === "Fail" && isRpcClientError(reason.error), + ); + if (isTransportFailure) { + return Stream.fromEffect( + Effect.logWarning( + "Durable RPC subscription lost its transport; waiting for the next session.", + { + cause: Cause.pretty(cause), + method: tag, + environmentId: supervisor.target.environmentId, + }, + ), + ).pipe(Stream.drain); + } + if (hasOnlyExpectedFailures && options?.onExpectedFailure !== undefined) { + const handled = Stream.fromEffect(options.onExpectedFailure(cause)).pipe( + Stream.drain, + ); + if (options.retryExpectedFailureAfter === undefined) { + return handled; } - if (hasOnlyExpectedFailures && options?.onExpectedFailure !== undefined) { - const handled = Stream.fromEffect(options.onExpectedFailure(cause)).pipe( - Stream.drain, - ); - if (options.retryExpectedFailureAfter === undefined) { - return handled; - } - return handled.pipe( - Stream.concat( - Stream.fromEffect( - Effect.sleep(options.retryExpectedFailureAfter), - ).pipe(Stream.drain), + return handled.pipe( + Stream.concat( + Stream.fromEffect(Effect.sleep(options.retryExpectedFailureAfter)).pipe( + Stream.drain, ), - Stream.concat(subscribeToSession()), - ); - } - return Stream.failCause(cause); - }), - ), - ); - return subscribeToSession(); - }, - }), - ), - Stream.ensuring(completeObservation), - ); + ), + Stream.concat(subscribeToSession()), + ); + } + return Stream.failCause(cause); + }), + ), + ); + return subscribeToSession(); + }, + }), + ), + Stream.ensuring(completeObservation), + ); }), ).pipe( Stream.withSpan("EnvironmentRpc.subscribe", { diff --git a/packages/client-runtime/src/state/server.ts b/packages/client-runtime/src/state/server.ts index 23bb7bff2a9..1b33ba95ced 100644 --- a/packages/client-runtime/src/state/server.ts +++ b/packages/client-runtime/src/state/server.ts @@ -133,6 +133,16 @@ export function createServerEnvironmentAtoms( label: "environment-data:server:process-resource-history", tag: WS_METHODS.serverGetProcessResourceHistory, }), + resourceTelemetry: createEnvironmentRpcSubscriptionAtomFamily(runtime, { + label: "environment-data:server:resource-telemetry", + tag: WS_METHODS.subscribeResourceTelemetry, + idleTtlMs: 0, + }), + resourceTelemetryHistory: createEnvironmentRpcQueryAtomFamily(runtime, { + label: "environment-data:server:resource-telemetry-history", + tag: WS_METHODS.serverGetResourceTelemetryHistory, + staleTimeMs: 5_000, + }), configProjection, welcome: createEnvironmentRpcSubscriptionAtomFamily(runtime, { label: "environment-data:server:welcome", @@ -178,5 +188,13 @@ export function createServerEnvironmentAtoms( label: "environment-data:server:signal-process", tag: WS_METHODS.serverSignalProcess, }), + retryResourceTelemetry: createEnvironmentRpcCommand(runtime, { + label: "environment-data:server:retry-resource-telemetry", + tag: WS_METHODS.serverRetryResourceTelemetry, + concurrency: { + mode: "singleFlight", + key: ({ environmentId }) => environmentId, + }, + }), }; } diff --git a/packages/contracts/src/desktopBootstrap.ts b/packages/contracts/src/desktopBootstrap.ts index c23dbbb3960..2f0a1c8e4aa 100644 --- a/packages/contracts/src/desktopBootstrap.ts +++ b/packages/contracts/src/desktopBootstrap.ts @@ -1,6 +1,6 @@ import * as Schema from "effect/Schema"; -import { PortSchema } from "./baseSchemas.ts"; +import { PortSchema, PositiveInt, TrimmedNonEmptyString } from "./baseSchemas.ts"; export const DesktopBackendBootstrap = Schema.Struct({ mode: Schema.Literal("desktop"), @@ -13,6 +13,8 @@ export const DesktopBackendBootstrap = Schema.Struct({ tailscaleServePort: PortSchema, otlpTracesUrl: Schema.optional(Schema.String), otlpMetricsUrl: Schema.optional(Schema.String), + desktopTelemetryFd: Schema.optionalKey(PositiveInt), + resourceMonitorPath: Schema.optionalKey(TrimmedNonEmptyString), }); export type DesktopBackendBootstrap = typeof DesktopBackendBootstrap.Type; diff --git a/packages/contracts/src/index.ts b/packages/contracts/src/index.ts index 03ecb4fec2c..a79341de2f1 100644 --- a/packages/contracts/src/index.ts +++ b/packages/contracts/src/index.ts @@ -26,4 +26,5 @@ export * from "./assets.ts"; export * from "./review.ts"; export * from "./preview.ts"; export * from "./previewAutomation.ts"; +export * from "./resourceTelemetry.ts"; export * from "./rpc.ts"; diff --git a/packages/contracts/src/ipc.ts b/packages/contracts/src/ipc.ts index 03c06d2f81a..b2a6140dbf9 100644 --- a/packages/contracts/src/ipc.ts +++ b/packages/contracts/src/ipc.ts @@ -32,6 +32,12 @@ import type { ProjectWriteFileResult, } from "./project.ts"; import type { ProviderInstanceId } from "./providerInstance.ts"; +import type { + ResourceTelemetryHistory, + ResourceTelemetryHistoryInput, + ResourceTelemetryRetryResult, + ResourceTelemetrySnapshot, +} from "./resourceTelemetry.ts"; import type { ServerConfig, ServerProcessDiagnosticsResult, @@ -1067,6 +1073,11 @@ export interface LocalApi { getProcessResourceHistory: ( input: ServerProcessResourceHistoryInput, ) => Promise; + getResourceTelemetry: () => Promise; + getResourceTelemetryHistory: ( + input: ResourceTelemetryHistoryInput, + ) => Promise; + retryResourceTelemetry: () => Promise; signalProcess: (input: ServerSignalProcessInput) => Promise; }; } diff --git a/packages/contracts/src/resourceTelemetry.ts b/packages/contracts/src/resourceTelemetry.ts new file mode 100644 index 00000000000..59fa0ce94ad --- /dev/null +++ b/packages/contracts/src/resourceTelemetry.ts @@ -0,0 +1,365 @@ +import * as Schema from "effect/Schema"; + +import { NonNegativeInt, PositiveInt, TrimmedNonEmptyString } from "./baseSchemas.ts"; +import { HostPowerSnapshot } from "./background.ts"; + +export const RESOURCE_MONITOR_PROTOCOL_VERSION = 1 as const; + +export const ResourceTelemetryIoSemantics = Schema.Literals([ + "storage", + "logical", + "all-io", + "unavailable", +]); +export type ResourceTelemetryIoSemantics = typeof ResourceTelemetryIoSemantics.Type; + +export const ResourceTelemetryProcessCategory = Schema.Literals([ + "server", + "server-child", + "provider-root", + "terminal-root", + "electron-main", + "electron-renderer", + "electron-gpu", + "electron-utility", + "resource-monitor", + "unknown-t3", +]); +export type ResourceTelemetryProcessCategory = typeof ResourceTelemetryProcessCategory.Type; + +export const ResourceTelemetrySourceStatus = Schema.Literals([ + "starting", + "healthy", + "degraded", + "unavailable", + "stopped", +]); +export type ResourceTelemetrySourceStatus = typeof ResourceTelemetrySourceStatus.Type; + +export const ResourceTelemetryProcessIdentity = Schema.Struct({ + pid: PositiveInt, + startTimeMs: NonNegativeInt, +}); +export type ResourceTelemetryProcessIdentity = typeof ResourceTelemetryProcessIdentity.Type; + +export const ResourceMonitorExternalProcess = Schema.Struct({ + pid: PositiveInt, + startTimeMs: Schema.optionalKey(NonNegativeInt), +}); +export type ResourceMonitorExternalProcess = typeof ResourceMonitorExternalProcess.Type; + +export const ResourceMonitorCapabilities = Schema.Struct({ + cumulativeCpuTime: Schema.Boolean, + currentCpuPercent: Schema.Boolean, + residentMemory: Schema.Boolean, + virtualMemory: Schema.Boolean, + ioBytes: Schema.Boolean, + processStartTime: Schema.Boolean, + processTree: Schema.Boolean, +}); +export type ResourceMonitorCapabilities = typeof ResourceMonitorCapabilities.Type; + +export const ResourceMonitorProcessSample = Schema.Struct({ + pid: PositiveInt, + ppid: NonNegativeInt, + startTimeMs: NonNegativeInt, + runTimeMs: NonNegativeInt, + name: Schema.String, + command: Schema.String, + status: Schema.String, + cpuPercent: Schema.Number, + cpuTimeMs: NonNegativeInt, + residentBytes: NonNegativeInt, + virtualBytes: NonNegativeInt, + ioReadBytes: NonNegativeInt, + ioWriteBytes: NonNegativeInt, + ioSemantics: Schema.Literals(["storage", "all-io"]), +}); +export type ResourceMonitorProcessSample = typeof ResourceMonitorProcessSample.Type; + +export const ResourceMonitorConfigureCommand = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("configure"), + rootPid: PositiveInt, + sampleIntervalMs: NonNegativeInt, + externalProcesses: Schema.Array(ResourceMonitorExternalProcess), +}); +export type ResourceMonitorConfigureCommand = typeof ResourceMonitorConfigureCommand.Type; + +export const ResourceMonitorSetExternalProcessesCommand = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("setExternalProcesses"), + processes: Schema.Array(ResourceMonitorExternalProcess), +}); +export type ResourceMonitorSetExternalProcessesCommand = + typeof ResourceMonitorSetExternalProcessesCommand.Type; + +export const ResourceMonitorSampleNowCommand = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("sampleNow"), + requestId: TrimmedNonEmptyString, +}); +export type ResourceMonitorSampleNowCommand = typeof ResourceMonitorSampleNowCommand.Type; + +export const ResourceMonitorShutdownCommand = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("shutdown"), +}); +export type ResourceMonitorShutdownCommand = typeof ResourceMonitorShutdownCommand.Type; + +export const ResourceMonitorCommand = Schema.Union([ + ResourceMonitorConfigureCommand, + ResourceMonitorSetExternalProcessesCommand, + ResourceMonitorSampleNowCommand, + ResourceMonitorShutdownCommand, +]); +export type ResourceMonitorCommand = typeof ResourceMonitorCommand.Type; + +export const ResourceMonitorHelloEvent = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("hello"), + sidecarVersion: TrimmedNonEmptyString, + sidecarPid: PositiveInt, + platform: TrimmedNonEmptyString, + arch: TrimmedNonEmptyString, + capabilities: ResourceMonitorCapabilities, +}); +export type ResourceMonitorHelloEvent = typeof ResourceMonitorHelloEvent.Type; + +export const ResourceMonitorSnapshotEvent = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("snapshot"), + sequence: NonNegativeInt, + sampledAtUnixMs: NonNegativeInt, + collectionDurationMicros: NonNegativeInt, + scannedProcessCount: NonNegativeInt, + retainedProcessCount: NonNegativeInt, + inaccessibleProcessCount: NonNegativeInt, + requestId: Schema.optionalKey(TrimmedNonEmptyString), + processes: Schema.Array(ResourceMonitorProcessSample), +}); +export type ResourceMonitorSnapshotEvent = typeof ResourceMonitorSnapshotEvent.Type; + +export const ResourceMonitorErrorEvent = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("error"), + code: TrimmedNonEmptyString, + message: TrimmedNonEmptyString, + recoverable: Schema.Boolean, +}); +export type ResourceMonitorErrorEvent = typeof ResourceMonitorErrorEvent.Type; + +export const ResourceMonitorEvent = Schema.Union([ + ResourceMonitorHelloEvent, + ResourceMonitorSnapshotEvent, + ResourceMonitorErrorEvent, +]); +export type ResourceMonitorEvent = typeof ResourceMonitorEvent.Type; + +export const DesktopElectronProcessType = Schema.Literals([ + "Browser", + "Tab", + "Utility", + "Zygote", + "Sandbox helper", + "GPU", + "Pepper Plugin", + "Pepper Plugin Broker", + "Unknown", +]); +export type DesktopElectronProcessType = typeof DesktopElectronProcessType.Type; + +export const DesktopElectronProcessMetric = Schema.Struct({ + pid: PositiveInt, + creationTimeMs: NonNegativeInt, + type: DesktopElectronProcessType, + name: Schema.optionalKey(Schema.String), + serviceName: Schema.optionalKey(Schema.String), + cpuPercent: Schema.Number, + cumulativeCpuSeconds: Schema.optionalKey(Schema.Number), + idleWakeupsPerSecond: Schema.Number, + workingSetBytes: NonNegativeInt, + peakWorkingSetBytes: NonNegativeInt, +}); +export type DesktopElectronProcessMetric = typeof DesktopElectronProcessMetric.Type; + +export const DesktopHostTelemetrySnapshot = Schema.Struct({ + version: Schema.Literal(1), + type: Schema.Literal("desktopTelemetry"), + sequence: NonNegativeInt, + sampledAtUnixMs: NonNegativeInt, + power: HostPowerSnapshot, + speedLimitPercent: Schema.Option(Schema.Number), + electronProcesses: Schema.Array(DesktopElectronProcessMetric), +}); +export type DesktopHostTelemetrySnapshot = typeof DesktopHostTelemetrySnapshot.Type; + +export const DesktopHostTelemetryHello = Schema.Struct({ + version: Schema.Literal(1), + type: Schema.Literal("desktopTelemetryHello"), + electronPid: PositiveInt, +}); +export type DesktopHostTelemetryHello = typeof DesktopHostTelemetryHello.Type; + +export const DesktopHostTelemetryMessage = Schema.Union([ + DesktopHostTelemetryHello, + DesktopHostTelemetrySnapshot, +]); +export type DesktopHostTelemetryMessage = typeof DesktopHostTelemetryMessage.Type; + +export const ResourceTelemetryProcess = Schema.Struct({ + identity: ResourceTelemetryProcessIdentity, + ppid: NonNegativeInt, + childPids: Schema.Array(PositiveInt), + depth: NonNegativeInt, + name: Schema.String, + command: Schema.String, + status: Schema.String, + category: ResourceTelemetryProcessCategory, + electronType: Schema.optionalKey(DesktopElectronProcessType), + electronServiceName: Schema.optionalKey(Schema.String), + cpuPercent: Schema.Number, + cpuTimeMs: NonNegativeInt, + residentBytes: NonNegativeInt, + peakResidentBytes: NonNegativeInt, + virtualBytes: NonNegativeInt, + ioReadBytes: NonNegativeInt, + ioWriteBytes: NonNegativeInt, + ioReadBytesPerSecond: Schema.Number, + ioWriteBytesPerSecond: Schema.Number, + ioSemantics: ResourceTelemetryIoSemantics, + idleWakeupsPerSecond: Schema.optionalKey(Schema.Number), + runTimeMs: NonNegativeInt, + firstSeenAt: Schema.DateTimeUtc, + lastSeenAt: Schema.DateTimeUtc, +}); +export type ResourceTelemetryProcess = typeof ResourceTelemetryProcess.Type; + +export const ResourceTelemetryAggregate = Schema.Struct({ + processCount: NonNegativeInt, + currentCpuPercent: Schema.Number, + cpuTimeMs: NonNegativeInt, + currentRssBytes: NonNegativeInt, + peakRssBytes: NonNegativeInt, + ioReadBytes: NonNegativeInt, + ioWriteBytes: NonNegativeInt, + ioReadBytesPerSecond: Schema.Number, + ioWriteBytesPerSecond: Schema.Number, + processStarts: NonNegativeInt, + processExits: NonNegativeInt, +}); +export type ResourceTelemetryAggregate = typeof ResourceTelemetryAggregate.Type; + +export const ResourceTelemetryGroups = Schema.Struct({ + backend: ResourceTelemetryAggregate, + electron: ResourceTelemetryAggregate, + monitor: ResourceTelemetryAggregate, + allT3: ResourceTelemetryAggregate, +}); +export type ResourceTelemetryGroups = typeof ResourceTelemetryGroups.Type; + +export const ResourceTelemetrySourceHealth = Schema.Struct({ + status: ResourceTelemetrySourceStatus, + lastSampleAt: Schema.Option(Schema.DateTimeUtc), + lastError: Schema.Option(TrimmedNonEmptyString), +}); +export type ResourceTelemetrySourceHealth = typeof ResourceTelemetrySourceHealth.Type; + +export const ResourceTelemetryHealth = Schema.Struct({ + native: ResourceTelemetrySourceHealth, + desktop: ResourceTelemetrySourceHealth, + sidecarVersion: Schema.Option(TrimmedNonEmptyString), + sidecarPid: Schema.Option(PositiveInt), + restartCount: NonNegativeInt, + collectionDurationMicros: NonNegativeInt, + scannedProcessCount: NonNegativeInt, + retainedProcessCount: NonNegativeInt, + inaccessibleProcessCount: NonNegativeInt, +}); +export type ResourceTelemetryHealth = typeof ResourceTelemetryHealth.Type; + +export const ResourceAttributionEntry = Schema.Struct({ + component: TrimmedNonEmptyString, + operation: TrimmedNonEmptyString, + logicalReadBytes: NonNegativeInt, + logicalWriteBytes: NonNegativeInt, + count: NonNegativeInt, + durationMs: NonNegativeInt, +}); +export type ResourceAttributionEntry = typeof ResourceAttributionEntry.Type; + +export const ResourceAttributionSnapshot = Schema.Struct({ + readAt: Schema.DateTimeUtc, + entries: Schema.Array(ResourceAttributionEntry), +}); +export type ResourceAttributionSnapshot = typeof ResourceAttributionSnapshot.Type; + +export const ResourceTelemetrySnapshot = Schema.Struct({ + readAt: Schema.DateTimeUtc, + sampleIntervalMs: NonNegativeInt, + processes: Schema.Array(ResourceTelemetryProcess), + groups: ResourceTelemetryGroups, + power: HostPowerSnapshot, + speedLimitPercent: Schema.Option(Schema.Number), + attribution: ResourceAttributionSnapshot, + health: ResourceTelemetryHealth, +}); +export type ResourceTelemetrySnapshot = typeof ResourceTelemetrySnapshot.Type; + +export const ResourceTelemetryHistoryInput = Schema.Struct({ + windowMs: NonNegativeInt, + bucketMs: NonNegativeInt, +}); +export type ResourceTelemetryHistoryInput = typeof ResourceTelemetryHistoryInput.Type; + +export const ResourceTelemetryHistoryBucket = Schema.Struct({ + startedAt: Schema.DateTimeUtc, + endedAt: Schema.DateTimeUtc, + avgCpuPercent: Schema.Number, + maxCpuPercent: Schema.Number, + maxRssBytes: NonNegativeInt, + ioReadBytes: NonNegativeInt, + ioWriteBytes: NonNegativeInt, + maxProcessCount: NonNegativeInt, +}); +export type ResourceTelemetryHistoryBucket = typeof ResourceTelemetryHistoryBucket.Type; + +export const ResourceTelemetryProcessSummary = Schema.Struct({ + identity: ResourceTelemetryProcessIdentity, + ppid: NonNegativeInt, + depth: NonNegativeInt, + name: Schema.String, + command: Schema.String, + category: ResourceTelemetryProcessCategory, + firstSeenAt: Schema.DateTimeUtc, + lastSeenAt: Schema.DateTimeUtc, + currentCpuPercent: Schema.Number, + avgCpuPercent: Schema.Number, + maxCpuPercent: Schema.Number, + cpuTimeMs: NonNegativeInt, + currentRssBytes: NonNegativeInt, + peakRssBytes: NonNegativeInt, + ioReadBytes: NonNegativeInt, + ioWriteBytes: NonNegativeInt, + ioSemantics: ResourceTelemetryIoSemantics, + sampleCount: NonNegativeInt, +}); +export type ResourceTelemetryProcessSummary = typeof ResourceTelemetryProcessSummary.Type; + +export const ResourceTelemetryHistory = Schema.Struct({ + readAt: Schema.DateTimeUtc, + windowMs: NonNegativeInt, + bucketMs: NonNegativeInt, + sampleIntervalMs: NonNegativeInt, + retainedSampleCount: NonNegativeInt, + buckets: Schema.Array(ResourceTelemetryHistoryBucket), + topProcesses: Schema.Array(ResourceTelemetryProcessSummary), + health: ResourceTelemetryHealth, +}); +export type ResourceTelemetryHistory = typeof ResourceTelemetryHistory.Type; + +export const ResourceTelemetryRetryResult = Schema.Struct({ + accepted: Schema.Boolean, + snapshot: ResourceTelemetrySnapshot, +}); +export type ResourceTelemetryRetryResult = typeof ResourceTelemetryRetryResult.Type; diff --git a/packages/contracts/src/rpc.ts b/packages/contracts/src/rpc.ts index b5865aad3e0..11b8a6519d2 100644 --- a/packages/contracts/src/rpc.ts +++ b/packages/contracts/src/rpc.ts @@ -134,6 +134,12 @@ import { ServerUpsertKeybindingInput, ServerUpsertKeybindingResult, } from "./server.ts"; +import { + ResourceTelemetryHistory, + ResourceTelemetryHistoryInput, + ResourceTelemetryRetryResult, + ResourceTelemetrySnapshot, +} from "./resourceTelemetry.ts"; import { ServerSettings, ServerSettingsError, ServerSettingsPatch } from "./settings.ts"; import { SourceControlCloneRepositoryInput, @@ -215,6 +221,9 @@ export const WS_METHODS = { serverGetTraceDiagnostics: "server.getTraceDiagnostics", serverGetProcessDiagnostics: "server.getProcessDiagnostics", serverGetProcessResourceHistory: "server.getProcessResourceHistory", + serverGetResourceTelemetry: "server.getResourceTelemetry", + serverGetResourceTelemetryHistory: "server.getResourceTelemetryHistory", + serverRetryResourceTelemetry: "server.retryResourceTelemetry", serverSignalProcess: "server.signalProcess", serverReportClientActivity: "server.reportClientActivity", serverReportHostPowerState: "server.reportHostPowerState", @@ -239,6 +248,7 @@ export const WS_METHODS = { subscribeServerLifecycle: "subscribeServerLifecycle", subscribeAuthAccess: "subscribeAuthAccess", subscribeBackgroundPolicy: "subscribeBackgroundPolicy", + subscribeResourceTelemetry: "subscribeResourceTelemetry", } as const; export const WsServerUpsertKeybindingRpc = Rpc.make(WS_METHODS.serverUpsertKeybinding, { @@ -318,6 +328,27 @@ export const WsServerGetProcessResourceHistoryRpc = Rpc.make( }, ); +export const WsServerGetResourceTelemetryRpc = Rpc.make(WS_METHODS.serverGetResourceTelemetry, { + payload: Schema.Struct({}), + success: ResourceTelemetrySnapshot, + error: EnvironmentAuthorizationError, +}); + +export const WsServerGetResourceTelemetryHistoryRpc = Rpc.make( + WS_METHODS.serverGetResourceTelemetryHistory, + { + payload: ResourceTelemetryHistoryInput, + success: ResourceTelemetryHistory, + error: EnvironmentAuthorizationError, + }, +); + +export const WsServerRetryResourceTelemetryRpc = Rpc.make(WS_METHODS.serverRetryResourceTelemetry, { + payload: Schema.Struct({}), + success: ResourceTelemetryRetryResult, + error: EnvironmentAuthorizationError, +}); + export const WsServerSignalProcessRpc = Rpc.make(WS_METHODS.serverSignalProcess, { payload: ServerSignalProcessInput, success: ServerSignalProcessResult, @@ -710,6 +741,13 @@ export const WsSubscribeBackgroundPolicyRpc = Rpc.make(WS_METHODS.subscribeBackg stream: true, }); +export const WsSubscribeResourceTelemetryRpc = Rpc.make(WS_METHODS.subscribeResourceTelemetry, { + payload: Schema.Struct({}), + success: ResourceTelemetrySnapshot, + error: EnvironmentAuthorizationError, + stream: true, +}); + export const WsRpcGroup = RpcGroup.make( WsServerGetConfigRpc, WsServerRefreshProvidersRpc, @@ -722,6 +760,9 @@ export const WsRpcGroup = RpcGroup.make( WsServerGetTraceDiagnosticsRpc, WsServerGetProcessDiagnosticsRpc, WsServerGetProcessResourceHistoryRpc, + WsServerGetResourceTelemetryRpc, + WsServerGetResourceTelemetryHistoryRpc, + WsServerRetryResourceTelemetryRpc, WsServerSignalProcessRpc, WsServerReportClientActivityRpc, WsServerReportHostPowerStateRpc, @@ -776,6 +817,7 @@ export const WsRpcGroup = RpcGroup.make( WsSubscribeServerLifecycleRpc, WsSubscribeAuthAccessRpc, WsSubscribeBackgroundPolicyRpc, + WsSubscribeResourceTelemetryRpc, WsOrchestrationDispatchCommandRpc, WsOrchestrationGetTurnDiffRpc, WsOrchestrationGetFullThreadDiffRpc, diff --git a/packages/contracts/src/server.ts b/packages/contracts/src/server.ts index 1aa280ad63b..a2afa4e61aa 100644 --- a/packages/contracts/src/server.ts +++ b/packages/contracts/src/server.ts @@ -302,6 +302,7 @@ export type ServerProcessSignal = typeof ServerProcessSignal.Type; export const ServerProcessDiagnosticsEntry = Schema.Struct({ pid: PositiveInt, + startTimeMs: NonNegativeInt, ppid: NonNegativeInt, pgid: Schema.Option(Schema.Int), status: TrimmedNonEmptyString, @@ -383,6 +384,7 @@ export type ServerProcessResourceHistoryResult = typeof ServerProcessResourceHis export const ServerSignalProcessInput = Schema.Struct({ pid: PositiveInt, + startTimeMs: NonNegativeInt, signal: ServerProcessSignal, }); export type ServerSignalProcessInput = typeof ServerSignalProcessInput.Type; diff --git a/packages/shared/src/observability.test.ts b/packages/shared/src/observability.test.ts index 57537b63e19..c98217851ce 100644 --- a/packages/shared/src/observability.test.ts +++ b/packages/shared/src/observability.test.ts @@ -8,6 +8,7 @@ import * as Layer from "effect/Layer"; import * as Logger from "effect/Logger"; import * as Order from "effect/Order"; import * as Path from "effect/Path"; +import * as Ref from "effect/Ref"; import * as References from "effect/References"; import * as Schema from "effect/Schema"; import * as Tracer from "effect/Tracer"; @@ -18,6 +19,7 @@ import { makeLocalFileTracer, makeTraceSink, type TraceRecord, + type TraceSinkFlushStats, } from "./observability.ts"; describe("causeErrorTag", () => { @@ -167,6 +169,34 @@ describe("observability", () => { ), ); + it.effect("reports successful logical trace writes", () => + Effect.scoped( + Effect.gen(function* () { + const fileSystem = yield* FileSystem.FileSystem; + const path = yield* Path.Path; + const tempDir = yield* fileSystem.makeTempDirectoryScoped({ prefix: "t3-trace-sink-" }); + const tracePath = path.join(tempDir, "shared.trace.ndjson"); + const reported = yield* Ref.make>([]); + + const sink = yield* makeTraceSink({ + filePath: tracePath, + maxBytes: 1024, + maxFiles: 2, + batchWindowMs: 10_000, + onFlush: (stats) => Ref.update(reported, (current) => [...current, stats]), + }); + + sink.push(makeRecord("attributed")); + yield* sink.flush; + + const stats = yield* Ref.get(reported); + assert.equal(stats.length, 1); + assert.equal(stats[0]?.count, 1); + assert.isAbove(stats[0]?.logicalWriteBytes ?? 0, 0); + }), + ), + ); + it.effect("rotates the trace file when the configured max size is exceeded", () => Effect.scoped( Effect.gen(function* () { diff --git a/packages/shared/src/observability.ts b/packages/shared/src/observability.ts index 68d4985db95..9e0d6352a0c 100644 --- a/packages/shared/src/observability.ts +++ b/packages/shared/src/observability.ts @@ -9,6 +9,7 @@ import { OtlpResource, OtlpTracer } from "effect/unstable/observability"; import { RotatingFileSink } from "./logging.ts"; const FLUSH_BUFFER_THRESHOLD = 32; +const textEncoder = new TextEncoder(); export type TraceAttributes = Readonly>; @@ -94,6 +95,13 @@ export interface TraceSinkOptions { readonly maxBytes: number; readonly maxFiles: number; readonly batchWindowMs: number; + readonly onFlush?: (stats: TraceSinkFlushStats) => Effect.Effect; +} + +export interface TraceSinkFlushStats { + readonly logicalWriteBytes: number; + readonly count: number; + readonly durationMs: number; } export interface TraceSink { @@ -263,23 +271,50 @@ export const makeTraceSink = Effect.fn("makeTraceSink")(function* (options: Trac }); let buffer: Array = []; + let pendingFlushStats: TraceSinkFlushStats = { + logicalWriteBytes: 0, + count: 0, + durationMs: 0, + }; const flushUnsafe = () => { if (buffer.length === 0) { return; } - const chunk = buffer.join(""); + const records = buffer; + const chunk = records.join(""); buffer = []; + const startedAt = performance.now(); try { sink.write(chunk); + pendingFlushStats = { + logicalWriteBytes: + pendingFlushStats.logicalWriteBytes + textEncoder.encode(chunk).byteLength, + count: pendingFlushStats.count + records.length, + durationMs: pendingFlushStats.durationMs + Math.max(0, performance.now() - startedAt), + }; } catch { - buffer.unshift(chunk); + buffer.unshift(...records); } }; - const flush = Effect.sync(flushUnsafe).pipe(Effect.withTracerEnabled(false)); + const flush = Effect.sync(() => { + flushUnsafe(); + const stats = pendingFlushStats; + pendingFlushStats = { + logicalWriteBytes: 0, + count: 0, + durationMs: 0, + }; + return stats; + }).pipe( + Effect.flatMap((stats) => + stats.count > 0 && options.onFlush ? options.onFlush(stats).pipe(Effect.ignore) : Effect.void, + ), + Effect.withTracerEnabled(false), + ); yield* Effect.addFinalizer(() => flush.pipe(Effect.ignore)); yield* Effect.forkScoped( diff --git a/scripts/build-desktop-artifact.test.ts b/scripts/build-desktop-artifact.test.ts index 8135f7e259d..e26e2461108 100644 --- a/scripts/build-desktop-artifact.test.ts +++ b/scripts/build-desktop-artifact.test.ts @@ -9,12 +9,15 @@ import { createStageWorkspaceConfig, createStagePnpmConfig, DESKTOP_ASAR_UNPACK, + DESKTOP_EXTRA_RESOURCES, resolveDesktopRuntimeDependencies, resolveFffNativeDependencies, resolveBuildOptions, resolveDesktopBuildIconAssets, resolveDesktopProductName, resolveDesktopUpdateChannel, + resolveResourceMonitorRustTargets, + resourceMonitorExecutableName, resolveGitHubPublishConfig, resolveMockUpdateServerPort, resolveMockUpdateServerUrl, @@ -175,6 +178,27 @@ it.layer(NodeServices.layer)("build-desktop-artifact", (it) => { assert.deepStrictEqual(DESKTOP_ASAR_UNPACK, ["node_modules/@ff-labs/fff-bin-*/**/*"]); }); + it("stages the resource monitor as an external executable resource", () => { + assert.deepStrictEqual(DESKTOP_EXTRA_RESOURCES, [ + { + from: "apps/desktop/prod-resources/resource-monitor", + to: "resource-monitor", + }, + ]); + assert.deepStrictEqual(resolveResourceMonitorRustTargets("mac", "universal"), [ + "aarch64-apple-darwin", + "x86_64-apple-darwin", + ]); + assert.deepStrictEqual(resolveResourceMonitorRustTargets("linux", "x64"), [ + "x86_64-unknown-linux-gnu", + ]); + assert.deepStrictEqual(resolveResourceMonitorRustTargets("win", "arm64"), [ + "aarch64-pc-windows-msvc", + ]); + assert.equal(resourceMonitorExecutableName("mac"), "t3-resource-monitor"); + assert.equal(resourceMonitorExecutableName("win"), "t3-resource-monitor.exe"); + }); + it("promotes target fff binaries to direct staged dependencies", () => { assert.deepStrictEqual(resolveFffNativeDependencies("mac", "arm64", "0.9.4"), { "@ff-labs/fff-bin-darwin-arm64": "0.9.4", diff --git a/scripts/build-desktop-artifact.ts b/scripts/build-desktop-artifact.ts index 6b519b1d4e3..e4e040f7b6b 100644 --- a/scripts/build-desktop-artifact.ts +++ b/scripts/build-desktop-artifact.ts @@ -72,6 +72,26 @@ interface PlatformConfig { readonly archChoices: ReadonlyArray; } +export function resolveResourceMonitorRustTargets( + platform: typeof BuildPlatform.Type, + arch: typeof BuildArch.Type, +): ReadonlyArray { + if (platform === "mac") { + if (arch === "universal") { + return ["aarch64-apple-darwin", "x86_64-apple-darwin"]; + } + return [arch === "arm64" ? "aarch64-apple-darwin" : "x86_64-apple-darwin"]; + } + if (platform === "linux") { + return [arch === "arm64" ? "aarch64-unknown-linux-gnu" : "x86_64-unknown-linux-gnu"]; + } + return [arch === "arm64" ? "aarch64-pc-windows-msvc" : "x86_64-pc-windows-msvc"]; +} + +export function resourceMonitorExecutableName(platform: typeof BuildPlatform.Type): string { + return platform === "win" ? "t3-resource-monitor.exe" : "t3-resource-monitor"; +} + const PLATFORM_CONFIG: Record = { mac: { cliFlag: "--mac", @@ -292,6 +312,12 @@ interface StagePackageJson { export const STAGE_INSTALL_ARGS = ["install", "--prod"] as const; export const DESKTOP_ASAR_UNPACK = ["node_modules/@ff-labs/fff-bin-*/**/*"] as const; +export const DESKTOP_EXTRA_RESOURCES = [ + { + from: "apps/desktop/prod-resources/resource-monitor", + to: "resource-monitor", + }, +] as const; export function resolveFffNativeDependencies( platform: typeof BuildPlatform.Type, @@ -497,6 +523,78 @@ const runCommand = Effect.fn("runCommand")(function* ( } }); +const stageResourceMonitor = Effect.fn("stageResourceMonitor")(function* (input: { + readonly repoRoot: string; + readonly stageResourcesDir: string; + readonly platform: typeof BuildPlatform.Type; + readonly arch: typeof BuildArch.Type; + readonly verbose: boolean; +}) { + const fs = yield* FileSystem.FileSystem; + const path = yield* Path.Path; + const manifestPath = path.join(input.repoRoot, "native/resource-monitor/Cargo.toml"); + const executableName = resourceMonitorExecutableName(input.platform); + const rustTargets = resolveResourceMonitorRustTargets(input.platform, input.arch); + const builtBinaries: string[] = []; + + for (const rustTarget of rustTargets) { + const spawnCommand = yield* resolveSpawnCommand("cargo", [ + "build", + "--locked", + "--release", + "--manifest-path", + manifestPath, + "--target", + rustTarget, + ]); + yield* runCommand( + ChildProcess.make(spawnCommand.command, spawnCommand.args, { + cwd: input.repoRoot, + shell: spawnCommand.shell, + }), + { + label: `cargo build resource monitor (${rustTarget})`, + verbose: input.verbose, + }, + ); + + const binaryPath = path.join( + input.repoRoot, + "native/resource-monitor/target", + rustTarget, + "release", + executableName, + ); + if (!(yield* fs.exists(binaryPath))) { + return yield* new BuildScriptError({ + message: `Resource monitor build did not produce ${binaryPath}.`, + }); + } + builtBinaries.push(binaryPath); + } + + const destinationDirectory = path.join(input.stageResourcesDir, "resource-monitor"); + const destinationPath = path.join(destinationDirectory, executableName); + yield* fs.remove(destinationDirectory, { recursive: true, force: true }).pipe(Effect.ignore); + yield* fs.makeDirectory(destinationDirectory, { recursive: true }); + + if (builtBinaries.length === 1) { + yield* fs.copyFile(builtBinaries[0]!, destinationPath); + } else { + yield* runCommand( + ChildProcess.make("lipo", ["-create", ...builtBinaries, "-output", destinationPath]), + { + label: "lipo resource monitor universal binary", + verbose: input.verbose, + }, + ); + } + + if (input.platform !== "win") { + yield* fs.chmod(destinationPath, 0o755); + } +}); + function generateMacIconSet( sourcePng: string, targetIcns: string, @@ -755,6 +853,7 @@ const createBuildConfig = Effect.fn("createBuildConfig")(function* ( directories: { buildResources: "apps/desktop/resources", }, + extraResources: DESKTOP_EXTRA_RESOURCES, }; const updateChannel = resolveDesktopUpdateChannel(version); const publishConfig = yield* resolveGitHubPublishConfig(updateChannel); @@ -941,6 +1040,13 @@ const buildDesktopArtifact = Effect.fn("buildDesktopArtifact")(function* ( yield* fs.copy(distDirs.desktopDist, path.join(stageAppDir, "apps/desktop/dist-electron")); yield* fs.copy(distDirs.desktopResources, stageResourcesDir); yield* fs.copy(distDirs.serverDist, path.join(stageAppDir, "apps/server/dist")); + yield* stageResourceMonitor({ + repoRoot, + stageResourcesDir, + platform: options.platform, + arch: options.arch, + verbose: options.verbose, + }); yield* assertPlatformBuildResources( options.platform, From 4638de470a804b3740ae4187248f0932f41d181b Mon Sep 17 00:00:00 2001 From: Julius Marminge Date: Wed, 17 Jun 2026 15:50:39 -0700 Subject: [PATCH 3/5] Route desktop telemetry control and stale power handling - Add fd5 control channel for desktop telemetry demand - Stop gating background work on stale host power - Update telemetry sampling and receiver tests Co-authored-by: codex --- .../DesktopBackendConfiguration.test.ts | 1 + .../backend/DesktopBackendConfiguration.ts | 1 + .../src/backend/DesktopBackendManager.test.ts | 50 ++- .../src/backend/DesktopBackendManager.ts | 62 +++- .../DesktopTelemetryPublisher.test.ts | 104 +++++- .../telemetry/DesktopTelemetryPublisher.ts | 59 +++- .../src/background/BackgroundPolicy.test.ts | 28 +- .../server/src/background/BackgroundPolicy.ts | 8 +- .../src/background/HostPowerMonitor.test.ts | 48 +++ .../server/src/background/HostPowerMonitor.ts | 22 +- apps/server/src/cli/config.test.ts | 7 + apps/server/src/cli/config.ts | 2 + apps/server/src/config.ts | 2 + .../diagnostics/ProcessDiagnostics.test.ts | 4 +- .../DesktopTelemetryReceiver.ts | 147 ++++++++- .../src/resourceTelemetry/Model.test.ts | 44 ++- apps/server/src/resourceTelemetry/Model.ts | 47 ++- .../NativeTelemetryClient.test.ts | 47 +++ .../NativeTelemetryClient.ts | 298 +++++++++++++++++- .../ResourceTelemetry.test.ts | 57 +++- .../resourceTelemetry/ResourceTelemetry.ts | 155 ++++++--- .../ResourceTelemetryHistory.test.ts | 167 ++++++++++ .../ResourceTelemetryHistory.ts | 214 +++++++++++++ .../ResourceTelemetryStore.ts | 253 --------------- apps/server/src/server.ts | 5 +- docs/architecture/resource-telemetry.md | 96 ++++-- native/resource-monitor/src/main.rs | 234 +++++++++++++- packages/contracts/src/desktopBootstrap.ts | 1 + packages/contracts/src/resourceTelemetry.ts | 49 ++- 29 files changed, 1782 insertions(+), 430 deletions(-) create mode 100644 apps/server/src/resourceTelemetry/NativeTelemetryClient.test.ts create mode 100644 apps/server/src/resourceTelemetry/ResourceTelemetryHistory.test.ts create mode 100644 apps/server/src/resourceTelemetry/ResourceTelemetryHistory.ts delete mode 100644 apps/server/src/resourceTelemetry/ResourceTelemetryStore.ts diff --git a/apps/desktop/src/backend/DesktopBackendConfiguration.test.ts b/apps/desktop/src/backend/DesktopBackendConfiguration.test.ts index 7e3580e44ac..255f98513fb 100644 --- a/apps/desktop/src/backend/DesktopBackendConfiguration.test.ts +++ b/apps/desktop/src/backend/DesktopBackendConfiguration.test.ts @@ -213,6 +213,7 @@ describe("DesktopBackendConfiguration", () => { const config = yield* configuration.resolve; assert.equal(config.bootstrap.resourceMonitorPath, monitorPath); assert.equal(config.bootstrap.desktopTelemetryFd, 4); + assert.equal(config.bootstrap.desktopTelemetryControlFd, 5); }).pipe( Effect.provide( DesktopBackendConfiguration.layer.pipe( diff --git a/apps/desktop/src/backend/DesktopBackendConfiguration.ts b/apps/desktop/src/backend/DesktopBackendConfiguration.ts index f2f23194ce3..a6f59d3652a 100644 --- a/apps/desktop/src/backend/DesktopBackendConfiguration.ts +++ b/apps/desktop/src/backend/DesktopBackendConfiguration.ts @@ -153,6 +153,7 @@ const resolveBackendStartConfig = Effect.fn("desktop.backendConfiguration.resolv tailscaleServeEnabled: backendExposure.tailscaleServeEnabled, tailscaleServePort: backendExposure.tailscaleServePort, desktopTelemetryFd: 4, + desktopTelemetryControlFd: 5, ...Option.match(input.resourceMonitorPath, { onNone: () => ({}), onSome: (resourceMonitorPath) => ({ resourceMonitorPath }), diff --git a/apps/desktop/src/backend/DesktopBackendManager.test.ts b/apps/desktop/src/backend/DesktopBackendManager.test.ts index 5222439b0ca..86e642e80b3 100644 --- a/apps/desktop/src/backend/DesktopBackendManager.test.ts +++ b/apps/desktop/src/backend/DesktopBackendManager.test.ts @@ -1,6 +1,7 @@ import { DesktopBackendBootstrap, type DesktopBackendBootstrap as DesktopBackendBootstrapValue, + DesktopTelemetryControlMessage, } from "@t3tools/contracts"; import { assert, describe, it } from "@effect/vitest"; import * as Deferred from "effect/Deferred"; @@ -29,6 +30,9 @@ import * as DesktopWindow from "../window/DesktopWindow.ts"; const decodeDesktopBackendBootstrap = Schema.decodeEffect( Schema.fromJsonString(DesktopBackendBootstrap), ); +const encodeDesktopTelemetryControl = Schema.encodeSync( + Schema.fromJsonString(DesktopTelemetryControlMessage), +); const baseConfig: DesktopBackendManager.DesktopBackendStartConfig = { executablePath: "/electron", @@ -44,6 +48,8 @@ const baseConfig: DesktopBackendManager.DesktopBackendStartConfig = { desktopBootstrapToken: "token", tailscaleServeEnabled: false, tailscaleServePort: 443, + desktopTelemetryFd: 4, + desktopTelemetryControlFd: 5, }, httpBaseUrl: new URL("http://127.0.0.1:3773"), captureOutput: true, @@ -60,6 +66,7 @@ function makeProcess(options?: { readonly stderr?: Stream.Stream; readonly exitCode?: Effect.Effect; readonly kill?: ChildProcessSpawner.ChildProcessHandle["kill"]; + readonly getOutputFd?: ChildProcessSpawner.ChildProcessHandle["getOutputFd"]; }): ChildProcessSpawner.ChildProcessHandle { return ChildProcessSpawner.makeHandle({ pid: ChildProcessSpawner.ProcessId(123), @@ -71,7 +78,7 @@ function makeProcess(options?: { kill: options?.kill ?? (() => Effect.void), stdin: Sink.drain, getInputFd: () => Sink.drain, - getOutputFd: () => Stream.empty, + getOutputFd: options?.getOutputFd ?? (() => Stream.empty), unref: Effect.succeed(Effect.void), }); } @@ -108,6 +115,7 @@ function makeManagerLayer(input: { readonly backendOutputLog?: Partial; readonly desktopState?: DesktopState.DesktopStateShape; readonly desktopWindow?: Partial; + readonly desktopTelemetryPublisher?: Partial; readonly config?: DesktopBackendManager.DesktopBackendStartConfig; }) { return DesktopBackendManager.layer.pipe( @@ -125,6 +133,8 @@ function makeManagerLayer(input: { latest: Effect.succeed(Option.none()), changes: Stream.empty, encoded: Stream.empty, + handleControl: () => Effect.void, + ...input.desktopTelemetryPublisher, }), input.desktopState ? Layer.succeed(DesktopState.DesktopState, input.desktopState) @@ -214,6 +224,8 @@ describe("DesktopBackendManager", () => { assert.equal(spawnedCommand.options.stderr, "pipe"); assert.equal(spawnedCommand.options.killSignal, "SIGTERM"); assert.isDefined(spawnedCommand.options.forceKillAfter); + assert.equal(spawnedCommand.options.additionalFds?.fd4?.type, "input"); + assert.equal(spawnedCommand.options.additionalFds?.fd5?.type, "output"); assert.equal( Duration.toMillis(Duration.fromInputUnsafe(spawnedCommand.options.forceKillAfter)), 2_000, @@ -224,6 +236,42 @@ describe("DesktopBackendManager", () => { }), ); + it.effect("routes desktop telemetry control messages from fd5 to the publisher", () => + Effect.gen(function* () { + const handled = yield* Deferred.make(); + const controlMessage = encodeDesktopTelemetryControl({ + version: 1, + type: "setDiagnosticsDemand", + enabled: true, + }); + const spawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make(() => + Effect.succeed( + makeProcess({ + getOutputFd: (fd) => + fd === 5 ? Stream.encodeText(Stream.make(`${controlMessage}\n`)) : Stream.empty, + exitCode: Deferred.await(handled).pipe(Effect.as(ChildProcessSpawner.ExitCode(0))), + }), + ), + ), + ); + const managerLayer = makeManagerLayer({ + spawnerLayer, + desktopTelemetryPublisher: { + handleControl: (message) => + Deferred.succeed(handled, message.enabled).pipe(Effect.asVoid), + }, + }); + + yield* Effect.gen(function* () { + const manager = yield* DesktopBackendManager.DesktopBackendManager; + yield* manager.start; + assert.isTrue(yield* Deferred.await(handled)); + }).pipe(Effect.provide(managerLayer)); + }), + ); + it.effect("retries HTTP readiness before reporting the backend ready", () => Effect.gen(function* () { const requestUrls: Array = []; diff --git a/apps/desktop/src/backend/DesktopBackendManager.ts b/apps/desktop/src/backend/DesktopBackendManager.ts index cb7307e83c8..5e763f1da10 100644 --- a/apps/desktop/src/backend/DesktopBackendManager.ts +++ b/apps/desktop/src/backend/DesktopBackendManager.ts @@ -16,12 +16,15 @@ import * as Schema from "effect/Schema"; import * as Semaphore from "effect/Semaphore"; import * as Scope from "effect/Scope"; import * as Stream from "effect/Stream"; +import * as Ndjson from "effect/unstable/encoding/Ndjson"; import { HttpClient } from "effect/unstable/http"; import { ChildProcess, ChildProcessSpawner } from "effect/unstable/process"; import { DesktopBackendBootstrap, type DesktopBackendBootstrap as DesktopBackendBootstrapValue, + DesktopTelemetryControlMessage, + type DesktopTelemetryControlMessage as DesktopTelemetryControlMessageValue, } from "@t3tools/contracts"; import * as DesktopBackendConfiguration from "./DesktopBackendConfiguration.ts"; @@ -90,6 +93,9 @@ type BackendProcessError = BackendProcessBootstrapEncodeError | BackendProcessSp interface RunBackendProcessOptions extends DesktopBackendStartConfig { readonly desktopTelemetryStream: Stream.Stream; + readonly onDesktopTelemetryControl?: ( + message: DesktopTelemetryControlMessageValue, + ) => Effect.Effect; readonly readinessTimeout?: Duration.Duration; readonly onStarted?: (pid: number) => Effect.Effect; readonly onReady?: () => Effect.Effect; @@ -226,6 +232,7 @@ function drainBackendOutput( } const encodeBootstrapJson = Schema.encodeEffect(Schema.fromJsonString(DesktopBackendBootstrap)); +const decodeDesktopTelemetryControl = Schema.decodeUnknownEffect(DesktopTelemetryControlMessage); const runBackendProcess = Effect.fn("runBackendProcess")(function* ( options: RunBackendProcessOptions, @@ -235,6 +242,23 @@ const runBackendProcess = Effect.fn("runBackendProcess")(function* ( Effect.mapError((cause) => new BackendProcessBootstrapEncodeError({ cause })), ); const onOutput = options.onOutput ?? (() => Effect.void); + const additionalFds: Record<`fd${number}`, ChildProcess.AdditionalFdConfig> = { + fd3: { + type: "input", + stream: Stream.encodeText(Stream.make(`${bootstrapJson}\n`)), + }, + }; + if (options.bootstrap.desktopTelemetryFd !== undefined) { + additionalFds[`fd${options.bootstrap.desktopTelemetryFd}`] = { + type: "input", + stream: options.desktopTelemetryStream, + }; + } + if (options.bootstrap.desktopTelemetryControlFd !== undefined) { + additionalFds[`fd${options.bootstrap.desktopTelemetryControlFd}`] = { + type: "output", + }; + } const command = ChildProcess.make( options.executablePath, [options.entryPath, "--bootstrap-fd", "3"], @@ -249,16 +273,7 @@ const runBackendProcess = Effect.fn("runBackendProcess")(function* ( stderr: options.captureOutput ? "pipe" : "inherit", killSignal: "SIGTERM", forceKillAfter: DEFAULT_BACKEND_TERMINATE_GRACE, - additionalFds: { - fd3: { - type: "input", - stream: Stream.encodeText(Stream.make(`${bootstrapJson}\n`)), - }, - fd4: { - type: "input", - stream: options.desktopTelemetryStream, - }, - }, + additionalFds, }, ); @@ -267,6 +282,32 @@ const runBackendProcess = Effect.fn("runBackendProcess")(function* ( .pipe(Effect.mapError((cause) => new BackendProcessSpawnError({ cause }))); yield* options.onStarted?.(handle.pid) ?? Effect.void; + if ( + options.bootstrap.desktopTelemetryControlFd !== undefined && + options.onDesktopTelemetryControl !== undefined + ) { + const controlFd = options.bootstrap.desktopTelemetryControlFd; + const handleControl = options.onDesktopTelemetryControl; + yield* handle.getOutputFd(controlFd).pipe( + Stream.pipeThroughChannel(Ndjson.decode({ ignoreEmptyLines: true })), + Stream.mapEffect((message) => decodeDesktopTelemetryControl(message)), + Stream.runForEach(handleControl), + Effect.catchCause((cause) => + logBackendManagerWarning("desktop telemetry control stream stopped", { + fd: controlFd, + cause: Cause.pretty(cause), + }), + ), + Effect.ensuring( + handleControl({ + version: 1, + type: "setDiagnosticsDemand", + enabled: false, + }), + ), + Effect.forkScoped, + ); + } if (options.captureOutput) { yield* drainBackendOutput("stdout", handle.stdout, onOutput).pipe(Effect.forkScoped); yield* drainBackendOutput("stderr", handle.stderr, onOutput).pipe(Effect.forkScoped); @@ -444,6 +485,7 @@ const makeDesktopBackendManager = Effect.fn("makeDesktopBackendManager")(functio const program = runBackendProcess({ ...config.value, desktopTelemetryStream: desktopTelemetryPublisher.encoded, + onDesktopTelemetryControl: desktopTelemetryPublisher.handleControl, onStarted: Effect.fn("desktop.backendManager.onStarted")(function* (pid) { yield* updateActiveRun(runId, (run) => ({ ...run, diff --git a/apps/desktop/src/telemetry/DesktopTelemetryPublisher.test.ts b/apps/desktop/src/telemetry/DesktopTelemetryPublisher.test.ts index 7518fed42f6..1fe8934c1b5 100644 --- a/apps/desktop/src/telemetry/DesktopTelemetryPublisher.test.ts +++ b/apps/desktop/src/telemetry/DesktopTelemetryPublisher.test.ts @@ -14,7 +14,10 @@ import * as ElectronApp from "../electron/ElectronApp.ts"; import * as ElectronPowerMonitor from "../electron/ElectronPowerMonitor.ts"; import * as DesktopTelemetryPublisher from "./DesktopTelemetryPublisher.ts"; -function makeElectronAppLayer(metrics: ReadonlyArray) { +function makeElectronAppLayer( + metrics: ReadonlyArray, + onMetricsRead: () => void = () => undefined, +) { return Layer.succeed(ElectronApp.ElectronApp, { metadata: Effect.die("unexpected metadata read"), name: Effect.succeed("T3 Code"), @@ -27,7 +30,10 @@ function makeElectronAppLayer(metrics: ReadonlyArray) { setAboutPanelOptions: () => Effect.void, setAppUserModelId: () => Effect.void, requestSingleInstanceLock: Effect.succeed(true), - getAppMetrics: Effect.succeed(metrics), + getAppMetrics: Effect.sync(() => { + onMetricsRead(); + return metrics; + }), isDefaultProtocolClient: () => Effect.succeed(false), setAsDefaultProtocolClient: () => Effect.succeed(true), setDesktopName: () => Effect.void, @@ -41,6 +47,7 @@ describe("DesktopTelemetryPublisher", () => { it.effect("publishes Electron metrics and event-driven power state over NDJSON", () => Effect.gen(function* () { const onBattery = yield* Ref.make(false); + let metricsReadCount = 0; const simpleListeners = new Map void>(); let thermalListener: ((state: ElectronPowerMonitor.ElectronThermalState) => void) | null = null; @@ -84,7 +91,14 @@ describe("DesktopTelemetryPublisher", () => { }), ); const layer = DesktopTelemetryPublisher.layer.pipe( - Layer.provide(Layer.mergeAll(makeElectronAppLayer(metrics), powerLayer)), + Layer.provide( + Layer.mergeAll( + makeElectronAppLayer(metrics, () => { + metricsReadCount += 1; + }), + powerLayer, + ), + ), ); yield* Effect.gen(function* () { @@ -96,26 +110,86 @@ describe("DesktopTelemetryPublisher", () => { assert.equal(messages[0]?.type, "desktopTelemetryHello"); assert.equal(messages[0]?.electronPid, process.pid); assert.equal(messages[1]?.type, "desktopTelemetry"); - assert.equal(messages[1]?.electronProcesses[0]?.pid, 4_242); - assert.equal(messages[1]?.electronProcesses[0]?.cpuPercent, 12.5); - assert.equal(messages[1]?.electronProcesses[0]?.workingSetBytes, 2_048 * 1_024); + assert.deepEqual(messages[1]?.electronProcesses, []); + assert.equal(messages[1]?.electronPid, process.pid); + assert.equal(metricsReadCount, 0); const nextSnapshotFiber = yield* Stream.runHead(publisher.changes).pipe(Effect.forkChild); yield* Effect.yieldNow; + yield* publisher.handleControl({ + version: 1, + type: "setDiagnosticsDemand", + enabled: true, + }); + const demandedSnapshot = Option.getOrThrow(yield* Fiber.join(nextSnapshotFiber)); + assert.equal(demandedSnapshot.electronProcesses[0]?.pid, 4_242); + assert.equal(demandedSnapshot.electronProcesses[0]?.cpuPercent, 12.5); + assert.equal(demandedSnapshot.electronProcesses[0]?.workingSetBytes, 2_048 * 1_024); + assert.equal(metricsReadCount, 1); + + const batterySnapshotFiber = yield* Stream.runHead(publisher.changes).pipe( + Effect.forkChild, + ); + yield* Effect.yieldNow; yield* Ref.set(onBattery, true); - simpleListeners.get("lock-screen")?.(); - simpleListeners.get("suspend")?.(); + simpleListeners.get("on-battery")?.(); + const batterySnapshot = Option.getOrThrow(yield* Fiber.join(batterySnapshotFiber)); + assert.equal(batterySnapshot.power.onBattery, "true"); + + const metricsAfterBatteryEvent = metricsReadCount; + yield* TestClock.adjust(Duration.millis(4_999)); + assert.equal(metricsReadCount, metricsAfterBatteryEvent); + yield* TestClock.adjust(Duration.millis(1)); + assert.equal(metricsReadCount, metricsAfterBatteryEvent + 1); + + const constrainedSnapshotFiber = yield* Stream.runHead(publisher.changes).pipe( + Effect.forkChild, + ); + yield* Effect.yieldNow; thermalListener?.("serious"); + const constrainedSnapshot = Option.getOrThrow(yield* Fiber.join(constrainedSnapshotFiber)); + assert.equal(constrainedSnapshot.power.thermalState, "serious"); + + const metricsAfterThermalEvent = metricsReadCount; + yield* TestClock.adjust(Duration.millis(14_999)); + assert.equal(metricsReadCount, metricsAfterThermalEvent); + yield* TestClock.adjust(Duration.millis(1)); + assert.equal(metricsReadCount, metricsAfterThermalEvent + 1); + + const speedLimitSnapshotFiber = yield* Stream.runHead(publisher.changes).pipe( + Effect.forkChild, + ); + yield* Effect.yieldNow; speedLimitListener?.(65); + const speedLimitSnapshot = Option.getOrThrow(yield* Fiber.join(speedLimitSnapshotFiber)); + assert.equal(Option.getOrNull(speedLimitSnapshot.speedLimitPercent), 65); + + const stoppedSnapshotFiber = yield* Stream.runHead(publisher.changes).pipe( + Effect.forkChild, + ); yield* Effect.yieldNow; - yield* TestClock.adjust(Duration.seconds(1)); + yield* publisher.handleControl({ + version: 1, + type: "setDiagnosticsDemand", + enabled: false, + }); + const stoppedSnapshot = Option.getOrThrow(yield* Fiber.join(stoppedSnapshotFiber)); + assert.deepEqual(stoppedSnapshot.electronProcesses, []); + const backgroundSequence = stoppedSnapshot.sequence; + const metricsAfterStopping = metricsReadCount; - const nextSnapshot = Option.getOrThrow(yield* Fiber.join(nextSnapshotFiber)); - assert.equal(nextSnapshot.power.locked, "true"); - assert.equal(nextSnapshot.power.suspended, true); - assert.equal(nextSnapshot.power.onBattery, "true"); - assert.equal(nextSnapshot.power.thermalState, "serious"); - assert.equal(Option.getOrNull(nextSnapshot.speedLimitPercent), 65); + yield* TestClock.adjust(Duration.seconds(29)); + assert.equal( + (yield* publisher.latest).pipe(Option.getOrThrow).sequence, + backgroundSequence, + ); + assert.equal(metricsReadCount, metricsAfterStopping); + yield* TestClock.adjust(Duration.seconds(1)); + assert.equal( + (yield* publisher.latest).pipe(Option.getOrThrow).sequence, + backgroundSequence + 1, + ); + assert.equal(metricsReadCount, metricsAfterStopping); }).pipe(Effect.provide(layer)); }), ); diff --git a/apps/desktop/src/telemetry/DesktopTelemetryPublisher.ts b/apps/desktop/src/telemetry/DesktopTelemetryPublisher.ts index 86b2538695e..8aa765a23b7 100644 --- a/apps/desktop/src/telemetry/DesktopTelemetryPublisher.ts +++ b/apps/desktop/src/telemetry/DesktopTelemetryPublisher.ts @@ -1,6 +1,7 @@ import { DesktopHostTelemetryMessage, type DesktopHostTelemetrySnapshot, + type DesktopTelemetryControlMessage, type HostPowerSnapshot, } from "@t3tools/contracts"; import * as Context from "effect/Context"; @@ -18,7 +19,10 @@ import * as Stream from "effect/Stream"; import * as ElectronApp from "../electron/ElectronApp.ts"; import * as ElectronPowerMonitor from "../electron/ElectronPowerMonitor.ts"; -const SAMPLE_INTERVAL = Duration.seconds(1); +const LIVE_SAMPLE_INTERVAL = Duration.seconds(1); +const BATTERY_SAMPLE_INTERVAL = Duration.seconds(5); +const CONSTRAINED_SAMPLE_INTERVAL = Duration.seconds(15); +const BACKGROUND_HEARTBEAT_INTERVAL = Duration.seconds(30); const IDLE_THRESHOLD_SECONDS = 60; const encodeMessage = Schema.encodeSync(Schema.fromJsonString(DesktopHostTelemetryMessage)); const textEncoder = new TextEncoder(); @@ -42,6 +46,7 @@ export interface DesktopTelemetryPublisherShape { readonly latest: Effect.Effect>; readonly changes: Stream.Stream; readonly encoded: Stream.Stream; + readonly handleControl: (message: DesktopTelemetryControlMessage) => Effect.Effect; } export class DesktopTelemetryPublisher extends Context.Service< @@ -80,6 +85,20 @@ function updatePowerState(state: PowerState, event: PowerEvent): PowerState { } } +function sampleInterval(power: PowerState, diagnosticsDemand: boolean): Duration.Duration { + if (!diagnosticsDemand) return BACKGROUND_HEARTBEAT_INTERVAL; + if ( + power.suspended || + power.locked === "true" || + power.thermalState === "serious" || + power.thermalState === "critical" + ) { + return CONSTRAINED_SAMPLE_INTERVAL; + } + if (power.onBattery === "true") return BATTERY_SAMPLE_INTERVAL; + return LIVE_SAMPLE_INTERVAL; +} + export const make = Effect.fn("desktop.telemetryPublisher.make")(function* () { const electronApp = yield* ElectronApp.ElectronApp; const powerMonitor = yield* ElectronPowerMonitor.ElectronPowerMonitor; @@ -94,6 +113,8 @@ export const make = Effect.fn("desktop.telemetryPublisher.make")(function* () { }; const powerState = yield* Ref.make(initialPowerState); const powerEvents = yield* Queue.unbounded(); + const sampleTriggers = yield* Queue.sliding(1); + const diagnosticsDemand = yield* Ref.make(false); const latest = yield* Ref.make(Option.none()); const changes = yield* PubSub.sliding(8); const sequence = yield* Ref.make(0); @@ -117,19 +138,21 @@ export const make = Effect.fn("desktop.telemetryPublisher.make")(function* () { yield* Effect.forever( Queue.take(powerEvents).pipe( Effect.flatMap((event) => Ref.update(powerState, (state) => updatePowerState(state, event))), + Effect.andThen(Queue.offer(sampleTriggers, undefined)), ), ).pipe(Effect.forkScoped); const sampleOnce = Effect.gen(function* () { const sampledAt = yield* DateTime.now; const sampledAtUnixMs = DateTime.toEpochMillis(sampledAt); + const demand = yield* Ref.get(diagnosticsDemand); const [currentPower, idleSeconds, systemIdleState, onBattery, metrics] = yield* Effect.all( [ Ref.get(powerState), powerMonitor.getSystemIdleTime, powerMonitor.getSystemIdleState(IDLE_THRESHOLD_SECONDS), powerMonitor.isOnBatteryPower, - electronApp.getAppMetrics, + demand ? electronApp.getAppMetrics : Effect.succeed([]), ], { concurrency: "unbounded" }, ); @@ -140,6 +163,7 @@ export const make = Effect.fn("desktop.telemetryPublisher.make")(function* () { type: "desktopTelemetry", sequence: nextSequence, sampledAtUnixMs, + electronPid: process.pid, power: { source: "electron-main", idle: idleState(systemIdleState), @@ -179,9 +203,33 @@ export const make = Effect.fn("desktop.telemetryPublisher.make")(function* () { ), ); - yield* Effect.forever(sampleOnce.pipe(Effect.andThen(Effect.sleep(SAMPLE_INTERVAL)))).pipe( - Effect.forkScoped, - ); + yield* Effect.gen(function* () { + yield* sampleOnce; + while (true) { + const [currentPower, demand] = yield* Effect.all([ + Ref.get(powerState), + Ref.get(diagnosticsDemand), + ]); + yield* Effect.raceFirst( + Queue.take(sampleTriggers), + Effect.sleep(sampleInterval(currentPower, demand)), + ); + yield* sampleOnce; + } + }).pipe(Effect.forkScoped); + + const handleControl: DesktopTelemetryPublisherShape["handleControl"] = (message) => { + switch (message.type) { + case "setDiagnosticsDemand": + return Ref.getAndSet(diagnosticsDemand, message.enabled).pipe( + Effect.flatMap((previous) => + previous === message.enabled + ? Effect.void + : Queue.offer(sampleTriggers, undefined).pipe(Effect.asVoid), + ), + ); + } + }; const snapshots = Stream.concat( Stream.unwrap( @@ -209,6 +257,7 @@ export const make = Effect.fn("desktop.telemetryPublisher.make")(function* () { latest: Ref.get(latest), changes: Stream.fromPubSub(changes), encoded, + handleControl, }); }); diff --git a/apps/server/src/background/BackgroundPolicy.test.ts b/apps/server/src/background/BackgroundPolicy.test.ts index 0bfcfb2b282..084bb069fc7 100644 --- a/apps/server/src/background/BackgroundPolicy.test.ts +++ b/apps/server/src/background/BackgroundPolicy.test.ts @@ -65,7 +65,6 @@ function makeLayer( Effect.sync(() => { snapshot = next; }).pipe(Effect.andThen(PubSub.publish(changes, next)), Effect.asVoid), - setDemandActive: () => Effect.void, streamChanges: Stream.fromPubSub(changes), }); }), @@ -188,4 +187,31 @@ describe("BackgroundPolicy", () => { ), ), ); + + it.effect("does not gate work on stale host power values", () => + Effect.gen(function* () { + const policy = yield* BackgroundPolicy.BackgroundPolicy; + yield* policy.reportClientActivity( + AuthSessionId.make("session-1"), + RpcClientId.make(1), + makeReport(), + ); + + assert.equal(yield* policy.shouldRunScopeWork({ type: "vcs-status", cwd: "/repo" }), true); + }).pipe( + Effect.provide( + makeLayer( + { + ...nominalHostPower, + locked: "true", + onBattery: "true", + lowPowerMode: "true", + thermalState: "critical", + stale: true, + }, + { backgroundActivityProfile: "battery-saver" }, + ), + ), + ), + ); }); diff --git a/apps/server/src/background/BackgroundPolicy.ts b/apps/server/src/background/BackgroundPolicy.ts index db3ae31102c..1397aed4a87 100644 --- a/apps/server/src/background/BackgroundPolicy.ts +++ b/apps/server/src/background/BackgroundPolicy.ts @@ -81,6 +81,7 @@ function isHostConstrained( hostPower: HostPowerSnapshot, settings: ResolvedBackgroundActivitySettings, ): boolean { + if (hostPower.stale) return false; if ( (settings.pauseWhenHostLocked && hostPower.locked === "true") || hasThermalPressure(hostPower) @@ -153,7 +154,7 @@ export const make = Effect.fn("background.policy.make")(function* () { const backgroundActivitySettings = serverSettings.getSettings.pipe( Effect.map(resolveServerBackgroundActivitySettings), - Effect.catch(() => Effect.succeed(getBackgroundActivityPresetSettings("balanced"))), + Effect.orElseSucceed(() => getBackgroundActivityPresetSettings("balanced")), ); const snapshot = Effect.gen(function* () { @@ -166,10 +167,7 @@ export const make = Effect.fn("background.policy.make")(function* () { return computeSnapshot({ hostPower, leases, now, settings, updatedAt: now }); }); - const publishSnapshot = snapshot.pipe( - Effect.tap((next) => hostPowerMonitor.setDemandActive(next.activeForegroundLeaseCount > 0)), - Effect.flatMap((next) => PubSub.publish(changes, next)), - ); + const publishSnapshot = snapshot.pipe(Effect.flatMap((next) => PubSub.publish(changes, next))); const reportClientActivity: BackgroundPolicyShape["reportClientActivity"] = ( sessionId, diff --git a/apps/server/src/background/HostPowerMonitor.test.ts b/apps/server/src/background/HostPowerMonitor.test.ts index 35fa72c0b10..445cd618c83 100644 --- a/apps/server/src/background/HostPowerMonitor.test.ts +++ b/apps/server/src/background/HostPowerMonitor.test.ts @@ -1,10 +1,15 @@ +import type { DesktopHostTelemetrySnapshot } from "@t3tools/contracts"; import { describe, expect, it } from "@effect/vitest"; import * as DateTime from "effect/DateTime"; import * as Effect from "effect/Effect"; import * as Fiber from "effect/Fiber"; +import * as Layer from "effect/Layer"; import * as Option from "effect/Option"; +import * as PubSub from "effect/PubSub"; +import * as Ref from "effect/Ref"; import * as Stream from "effect/Stream"; +import * as DesktopTelemetryReceiver from "../resourceTelemetry/DesktopTelemetryReceiver.ts"; import * as HostPowerMonitor from "./HostPowerMonitor.ts"; describe("HostPowerMonitor", () => { @@ -41,4 +46,47 @@ describe("HostPowerMonitor", () => { expect(Option.getOrThrow(yield* Fiber.join(nextChange)).locked).toBe("true"); }), ); + + it.effect("consumes desktop power directly without retaining diagnostics telemetry", () => + Effect.gen(function* () { + const sampledAt = DateTime.makeUnsafe("2026-06-17T12:00:00.000Z"); + const desktopChanges = yield* PubSub.sliding(1); + const diagnosticsDemandWrites = yield* Ref.make(0); + const receiverLayer = DesktopTelemetryReceiver.layerTest({ + changes: Stream.fromPubSub(desktopChanges), + setDiagnosticsDemand: () => Ref.update(diagnosticsDemandWrites, (count) => count + 1), + }); + const layer = HostPowerMonitor.layer.pipe(Layer.provide(receiverLayer)); + + yield* Effect.gen(function* () { + const monitor = yield* HostPowerMonitor.HostPowerMonitor; + const nextPower = yield* Stream.runHead(monitor.streamChanges).pipe(Effect.forkChild); + yield* Effect.yieldNow; + yield* PubSub.publish(desktopChanges, { + version: 1, + type: "desktopTelemetry", + sequence: 1, + sampledAtUnixMs: DateTime.toEpochMillis(sampledAt), + electronPid: 100, + power: { + source: "electron-main", + idle: "false", + idleSeconds: 0, + locked: "false", + suspended: false, + onBattery: "true", + lowPowerMode: "unknown", + thermalState: "nominal", + stale: false, + updatedAt: sampledAt, + }, + speedLimitPercent: Option.none(), + electronProcesses: [], + }); + + expect(Option.getOrThrow(yield* Fiber.join(nextPower)).onBattery).toBe("true"); + expect(yield* Ref.get(diagnosticsDemandWrites)).toBe(0); + }).pipe(Effect.provide(layer)); + }), + ); }); diff --git a/apps/server/src/background/HostPowerMonitor.ts b/apps/server/src/background/HostPowerMonitor.ts index 76a2c249e80..53efafcc835 100644 --- a/apps/server/src/background/HostPowerMonitor.ts +++ b/apps/server/src/background/HostPowerMonitor.ts @@ -3,16 +3,16 @@ import * as Context from "effect/Context"; import * as DateTime from "effect/DateTime"; import * as Effect from "effect/Effect"; import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; import * as PubSub from "effect/PubSub"; import * as Ref from "effect/Ref"; import * as Stream from "effect/Stream"; -import * as ResourceTelemetry from "../resourceTelemetry/ResourceTelemetry.ts"; +import * as DesktopTelemetryReceiver from "../resourceTelemetry/DesktopTelemetryReceiver.ts"; export interface HostPowerMonitorShape { readonly snapshot: Effect.Effect; readonly report: (snapshot: HostPowerSnapshot) => Effect.Effect; - readonly setDemandActive: (active: boolean) => Effect.Effect; readonly streamChanges: Stream.Stream; } @@ -65,7 +65,6 @@ export const make = Effect.fn("background.hostPower.make")(function* ( return HostPowerMonitor.of({ snapshot: Ref.get(latestRef), report, - setDemandActive: () => Effect.void, streamChanges: Stream.fromPubSub(changes), }); }); @@ -73,11 +72,18 @@ export const make = Effect.fn("background.hostPower.make")(function* ( export const layer = Layer.effect( HostPowerMonitor, Effect.gen(function* () { - const telemetry = yield* ResourceTelemetry.ResourceTelemetry; - const initial = yield* telemetry.latest; - const monitor = yield* make(initial.power.source); - yield* monitor.report(initial.power); - yield* telemetry.changes.pipe( + const desktopTelemetry = yield* DesktopTelemetryReceiver.DesktopTelemetryReceiver; + const initial = yield* desktopTelemetry.latest; + const monitor = yield* make( + Option.match(initial, { + onNone: () => "unknown" as const, + onSome: (snapshot) => snapshot.power.source, + }), + ); + if (Option.isSome(initial)) { + yield* monitor.report(initial.value.power); + } + yield* desktopTelemetry.changes.pipe( Stream.map((snapshot) => snapshot.power), Stream.runForEach(monitor.report), Effect.forkScoped, diff --git a/apps/server/src/cli/config.test.ts b/apps/server/src/cli/config.test.ts index d4d9d378557..f6464d13394 100644 --- a/apps/server/src/cli/config.test.ts +++ b/apps/server/src/cli/config.test.ts @@ -268,6 +268,8 @@ it.layer(NodeServices.layer)("cli config resolution", (it) => { t3Home: baseDir, noBrowser: true, desktopBootstrapToken: "desktop-token", + desktopTelemetryFd: 4, + desktopTelemetryControlFd: 5, tailscaleServeEnabled: false, tailscaleServePort: 443, otlpTracesUrl: "http://localhost:4318/v1/traces", @@ -323,12 +325,17 @@ it.layer(NodeServices.layer)("cli config resolution", (it) => { noBrowser: true, startupPresentation: "browser", desktopBootstrapToken: "desktop-token", + desktopTelemetryFd: 4, + desktopTelemetryControlFd: 5, + resourceMonitorPath: undefined, autoBootstrapProjectFromCwd: false, logWebSocketEvents: false, tailscaleServeEnabled: false, tailscaleServePort: 443, }); assert.equal(join(baseDir, "userdata"), resolved.stateDir); + assert.equal(resolved.desktopTelemetryFd, 4); + assert.equal(resolved.desktopTelemetryControlFd, 5); }), ); diff --git a/apps/server/src/cli/config.ts b/apps/server/src/cli/config.ts index 78c9f99a2cc..9b2d9044360 100644 --- a/apps/server/src/cli/config.ts +++ b/apps/server/src/cli/config.ts @@ -299,6 +299,7 @@ export const resolveServerConfig = ( ); const desktopBootstrapToken = bootstrap?.desktopBootstrapToken; const desktopTelemetryFd = bootstrap?.desktopTelemetryFd; + const desktopTelemetryControlFd = bootstrap?.desktopTelemetryControlFd; const resourceMonitorPath = bootstrap?.resourceMonitorPath; const autoBootstrapProjectFromCwd = Option.getOrElse( resolveOptionPrecedence( @@ -373,6 +374,7 @@ export const resolveServerConfig = ( startupPresentation, desktopBootstrapToken, desktopTelemetryFd, + desktopTelemetryControlFd, resourceMonitorPath, autoBootstrapProjectFromCwd, logWebSocketEvents, diff --git a/apps/server/src/config.ts b/apps/server/src/config.ts index 3269dd11568..67b34b7b09e 100644 --- a/apps/server/src/config.ts +++ b/apps/server/src/config.ts @@ -70,6 +70,7 @@ export interface ServerConfigShape extends ServerDerivedPaths { readonly startupPresentation: StartupPresentation; readonly desktopBootstrapToken: string | undefined; readonly desktopTelemetryFd?: number | undefined; + readonly desktopTelemetryControlFd?: number | undefined; readonly resourceMonitorPath?: string | undefined; readonly autoBootstrapProjectFromCwd: boolean; readonly logWebSocketEvents: boolean; @@ -174,6 +175,7 @@ export class ServerConfig extends Context.Service { type: "desktopTelemetry", sequence: 1, sampledAtUnixMs, + electronPid: 4_242, power: { source: "electron-main", idle: "false", diff --git a/apps/server/src/resourceTelemetry/DesktopTelemetryReceiver.ts b/apps/server/src/resourceTelemetry/DesktopTelemetryReceiver.ts index 56f1bad2aeb..96a5bf47a4e 100644 --- a/apps/server/src/resourceTelemetry/DesktopTelemetryReceiver.ts +++ b/apps/server/src/resourceTelemetry/DesktopTelemetryReceiver.ts @@ -6,21 +6,27 @@ import { DesktopHostTelemetryMessage, type DesktopHostTelemetryMessage as DesktopHostTelemetryMessageValue, type DesktopHostTelemetrySnapshot, + DesktopTelemetryControlMessage, type ResourceTelemetrySourceStatus, } from "@t3tools/contracts"; import * as Context from "effect/Context"; import * as DateTime from "effect/DateTime"; +import * as Duration from "effect/Duration"; import * as Effect from "effect/Effect"; import * as Layer from "effect/Layer"; import * as Option from "effect/Option"; import * as PubSub from "effect/PubSub"; import * as Ref from "effect/Ref"; import * as Schema from "effect/Schema"; +import * as Semaphore from "effect/Semaphore"; import * as Stream from "effect/Stream"; import * as Ndjson from "effect/unstable/encoding/Ndjson"; import { ServerConfig } from "../config.ts"; +const STALE_AFTER_MS = 90_000; +const STALE_CHECK_INTERVAL = Duration.seconds(30); + export class DesktopTelemetryDescriptorUnavailable extends Schema.TaggedErrorClass()( "DesktopTelemetryDescriptorUnavailable", { @@ -78,6 +84,18 @@ export class DesktopTelemetryStreamClosed extends Schema.TaggedErrorClass()( + "DesktopTelemetryStale", + { + fd: Schema.Number, + staleAfterMs: Schema.Number, + }, +) { + override get message(): string { + return `Desktop telemetry on fd ${this.fd} has not updated for ${this.staleAfterMs}ms.`; + } +} + export type DesktopTelemetryReceiverError = | DesktopTelemetryDescriptorUnavailable | DesktopTelemetryProtocolMismatch @@ -85,6 +103,19 @@ export type DesktopTelemetryReceiverError = | DesktopTelemetryStreamFailed | DesktopTelemetryStreamClosed; +export class DesktopTelemetryControlFailed extends Schema.TaggedErrorClass()( + "DesktopTelemetryControlFailed", + { + fd: Schema.Number, + operation: Schema.String, + cause: Schema.Defect(), + }, +) { + override get message(): string { + return `Desktop telemetry control '${this.operation}' failed on fd ${this.fd}.`; + } +} + export interface DesktopTelemetryReceiverHealth { readonly status: ResourceTelemetrySourceStatus; readonly lastSampleAt: Option.Option; @@ -96,6 +127,9 @@ export interface DesktopTelemetryReceiverShape { readonly changes: Stream.Stream; readonly health: Effect.Effect; readonly healthChanges: Stream.Stream; + readonly setDiagnosticsDemand: ( + enabled: boolean, + ) => Effect.Effect; } export class DesktopTelemetryReceiver extends Context.Service< @@ -104,6 +138,9 @@ export class DesktopTelemetryReceiver extends Context.Service< >()("t3/resourceTelemetry/DesktopTelemetryReceiver") {} const decodeMessage = Schema.decodeUnknownEffect(DesktopHostTelemetryMessage); +const encodeControlMessage = Schema.encodeEffect( + Schema.fromJsonString(DesktopTelemetryControlMessage), +); const isDescriptorUnavailable = Schema.is(DesktopTelemetryDescriptorUnavailable); const isProtocolMismatch = Schema.is(DesktopTelemetryProtocolMismatch); const isDecodeFailed = Schema.is(DesktopTelemetryDecodeFailed); @@ -132,6 +169,7 @@ export const make = Effect.fn("resourceTelemetry.desktopTelemetryReceiver.make") const latest = yield* Ref.make(Option.none()); const changes = yield* PubSub.sliding(8); const healthChanges = yield* PubSub.sliding(4); + const controlMutex = yield* Semaphore.make(1); const health = yield* Ref.make({ status: config.desktopTelemetryFd === undefined ? "unavailable" : "starting", lastSampleAt: Option.none(), @@ -154,6 +192,75 @@ export const make = Effect.fn("resourceTelemetry.desktopTelemetryReceiver.make") Effect.flatMap((next) => PubSub.publish(healthChanges, next)), Effect.asVoid, ); + const updateSampleHealth = (sampledAt: DateTime.Utc) => + Ref.modify(health, (current) => { + const next: DesktopTelemetryReceiverHealth = { + status: "healthy", + lastSampleAt: Option.some(sampledAt), + lastError: Option.none(), + }; + return [ + current.status !== "healthy" || Option.isSome(current.lastError) + ? Option.some(next) + : Option.none(), + next, + ] as const; + }).pipe( + Effect.flatMap( + Option.match({ + onNone: () => Effect.void, + onSome: (next) => PubSub.publish(healthChanges, next), + }), + ), + Effect.asVoid, + ); + + const setDiagnosticsDemand: DesktopTelemetryReceiverShape["setDiagnosticsDemand"] = (enabled) => + controlMutex.withPermits(1)( + Effect.gen(function* () { + const fd = config.desktopTelemetryControlFd; + if (fd === undefined) return; + const encoded = yield* encodeControlMessage({ + version: 1, + type: "setDiagnosticsDemand", + enabled, + }).pipe( + Effect.mapError( + (cause) => + new DesktopTelemetryControlFailed({ + fd, + operation: "encode", + cause, + }), + ), + ); + yield* Effect.try({ + try: () => { + const payload = Buffer.from(`${encoded}\n`); + let offset = 0; + while (offset < payload.byteLength) { + const written = NodeFileSystem.writeSync(fd, payload, offset); + if (written <= 0) throw new Error("desktop telemetry control pipe accepted no bytes"); + offset += written; + } + }, + catch: (cause) => + new DesktopTelemetryControlFailed({ + fd, + operation: "write", + cause, + }), + }).pipe( + Effect.tapError((error) => + updateHealth((current) => ({ + ...current, + status: "degraded", + lastError: Option.some(error.message), + })), + ), + ); + }), + ); if (config.desktopTelemetryFd !== undefined) { const fd = config.desktopTelemetryFd; @@ -217,13 +324,7 @@ export const make = Effect.fn("resourceTelemetry.desktopTelemetryReceiver.make") const sampledAt = DateTime.makeUnsafe(message.sampledAtUnixMs); return Ref.set(latest, Option.some(message)).pipe( - Effect.andThen( - Ref.set(health, { - status: "healthy", - lastSampleAt: Option.some(sampledAt), - lastError: Option.none(), - }), - ), + Effect.andThen(updateSampleHealth(sampledAt)), Effect.andThen(PubSub.publish(changes, message)), Effect.asVoid, ); @@ -248,6 +349,36 @@ export const make = Effect.fn("resourceTelemetry.desktopTelemetryReceiver.make") ), Effect.forkScoped, ); + + yield* Effect.forever( + Effect.sleep(STALE_CHECK_INTERVAL).pipe( + Effect.andThen( + Effect.gen(function* () { + const current = yield* Ref.get(latest); + if (Option.isNone(current) || current.value.power.stale) return; + const now = yield* DateTime.now; + if (DateTime.toEpochMillis(now) - current.value.sampledAtUnixMs < STALE_AFTER_MS) + return; + const staleSnapshot: DesktopHostTelemetrySnapshot = { + ...current.value, + power: { ...current.value.power, stale: true }, + }; + yield* Ref.set(latest, Option.some(staleSnapshot)); + yield* updateHealth((currentHealth) => ({ + ...currentHealth, + status: currentHealth.status === "stopped" ? "stopped" : "degraded", + lastError: + currentHealth.status === "stopped" + ? currentHealth.lastError + : Option.some( + new DesktopTelemetryStale({ fd, staleAfterMs: STALE_AFTER_MS }).message, + ), + })); + yield* PubSub.publish(changes, staleSnapshot); + }), + ), + ), + ).pipe(Effect.forkScoped); } return DesktopTelemetryReceiver.of({ @@ -255,6 +386,7 @@ export const make = Effect.fn("resourceTelemetry.desktopTelemetryReceiver.make") changes: Stream.fromPubSub(changes), health: Ref.get(health), healthChanges: Stream.fromPubSub(healthChanges), + setDiagnosticsDemand, }); }); @@ -274,6 +406,7 @@ export const layerTest = ( lastError: Option.some("Desktop telemetry test implementation is unavailable."), }), healthChanges: Stream.empty, + setDiagnosticsDemand: () => Effect.void, ...overrides, }), ); diff --git a/apps/server/src/resourceTelemetry/Model.test.ts b/apps/server/src/resourceTelemetry/Model.test.ts index 175f268b658..0d93894c4ca 100644 --- a/apps/server/src/resourceTelemetry/Model.test.ts +++ b/apps/server/src/resourceTelemetry/Model.test.ts @@ -39,7 +39,7 @@ function nativeSnapshot( sequence = 1, ): ResourceMonitorSnapshotEvent { return { - version: 1, + version: 2, type: "snapshot", sequence, sampledAtUnixMs, @@ -74,6 +74,7 @@ function desktopSnapshot( type: "desktopTelemetry", sequence: 1, sampledAtUnixMs, + electronPid: electronProcesses[0]?.pid ?? 10_000, power: { source: "electron-main", idle: "false", @@ -237,6 +238,45 @@ describe("resource telemetry process model", () => { expect(second.groups.backend.ioWriteBytes).toBe(3_000); }); + it("derives deltas at the constrained 15-second sampling cadence", () => { + const first = merge({ + native: nativeSnapshot(BASE_TIME_MS, [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 1_000, + ioReadBytes: 10_000, + ioWriteBytes: 20_000, + }), + ]), + }); + const second = merge({ + previous: first, + native: nativeSnapshot( + BASE_TIME_MS + 15_000, + [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 2_500, + ioReadBytes: 25_000, + ioWriteBytes: 50_000, + }), + ], + 2, + ), + }); + + expect(second.processes[0]?.cpuPercent).toBe(10); + expect(second.processes[0]?.ioReadBytesPerSecond).toBe(1_000); + expect(second.processes[0]?.ioWriteBytesPerSecond).toBe(2_000); + expect(second.groups.backend.cpuTimeMs).toBe(1_500); + expect(second.groups.backend.ioReadBytes).toBe(15_000); + expect(second.groups.backend.ioWriteBytes).toBe(30_000); + }); + it("preserves native rates while applying a desktop-only update", () => { const first = merge({ native: nativeSnapshot(BASE_TIME_MS, [ @@ -331,7 +371,7 @@ describe("resource telemetry process model", () => { const delayed = merge({ previous: decreased, native: nativeSnapshot( - BASE_TIME_MS + 20_000, + BASE_TIME_MS + 90_000, [ processSample({ pid: SERVER_PID, diff --git a/apps/server/src/resourceTelemetry/Model.ts b/apps/server/src/resourceTelemetry/Model.ts index 6b50ef92f4a..7bffe62d782 100644 --- a/apps/server/src/resourceTelemetry/Model.ts +++ b/apps/server/src/resourceTelemetry/Model.ts @@ -10,7 +10,7 @@ import type { import * as DateTime from "effect/DateTime"; import * as Option from "effect/Option"; -const MAX_DELTA_INTERVAL_MS = 10_000; +const MAX_DELTA_INTERVAL_MS = 30_000; const ELECTRON_IDENTITY_TOLERANCE_MS = 2_000; export interface ProcessState { @@ -47,6 +47,7 @@ export interface MergeProcessesInput { readonly fallbackSampledAtMs: number; readonly nativeSnapshot: Option.Option; readonly desktopSnapshot: Option.Option; + readonly electronRootPids?: ReadonlySet; readonly previous: ReadonlyMap; readonly counters: TelemetryCounters; readonly updatePrevious: boolean; @@ -110,6 +111,15 @@ function electronCategory(metric: DesktopElectronProcessMetric): ResourceTelemet } } +function inferredElectronCategory( + process: ResourceMonitorProcessSample, +): ResourceTelemetryProcessCategory { + const command = process.command.toLowerCase(); + if (command.includes("--type=renderer")) return "electron-renderer"; + if (command.includes("--type=gpu-process")) return "electron-gpu"; + return "electron-utility"; +} + function matchElectronMetric( process: ResourceMonitorProcessSample, metricsByPid: ReadonlyMap, @@ -386,15 +396,20 @@ export function mergeProcesses(input: MergeProcessesInput): MergeProcessesResult } const processes = [...nativeByPid.values()]; const processesByPid = new Map(processes.map((process) => [process.pid, process])); - const electronPids = new Set(metricsByPid.keys()); - const electronRootPids = [...electronPids] - .filter((pid) => { - const process = processesByPid.get(pid); - return process === undefined - ? true - : !hasElectronAncestor(process, processesByPid, electronPids); - }) - .toSorted((left, right) => left - right); + const explicitElectronRootPids = input.electronRootPids ?? new Set(); + const electronPids = new Set([...metricsByPid.keys(), ...explicitElectronRootPids]); + const electronRootPids = [ + ...explicitElectronRootPids, + ...[...electronPids] + .filter((pid) => { + if (explicitElectronRootPids.has(pid)) return false; + const process = processesByPid.get(pid); + return process === undefined + ? true + : !hasElectronAncestor(process, processesByPid, electronPids); + }) + .toSorted((left, right) => left - right), + ].filter((pid, index, values) => values.indexOf(pid) === index); const rootPids = [input.serverPid, ...electronRootPids]; const roots = new Set(rootPids); const depths = processDepths(processes, roots); @@ -438,11 +453,13 @@ export function mergeProcesses(input: MergeProcessesInput): MergeProcessesResult ? "server" : Option.contains(input.sidecarPid, process.pid) ? "resource-monitor" - : electronMetric - ? electronCategory(electronMetric) - : isElectronDescendant(process.pid, processesByPid, electronPids) - ? "electron-utility" - : "server-child"; + : explicitElectronRootPids.has(process.pid) + ? "electron-main" + : electronMetric + ? electronCategory(electronMetric) + : isElectronDescendant(process.pid, processesByPid, electronPids) + ? inferredElectronCategory(process) + : "server-child"; const firstSeenAt = previous?.process.firstSeenAt ?? DateTime.makeUnsafe(sampledAtMs); const preservePreviousRates = !input.updatePrevious && previous !== undefined; const cpuPercent = preservePreviousRates diff --git a/apps/server/src/resourceTelemetry/NativeTelemetryClient.test.ts b/apps/server/src/resourceTelemetry/NativeTelemetryClient.test.ts new file mode 100644 index 00000000000..30bbfa3a223 --- /dev/null +++ b/apps/server/src/resourceTelemetry/NativeTelemetryClient.test.ts @@ -0,0 +1,47 @@ +import type { HostPowerSnapshot } from "@t3tools/contracts"; +import { describe, expect, it } from "@effect/vitest"; +import * as DateTime from "effect/DateTime"; + +import { resolveNativeSampleIntervalMs } from "./NativeTelemetryClient.ts"; + +const basePower: HostPowerSnapshot = { + source: "electron-main", + idle: "false", + idleSeconds: 0, + locked: "false", + suspended: false, + onBattery: "false", + lowPowerMode: "false", + thermalState: "nominal", + stale: false, + updatedAt: DateTime.makeUnsafe("2026-06-17T12:00:00.000Z"), +}; + +describe("resolveNativeSampleIntervalMs", () => { + it("pauses while suspended and backs off under host constraints", () => { + expect(resolveNativeSampleIntervalMs({ ...basePower, suspended: true }, 1)).toBe(0); + expect(resolveNativeSampleIntervalMs({ ...basePower, locked: "true" }, 1)).toBe(15_000); + expect(resolveNativeSampleIntervalMs({ ...basePower, lowPowerMode: "true" }, 1)).toBe(15_000); + expect(resolveNativeSampleIntervalMs({ ...basePower, thermalState: "critical" }, 1)).toBe( + 15_000, + ); + expect(resolveNativeSampleIntervalMs({ ...basePower, onBattery: "true" }, 1)).toBe(5_000); + }); + + it("keeps unknown background telemetry cheap but serves live diagnostics at 1Hz", () => { + const unknown: HostPowerSnapshot = { + ...basePower, + source: "unknown", + stale: true, + }; + expect(resolveNativeSampleIntervalMs(unknown, 0)).toBe(5_000); + expect(resolveNativeSampleIntervalMs(unknown, 1)).toBe(1_000); + expect( + resolveNativeSampleIntervalMs( + { ...basePower, stale: true, locked: "true", suspended: true }, + 0, + ), + ).toBe(5_000); + expect(resolveNativeSampleIntervalMs(basePower, 0)).toBe(1_000); + }); +}); diff --git a/apps/server/src/resourceTelemetry/NativeTelemetryClient.ts b/apps/server/src/resourceTelemetry/NativeTelemetryClient.ts index df942089c50..ac4c135d60d 100644 --- a/apps/server/src/resourceTelemetry/NativeTelemetryClient.ts +++ b/apps/server/src/resourceTelemetry/NativeTelemetryClient.ts @@ -1,4 +1,5 @@ import type { + HostPowerSnapshot, ResourceMonitorCapabilities, ResourceMonitorCommand, ResourceMonitorEvent, @@ -36,8 +37,12 @@ import * as ResourceMonitorBinary from "./ResourceMonitorBinary.ts"; import { ServerConfig } from "../config.ts"; const SAMPLE_INTERVAL_MS = 1_000; +const UNKNOWN_BACKGROUND_SAMPLE_INTERVAL_MS = 5_000; +const BATTERY_SAMPLE_INTERVAL_MS = 5_000; +const CONSTRAINED_SAMPLE_INTERVAL_MS = 15_000; const HANDSHAKE_TIMEOUT = Duration.seconds(5); const SAMPLE_REQUEST_TIMEOUT = Duration.seconds(5); +const HISTORY_REQUEST_TIMEOUT = Duration.seconds(15); const INITIAL_RESTART_DELAY = Duration.millis(500); const MAX_RESTART_DELAY = Duration.seconds(10); const FAILURE_WINDOW_MS = 60_000; @@ -138,14 +143,21 @@ export interface NativeTelemetryClientHealth { readonly lastSampleAt: Option.Option; readonly lastError: Option.Option; readonly restartCount: number; + readonly sampleIntervalMs: number; } export interface NativeTelemetryClientShape { readonly capabilities: Effect.Effect; readonly snapshots: Stream.Stream; + readonly readHistory: ( + windowMs: number, + ) => Effect.Effect, NativeTelemetryClientError>; readonly setExternalProcesses: ( processes: ReadonlyArray, ) => Effect.Effect; + readonly setHostPowerState: ( + snapshot: HostPowerSnapshot, + ) => Effect.Effect; readonly sampleNow: Effect.Effect; readonly retry: Effect.Effect; readonly health: Effect.Effect; @@ -166,6 +178,20 @@ interface ClientState { readonly restartCount: number; } +interface CollectionControl { + readonly hostPower: HostPowerSnapshot; + readonly liveSubscriberCount: number; + readonly sampleIntervalMs: number; +} + +interface PendingHistoryRequest { + readonly deferred: Deferred.Deferred< + ReadonlyArray, + NativeTelemetryClientError + >; + readonly snapshots: ReadonlyArray; +} + const initialState: ClientState = { status: "starting", handle: Option.none(), @@ -175,16 +201,40 @@ const initialState: ClientState = { restartCount: 0, }; -function toHealth(state: ClientState): NativeTelemetryClientHealth { +function toHealth(state: ClientState, sampleIntervalMs: number): NativeTelemetryClientHealth { return { status: state.status, hello: state.hello, lastSampleAt: state.lastSampleAt, lastError: state.lastError, restartCount: state.restartCount, + sampleIntervalMs, }; } +function isThermallyConstrained(snapshot: HostPowerSnapshot): boolean { + return snapshot.thermalState === "serious" || snapshot.thermalState === "critical"; +} + +export function resolveNativeSampleIntervalMs( + snapshot: HostPowerSnapshot, + liveSubscriberCount: number, +): number { + if (snapshot.stale || snapshot.source === "unknown") { + return liveSubscriberCount > 0 ? SAMPLE_INTERVAL_MS : UNKNOWN_BACKGROUND_SAMPLE_INTERVAL_MS; + } + if (snapshot.suspended) return 0; + if ( + snapshot.locked === "true" || + snapshot.lowPowerMode === "true" || + isThermallyConstrained(snapshot) + ) { + return CONSTRAINED_SAMPLE_INTERVAL_MS; + } + if (snapshot.onBattery === "true") return BATTERY_SAMPLE_INTERVAL_MS; + return SAMPLE_INTERVAL_MS; +} + const decodeMonitorEvent: ( value: unknown, ) => Effect.Effect = Schema.decodeUnknownEffect( @@ -216,29 +266,54 @@ export const make = Effect.fn("resourceTelemetry.nativeTelemetryClient.make")(fu const spawner = yield* ChildProcessSpawner.ChildProcessSpawner; const crypto = yield* Crypto.Crypto; const config = yield* ServerConfig; + const initializedAt = yield* DateTime.now; const state = yield* Ref.make(initialState); + const collectionControl = yield* Ref.make({ + hostPower: { + source: "unknown", + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt: initializedAt, + }, + liveSubscriberCount: 0, + sampleIntervalMs: UNKNOWN_BACKGROUND_SAMPLE_INTERVAL_MS, + }); const externalProcesses = yield* Ref.make>([]); const pendingSamples = yield* Ref.make( new Map>(), ); + const pendingHistories = yield* Ref.make(new Map()); const snapshots = yield* PubSub.sliding(8); const healthChanges = yield* PubSub.sliding(4); const retryQueue = yield* Queue.sliding(1); const commandMutex = yield* Semaphore.make(1); - const publishHealth = Ref.get(state).pipe( - Effect.map(toHealth), + const currentHealth = Effect.all([Ref.get(state), Ref.get(collectionControl)]).pipe( + Effect.map(([current, control]) => toHealth(current, control.sampleIntervalMs)), + ); + const publishHealth = currentHealth.pipe( Effect.flatMap((health) => PubSub.publish(healthChanges, health)), Effect.asVoid, ); const failPending = (error: NativeTelemetryClientError) => - Ref.getAndSet(pendingSamples, new Map()).pipe( - Effect.flatMap((pending) => - Effect.forEach(pending.values(), (deferred) => Deferred.fail(deferred, error), { - discard: true, - }), - ), - ); + Effect.gen(function* () { + const samples = yield* Ref.getAndSet(pendingSamples, new Map()); + const histories = yield* Ref.getAndSet(pendingHistories, new Map()); + yield* Effect.forEach(samples.values(), (deferred) => Deferred.fail(deferred, error), { + discard: true, + }); + yield* Effect.forEach( + histories.values(), + (request) => Deferred.fail(request.deferred, error), + { discard: true }, + ); + }); const writeCommand = ( handle: ChildProcessSpawner.ChildProcessHandle, @@ -275,7 +350,7 @@ export const make = Effect.fn("resourceTelemetry.nativeTelemetryClient.make")(fu case "hello": return Ref.update(state, (current) => ({ ...current, - status: "healthy" as const, + status: "starting" as const, hello: Option.some(event), lastError: Option.none(), })).pipe( @@ -305,6 +380,33 @@ export const make = Effect.fn("resourceTelemetry.nativeTelemetryClient.make")(fu } } }); + case "historyChunk": + return Effect.gen(function* () { + const latestSnapshot = event.snapshots.at(-1); + if (latestSnapshot) { + yield* Ref.update(state, (current) => ({ + ...current, + status: "healthy" as const, + lastSampleAt: Option.some(DateTime.makeUnsafe(latestSnapshot.sampledAtUnixMs)), + lastError: Option.none(), + })); + } + const completed = yield* Ref.modify(pendingHistories, (pending) => { + const request = pending.get(event.requestId); + if (!request) return [Option.none(), pending] as const; + const snapshots = [...request.snapshots, ...event.snapshots]; + const next = new Map(pending); + if (event.done) { + next.delete(event.requestId); + return [Option.some({ deferred: request.deferred, snapshots }), next] as const; + } + next.set(event.requestId, { deferred: request.deferred, snapshots }); + return [Option.none(), next] as const; + }); + if (Option.isSome(completed)) { + yield* Deferred.succeed(completed.value.deferred, completed.value.snapshots); + } + }); case "error": return Ref.update(state, (current) => ({ ...current, @@ -413,18 +515,48 @@ export const make = Effect.fn("resourceTelemetry.nativeTelemetryClient.make")(fu }), ), ); + const control = yield* Ref.get(collectionControl); yield* writeCommand(handle, { version: RESOURCE_MONITOR_PROTOCOL_VERSION, type: "configure", rootPid: process.pid, - sampleIntervalMs: SAMPLE_INTERVAL_MS, + sampleIntervalMs: control.sampleIntervalMs, externalProcesses: [...(yield* Ref.get(externalProcesses))], }); + if (control.liveSubscriberCount > 0) { + yield* writeCommand(handle, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "setStreaming", + enabled: true, + }); + } yield* Ref.update(state, (current) => ({ ...current, status: "healthy" as const, hello: Option.some(hello), })); + yield* publishHealth; + + yield* writeCommand(handle, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "setExternalProcesses", + processes: [...(yield* Ref.get(externalProcesses))], + }); + const latestControl = yield* Ref.get(collectionControl); + if (latestControl.sampleIntervalMs !== control.sampleIntervalMs) { + yield* writeCommand(handle, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "setSampleInterval", + sampleIntervalMs: latestControl.sampleIntervalMs, + }); + } + if (latestControl.liveSubscriberCount > 0 !== control.liveSubscriberCount > 0) { + yield* writeCommand(handle, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "setStreaming", + enabled: latestControl.liveSubscriberCount > 0, + }); + } const exitEffect = handle.exitCode.pipe( Effect.mapError( @@ -517,11 +649,76 @@ export const make = Effect.fn("resourceTelemetry.nativeTelemetryClient.make")(fu Effect.forkScoped, ); + const applyCollectionControl = Effect.fn( + "resourceTelemetry.nativeTelemetryClient.applyCollectionControl", + )(function* (previous: CollectionControl, next: CollectionControl) { + const current = yield* Ref.get(state); + if (Option.isSome(current.handle) && current.status === "healthy") { + if (previous.sampleIntervalMs !== next.sampleIntervalMs) { + yield* writeCommand(current.handle.value, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "setSampleInterval", + sampleIntervalMs: next.sampleIntervalMs, + }); + } + const wasStreaming = previous.liveSubscriberCount > 0; + const isStreaming = next.liveSubscriberCount > 0; + if (wasStreaming !== isStreaming) { + yield* writeCommand(current.handle.value, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "setStreaming", + enabled: isStreaming, + }); + } + } + if (previous.sampleIntervalMs !== next.sampleIntervalMs) { + yield* publishHealth; + } + }); + + const setHostPowerState: NativeTelemetryClientShape["setHostPowerState"] = (hostPower) => + Effect.gen(function* () { + const [previous, next] = yield* Ref.modify(collectionControl, (current) => { + const updated: CollectionControl = { + ...current, + hostPower, + sampleIntervalMs: resolveNativeSampleIntervalMs(hostPower, current.liveSubscriberCount), + }; + return [[current, updated] as const, updated]; + }); + yield* applyCollectionControl(previous, next); + }); + + const changeLiveSubscriberCount = Effect.fn( + "resourceTelemetry.nativeTelemetryClient.changeLiveSubscriberCount", + )(function* (delta: 1 | -1) { + const [previous, next] = yield* Ref.modify(collectionControl, (current) => { + const liveSubscriberCount = Math.max(0, current.liveSubscriberCount + delta); + const updated: CollectionControl = { + ...current, + liveSubscriberCount, + sampleIntervalMs: resolveNativeSampleIntervalMs(current.hostPower, liveSubscriberCount), + }; + return [[current, updated] as const, updated]; + }); + yield* applyCollectionControl(previous, next); + }); + + const liveSnapshots = Stream.unwrap( + Effect.gen(function* () { + const subscription = yield* PubSub.subscribe(snapshots); + yield* Effect.acquireRelease(changeLiveSubscriberCount(1), () => + changeLiveSubscriberCount(-1).pipe(Effect.ignore), + ); + return Stream.fromSubscription(subscription); + }), + ); + const setExternalProcesses: NativeTelemetryClientShape["setExternalProcesses"] = (processes) => Effect.gen(function* () { yield* Ref.set(externalProcesses, [...processes]); const current = yield* Ref.get(state); - if (Option.isNone(current.handle)) return; + if (Option.isNone(current.handle) || current.status !== "healthy") return; yield* writeCommand(current.handle.value, { version: RESOURCE_MONITOR_PROTOCOL_VERSION, type: "setExternalProcesses", @@ -529,9 +726,68 @@ export const make = Effect.fn("resourceTelemetry.nativeTelemetryClient.make")(fu }); }); + const readHistory: NativeTelemetryClientShape["readHistory"] = (windowMs) => + Effect.gen(function* () { + const current = yield* Ref.get(state); + if (Option.isNone(current.handle) || current.status !== "healthy") { + return yield* new NativeTelemetryUnavailable({ + reason: Option.getOrElse(current.lastError, () => "sidecar is not running"), + }); + } + const requestId = yield* crypto.randomUUIDv4.pipe( + Effect.mapError( + (cause) => + new NativeTelemetryCommandFailed({ + operation: "createHistoryRequestId", + cause, + }), + ), + ); + const deferred = yield* Deferred.make< + ReadonlyArray, + NativeTelemetryClientError + >(); + yield* Ref.update(pendingHistories, (pending) => { + const next = new Map(pending); + next.set(requestId, { deferred, snapshots: [] }); + return next; + }); + return yield* writeCommand(current.handle.value, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "readHistory", + requestId, + windowMs: Math.max(0, Math.round(windowMs)), + }).pipe( + Effect.andThen( + Deferred.await(deferred).pipe( + Effect.timeoutOption(HISTORY_REQUEST_TIMEOUT), + Effect.flatMap( + Option.match({ + onNone: () => + Effect.fail( + new NativeTelemetryCommandFailed({ + operation: "readHistory", + cause: "history request timed out", + }), + ), + onSome: Effect.succeed, + }), + ), + ), + ), + Effect.ensuring( + Ref.update(pendingHistories, (pending) => { + const next = new Map(pending); + next.delete(requestId); + return next; + }), + ), + ); + }); + const sampleNow: NativeTelemetryClientShape["sampleNow"] = Effect.gen(function* () { const current = yield* Ref.get(state); - if (Option.isNone(current.handle)) { + if (Option.isNone(current.handle) || current.status !== "healthy") { return yield* new NativeTelemetryUnavailable({ reason: Option.getOrElse(current.lastError, () => "sidecar is not running"), }); @@ -587,7 +843,7 @@ export const make = Effect.fn("resourceTelemetry.nativeTelemetryClient.make")(fu ); }); - const health = Ref.get(state).pipe(Effect.map(toHealth)); + const health = currentHealth; return NativeTelemetryClient.of({ capabilities: Ref.get(state).pipe( @@ -603,8 +859,10 @@ export const make = Effect.fn("resourceTelemetry.nativeTelemetryClient.make")(fu }), ), ), - snapshots: Stream.fromPubSub(snapshots), + snapshots: liveSnapshots, + readHistory, setExternalProcesses, + setHostPowerState, sampleNow, retry: Ref.get(state).pipe( Effect.flatMap((current) => @@ -636,7 +894,14 @@ export const layerTest = ( processTree: true, }), snapshots: Stream.empty, + readHistory: () => + Effect.fail( + new NativeTelemetryUnavailable({ + reason: "No resource monitor history was configured for this test.", + }), + ), setExternalProcesses: () => Effect.void, + setHostPowerState: () => Effect.void, sampleNow: Effect.fail( new NativeTelemetryUnavailable({ reason: "No resource monitor sample was configured for this test.", @@ -649,6 +914,7 @@ export const layerTest = ( lastSampleAt: Option.none(), lastError: Option.some("Resource monitor test implementation is unavailable."), restartCount: 0, + sampleIntervalMs: UNKNOWN_BACKGROUND_SAMPLE_INTERVAL_MS, }), healthChanges: Stream.empty, ...overrides, diff --git a/apps/server/src/resourceTelemetry/ResourceTelemetry.test.ts b/apps/server/src/resourceTelemetry/ResourceTelemetry.test.ts index cbf41dbd544..f01f4f73df3 100644 --- a/apps/server/src/resourceTelemetry/ResourceTelemetry.test.ts +++ b/apps/server/src/resourceTelemetry/ResourceTelemetry.test.ts @@ -7,7 +7,6 @@ import { describe, expect, it } from "@effect/vitest"; import * as DateTime from "effect/DateTime"; import * as Duration from "effect/Duration"; import * as Effect from "effect/Effect"; -import * as Fiber from "effect/Fiber"; import * as Layer from "effect/Layer"; import * as Option from "effect/Option"; import * as PubSub from "effect/PubSub"; @@ -80,7 +79,7 @@ function nativeSnapshot(input: { }), ]; return { - version: 1, + version: 2, type: "snapshot", sequence: input.sequence, sampledAtUnixMs: input.sampledAtUnixMs, @@ -99,6 +98,7 @@ function desktopSnapshot(sampledAtUnixMs: number): DesktopHostTelemetrySnapshot type: "desktopTelemetry", sequence: 1, sampledAtUnixMs, + electronPid: 5_000, power: { source: "electron-main", idle: "false", @@ -129,6 +129,48 @@ function desktopSnapshot(sampledAtUnixMs: number): DesktopHostTelemetrySnapshot } describe("ResourceTelemetry", () => { + it.effect("enables live native and Electron collection only while changes are retained", () => + Effect.gen(function* () { + const sampledAtUnixMs = DateTime.toEpochMillis(yield* DateTime.now); + const sample = nativeSnapshot({ + sequence: 1, + sampledAtUnixMs, + childCpuTimeMs: 100, + childWriteBytes: 1_000, + }); + const demandChanges = yield* Ref.make>([]); + const nativeLayer = NativeTelemetryClient.layerTest({ + sampleNow: Effect.succeed(sample), + health: Effect.succeed({ + status: "healthy", + hello: Option.none(), + lastSampleAt: Option.none(), + lastError: Option.none(), + restartCount: 0, + sampleIntervalMs: 1_000, + }), + }); + const desktopLayer = DesktopTelemetryReceiver.layerTest({ + latest: Effect.succeedSome(desktopSnapshot(sampledAtUnixMs)), + setDiagnosticsDemand: (enabled) => + Ref.update(demandChanges, (changes) => [...changes, enabled]), + }); + const telemetryLayer = ResourceTelemetry.layer.pipe( + Layer.provide(Layer.mergeAll(nativeLayer, desktopLayer, ResourceAttribution.layer)), + ); + + const live = yield* Stream.runHead( + Effect.gen(function* () { + const telemetry = yield* ResourceTelemetry.ResourceTelemetry; + return telemetry.changes; + }).pipe(Stream.unwrap), + ).pipe(Effect.provide(telemetryLayer)); + + expect(Option.isSome(live)).toBe(true); + expect(yield* Ref.get(demandChanges)).toEqual([true, false]); + }), + ); + it.effect("combines native, Electron, attribution, retry, and history data", () => Effect.gen(function* () { const startedAt = DateTime.toEpochMillis(yield* DateTime.now); @@ -160,7 +202,7 @@ describe("ResourceTelemetry", () => { const nativeHealth = yield* Ref.make({ status: "healthy", hello: Option.some({ - version: 1, + version: 2, type: "hello", sidecarVersion: "0.1.0", sidecarPid: 9_000, @@ -179,11 +221,13 @@ describe("ResourceTelemetry", () => { lastSampleAt: Option.some(DateTime.makeUnsafe(startedAt)), lastError: Option.none(), restartCount: 2, + sampleIntervalMs: 1_000, }); const nativeHealthChanges = yield* PubSub.sliding(4); const nativeLayer = NativeTelemetryClient.layerTest({ setExternalProcesses: (processes) => Ref.set(externalProcesses, processes), + readHistory: () => Effect.succeed(samples.slice(0, 2)), sampleNow: Ref.modify(sampleIndex, (index) => [ samples[Math.min(index, samples.length - 1)]!, index + 1, @@ -209,7 +253,7 @@ describe("ResourceTelemetry", () => { const telemetry = yield* ResourceTelemetry.ResourceTelemetry; const attribution = yield* ResourceAttribution.ResourceAttribution; - expect(yield* Ref.get(externalProcesses)).toEqual([{ pid: 5_000, startTimeMs: 300 }]); + expect(yield* Ref.get(externalProcesses)).toEqual([{ pid: 5_000 }]); yield* attribution.record({ component: "provider-event-log", @@ -280,15 +324,14 @@ describe("ResourceTelemetry", () => { expect(DateTime.toEpochMillis(restarted.readAt)).toBe(startedAt + 2_000); expect(Option.getOrNull(restarted.health.sidecarPid)).toBe(9_001); - const healthUpdateFiber = yield* Stream.runHead(telemetry.changes).pipe(Effect.forkChild); - yield* Effect.yieldNow; yield* Ref.update(nativeHealth, (current) => ({ ...current, status: "degraded" as const, lastError: Option.some("collector exited"), })); yield* PubSub.publish(nativeHealthChanges, yield* Ref.get(nativeHealth)); - const healthUpdate = Option.getOrThrow(yield* Fiber.join(healthUpdateFiber)); + yield* Effect.yieldNow; + const healthUpdate = yield* telemetry.latest; expect(healthUpdate.health.native.status).toBe("degraded"); expect(Option.getOrNull(healthUpdate.health.native.lastError)).toBe("collector exited"); const degradedHistory = yield* telemetry.readHistory({ diff --git a/apps/server/src/resourceTelemetry/ResourceTelemetry.ts b/apps/server/src/resourceTelemetry/ResourceTelemetry.ts index a74f8d664c9..f4a0f9770c0 100644 --- a/apps/server/src/resourceTelemetry/ResourceTelemetry.ts +++ b/apps/server/src/resourceTelemetry/ResourceTelemetry.ts @@ -12,11 +12,14 @@ import type { import * as Context from "effect/Context"; import * as DateTime from "effect/DateTime"; import * as Effect from "effect/Effect"; +import * as Exit from "effect/Exit"; import * as Layer from "effect/Layer"; import * as Option from "effect/Option"; import * as PubSub from "effect/PubSub"; import * as Ref from "effect/Ref"; +import * as Result from "effect/Result"; import * as Schema from "effect/Schema"; +import * as Scope from "effect/Scope"; import * as Semaphore from "effect/Semaphore"; import * as Stream from "effect/Stream"; @@ -29,9 +32,10 @@ import { } from "./Model.ts"; import * as NativeTelemetryClient from "./NativeTelemetryClient.ts"; import * as ResourceAttribution from "./ResourceAttribution.ts"; -import * as ResourceTelemetryStore from "./ResourceTelemetryStore.ts"; - -const SAMPLE_INTERVAL_MS = 1_000; +import { + buildResourceTelemetryHistory, + normalizeResourceTelemetryHistoryInput, +} from "./ResourceTelemetryHistory.ts"; export class ResourceTelemetryRefreshFailed extends Schema.TaggedErrorClass()( "ResourceTelemetryRefreshFailed", @@ -72,6 +76,11 @@ interface TelemetryState { readonly lastNativeRestartCount: number; } +interface LiveTelemetryState { + readonly retainCount: number; + readonly scope: Option.Option; +} + function unknownPower(updatedAt: DateTime.Utc): HostPowerSnapshot { return { source: "unknown", @@ -135,13 +144,9 @@ export const make = Effect.fn("resourceTelemetry.resourceTelemetry.make")(functi const initialDesktop = yield* desktopReceiver.latest; if (Option.isSome(initialDesktop)) { yield* nativeClient - .setExternalProcesses( - initialDesktop.value.electronProcesses.map((process) => ({ - pid: process.pid, - startTimeMs: process.creationTimeMs, - })), - ) + .setExternalProcesses([{ pid: initialDesktop.value.electronPid }]) .pipe(Effect.ignore); + yield* nativeClient.setHostPowerState(initialDesktop.value.power).pipe(Effect.ignore); } const [initialNativeHealth, initialDesktopHealth, initialAttribution] = yield* Effect.all([ nativeClient.health, @@ -160,7 +165,7 @@ export const make = Effect.fn("resourceTelemetry.resourceTelemetry.make")(functi }); const initialSnapshot: ResourceTelemetrySnapshot = { readAt: initialReadAt, - sampleIntervalMs: SAMPLE_INTERVAL_MS, + sampleIntervalMs: initialNativeHealth.sampleIntervalMs, processes: initialMerge.processes, groups: initialMerge.groups, power: Option.match(initialDesktop, { @@ -184,7 +189,11 @@ export const make = Effect.fn("resourceTelemetry.resourceTelemetry.make")(functi lastNativeSequence: 0, lastNativeRestartCount: initialNativeHealth.restartCount, }); - const store = yield* ResourceTelemetryStore.make(initialSnapshot); + const liveState = yield* Ref.make({ + retainCount: 0, + scope: Option.none(), + }); + const liveMutex = yield* Semaphore.make(1); const refreshHealth = mutex.withPermits(1)( Effect.gen(function* () { const current = yield* Ref.get(state); @@ -204,8 +213,9 @@ export const make = Effect.fn("resourceTelemetry.resourceTelemetry.make")(functi ...current, latest: snapshot, }); - yield* store.updateLatest(snapshot); - yield* PubSub.publish(changes, snapshot); + if ((yield* Ref.get(liveState)).retainCount > 0) { + yield* PubSub.publish(changes, snapshot); + } }), ); @@ -213,6 +223,7 @@ export const make = Effect.fn("resourceTelemetry.resourceTelemetry.make")(functi readonly nativeSnapshot?: ResourceMonitorSnapshotEvent; readonly desktopSnapshot?: DesktopHostTelemetrySnapshot; readonly updatePrevious: boolean; + readonly publish?: boolean; }): Effect.Effect => mutex.withPermits(1)( Effect.gen(function* () { @@ -243,6 +254,10 @@ export const make = Effect.fn("resourceTelemetry.resourceTelemetry.make")(functi fallbackSampledAtMs: DateTime.toEpochMillis(current.latest.readAt), nativeSnapshot, desktopSnapshot, + electronRootPids: Option.match(desktopSnapshot, { + onNone: () => new Set(), + onSome: (desktop) => new Set([desktop.electronPid]), + }), previous: current.previous, counters: current.counters, updatePrevious: input.updatePrevious, @@ -250,7 +265,7 @@ export const make = Effect.fn("resourceTelemetry.resourceTelemetry.make")(functi const readAt = DateTime.makeUnsafe(merged.sampledAtMs); const snapshot: ResourceTelemetrySnapshot = { readAt, - sampleIntervalMs: SAMPLE_INTERVAL_MS, + sampleIntervalMs: nativeHealth.sampleIntervalMs, processes: merged.processes, groups: merged.groups, power: Option.match(desktopSnapshot, { @@ -279,10 +294,9 @@ export const make = Effect.fn("resourceTelemetry.resourceTelemetry.make")(functi ? nativeHealth.restartCount : current.lastNativeRestartCount, }); - if (input.updatePrevious) { - yield* store.record(snapshot, merged.deltas); + if (input.publish !== false) { + yield* PubSub.publish(changes, snapshot); } - yield* PubSub.publish(changes, snapshot); return snapshot; }), ); @@ -291,30 +305,97 @@ export const make = Effect.fn("resourceTelemetry.resourceTelemetry.make")(functi rebuild({ nativeSnapshot: snapshot, updatePrevious: true }); const ingestDesktop = (snapshot: DesktopHostTelemetrySnapshot) => Effect.gen(function* () { - yield* nativeClient - .setExternalProcesses( - snapshot.electronProcesses.map((process) => ({ - pid: process.pid, - startTimeMs: process.creationTimeMs, - })), - ) - .pipe(Effect.ignore); - return yield* rebuild({ desktopSnapshot: snapshot, updatePrevious: false }); + yield* nativeClient.setExternalProcesses([{ pid: snapshot.electronPid }]).pipe(Effect.ignore); + yield* nativeClient.setHostPowerState(snapshot.power).pipe(Effect.ignore); + const live = (yield* Ref.get(liveState)).retainCount > 0; + return yield* rebuild({ desktopSnapshot: snapshot, updatePrevious: false, publish: live }); }); - yield* nativeClient.snapshots.pipe( - Stream.runForEach((snapshot) => ingestNative(snapshot)), - Effect.catch((error) => - Effect.logWarning("Native resource telemetry stream stopped", { - cause: error.message, - }), - ), - Effect.forkScoped, - ); yield* desktopReceiver.changes.pipe( Stream.runForEach((snapshot) => ingestDesktop(snapshot)), Effect.forkScoped, ); + + const acquireLive = liveMutex.withPermits(1)( + Effect.gen(function* () { + const current = yield* Ref.get(liveState); + if (current.retainCount > 0) { + yield* Ref.set(liveState, { ...current, retainCount: current.retainCount + 1 }); + return; + } + + const scope = yield* Scope.make(); + yield* Ref.set(liveState, { retainCount: 1, scope: Option.some(scope) }); + yield* desktopReceiver.setDiagnosticsDemand(true).pipe(Effect.ignore); + yield* nativeClient.snapshots.pipe( + Stream.runForEach(ingestNative), + Effect.catch((error) => + Effect.logWarning("Native resource telemetry stream stopped", { + cause: error.message, + }), + ), + Effect.forkIn(scope), + ); + yield* nativeClient.sampleNow.pipe(Effect.flatMap(ingestNative), Effect.ignore); + }), + ); + + const releaseLive = liveMutex.withPermits(1)( + Effect.gen(function* () { + const current = yield* Ref.get(liveState); + if (current.retainCount <= 1) { + yield* Ref.set(liveState, { retainCount: 0, scope: Option.none() }); + if (Option.isSome(current.scope)) { + yield* Scope.close(current.scope.value, Exit.void).pipe(Effect.ignore); + } + yield* desktopReceiver.setDiagnosticsDemand(false).pipe(Effect.ignore); + return; + } + yield* Ref.set(liveState, { ...current, retainCount: current.retainCount - 1 }); + }), + ); + + const liveChanges = Stream.unwrap( + Effect.gen(function* () { + const subscription = yield* PubSub.subscribe(changes); + yield* Effect.acquireRelease(acquireLive, () => releaseLive); + return Stream.fromSubscription(subscription); + }), + ); + + const readHistory: ResourceTelemetryShape["readHistory"] = (input) => + Effect.gen(function* () { + const readAt = yield* DateTime.now; + const normalizedInput = normalizeResourceTelemetryHistoryInput(input); + const historyResult = yield* Effect.result( + nativeClient.readHistory(normalizedInput.windowMs), + ); + if (Result.isFailure(historyResult)) { + yield* Effect.logWarning("Failed to read native resource telemetry history", { + cause: historyResult.failure.message, + }); + } + const [nativeHealth, desktopHealth] = yield* Effect.all([ + nativeClient.health, + desktopReceiver.health, + ]); + const current = yield* Ref.get(state); + return buildResourceTelemetryHistory({ + readAt, + windowMs: normalizedInput.windowMs, + bucketMs: normalizedInput.bucketMs, + sampleIntervalMs: nativeHealth.sampleIntervalMs, + serverPid: process.pid, + sidecarPid: Option.map(nativeHealth.hello, (hello) => hello.sidecarPid), + desktopSnapshot: current.desktopSnapshot, + snapshots: Result.isSuccess(historyResult) ? historyResult.success : [], + health: buildHealth({ + native: nativeHealth, + desktop: desktopHealth, + nativeSnapshot: current.nativeSnapshot, + }), + }); + }); yield* nativeClient.healthChanges.pipe( Stream.runForEach(() => refreshHealth), Effect.forkScoped, @@ -353,8 +434,8 @@ export const make = Effect.fn("resourceTelemetry.resourceTelemetry.make")(functi return ResourceTelemetry.of({ latest: Ref.get(state).pipe(Effect.map((current) => current.latest)), - changes: Stream.fromPubSub(changes), - readHistory: store.readHistory, + changes: liveChanges, + readHistory, refresh, validateProcessIdentity, retry: nativeClient.retry.pipe( diff --git a/apps/server/src/resourceTelemetry/ResourceTelemetryHistory.test.ts b/apps/server/src/resourceTelemetry/ResourceTelemetryHistory.test.ts new file mode 100644 index 00000000000..82ebab1b44c --- /dev/null +++ b/apps/server/src/resourceTelemetry/ResourceTelemetryHistory.test.ts @@ -0,0 +1,167 @@ +import type { + DesktopHostTelemetrySnapshot, + ResourceMonitorProcessSample, + ResourceMonitorSnapshotEvent, + ResourceTelemetryHealth, +} from "@t3tools/contracts"; +import { describe, expect, it } from "@effect/vitest"; +import * as DateTime from "effect/DateTime"; +import * as Option from "effect/Option"; + +import { + buildResourceTelemetryHistory, + normalizeResourceTelemetryHistoryInput, +} from "./ResourceTelemetryHistory.ts"; + +const SERVER_PID = 100; +const ELECTRON_PID = 200; +const CHILD_PID = 300; +const STARTED_AT_MS = DateTime.toEpochMillis(DateTime.makeUnsafe("2026-06-17T12:00:00.000Z")); + +function processSample( + input: Partial & + Pick, +): ResourceMonitorProcessSample { + return { + runTimeMs: 1_000, + name: `process-${input.pid}`, + command: `process-${input.pid}`, + status: "Running", + cpuPercent: 0, + cpuTimeMs: 0, + residentBytes: 1_024, + virtualBytes: 2_048, + ioReadBytes: 0, + ioWriteBytes: 0, + ioSemantics: "storage", + ...input, + }; +} + +function snapshot( + sequence: number, + sampledAtUnixMs: number, + childCpuTimeMs: number, + childWriteBytes: number, +): ResourceMonitorSnapshotEvent { + const processes = [ + processSample({ pid: SERVER_PID, ppid: 1, startTimeMs: 10 }), + processSample({ + pid: ELECTRON_PID, + ppid: 1, + startTimeMs: 20, + name: "electron", + command: "electron", + }), + processSample({ + pid: CHILD_PID, + ppid: SERVER_PID, + startTimeMs: 30, + name: "codex", + command: "codex app-server", + cpuTimeMs: childCpuTimeMs, + ioWriteBytes: childWriteBytes, + }), + ]; + return { + version: 2, + type: "snapshot", + sequence, + sampledAtUnixMs, + collectionDurationMicros: 100, + scannedProcessCount: processes.length, + retainedProcessCount: processes.length, + inaccessibleProcessCount: 0, + processes, + }; +} + +const health: ResourceTelemetryHealth = { + native: { + status: "healthy", + lastSampleAt: Option.none(), + lastError: Option.none(), + }, + desktop: { + status: "healthy", + lastSampleAt: Option.none(), + lastError: Option.none(), + }, + sidecarVersion: Option.some("0.1.0"), + sidecarPid: Option.some(400), + restartCount: 0, + collectionDurationMicros: 100, + scannedProcessCount: 3, + retainedProcessCount: 3, + inaccessibleProcessCount: 0, +}; + +function desktopSnapshot(): DesktopHostTelemetrySnapshot { + const sampledAt = DateTime.makeUnsafe(STARTED_AT_MS + 1_000); + return { + version: 1, + type: "desktopTelemetry", + sequence: 1, + sampledAtUnixMs: STARTED_AT_MS + 1_000, + electronPid: ELECTRON_PID, + power: { + source: "electron-main", + idle: "false", + idleSeconds: 0, + locked: "false", + suspended: false, + onBattery: "false", + lowPowerMode: "unknown", + thermalState: "nominal", + stale: false, + updatedAt: sampledAt, + }, + speedLimitPercent: Option.none(), + electronProcesses: [ + { + pid: ELECTRON_PID, + creationTimeMs: 20, + type: "Browser", + cpuPercent: 999, + idleWakeupsPerSecond: 999, + workingSetBytes: 999_999, + peakWorkingSetBytes: 999_999, + }, + ], + }; +} + +describe("buildResourceTelemetryHistory", () => { + it("normalizes query bounds before requesting native history", () => { + expect(normalizeResourceTelemetryHistoryInput({ windowMs: 0, bucketMs: 0 })).toEqual({ + windowMs: 1_000, + bucketMs: 1_000, + }); + }); + + it("replays native snapshots on demand without applying current Electron metrics", () => { + const history = buildResourceTelemetryHistory({ + readAt: DateTime.makeUnsafe(STARTED_AT_MS + 2_000), + windowMs: 10_000, + bucketMs: 10_000, + sampleIntervalMs: 1_000, + serverPid: SERVER_PID, + sidecarPid: Option.some(400), + desktopSnapshot: Option.some(desktopSnapshot()), + snapshots: [ + snapshot(1, STARTED_AT_MS, 100, 1_000), + snapshot(2, STARTED_AT_MS + 1_000, 350, 5_000), + ], + health, + }); + + const child = history.topProcesses.find((process) => process.identity.pid === CHILD_PID); + const electron = history.topProcesses.find((process) => process.identity.pid === ELECTRON_PID); + expect(child?.sampleCount).toBe(2); + expect(child?.cpuTimeMs).toBe(250); + expect(child?.ioWriteBytes).toBe(4_000); + expect(electron?.category).toBe("electron-main"); + expect(electron?.currentRssBytes).toBe(1_024); + expect(history.buckets.reduce((total, bucket) => total + bucket.ioWriteBytes, 0)).toBe(4_000); + }); +}); diff --git a/apps/server/src/resourceTelemetry/ResourceTelemetryHistory.ts b/apps/server/src/resourceTelemetry/ResourceTelemetryHistory.ts new file mode 100644 index 00000000000..6a6690bea0d --- /dev/null +++ b/apps/server/src/resourceTelemetry/ResourceTelemetryHistory.ts @@ -0,0 +1,214 @@ +import type { + DesktopHostTelemetrySnapshot, + ResourceMonitorSnapshotEvent, + ResourceTelemetryHealth, + ResourceTelemetryHistory, + ResourceTelemetryHistoryBucket, + ResourceTelemetryProcess, + ResourceTelemetryProcessSummary, +} from "@t3tools/contracts"; +import * as DateTime from "effect/DateTime"; +import * as Option from "effect/Option"; + +import { + emptyTelemetryCounters, + mergeProcesses, + processIdentityKey, + type ProcessState, + type TelemetryCounters, +} from "./Model.ts"; + +const MAX_HISTORY_WINDOW_MS = 60 * 60_000; + +export function normalizeResourceTelemetryHistoryInput(input: { + readonly windowMs: number; + readonly bucketMs: number; +}): { readonly windowMs: number; readonly bucketMs: number } { + const windowMs = Math.max(1_000, Math.min(MAX_HISTORY_WINDOW_MS, input.windowMs)); + return { + windowMs, + bucketMs: Math.max(1_000, Math.min(windowMs, input.bucketMs)), + }; +} + +interface AggregateSample { + readonly sampledAtMs: number; + readonly cpuPercent: number; + readonly rssBytes: number; + readonly processCount: number; + readonly ioReadBytes: number; + readonly ioWriteBytes: number; +} + +interface ProcessSample { + readonly sampledAtMs: number; + readonly process: ResourceTelemetryProcess; + readonly cpuTimeMs: number; + readonly ioReadBytes: number; + readonly ioWriteBytes: number; +} + +export interface BuildResourceTelemetryHistoryInput { + readonly readAt: DateTime.Utc; + readonly windowMs: number; + readonly bucketMs: number; + readonly sampleIntervalMs: number; + readonly serverPid: number; + readonly sidecarPid: Option.Option; + readonly desktopSnapshot: Option.Option; + readonly snapshots: ReadonlyArray; + readonly health: ResourceTelemetryHealth; +} + +function summarizeProcesses( + samples: ReadonlyArray, +): ReadonlyArray { + const groups = new Map(); + for (const sample of samples) { + const identityKey = processIdentityKey( + sample.process.identity.pid, + sample.process.identity.startTimeMs, + ); + const current = groups.get(identityKey) ?? []; + current.push(sample); + groups.set(identityKey, current); + } + + return [...groups.values()] + .map((processSamples): ResourceTelemetryProcessSummary => { + const sorted = processSamples.toSorted((left, right) => left.sampledAtMs - right.sampledAtMs); + const first = sorted[0]!; + const latest = sorted[sorted.length - 1]!; + const cpuTotal = sorted.reduce((total, sample) => total + sample.process.cpuPercent, 0); + return { + identity: latest.process.identity, + ppid: latest.process.ppid, + depth: latest.process.depth, + name: latest.process.name, + command: latest.process.command, + category: latest.process.category, + firstSeenAt: first.process.firstSeenAt, + lastSeenAt: latest.process.lastSeenAt, + currentCpuPercent: latest.process.cpuPercent, + avgCpuPercent: cpuTotal / sorted.length, + maxCpuPercent: Math.max(...sorted.map((sample) => sample.process.cpuPercent)), + cpuTimeMs: sorted.reduce((total, sample) => total + sample.cpuTimeMs, 0), + currentRssBytes: latest.process.residentBytes, + peakRssBytes: Math.max(...sorted.map((sample) => sample.process.peakResidentBytes)), + ioReadBytes: sorted.reduce((total, sample) => total + sample.ioReadBytes, 0), + ioWriteBytes: sorted.reduce((total, sample) => total + sample.ioWriteBytes, 0), + ioSemantics: latest.process.ioSemantics, + sampleCount: sorted.length, + }; + }) + .toSorted( + (left, right) => right.cpuTimeMs - left.cpuTimeMs || right.peakRssBytes - left.peakRssBytes, + ); +} + +function buildBuckets(input: { + readonly samples: ReadonlyArray; + readonly nowMs: number; + readonly windowMs: number; + readonly bucketMs: number; +}): ReadonlyArray { + const windowStartMs = input.nowMs - input.windowMs; + const buckets: ResourceTelemetryHistoryBucket[] = []; + for (let startedAtMs = windowStartMs; startedAtMs < input.nowMs; startedAtMs += input.bucketMs) { + const endedAtMs = Math.min(input.nowMs, startedAtMs + input.bucketMs); + const samples = input.samples.filter( + (sample) => + sample.sampledAtMs >= startedAtMs && + (endedAtMs === input.nowMs + ? sample.sampledAtMs <= endedAtMs + : sample.sampledAtMs < endedAtMs), + ); + const cpuTotal = samples.reduce((total, sample) => total + sample.cpuPercent, 0); + buckets.push({ + startedAt: DateTime.makeUnsafe(startedAtMs), + endedAt: DateTime.makeUnsafe(endedAtMs), + avgCpuPercent: samples.length === 0 ? 0 : cpuTotal / samples.length, + maxCpuPercent: + samples.length === 0 ? 0 : Math.max(...samples.map((sample) => sample.cpuPercent)), + maxRssBytes: samples.length === 0 ? 0 : Math.max(...samples.map((sample) => sample.rssBytes)), + ioReadBytes: samples.reduce((total, sample) => total + sample.ioReadBytes, 0), + ioWriteBytes: samples.reduce((total, sample) => total + sample.ioWriteBytes, 0), + maxProcessCount: + samples.length === 0 ? 0 : Math.max(...samples.map((sample) => sample.processCount)), + }); + } + return buckets; +} + +export function buildResourceTelemetryHistory( + input: BuildResourceTelemetryHistoryInput, +): ResourceTelemetryHistory { + const readAtMs = DateTime.toEpochMillis(input.readAt); + const { windowMs, bucketMs } = normalizeResourceTelemetryHistoryInput(input); + const windowStartMs = readAtMs - windowMs; + const snapshots = input.snapshots + .filter((snapshot) => snapshot.sampledAtUnixMs >= windowStartMs) + .toSorted((left, right) => left.sampledAtUnixMs - right.sampledAtUnixMs); + const electronRootPids = Option.match(input.desktopSnapshot, { + onNone: () => new Set(), + onSome: (snapshot) => new Set([snapshot.electronPid]), + }); + const desktopIdentity = Option.map(input.desktopSnapshot, (snapshot) => ({ + ...snapshot, + electronProcesses: [], + })); + const aggregateSamples: AggregateSample[] = []; + const processSamples: ProcessSample[] = []; + let previous: ReadonlyMap = new Map(); + let counters: TelemetryCounters = emptyTelemetryCounters(); + + for (const snapshot of snapshots) { + const merged = mergeProcesses({ + serverPid: input.serverPid, + sidecarPid: input.sidecarPid, + fallbackSampledAtMs: snapshot.sampledAtUnixMs, + nativeSnapshot: Option.some(snapshot), + desktopSnapshot: desktopIdentity, + electronRootPids, + previous, + counters, + updatePrevious: true, + }); + previous = merged.previous; + counters = merged.counters; + const deltasByIdentity = new Map( + merged.deltas.map((processDelta) => [processDelta.identityKey, processDelta]), + ); + aggregateSamples.push({ + sampledAtMs: snapshot.sampledAtUnixMs, + cpuPercent: merged.groups.allT3.currentCpuPercent, + rssBytes: merged.groups.allT3.currentRssBytes, + processCount: merged.groups.allT3.processCount, + ioReadBytes: merged.deltas.reduce((total, process) => total + process.ioReadBytes, 0), + ioWriteBytes: merged.deltas.reduce((total, process) => total + process.ioWriteBytes, 0), + }); + for (const process of merged.processes) { + const processDelta = deltasByIdentity.get( + processIdentityKey(process.identity.pid, process.identity.startTimeMs), + ); + processSamples.push({ + sampledAtMs: snapshot.sampledAtUnixMs, + process, + cpuTimeMs: processDelta?.cpuTimeMs ?? 0, + ioReadBytes: processDelta?.ioReadBytes ?? 0, + ioWriteBytes: processDelta?.ioWriteBytes ?? 0, + }); + } + } + + return { + readAt: input.readAt, + windowMs, + bucketMs, + sampleIntervalMs: input.sampleIntervalMs, + retainedSampleCount: aggregateSamples.length + processSamples.length, + buckets: buildBuckets({ samples: aggregateSamples, nowMs: readAtMs, windowMs, bucketMs }), + topProcesses: summarizeProcesses(processSamples), + health: input.health, + }; +} diff --git a/apps/server/src/resourceTelemetry/ResourceTelemetryStore.ts b/apps/server/src/resourceTelemetry/ResourceTelemetryStore.ts deleted file mode 100644 index cb016db1a39..00000000000 --- a/apps/server/src/resourceTelemetry/ResourceTelemetryStore.ts +++ /dev/null @@ -1,253 +0,0 @@ -import type { - ResourceTelemetryHistory, - ResourceTelemetryHistoryBucket, - ResourceTelemetryHistoryInput, - ResourceTelemetryProcess, - ResourceTelemetryProcessSummary, - ResourceTelemetrySnapshot, -} from "@t3tools/contracts"; -import * as Context from "effect/Context"; -import * as DateTime from "effect/DateTime"; -import * as Effect from "effect/Effect"; -import * as Layer from "effect/Layer"; -import * as Ref from "effect/Ref"; - -import type { ProcessDelta } from "./Model.ts"; -import { processIdentityKey } from "./Model.ts"; - -const RETENTION_MS = 60 * 60_000; -const MAX_AGGREGATE_SAMPLES = 3_600; -const MAX_PROCESS_SAMPLES = 20_000; - -interface AggregateSample { - readonly sampledAtMs: number; - readonly cpuPercent: number; - readonly rssBytes: number; - readonly processCount: number; - readonly ioReadBytes: number; - readonly ioWriteBytes: number; -} - -interface ProcessSample { - readonly sampledAtMs: number; - readonly process: ResourceTelemetryProcess; - readonly cpuTimeMs: number; - readonly ioReadBytes: number; - readonly ioWriteBytes: number; -} - -interface StoreState { - readonly aggregateSamples: ReadonlyArray; - readonly processSamples: ReadonlyArray; - readonly latest: ResourceTelemetrySnapshot; -} - -export interface ResourceTelemetryStoreShape { - readonly updateLatest: (snapshot: ResourceTelemetrySnapshot) => Effect.Effect; - readonly record: ( - snapshot: ResourceTelemetrySnapshot, - deltas: ReadonlyArray, - ) => Effect.Effect; - readonly readHistory: ( - input: ResourceTelemetryHistoryInput, - ) => Effect.Effect; -} - -export class ResourceTelemetryStore extends Context.Service< - ResourceTelemetryStore, - ResourceTelemetryStoreShape ->()("t3/resourceTelemetry/ResourceTelemetryStore") {} - -function trimByTime
( - values: ReadonlyArray, - nowMs: number, - max: number, -): ReadonlyArray { - const retained = values.filter((value) => value.sampledAtMs >= nowMs - RETENTION_MS); - return retained.length <= max ? retained : retained.slice(retained.length - max); -} - -function summarizeProcesses( - samples: ReadonlyArray, -): ReadonlyArray { - const groups = new Map(); - for (const sample of samples) { - const identityKey = processIdentityKey( - sample.process.identity.pid, - sample.process.identity.startTimeMs, - ); - const current = groups.get(identityKey) ?? []; - current.push(sample); - groups.set(identityKey, current); - } - - return [...groups.values()] - .map((processSamples): ResourceTelemetryProcessSummary => { - const sorted = processSamples.toSorted((left, right) => left.sampledAtMs - right.sampledAtMs); - const first = sorted[0]!; - const latest = sorted[sorted.length - 1]!; - const cpuTotal = sorted.reduce((total, sample) => total + sample.process.cpuPercent, 0); - return { - identity: latest.process.identity, - ppid: latest.process.ppid, - depth: latest.process.depth, - name: latest.process.name, - command: latest.process.command, - category: latest.process.category, - firstSeenAt: first.process.firstSeenAt, - lastSeenAt: latest.process.lastSeenAt, - currentCpuPercent: latest.process.cpuPercent, - avgCpuPercent: cpuTotal / sorted.length, - maxCpuPercent: Math.max(...sorted.map((sample) => sample.process.cpuPercent)), - cpuTimeMs: sorted.reduce((total, sample) => total + sample.cpuTimeMs, 0), - currentRssBytes: latest.process.residentBytes, - peakRssBytes: Math.max(...sorted.map((sample) => sample.process.peakResidentBytes)), - ioReadBytes: sorted.reduce((total, sample) => total + sample.ioReadBytes, 0), - ioWriteBytes: sorted.reduce((total, sample) => total + sample.ioWriteBytes, 0), - ioSemantics: latest.process.ioSemantics, - sampleCount: sorted.length, - }; - }) - .toSorted( - (left, right) => right.cpuTimeMs - left.cpuTimeMs || right.peakRssBytes - left.peakRssBytes, - ); -} - -function buildBuckets(input: { - readonly samples: ReadonlyArray; - readonly nowMs: number; - readonly windowMs: number; - readonly bucketMs: number; -}): ReadonlyArray { - const windowStartMs = input.nowMs - input.windowMs; - const buckets: ResourceTelemetryHistoryBucket[] = []; - for (let startedAtMs = windowStartMs; startedAtMs < input.nowMs; startedAtMs += input.bucketMs) { - const endedAtMs = Math.min(input.nowMs, startedAtMs + input.bucketMs); - const samples = input.samples.filter( - (sample) => - sample.sampledAtMs >= startedAtMs && - (endedAtMs === input.nowMs - ? sample.sampledAtMs <= endedAtMs - : sample.sampledAtMs < endedAtMs), - ); - const cpuTotal = samples.reduce((total, sample) => total + sample.cpuPercent, 0); - buckets.push({ - startedAt: DateTime.makeUnsafe(startedAtMs), - endedAt: DateTime.makeUnsafe(endedAtMs), - avgCpuPercent: samples.length === 0 ? 0 : cpuTotal / samples.length, - maxCpuPercent: - samples.length === 0 ? 0 : Math.max(...samples.map((sample) => sample.cpuPercent)), - maxRssBytes: samples.length === 0 ? 0 : Math.max(...samples.map((sample) => sample.rssBytes)), - ioReadBytes: samples.reduce((total, sample) => total + sample.ioReadBytes, 0), - ioWriteBytes: samples.reduce((total, sample) => total + sample.ioWriteBytes, 0), - maxProcessCount: - samples.length === 0 ? 0 : Math.max(...samples.map((sample) => sample.processCount)), - }); - } - return buckets; -} - -export const make = Effect.fn("resourceTelemetry.resourceTelemetryStore.make")(function* ( - initial: ResourceTelemetrySnapshot, -) { - const state = yield* Ref.make({ - aggregateSamples: [], - processSamples: [], - latest: initial, - }); - - const record: ResourceTelemetryStoreShape["record"] = (snapshot, deltas) => - Ref.update(state, (current) => { - const sampledAtMs = DateTime.toEpochMillis(snapshot.readAt); - const deltasByIdentity = new Map( - deltas.map((processDelta) => [processDelta.identityKey, processDelta]), - ); - const aggregateDelta = deltas.reduce( - (total, process) => ({ - ioReadBytes: total.ioReadBytes + process.ioReadBytes, - ioWriteBytes: total.ioWriteBytes + process.ioWriteBytes, - }), - { ioReadBytes: 0, ioWriteBytes: 0 }, - ); - return { - latest: snapshot, - aggregateSamples: trimByTime( - [ - ...current.aggregateSamples, - { - sampledAtMs, - cpuPercent: snapshot.groups.allT3.currentCpuPercent, - rssBytes: snapshot.groups.allT3.currentRssBytes, - processCount: snapshot.groups.allT3.processCount, - ioReadBytes: aggregateDelta.ioReadBytes, - ioWriteBytes: aggregateDelta.ioWriteBytes, - }, - ], - sampledAtMs, - MAX_AGGREGATE_SAMPLES, - ), - processSamples: trimByTime( - [ - ...current.processSamples, - ...snapshot.processes.map((process) => { - const processDelta = deltasByIdentity.get( - processIdentityKey(process.identity.pid, process.identity.startTimeMs), - ); - return { - sampledAtMs, - process, - cpuTimeMs: processDelta?.cpuTimeMs ?? 0, - ioReadBytes: processDelta?.ioReadBytes ?? 0, - ioWriteBytes: processDelta?.ioWriteBytes ?? 0, - }; - }), - ], - sampledAtMs, - MAX_PROCESS_SAMPLES, - ), - }; - }); - - const updateLatest: ResourceTelemetryStoreShape["updateLatest"] = (snapshot) => - Ref.update(state, (current) => ({ - ...current, - latest: snapshot, - })); - - const readHistory: ResourceTelemetryStoreShape["readHistory"] = (input) => - Effect.gen(function* () { - const readAt = yield* DateTime.now; - const readAtMs = DateTime.toEpochMillis(readAt); - const windowMs = Math.max(1_000, Math.min(RETENTION_MS, input.windowMs)); - const bucketMs = Math.max(1_000, Math.min(windowMs, input.bucketMs)); - const current = yield* Ref.get(state); - const minSampledAtMs = readAtMs - windowMs; - const aggregateSamples = current.aggregateSamples.filter( - (sample) => sample.sampledAtMs >= minSampledAtMs, - ); - const processSamples = current.processSamples.filter( - (sample) => sample.sampledAtMs >= minSampledAtMs, - ); - - return { - readAt, - windowMs, - bucketMs, - sampleIntervalMs: current.latest.sampleIntervalMs, - retainedSampleCount: current.aggregateSamples.length + current.processSamples.length, - buckets: buildBuckets({ - samples: aggregateSamples, - nowMs: readAtMs, - windowMs, - bucketMs, - }), - topProcesses: summarizeProcesses(processSamples), - health: current.latest.health, - }; - }); - - return ResourceTelemetryStore.of({ updateLatest, record, readHistory }); -}); - -export const layer = (initial: ResourceTelemetrySnapshot) => - Layer.effect(ResourceTelemetryStore, make(initial)); diff --git a/apps/server/src/server.ts b/apps/server/src/server.ts index d4f7d1cf2fa..39f5433334b 100644 --- a/apps/server/src/server.ts +++ b/apps/server/src/server.ts @@ -124,14 +124,15 @@ const PtyAdapterLive = Layer.unwrap( const NativeTelemetryLayerLive = NativeTelemetryClient.layer.pipe( Layer.provide(ResourceMonitorBinary.layer), ); +const DesktopTelemetryReceiverLayerLive = DesktopTelemetryReceiver.layer; const ResourceTelemetryLayerLive = ResourceTelemetry.layer.pipe( Layer.provideMerge(NativeTelemetryLayerLive), - Layer.provideMerge(DesktopTelemetryReceiver.layer), + Layer.provideMerge(DesktopTelemetryReceiverLayerLive), ); const HostPowerMonitorLayerLive = HostPowerMonitor.layer.pipe( - Layer.provide(ResourceTelemetryLayerLive), + Layer.provide(DesktopTelemetryReceiverLayerLive), ); const BackgroundLayerLive = BackgroundPolicy.layer.pipe( diff --git a/docs/architecture/resource-telemetry.md b/docs/architecture/resource-telemetry.md index 8decbc3c516..a72483472cc 100644 --- a/docs/architecture/resource-telemetry.md +++ b/docs/architecture/resource-telemetry.md @@ -11,9 +11,9 @@ subprocess probes with two persistent, direct data sources: through operating-system APIs via `sysinfo`; 2. Electron main-process APIs for Electron process metrics and host power state. -The server merges both sources, computes rates from cumulative counters, keeps -bounded in-memory history, exposes typed RPCs, and drives the diagnostics page. -Telemetry history is not persisted to disk. +The native monitor owns bounded in-memory history. The server only merges and +summarizes that history when diagnostics requests it. Telemetry history is not +persisted to disk or continuously copied into Node. ## Why a standalone executable @@ -38,14 +38,13 @@ native code into Node. ```text Electron main ├─ powerMonitor - ├─ app.getAppMetrics() - └─ inherited fd 4, NDJSON - │ - ▼ + ├─ app.getAppMetrics() while diagnostics is open + ├─ inherited fd 4, telemetry NDJSON ─────────────┐ + └─ inherited fd 5, demand-control NDJSON ◀──────┤ + ▼ Node server ── stdin/stdout NDJSON ── Rust resource monitor │ ├─ ResourceTelemetry Effect service - ├─ bounded in-memory history ├─ background power policy projection └─ WebSocket RPC/subscription ── diagnostics UI ``` @@ -65,10 +64,14 @@ line on stdout: - `configure` - `setExternalProcesses` +- `setSampleInterval` +- `setStreaming` - `sampleNow` +- `readHistory` - `shutdown` - `hello` - `snapshot` +- `historyChunk` - `error` The protocol version is defined by @@ -77,8 +80,8 @@ The protocol version is defined by ### Collection -The monitor keeps one `sysinfo::System` instance and refreshes it on a one-second -interval. It collects: +The monitor keeps one `sysinfo::System` instance and refreshes it at the +power-adaptive interval selected by the server. It collects: - PID and parent PID; - process start time and run time; @@ -110,6 +113,26 @@ can have coarser platform resolution. The process list is emitted in depth-first tree order so renderer collapse and expansion preserves complete subtrees. +### Native history and streaming + +Every native sample is appended to a one-hour in-memory ring bounded to 3,600 +snapshots and 20,000 retained process rows. History stays in the sidecar until a +`readHistory` request and is returned in bounded chunks. The first bound reached +wins, so high process counts shorten the effective history window. + +Periodic snapshot streaming is disabled by default. The server enables it only +while at least one diagnostics subscription is retained. `sampleNow` remains +available for explicit refreshes and identity validation. + +The server adjusts native sampling without restarting the sidecar: + +- suspended: paused; +- locked, low-power, or serious/critical thermal state: 15 seconds; +- battery: 5 seconds; +- normal AC: 1 second; +- unknown or stale power: 5 seconds in the background and 1 second while live + diagnostics is open. + ### Sampling limits This is counter sampling, not syscall tracing. @@ -143,14 +166,19 @@ columns are the operating system's cumulative counters for that process. Electron main owns `DesktopTelemetryPublisher`. -It samples once per second from: +Power events trigger an immediate snapshot. A low-rate 30-second heartbeat keeps +the server-side power state fresh while diagnostics is closed. During that +heartbeat Electron reads: -- `app.getAppMetrics()`; - `powerMonitor.isOnBatteryPower()`; - `powerMonitor.getSystemIdleTime()`; - `powerMonitor.getSystemIdleState()`; - `powerMonitor.getCurrentThermalState()`. +`app.getAppMetrics()` is only called while diagnostics demand is active. Its +live cadence is 1 second on AC, 5 seconds on battery, and 15 seconds while +locked, suspended, or thermally constrained. + It also listens for: - lock and unlock; @@ -165,10 +193,11 @@ remains `unknown`. The desktop backend is spawned with: - fd 3 for the existing bootstrap payload; -- fd 4 for desktop telemetry NDJSON. +- fd 4 for Electron-to-server telemetry NDJSON; +- fd 5 for server-to-Electron diagnostics-demand NDJSON. -This is a private Electron-main-to-server pipe. It does not use the renderer -WebSocket and is recreated for every backend restart. +These are private Electron-main/server pipes. They do not use the renderer +WebSocket and are recreated for every backend restart. ## Server Effect services @@ -192,7 +221,10 @@ Owns the resource-monitor process and protocol. - validates the hello/version handshake; - sends configuration and external process roots; -- exposes automatic snapshots and `sampleNow`; +- adapts the native interval from host power state; +- enables streaming only for scoped live subscribers; +- reads chunked native history on demand; +- exposes `sampleNow`; - serializes commands; - supervises process exit and protocol failure; - restarts with bounded exponential backoff; @@ -207,8 +239,10 @@ restart cannot freeze telemetry. ### `DesktopTelemetryReceiver` Reads fd 4, decodes schema-validated messages, stores the latest Electron -snapshot, and publishes desktop health. Decode errors, protocol mismatch, -stream failure, and normal stream closure are represented explicitly. +snapshot, and publishes desktop health. It writes diagnostics demand to fd 5 +and marks the source stale after 90 seconds without a heartbeat. Decode errors, +protocol mismatch, control-write failure, stream failure, stale input, and +normal stream closure are represented explicitly. ### `ResourceTelemetry` @@ -220,20 +254,21 @@ Merges native and Electron data and owns public telemetry semantics. - computes process depth and child relationships; - tracks starts, exits, CPU time, and observed I/O; - projects power data; -- publishes live snapshots; +- acquires native streaming and Electron process metrics only for scoped live + subscribers; +- queries and replays native history only when requested; - validates `(pid, startTimeMs)` before process signaling; - updates history health even when no further native sample arrives. Electron and monitor processes are visible but are not valid targets for the existing process-signal RPC. -### `ResourceTelemetryStore` +### History projection -Keeps aggregate and process samples in memory for at most one hour, subject to -hard sample-count bounds. Aggregate history retains up to 3,600 samples. -Detailed process history retains up to 20,000 process samples, so high process -fan-out can shorten detailed per-process coverage while aggregate coverage -remains available. +`ResourceTelemetryHistory` is a pure on-demand projection. It replays raw native +snapshots to derive rates, lifecycle counters, buckets, and process summaries. +Current Electron process metrics are intentionally excluded from historical +replay so they cannot overwrite older native CPU or memory samples. ### `ResourceAttribution` @@ -249,8 +284,8 @@ adding diagnostics-specific counters. ## Background policy integration -`HostPowerMonitor` now projects power state from `ResourceTelemetry`; it does not -spawn macOS shell probes. +`HostPowerMonitor` consumes `DesktopTelemetryReceiver` directly; observing host +power does not retain live resource diagnostics or invoke shell probes. The monitor updates its latest timestamp on every Electron sample but only publishes semantic state changes. Increasing idle seconds alone does not cause a @@ -300,11 +335,12 @@ with the CLI. Missing platform artifacts degrade native telemetry to Steady state uses: - one native process; -- one native sample per second; -- one Electron sample per second in desktop mode; +- power-adaptive native counter sampling with no periodic Node snapshot stream; +- event-driven Electron power updates plus a 30-second heartbeat; +- no `app.getAppMetrics()` calls while diagnostics is closed; - no telemetry database; - no recurring shell probes; -- bounded PubSub queues and bounded history. +- bounded PubSub queues and native ring history. The diagnostics page exposes the monitor's own process resource usage and collection duration so the observer's cost is measurable. diff --git a/native/resource-monitor/src/main.rs b/native/resource-monitor/src/main.rs index 5f747847442..c093cfdaa5a 100644 --- a/native/resource-monitor/src/main.rs +++ b/native/resource-monitor/src/main.rs @@ -6,10 +6,14 @@ use std::thread; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use sysinfo::{Pid, ProcessRefreshKind, ProcessesToUpdate, System, UpdateKind}; -const PROTOCOL_VERSION: u32 = 1; +const PROTOCOL_VERSION: u32 = 2; const MIN_SAMPLE_INTERVAL_MS: u64 = 250; const MAX_SAMPLE_INTERVAL_MS: u64 = 60_000; const EXTERNAL_PROCESS_START_TOLERANCE_MS: u64 = 2_000; +const HISTORY_RETENTION_MS: u64 = 60 * 60_000; +const MAX_HISTORY_SNAPSHOTS: usize = 3_600; +const MAX_HISTORY_PROCESS_SAMPLES: usize = 20_000; +const HISTORY_CHUNK_SNAPSHOTS: usize = 32; #[derive(Debug, Clone, Deserialize)] #[serde(rename_all = "camelCase")] @@ -37,10 +41,23 @@ enum Command { version: u32, processes: Vec, }, + SetSampleInterval { + version: u32, + sample_interval_ms: u64, + }, + SetStreaming { + version: u32, + enabled: bool, + }, SampleNow { version: u32, request_id: String, }, + ReadHistory { + version: u32, + request_id: String, + window_ms: u64, + }, Shutdown { version: u32, }, @@ -51,7 +68,10 @@ impl Command { match self { Self::Configure { version, .. } | Self::SetExternalProcesses { version, .. } + | Self::SetSampleInterval { version, .. } + | Self::SetStreaming { version, .. } | Self::SampleNow { version, .. } + | Self::ReadHistory { version, .. } | Self::Shutdown { version } => *version, } } @@ -94,7 +114,7 @@ enum IoSemantics { AllIo, } -#[derive(Debug, Serialize)] +#[derive(Debug, Clone, Serialize)] #[serde(rename_all = "camelCase")] struct ProcessSample { pid: u32, @@ -113,7 +133,7 @@ struct ProcessSample { io_semantics: IoSemantics, } -#[derive(Debug, Serialize)] +#[derive(Debug, Clone, Serialize)] #[serde(rename_all = "camelCase")] struct SnapshotEvent { version: u32, @@ -130,6 +150,17 @@ struct SnapshotEvent { processes: Vec, } +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct HistoryChunkEvent<'a> { + version: u32, + #[serde(rename = "type")] + event_type: &'static str, + request_id: &'a str, + done: bool, + snapshots: &'a [SnapshotEvent], +} + #[derive(Debug, Serialize)] #[serde(rename_all = "camelCase")] struct ErrorEvent { @@ -144,10 +175,51 @@ struct ErrorEvent { #[derive(Debug, Clone)] struct CollectorConfig { root_pid: u32, - sample_interval: Duration, + sample_interval: Option, external_processes: HashMap>, } +#[derive(Default)] +struct HistoryRecorder { + snapshots: VecDeque, + process_sample_count: usize, +} + +impl HistoryRecorder { + fn record(&mut self, snapshot: &SnapshotEvent) { + let mut retained = snapshot.clone(); + retained.request_id = None; + self.process_sample_count = self + .process_sample_count + .saturating_add(retained.processes.len()); + self.snapshots.push_back(retained); + self.trim(snapshot.sampled_at_unix_ms); + } + + fn trim(&mut self, now_ms: u64) { + while self.snapshots.front().is_some_and(|snapshot| { + snapshot.sampled_at_unix_ms < now_ms.saturating_sub(HISTORY_RETENTION_MS) + || self.snapshots.len() > MAX_HISTORY_SNAPSHOTS + || self.process_sample_count > MAX_HISTORY_PROCESS_SAMPLES + }) { + if let Some(removed) = self.snapshots.pop_front() { + self.process_sample_count = self + .process_sample_count + .saturating_sub(removed.processes.len()); + } + } + } + + fn read(&self, window_ms: u64, now_ms: u64) -> Vec { + let started_at_ms = now_ms.saturating_sub(window_ms.min(HISTORY_RETENTION_MS)); + self.snapshots + .iter() + .filter(|snapshot| snapshot.sampled_at_unix_ms >= started_at_ms) + .cloned() + .collect() + } +} + struct Collector { system: System, sequence: u64, @@ -303,8 +375,12 @@ fn unix_time_ms() -> u64 { .as_millis() as u64 } -fn clamp_sample_interval(sample_interval_ms: u64) -> Duration { - Duration::from_millis(sample_interval_ms.clamp(MIN_SAMPLE_INTERVAL_MS, MAX_SAMPLE_INTERVAL_MS)) +fn clamp_sample_interval(sample_interval_ms: u64) -> Option { + (sample_interval_ms > 0).then(|| { + Duration::from_millis( + sample_interval_ms.clamp(MIN_SAMPLE_INTERVAL_MS, MAX_SAMPLE_INTERVAL_MS), + ) + }) } fn spawn_input_reader() -> Receiver { @@ -368,6 +444,40 @@ fn write_error( ) } +fn write_history( + writer: &mut impl Write, + request_id: &str, + snapshots: &[SnapshotEvent], +) -> io::Result<()> { + if snapshots.is_empty() { + return write_event( + writer, + &HistoryChunkEvent { + version: PROTOCOL_VERSION, + event_type: "historyChunk", + request_id, + done: true, + snapshots, + }, + ); + } + + let chunk_count = snapshots.len().div_ceil(HISTORY_CHUNK_SNAPSHOTS); + for (index, chunk) in snapshots.chunks(HISTORY_CHUNK_SNAPSHOTS).enumerate() { + write_event( + writer, + &HistoryChunkEvent { + version: PROTOCOL_VERSION, + event_type: "historyChunk", + request_id, + done: index + 1 == chunk_count, + snapshots: chunk, + }, + )?; + } + Ok(()) +} + fn main() -> io::Result<()> { let mut writer = BufWriter::new(io::stdout().lock()); write_event( @@ -393,8 +503,10 @@ fn main() -> io::Result<()> { let receiver = spawn_input_reader(); let mut collector = Collector::new(); + let mut history = HistoryRecorder::default(); let mut config: Option = None; let mut next_sample_at: Option = None; + let mut streaming_enabled = false; loop { let timeout = next_sample_at @@ -435,7 +547,7 @@ fn main() -> io::Result<()> { .map(|process| (process.pid, process.start_time_ms)) .collect(), }); - next_sample_at = Some(Instant::now()); + next_sample_at = sample_interval.map(|_| Instant::now()); } Command::SetExternalProcesses { processes, .. } => { if let Some(current) = config.as_mut() { @@ -452,11 +564,34 @@ fn main() -> io::Result<()> { )?; } } + Command::SetSampleInterval { + sample_interval_ms, .. + } => { + if let Some(current) = config.as_mut() { + current.sample_interval = clamp_sample_interval(sample_interval_ms); + next_sample_at = current + .sample_interval + .map(|interval| Instant::now() + interval); + } else { + write_error( + &mut writer, + "not-configured", + "configure must be sent before changing the sample interval", + true, + )?; + } + } + Command::SetStreaming { enabled, .. } => { + streaming_enabled = enabled; + } Command::SampleNow { request_id, .. } => { if let Some(current) = config.as_ref() { let event = collector.sample(current, Some(request_id)); + history.record(&event); write_event(&mut writer, &event)?; - next_sample_at = Some(Instant::now() + current.sample_interval); + next_sample_at = current + .sample_interval + .map(|interval| Instant::now() + interval); } else { write_error( &mut writer, @@ -466,14 +601,38 @@ fn main() -> io::Result<()> { )?; } } + Command::ReadHistory { + request_id, + window_ms, + .. + } => { + if config.is_some() { + let snapshots = history.read(window_ms, unix_time_ms()); + write_history(&mut writer, &request_id, &snapshots)?; + } else { + write_error( + &mut writer, + "not-configured", + "configure must be sent before reading history", + true, + )?; + } + } Command::Shutdown { .. } => return Ok(()), } } Err(RecvTimeoutError::Timeout) => { if let Some(current) = config.as_ref() { - let event = collector.sample(current, None); - write_event(&mut writer, &event)?; - next_sample_at = Some(Instant::now() + current.sample_interval); + if let Some(interval) = current.sample_interval { + let event = collector.sample(current, None); + history.record(&event); + if streaming_enabled { + write_event(&mut writer, &event)?; + } + next_sample_at = Some(Instant::now() + interval); + } else { + next_sample_at = None; + } } } Err(RecvTimeoutError::Disconnected) => return Ok(()), @@ -518,7 +677,7 @@ mod tests { #[test] fn decodes_protocol_commands() { let configure = serde_json::from_str::( - r#"{"version":1,"type":"configure","rootPid":42,"sampleIntervalMs":1000,"externalProcesses":[{"pid":7}]}"#, + r#"{"version":2,"type":"configure","rootPid":42,"sampleIntervalMs":1000,"externalProcesses":[{"pid":7}]}"#, ) .expect("configure command"); @@ -536,14 +695,61 @@ mod tests { } _ => panic!("unexpected command"), } + + let read_history = serde_json::from_str::( + r#"{"version":2,"type":"readHistory","requestId":"history-1","windowMs":60000}"#, + ) + .expect("read history command"); + assert!(matches!( + read_history, + Command::ReadHistory { + request_id, + window_ms: 60_000, + .. + } if request_id == "history-1" + )); } #[test] fn clamps_sample_interval() { - assert_eq!(clamp_sample_interval(1), Duration::from_millis(250)); + assert_eq!(clamp_sample_interval(0), None); + assert_eq!(clamp_sample_interval(1), Some(Duration::from_millis(250))); assert_eq!( clamp_sample_interval(100_000), - Duration::from_millis(60_000) + Some(Duration::from_millis(60_000)) + ); + } + + #[test] + fn retains_bounded_history_without_request_ids() { + let mut history = HistoryRecorder::default(); + for sequence in 0..=MAX_HISTORY_SNAPSHOTS { + history.record(&SnapshotEvent { + version: PROTOCOL_VERSION, + event_type: "snapshot", + sequence: sequence as u64, + sampled_at_unix_ms: sequence as u64 * 1_000, + collection_duration_micros: 1, + scanned_process_count: 0, + retained_process_count: 0, + inaccessible_process_count: 0, + request_id: Some("request".to_owned()), + processes: Vec::new(), + }); + } + + assert_eq!(history.snapshots.len(), MAX_HISTORY_SNAPSHOTS); + assert!( + history + .snapshots + .iter() + .all(|snapshot| snapshot.request_id.is_none()) + ); + assert_eq!( + history + .read(10_000, MAX_HISTORY_SNAPSHOTS as u64 * 1_000) + .len(), + 11 ); } diff --git a/packages/contracts/src/desktopBootstrap.ts b/packages/contracts/src/desktopBootstrap.ts index 2f0a1c8e4aa..61826e64998 100644 --- a/packages/contracts/src/desktopBootstrap.ts +++ b/packages/contracts/src/desktopBootstrap.ts @@ -14,6 +14,7 @@ export const DesktopBackendBootstrap = Schema.Struct({ otlpTracesUrl: Schema.optional(Schema.String), otlpMetricsUrl: Schema.optional(Schema.String), desktopTelemetryFd: Schema.optionalKey(PositiveInt), + desktopTelemetryControlFd: Schema.optionalKey(PositiveInt), resourceMonitorPath: Schema.optionalKey(TrimmedNonEmptyString), }); diff --git a/packages/contracts/src/resourceTelemetry.ts b/packages/contracts/src/resourceTelemetry.ts index 59fa0ce94ad..5827fd854c9 100644 --- a/packages/contracts/src/resourceTelemetry.ts +++ b/packages/contracts/src/resourceTelemetry.ts @@ -3,7 +3,7 @@ import * as Schema from "effect/Schema"; import { NonNegativeInt, PositiveInt, TrimmedNonEmptyString } from "./baseSchemas.ts"; import { HostPowerSnapshot } from "./background.ts"; -export const RESOURCE_MONITOR_PROTOCOL_VERSION = 1 as const; +export const RESOURCE_MONITOR_PROTOCOL_VERSION = 2 as const; export const ResourceTelemetryIoSemantics = Schema.Literals([ "storage", @@ -101,6 +101,29 @@ export const ResourceMonitorSampleNowCommand = Schema.Struct({ }); export type ResourceMonitorSampleNowCommand = typeof ResourceMonitorSampleNowCommand.Type; +export const ResourceMonitorSetSampleIntervalCommand = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("setSampleInterval"), + sampleIntervalMs: NonNegativeInt, +}); +export type ResourceMonitorSetSampleIntervalCommand = + typeof ResourceMonitorSetSampleIntervalCommand.Type; + +export const ResourceMonitorSetStreamingCommand = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("setStreaming"), + enabled: Schema.Boolean, +}); +export type ResourceMonitorSetStreamingCommand = typeof ResourceMonitorSetStreamingCommand.Type; + +export const ResourceMonitorReadHistoryCommand = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("readHistory"), + requestId: TrimmedNonEmptyString, + windowMs: NonNegativeInt, +}); +export type ResourceMonitorReadHistoryCommand = typeof ResourceMonitorReadHistoryCommand.Type; + export const ResourceMonitorShutdownCommand = Schema.Struct({ version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), type: Schema.Literal("shutdown"), @@ -110,7 +133,10 @@ export type ResourceMonitorShutdownCommand = typeof ResourceMonitorShutdownComma export const ResourceMonitorCommand = Schema.Union([ ResourceMonitorConfigureCommand, ResourceMonitorSetExternalProcessesCommand, + ResourceMonitorSetSampleIntervalCommand, + ResourceMonitorSetStreamingCommand, ResourceMonitorSampleNowCommand, + ResourceMonitorReadHistoryCommand, ResourceMonitorShutdownCommand, ]); export type ResourceMonitorCommand = typeof ResourceMonitorCommand.Type; @@ -140,6 +166,15 @@ export const ResourceMonitorSnapshotEvent = Schema.Struct({ }); export type ResourceMonitorSnapshotEvent = typeof ResourceMonitorSnapshotEvent.Type; +export const ResourceMonitorHistoryChunkEvent = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("historyChunk"), + requestId: TrimmedNonEmptyString, + done: Schema.Boolean, + snapshots: Schema.Array(ResourceMonitorSnapshotEvent), +}); +export type ResourceMonitorHistoryChunkEvent = typeof ResourceMonitorHistoryChunkEvent.Type; + export const ResourceMonitorErrorEvent = Schema.Struct({ version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), type: Schema.Literal("error"), @@ -152,6 +187,7 @@ export type ResourceMonitorErrorEvent = typeof ResourceMonitorErrorEvent.Type; export const ResourceMonitorEvent = Schema.Union([ ResourceMonitorHelloEvent, ResourceMonitorSnapshotEvent, + ResourceMonitorHistoryChunkEvent, ResourceMonitorErrorEvent, ]); export type ResourceMonitorEvent = typeof ResourceMonitorEvent.Type; @@ -188,6 +224,7 @@ export const DesktopHostTelemetrySnapshot = Schema.Struct({ type: Schema.Literal("desktopTelemetry"), sequence: NonNegativeInt, sampledAtUnixMs: NonNegativeInt, + electronPid: PositiveInt, power: HostPowerSnapshot, speedLimitPercent: Schema.Option(Schema.Number), electronProcesses: Schema.Array(DesktopElectronProcessMetric), @@ -207,6 +244,16 @@ export const DesktopHostTelemetryMessage = Schema.Union([ ]); export type DesktopHostTelemetryMessage = typeof DesktopHostTelemetryMessage.Type; +export const DesktopTelemetrySetDiagnosticsDemand = Schema.Struct({ + version: Schema.Literal(1), + type: Schema.Literal("setDiagnosticsDemand"), + enabled: Schema.Boolean, +}); +export type DesktopTelemetrySetDiagnosticsDemand = typeof DesktopTelemetrySetDiagnosticsDemand.Type; + +export const DesktopTelemetryControlMessage = Schema.Union([DesktopTelemetrySetDiagnosticsDemand]); +export type DesktopTelemetryControlMessage = typeof DesktopTelemetryControlMessage.Type; + export const ResourceTelemetryProcess = Schema.Struct({ identity: ResourceTelemetryProcessIdentity, ppid: NonNegativeInt, From 4a875dbe519b7891388272f0c81e0498619cc4f4 Mon Sep 17 00:00:00 2001 From: Julius Marminge Date: Wed, 17 Jun 2026 16:20:24 -0700 Subject: [PATCH 4/5] Buffer backend output before persisting failure logs - Capture child process output in memory during normal runs - Persist buffered output only on failures and bound retained data - Increase trace/event batching windows to reduce flush churn Co-authored-by: codex --- .../src/app/DesktopObservability.test.ts | 149 ++++++++++++--- apps/desktop/src/app/DesktopObservability.ts | 178 +++++++++++++++--- .../src/backend/DesktopBackendManager.test.ts | 81 +++++++- .../src/backend/DesktopBackendManager.ts | 45 +++-- apps/server/src/cli/config.test.ts | 2 +- apps/server/src/cli/config.ts | 2 +- .../src/provider/Layers/EventNdjsonLogger.ts | 42 ++++- packages/shared/src/observability.ts | 2 +- 8 files changed, 421 insertions(+), 80 deletions(-) diff --git a/apps/desktop/src/app/DesktopObservability.test.ts b/apps/desktop/src/app/DesktopObservability.test.ts index a78de48d5e1..73b386671ce 100644 --- a/apps/desktop/src/app/DesktopObservability.test.ts +++ b/apps/desktop/src/app/DesktopObservability.test.ts @@ -49,14 +49,14 @@ const environmentInput = (baseDir: string) => runningUnderArm64Translation: false, }) satisfies DesktopEnvironment.MakeDesktopEnvironmentInput; -const makeEnvironmentLayer = (baseDir: string) => +const makeEnvironmentLayer = (baseDir: string, isDevelopment = true) => DesktopEnvironment.layer(environmentInput(baseDir)).pipe( Layer.provide( Layer.mergeAll( NodeServices.layer, DesktopConfig.layerTest({ T3CODE_HOME: baseDir, - VITE_DEV_SERVER_URL: "http://127.0.0.1:5733", + VITE_DEV_SERVER_URL: isDevelopment ? "http://127.0.0.1:5733" : undefined, }), ), ), @@ -112,41 +112,56 @@ describe("DesktopObservability", () => { ), ); - it.effect("persists backend child output as structured JSON records in development", () => + it.effect("buffers backend child output and persists it only when a failure is reported", () => Effect.gen(function* () { const fileSystem = yield* FileSystem.FileSystem; const baseDir = yield* fileSystem.makeTempDirectoryScoped({ prefix: "t3-desktop-backend-output-log-test-", }); - const environmentLayer = makeEnvironmentLayer(baseDir); + const environmentLayer = makeEnvironmentLayer(baseDir, false); const logPath = yield* Effect.gen(function* () { const environment = yield* DesktopEnvironment.DesktopEnvironment; return environment.path.join(environment.logDir, "server-child.log"); }).pipe(Effect.provide(environmentLayer)); + const tracePath = yield* Effect.gen(function* () { + const environment = yield* DesktopEnvironment.DesktopEnvironment; + return environment.path.join(environment.logDir, "desktop.trace.ndjson"); + }).pipe(Effect.provide(environmentLayer)); - yield* Effect.gen(function* () { - const outputLog = yield* DesktopObservability.DesktopBackendOutputLog; - yield* outputLog.writeSessionBoundary({ - phase: "START", - details: "pid=123 port=3773 cwd=/repo", - }); - yield* outputLog.writeOutputChunk("stdout", new TextEncoder().encode("hello server\n")); - }).pipe( - Effect.annotateLogs({ runId: "test-run" }), - Effect.provide(DesktopObservability.layer.pipe(Layer.provideMerge(environmentLayer))), + yield* Effect.scoped( + Effect.gen(function* () { + const outputLog = yield* DesktopObservability.DesktopBackendOutputLog; + yield* outputLog.beginSession({ + details: "pid=123 port=3773 cwd=/repo", + }); + yield* outputLog.writeOutputChunk("stdout", new TextEncoder().encode("hello server\n")); + assert.isFalse(yield* fileSystem.exists(logPath)); + yield* outputLog.persistFailure({ details: "code=1" }); + yield* outputLog.beginSession({ details: "pid=456" }); + yield* outputLog.writeOutputChunk( + "stderr", + new TextEncoder().encode("normal shutdown\n"), + ); + yield* outputLog.discardSession; + }).pipe( + Effect.annotateLogs({ runId: "test-run" }), + Effect.provide(DesktopObservability.layer.pipe(Layer.provideMerge(environmentLayer))), + ), ); const log = yield* fileSystem.readFileString(logPath); const lines = log.trimEnd().split("\n"); - const boundary = yield* decodeDesktopBackendChildLogRecord(lines[0] ?? ""); + const start = yield* decodeDesktopBackendChildLogRecord(lines[0] ?? ""); const output = yield* decodeDesktopBackendChildLogRecord(lines[1] ?? ""); + const end = yield* decodeDesktopBackendChildLogRecord(lines[2] ?? ""); - assert.equal(boundary.message, "backend child process session start"); - assert.equal(boundary.level, "INFO"); - assert.equal(boundary.annotations.component, "desktop-backend-child"); - assert.equal(boundary.annotations.runId, "test-run"); - assert.equal(boundary.annotations.phase, "START"); - assert.equal(boundary.annotations.details, "pid=123 port=3773 cwd=/repo"); + assert.equal(lines.length, 3); + assert.equal(start.message, "backend child process failure output start"); + assert.equal(start.level, "ERROR"); + assert.equal(start.annotations.component, "desktop-backend-child"); + assert.equal(start.annotations.runId, "test-run"); + assert.equal(start.annotations.phase, "START"); + assert.equal(start.annotations.details, "pid=123 port=3773 cwd=/repo"); assert.equal(output.message, "backend child process output"); assert.equal(output.level, "INFO"); @@ -154,6 +169,98 @@ describe("DesktopObservability", () => { assert.equal(output.annotations.runId, "test-run"); assert.equal(output.annotations.stream, "stdout"); assert.equal(output.annotations.text, "hello server\n"); + + assert.equal(end.message, "backend child process failure output end"); + assert.equal(end.level, "ERROR"); + assert.equal(end.annotations.phase, "END"); + assert.equal(end.annotations.details, "code=1"); + + const traceRecords = (yield* fileSystem.readFileString(tracePath)) + .trim() + .split("\n") + .filter((line) => line.length > 0) + .map((line) => decodeTraceRecordLine(line)); + assert.isFalse( + traceRecords.some( + (record) => record.name === "desktop.observability.backendOutput.writeOutputChunk", + ), + ); + }).pipe( + Effect.scoped, + Effect.provide(Layer.mergeAll(NodeServices.layer, NodeHttpClient.layerUndici)), + ), + ); + + it.effect("retains only the last mebibyte of backend child output", () => + Effect.gen(function* () { + const fileSystem = yield* FileSystem.FileSystem; + const baseDir = yield* fileSystem.makeTempDirectoryScoped({ + prefix: "t3-desktop-backend-output-bound-test-", + }); + const environmentLayer = makeEnvironmentLayer(baseDir, false); + const logPath = yield* Effect.gen(function* () { + const environment = yield* DesktopEnvironment.DesktopEnvironment; + return environment.path.join(environment.logDir, "server-child.log"); + }).pipe(Effect.provide(environmentLayer)); + const maxBufferedBytes = 1024 * 1024; + const discardedPrefixBytes = 128; + const output = new Uint8Array(maxBufferedBytes + discardedPrefixBytes); + output.fill("x".charCodeAt(0)); + output.fill("y".charCodeAt(0), 0, discardedPrefixBytes); + + yield* Effect.scoped( + Effect.gen(function* () { + const outputLog = yield* DesktopObservability.DesktopBackendOutputLog; + yield* outputLog.beginSession({ details: "pid=123" }); + yield* outputLog.writeOutputChunk("stderr", output); + yield* outputLog.persistFailure({ details: "code=1" }); + }).pipe( + Effect.provide(DesktopObservability.layer.pipe(Layer.provideMerge(environmentLayer))), + ), + ); + + const lines = (yield* fileSystem.readFileString(logPath)).trimEnd().split("\n"); + const record = yield* decodeDesktopBackendChildLogRecord(lines[1] ?? ""); + const text = record.annotations.text; + assert.equal(typeof text, "string"); + if (typeof text !== "string") { + return; + } + assert.equal(new TextEncoder().encode(text).byteLength, maxBufferedBytes); + assert.isFalse(text.includes("y")); + }).pipe( + Effect.scoped, + Effect.provide(Layer.mergeAll(NodeServices.layer, NodeHttpClient.layerUndici)), + ), + ); + + it.effect("bounds the number of retained backend child output chunks", () => + Effect.gen(function* () { + const fileSystem = yield* FileSystem.FileSystem; + const baseDir = yield* fileSystem.makeTempDirectoryScoped({ + prefix: "t3-desktop-backend-output-chunks-test-", + }); + const environmentLayer = makeEnvironmentLayer(baseDir, false); + const logPath = yield* Effect.gen(function* () { + const environment = yield* DesktopEnvironment.DesktopEnvironment; + return environment.path.join(environment.logDir, "server-child.log"); + }).pipe(Effect.provide(environmentLayer)); + + yield* Effect.scoped( + Effect.gen(function* () { + const outputLog = yield* DesktopObservability.DesktopBackendOutputLog; + yield* outputLog.beginSession({ details: "pid=123" }); + for (let index = 0; index < 300; index += 1) { + yield* outputLog.writeOutputChunk("stderr", Uint8Array.of(index % 128)); + } + yield* outputLog.persistFailure({ details: "code=1" }); + }).pipe( + Effect.provide(DesktopObservability.layer.pipe(Layer.provideMerge(environmentLayer))), + ), + ); + + const lines = (yield* fileSystem.readFileString(logPath)).trimEnd().split("\n"); + assert.equal(lines.length, 258); }).pipe( Effect.scoped, Effect.provide(Layer.mergeAll(NodeServices.layer, NodeHttpClient.layerUndici)), diff --git a/apps/desktop/src/app/DesktopObservability.ts b/apps/desktop/src/app/DesktopObservability.ts index 2349fe52dc3..0468e810883 100644 --- a/apps/desktop/src/app/DesktopObservability.ts +++ b/apps/desktop/src/app/DesktopObservability.ts @@ -21,8 +21,10 @@ import * as DesktopEnvironment from "./DesktopEnvironment.ts"; const DESKTOP_LOG_FILE_MAX_BYTES = 10 * 1024 * 1024; const DESKTOP_LOG_FILE_MAX_FILES = 10; +const DESKTOP_BACKEND_OUTPUT_BUFFER_MAX_BYTES = 1024 * 1024; +const DESKTOP_BACKEND_OUTPUT_BUFFER_MAX_CHUNKS = 256; const DESKTOP_BACKEND_CHILD_LOG_FIBER_ID = "#backend-child"; -const DESKTOP_TRACE_BATCH_WINDOW_MS = 200; +const DESKTOP_TRACE_BATCH_WINDOW_MS = 1_000; export interface RotatingLogFileWriter { readonly writeBytes: (chunk: Uint8Array) => Effect.Effect; @@ -30,14 +32,13 @@ export interface RotatingLogFileWriter { } export interface DesktopBackendOutputLogShape { - readonly writeSessionBoundary: (input: { - readonly phase: "START" | "END"; - readonly details: string; - }) => Effect.Effect; + readonly beginSession: (input: { readonly details: string }) => Effect.Effect; readonly writeOutputChunk: ( streamName: "stdout" | "stderr", chunk: Uint8Array, ) => Effect.Effect; + readonly persistFailure: (input: { readonly details: string }) => Effect.Effect; + readonly discardSession: Effect.Effect; } export class DesktopBackendOutputLog extends Context.Service< @@ -113,10 +114,84 @@ const encodeDesktopBackendChildLogRecord = Schema.encodeEffect( ); const DesktopBackendOutputLogNoop: DesktopBackendOutputLogShape = { - writeSessionBoundary: () => Effect.void, + beginSession: () => Effect.void, writeOutputChunk: () => Effect.void, + persistFailure: () => Effect.void, + discardSession: Effect.void, }; +interface BufferedBackendOutputChunk { + readonly streamName: "stdout" | "stderr"; + readonly chunk: Uint8Array; +} + +interface BackendOutputSession { + readonly runId: string; + readonly startDetails: string; + readonly chunks: ReadonlyArray; + readonly byteLength: number; +} + +function appendBoundedOutputChunk( + session: BackendOutputSession, + streamName: "stdout" | "stderr", + chunk: Uint8Array, +): BackendOutputSession { + if (chunk.byteLength === 0) { + return session; + } + + const retainedChunk = + chunk.byteLength > DESKTOP_BACKEND_OUTPUT_BUFFER_MAX_BYTES + ? chunk.slice(chunk.byteLength - DESKTOP_BACKEND_OUTPUT_BUFFER_MAX_BYTES) + : chunk.slice(); + const chunks = [ + ...session.chunks, + { + streamName, + chunk: retainedChunk, + }, + ]; + let byteLength = session.byteLength + retainedChunk.byteLength; + let overflow = Math.max(0, byteLength - DESKTOP_BACKEND_OUTPUT_BUFFER_MAX_BYTES); + let firstRetainedIndex = 0; + + while (overflow > 0) { + const first = chunks[firstRetainedIndex]; + if (!first) { + break; + } + if (first.chunk.byteLength <= overflow) { + overflow -= first.chunk.byteLength; + byteLength -= first.chunk.byteLength; + firstRetainedIndex += 1; + continue; + } + + chunks[firstRetainedIndex] = { + ...first, + chunk: first.chunk.slice(overflow), + }; + byteLength -= overflow; + overflow = 0; + } + + const excessChunks = Math.max( + 0, + chunks.length - firstRetainedIndex - DESKTOP_BACKEND_OUTPUT_BUFFER_MAX_CHUNKS, + ); + for (let index = firstRetainedIndex; index < firstRetainedIndex + excessChunks; index += 1) { + byteLength -= chunks[index]?.chunk.byteLength ?? 0; + } + firstRetainedIndex += excessChunks; + + return { + ...session, + chunks: chunks.slice(firstRetainedIndex), + byteLength, + }; +} + const currentDesktopRunId = Effect.gen(function* () { const annotations = yield* References.CurrentLogAnnotations; const runId = annotations.runId; @@ -301,45 +376,86 @@ const backendOutputLogLayer = Layer.effect( const writer = yield* makeRotatingLogFileWriter({ filePath: environment.path.join(environment.logDir, "server-child.log"), }).pipe(Effect.option); + const sessionRef = yield* Ref.make(Option.none()); return Option.match(writer, { onNone: () => DesktopBackendOutputLogNoop, - onSome: (logFile) => - ({ - writeSessionBoundary: Effect.fn( - "desktop.observability.backendOutput.writeSessionBoundary", - )(function* ({ phase, details }) { + onSome: (logFile) => { + return { + beginSession: Effect.fn("desktop.observability.backendOutput.beginSession")(function* ({ + details, + }) { const runId = yield* currentDesktopRunId; - yield* writeBackendChildLogRecord(logFile, { - message: `backend child process session ${phase.toLowerCase()}`, - level: "INFO", - annotations: { - component: "desktop-backend-child", + yield* Ref.set( + sessionRef, + Option.some({ runId, - phase, - details: sanitizeLogValue(details), - }, - }); + startDetails: sanitizeLogValue(details), + chunks: [], + byteLength: 0, + }), + ); + }), + writeOutputChunk: Effect.fnUntraced(function* (streamName, chunk) { + if (environment.isDevelopment) { + yield* writeDevelopmentConsoleOutput(streamName, chunk); + } + yield* Ref.update( + sessionRef, + Option.map((session) => appendBoundedOutputChunk(session, streamName, chunk)), + ); }), - writeOutputChunk: Effect.fn("desktop.observability.backendOutput.writeOutputChunk")( - function* (streamName, chunk) { - if (environment.isDevelopment) { - yield* writeDevelopmentConsoleOutput(streamName, chunk); + persistFailure: Effect.fn("desktop.observability.backendOutput.persistFailure")( + function* ({ details }) { + const session = yield* Ref.modify(sessionRef, (current) => [ + current, + Option.map(current, (value) => ({ + ...value, + chunks: [], + byteLength: 0, + })), + ]); + if (Option.isNone(session)) { + return; + } + + yield* writeBackendChildLogRecord(logFile, { + message: "backend child process failure output start", + level: "ERROR", + annotations: { + component: "desktop-backend-child", + runId: session.value.runId, + phase: "START", + details: session.value.startDetails, + }, + }); + for (const output of session.value.chunks) { + yield* writeBackendChildLogRecord(logFile, { + message: "backend child process output", + level: output.streamName === "stderr" ? "ERROR" : "INFO", + annotations: { + component: "desktop-backend-child", + runId: session.value.runId, + stream: output.streamName, + text: textDecoder.decode(output.chunk), + }, + }); } - const runId = yield* currentDesktopRunId; yield* writeBackendChildLogRecord(logFile, { - message: "backend child process output", - level: streamName === "stderr" ? "ERROR" : "INFO", + message: "backend child process failure output end", + level: "ERROR", annotations: { component: "desktop-backend-child", - runId, - stream: streamName, - text: textDecoder.decode(chunk), + runId: session.value.runId, + phase: "END", + details: sanitizeLogValue(details), }, }); }, ), - }) satisfies DesktopBackendOutputLogShape, + discardSession: Ref.set(sessionRef, Option.none()), + } satisfies DesktopBackendOutputLogShape; + }, }); }), ); diff --git a/apps/desktop/src/backend/DesktopBackendManager.test.ts b/apps/desktop/src/backend/DesktopBackendManager.test.ts index 86e642e80b3..5c29b4f12e7 100644 --- a/apps/desktop/src/backend/DesktopBackendManager.test.ts +++ b/apps/desktop/src/backend/DesktopBackendManager.test.ts @@ -140,8 +140,10 @@ function makeManagerLayer(input: { ? Layer.succeed(DesktopState.DesktopState, input.desktopState) : DesktopState.layer, Layer.succeed(DesktopObservability.DesktopBackendOutputLog, { - writeSessionBoundary: () => Effect.void, + beginSession: () => Effect.void, writeOutputChunk: () => Effect.void, + persistFailure: () => Effect.void, + discardSession: Effect.void, ...input.backendOutputLog, } satisfies DesktopObservability.DesktopBackendOutputLogShape), Layer.succeed(DesktopWindow.DesktopWindow, { @@ -200,8 +202,7 @@ describe("DesktopBackendManager", () => { }).pipe(Effect.andThen(Deferred.succeed(ready, void 0))), }, backendOutputLog: { - writeSessionBoundary: ({ phase }) => - phase === "END" ? Queue.offer(exited, void 0).pipe(Effect.asVoid) : Effect.void, + persistFailure: () => Queue.offer(exited, void 0).pipe(Effect.asVoid), }, }); @@ -272,6 +273,52 @@ describe("DesktopBackendManager", () => { }), ); + it.effect("drains trailing child output before persisting an unexpected exit", () => + Effect.gen(function* () { + const persistedOutput = yield* Deferred.make>(); + const outputDrainStarted = yield* Deferred.make(); + const outputChunks = yield* Ref.make>([]); + const spawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make(() => + Effect.succeed( + makeProcess({ + stdout: Stream.fromEffect( + Deferred.succeed(outputDrainStarted, void 0).pipe( + Effect.andThen(Effect.sleep(Duration.millis(50))), + Effect.as(new TextEncoder().encode("trailing output\n")), + ), + ), + exitCode: Effect.succeed(ChildProcessSpawner.ExitCode(1)), + }), + ), + ), + ); + const managerLayer = makeManagerLayer({ + spawnerLayer, + httpClientLayer: httpClientLayer(() => Effect.never), + backendOutputLog: { + writeOutputChunk: (_streamName, chunk) => + Ref.update(outputChunks, (current) => [...current, new TextDecoder().decode(chunk)]), + persistFailure: () => + Ref.get(outputChunks).pipe( + Effect.flatMap((chunks) => Deferred.succeed(persistedOutput, chunks)), + Effect.asVoid, + ), + }, + }); + + yield* Effect.gen(function* () { + const manager = yield* DesktopBackendManager.DesktopBackendManager; + yield* manager.start; + yield* Deferred.await(outputDrainStarted); + yield* TestClock.adjust(Duration.millis(50)); + + assert.deepEqual(yield* Deferred.await(persistedOutput), ["trailing output\n"]); + }).pipe(Effect.provide(Layer.merge(TestClock.layer(), managerLayer))); + }), + ); + it.effect("retries HTTP readiness before reporting the backend ready", () => Effect.gen(function* () { const requestUrls: Array = []; @@ -309,8 +356,7 @@ describe("DesktopBackendManager", () => { }).pipe(Effect.andThen(Deferred.succeed(ready, void 0))), }, backendOutputLog: { - writeSessionBoundary: ({ phase }) => - phase === "END" ? Queue.offer(exited, void 0).pipe(Effect.asVoid) : Effect.void, + persistFailure: () => Queue.offer(exited, void 0).pipe(Effect.asVoid), }, }); @@ -343,6 +389,8 @@ describe("DesktopBackendManager", () => { const ready = yield* Deferred.make(); const backendReady = yield* Ref.make(false); const quitting = yield* Ref.make(false); + let persistedFailureCount = 0; + let discardedSessionCount = 0; const spawnerLayer = Layer.succeed( ChildProcessSpawner.ChildProcessSpawner, @@ -374,6 +422,15 @@ describe("DesktopBackendManager", () => { desktopWindow: { handleBackendReady: Deferred.succeed(ready, void 0).pipe(Effect.asVoid), }, + backendOutputLog: { + persistFailure: () => + Effect.sync(() => { + persistedFailureCount += 1; + }), + discardSession: Effect.sync(() => { + discardedSessionCount += 1; + }), + }, }); yield* Effect.gen(function* () { @@ -393,6 +450,8 @@ describe("DesktopBackendManager", () => { yield* manager.stop(); assert.equal(startCount, 1); assert.equal(closedCount, 1); + assert.equal(persistedFailureCount, 0); + assert.equal(discardedSessionCount, 1); const stoppedSnapshot = yield* manager.snapshot; assert.isFalse(yield* Ref.get(backendReady)); @@ -406,6 +465,7 @@ describe("DesktopBackendManager", () => { it.effect("restarts an unexpectedly exited backend with the Effect clock", () => Effect.gen(function* () { const starts = yield* Queue.unbounded(); + const failures = yield* Queue.unbounded(); let startCount = 0; const spawnerLayer = Layer.succeed( @@ -425,6 +485,9 @@ describe("DesktopBackendManager", () => { const managerLayer = makeManagerLayer({ spawnerLayer, httpClientLayer: httpClientLayer(() => Effect.never), + backendOutputLog: { + persistFailure: ({ details }) => Queue.offer(failures, details).pipe(Effect.asVoid), + }, }); yield* Effect.gen(function* () { @@ -432,6 +495,7 @@ describe("DesktopBackendManager", () => { yield* manager.start; assert.equal(yield* Queue.take(starts), 1); + assert.equal(yield* Queue.take(failures), "pid=123 code=1"); yield* TestClock.adjust(Duration.millis(499)); assert.equal(yield* Queue.size(starts), 0); @@ -488,6 +552,13 @@ describe("DesktopBackendManager", () => { yield* manager.start; assert.equal(yield* Queue.take(starts), 1); + let restartScheduled = false; + while (!restartScheduled) { + restartScheduled = (yield* manager.snapshot).restartScheduled; + if (!restartScheduled) { + yield* Effect.yieldNow; + } + } yield* manager.start; assert.equal(yield* Queue.take(starts), 2); diff --git a/apps/desktop/src/backend/DesktopBackendManager.ts b/apps/desktop/src/backend/DesktopBackendManager.ts index 5e763f1da10..94a4f419421 100644 --- a/apps/desktop/src/backend/DesktopBackendManager.ts +++ b/apps/desktop/src/backend/DesktopBackendManager.ts @@ -39,6 +39,7 @@ const DEFAULT_BACKEND_READINESS_TIMEOUT = Duration.minutes(1); const DEFAULT_BACKEND_READINESS_INTERVAL = Duration.millis(100); const DEFAULT_BACKEND_READINESS_REQUEST_TIMEOUT = Duration.seconds(1); const DEFAULT_BACKEND_TERMINATE_GRACE = Duration.seconds(2); +const DEFAULT_BACKEND_OUTPUT_DRAIN_TIMEOUT = Duration.millis(250); const BACKEND_READINESS_PATH = "/.well-known/t3/environment"; type BackendProcessLayerServices = ChildProcessSpawner.ChildProcessSpawner | HttpClient.HttpClient; @@ -280,6 +281,7 @@ const runBackendProcess = Effect.fn("runBackendProcess")(function* ( const handle = yield* spawner .spawn(command) .pipe(Effect.mapError((cause) => new BackendProcessSpawnError({ cause }))); + const outputFibers: Array> = []; yield* options.onStarted?.(handle.pid) ?? Effect.void; if ( @@ -309,8 +311,10 @@ const runBackendProcess = Effect.fn("runBackendProcess")(function* ( ); } if (options.captureOutput) { - yield* drainBackendOutput("stdout", handle.stdout, onOutput).pipe(Effect.forkScoped); - yield* drainBackendOutput("stderr", handle.stderr, onOutput).pipe(Effect.forkScoped); + outputFibers.push( + yield* drainBackendOutput("stdout", handle.stdout, onOutput).pipe(Effect.forkScoped), + yield* drainBackendOutput("stderr", handle.stderr, onOutput).pipe(Effect.forkScoped), + ); } yield* waitForHttpReady( options.httpBaseUrl, @@ -321,7 +325,12 @@ const runBackendProcess = Effect.fn("runBackendProcess")(function* ( Effect.forkScoped, ); - return describeProcessExit(yield* Effect.result(handle.exitCode)); + const exit = describeProcessExit(yield* Effect.result(handle.exitCode)); + yield* Effect.forEach(outputFibers, Fiber.await, { + concurrency: "unbounded", + discard: true, + }).pipe(Effect.timeout(DEFAULT_BACKEND_OUTPUT_DRAIN_TIMEOUT), Effect.ignore); + return exit; }); const makeDesktopBackendManager = Effect.fn("makeDesktopBackendManager")(function* () { @@ -467,10 +476,13 @@ const makeDesktopBackendManager = Effect.fn("makeDesktopBackendManager")(functio if (isCurrentRun) { if (Option.isSome(pid)) { - yield* backendOutputLog.writeSessionBoundary({ - phase: "END", - details: `pid=${pid.value} ${reason}`, - }); + if (nextState.desiredRunning) { + yield* backendOutputLog.persistFailure({ + details: `pid=${pid.value} ${reason}`, + }); + } else { + yield* backendOutputLog.discardSession; + } } yield* Ref.set(desktopState.backendReady, false); } @@ -491,8 +503,7 @@ const makeDesktopBackendManager = Effect.fn("makeDesktopBackendManager")(functio ...run, pid: Option.some(pid), })); - yield* backendOutputLog.writeSessionBoundary({ - phase: "START", + yield* backendOutputLog.beginSession({ details: `pid=${pid} port=${config.value.bootstrap.port} cwd=${config.value.cwd}`, }); }), @@ -525,10 +536,16 @@ const makeDesktopBackendManager = Effect.fn("makeDesktopBackendManager")(functio ), ); }), - onReadinessFailure: (error) => - logBackendManagerWarning("backend readiness check failed during bootstrap", { - error: error.message, - }), + onReadinessFailure: Effect.fn("desktop.backendManager.onReadinessFailure")( + function* (error) { + yield* logBackendManagerWarning("backend readiness check failed during bootstrap", { + error: error.message, + }); + yield* backendOutputLog.persistFailure({ + details: error.message, + }); + }, + ), onOutput: (streamName, chunk) => backendOutputLog.writeOutputChunk(streamName, chunk), }).pipe( Effect.provideService(ChildProcessSpawner.ChildProcessSpawner, spawner), @@ -639,7 +656,7 @@ const makeDesktopBackendManager = Effect.fn("makeDesktopBackendManager")(functio }); yield* Option.match(active, { onNone: () => Effect.void, - onSome: (run) => closeRun(run, options), + onSome: (run) => closeRun(run, options).pipe(Effect.andThen(backendOutputLog.discardSession)), }); }); diff --git a/apps/server/src/cli/config.test.ts b/apps/server/src/cli/config.test.ts index f6464d13394..9d6622ef6d7 100644 --- a/apps/server/src/cli/config.test.ts +++ b/apps/server/src/cli/config.test.ts @@ -38,7 +38,7 @@ it.layer(NodeServices.layer)("cli config resolution", (it) => { const defaultObservabilityConfig = { traceMinLevel: "Info", traceTimingEnabled: true, - traceBatchWindowMs: 200, + traceBatchWindowMs: 1_000, traceMaxBytes: 10 * 1024 * 1024, traceMaxFiles: 10, otlpTracesUrl: undefined, diff --git a/apps/server/src/cli/config.ts b/apps/server/src/cli/config.ts index 9b2d9044360..58f827541c9 100644 --- a/apps/server/src/cli/config.ts +++ b/apps/server/src/cli/config.ts @@ -91,7 +91,7 @@ const EnvServerConfig = Config.all({ ), traceMaxBytes: Config.int("T3CODE_TRACE_MAX_BYTES").pipe(Config.withDefault(10 * 1024 * 1024)), traceMaxFiles: Config.int("T3CODE_TRACE_MAX_FILES").pipe(Config.withDefault(10)), - traceBatchWindowMs: Config.int("T3CODE_TRACE_BATCH_WINDOW_MS").pipe(Config.withDefault(200)), + traceBatchWindowMs: Config.int("T3CODE_TRACE_BATCH_WINDOW_MS").pipe(Config.withDefault(1_000)), otlpTracesUrl: Config.string("T3CODE_OTLP_TRACES_URL").pipe( Config.option, Config.map(Option.getOrUndefined), diff --git a/apps/server/src/provider/Layers/EventNdjsonLogger.ts b/apps/server/src/provider/Layers/EventNdjsonLogger.ts index 8e736672a7b..194c5746ef7 100644 --- a/apps/server/src/provider/Layers/EventNdjsonLogger.ts +++ b/apps/server/src/provider/Layers/EventNdjsonLogger.ts @@ -90,6 +90,41 @@ function resolveStreamLabel(stream: EventNdjsonStream): string { } } +function writeBatchedMessages( + sink: RotatingFileSink, + messages: ReadonlyArray, + maxBytes: number, +): number { + let pendingMessages: Array = []; + let pendingBytes = 0; + let logicalWriteBytes = 0; + + const flush = () => { + if (pendingMessages.length === 0) { + return; + } + sink.write(pendingMessages.join("")); + pendingMessages = []; + pendingBytes = 0; + }; + + for (const message of messages) { + const messageBytes = textEncoder.encode(message).byteLength; + logicalWriteBytes += messageBytes; + if (pendingBytes > 0 && pendingBytes + messageBytes > maxBytes) { + flush(); + } + pendingMessages.push(message); + pendingBytes += messageBytes; + if (pendingBytes >= maxBytes) { + flush(); + } + } + flush(); + + return logicalWriteBytes; +} + const toLogMessage = Effect.fn("toLogMessage")(function* ( event: unknown, ): Effect.fn.Return { @@ -144,14 +179,9 @@ const makeThreadWriter = Effect.fn("makeThreadWriter")(function* (input: { const startedAt = yield* Clock.currentTimeMillis; const flushResult = yield* Effect.sync(() => { try { - let logicalWriteBytes = 0; - for (const message of messages) { - sink.write(message); - logicalWriteBytes += textEncoder.encode(message).byteLength; - } return { ok: true as const, - logicalWriteBytes, + logicalWriteBytes: writeBatchedMessages(sink, messages, input.maxBytes), count: messages.length, }; } catch (error) { diff --git a/packages/shared/src/observability.ts b/packages/shared/src/observability.ts index 9e0d6352a0c..48ed58bcd2b 100644 --- a/packages/shared/src/observability.ts +++ b/packages/shared/src/observability.ts @@ -8,7 +8,7 @@ import { OtlpResource, OtlpTracer } from "effect/unstable/observability"; import { RotatingFileSink } from "./logging.ts"; -const FLUSH_BUFFER_THRESHOLD = 32; +const FLUSH_BUFFER_THRESHOLD = 256; const textEncoder = new TextEncoder(); export type TraceAttributes = Readonly>; From 7addf7c6d6f0fa33d59d84c023859ba8da18ac93 Mon Sep 17 00:00:00 2001 From: Julius Marminge Date: Thu, 18 Jun 2026 22:00:25 -0700 Subject: [PATCH 5/5] Remove legacy local backend and telemetry RPC - drop browser-only local backend stubs in favor of desktop bridge - route resource telemetry through the subscription path --- apps/server/src/server.test.ts | 17 +++ apps/server/src/ws.ts | 5 - apps/web/src/env.ts | 6 +- apps/web/src/localApi.test.ts | 19 +-- apps/web/src/localApi.ts | 29 +--- apps/web/src/vite-env.d.ts | 3 +- packages/contracts/src/ipc.ts | 264 +-------------------------------- packages/contracts/src/rpc.ts | 8 - 8 files changed, 26 insertions(+), 325 deletions(-) diff --git a/apps/server/src/server.test.ts b/apps/server/src/server.test.ts index f084b89a2f8..5adc814c7fb 100644 --- a/apps/server/src/server.test.ts +++ b/apps/server/src/server.test.ts @@ -4272,6 +4272,23 @@ it.layer(NodeServices.layer)("server router seam", (it) => { }).pipe(Effect.provide(NodeHttpServer.layerTest)), ); + it.effect("routes websocket resource telemetry through the subscription", () => + Effect.gen(function* () { + yield* buildAppUnderTest(); + + const wsUrl = yield* getWsServerUrl("/ws"); + const snapshot = yield* Effect.scoped( + withWsRpcClient(wsUrl, (client) => + client[WS_METHODS.subscribeResourceTelemetry]({}).pipe(Stream.runHead), + ), + ); + + assertTrue(Option.isSome(snapshot)); + assert.equal(snapshot.value.processes.length, 0); + assert.equal(snapshot.value.groups.backend.processCount, 0); + }).pipe(Effect.provide(NodeHttpServer.layerTest)), + ); + it.effect("routes websocket rpc subscribeServerConfig emits provider status updates", () => Effect.gen(function* () { const nextProviders = [ diff --git a/apps/server/src/ws.ts b/apps/server/src/ws.ts index 57822a2e739..db2475e8d3d 100644 --- a/apps/server/src/ws.ts +++ b/apps/server/src/ws.ts @@ -160,7 +160,6 @@ const RPC_REQUIRED_SCOPE = new Map([ [WS_METHODS.serverGetTraceDiagnostics, AuthOrchestrationReadScope], [WS_METHODS.serverGetProcessDiagnostics, AuthOrchestrationReadScope], [WS_METHODS.serverGetProcessResourceHistory, AuthOrchestrationReadScope], - [WS_METHODS.serverGetResourceTelemetry, AuthOrchestrationReadScope], [WS_METHODS.serverGetResourceTelemetryHistory, AuthOrchestrationReadScope], [WS_METHODS.serverRetryResourceTelemetry, AuthOrchestrationOperateScope], [WS_METHODS.serverSignalProcess, AuthOrchestrationOperateScope], @@ -1124,10 +1123,6 @@ const makeWsRpcLayer = (currentSession: AuthenticatedSession) => "rpc.aggregate": "server", }, ), - [WS_METHODS.serverGetResourceTelemetry]: (_input) => - observeRpcEffect(WS_METHODS.serverGetResourceTelemetry, resourceTelemetry.latest, { - "rpc.aggregate": "server", - }), [WS_METHODS.serverGetResourceTelemetryHistory]: (input) => observeRpcEffect( WS_METHODS.serverGetResourceTelemetryHistory, diff --git a/apps/web/src/env.ts b/apps/web/src/env.ts index fb2e493cada..2e08dd33698 100644 --- a/apps/web/src/env.ts +++ b/apps/web/src/env.ts @@ -1,8 +1,6 @@ /** * True when running inside the Electron preload bridge, false in a regular browser. - * The preload script sets window.nativeApi via contextBridge before any web-app + * The preload script sets window.desktopBridge via contextBridge before any web-app * code executes, so this is reliable at module load time. */ -export const isElectron = - typeof window !== "undefined" && - (window.desktopBridge !== undefined || window.nativeApi !== undefined); +export const isElectron = typeof window !== "undefined" && window.desktopBridge !== undefined; diff --git a/apps/web/src/localApi.test.ts b/apps/web/src/localApi.test.ts index 3379f5ed989..260256c1250 100644 --- a/apps/web/src/localApi.test.ts +++ b/apps/web/src/localApi.test.ts @@ -49,7 +49,6 @@ beforeEach(() => { }); } Reflect.deleteProperty(testWindow(), "desktopBridge"); - Reflect.deleteProperty(testWindow(), "nativeApi"); Object.defineProperty(testWindow(), "localStorage", { configurable: true, value: createLocalStorageStub(), @@ -61,16 +60,12 @@ afterEach(() => { }); describe("LocalApi", () => { - it("keeps backend operations unavailable in the browser facade", async () => { + it("keeps backend operations out of the local host facade", async () => { const { createLocalApi } = await import("./localApi"); const api = createLocalApi(); - await expect(api.server.getConfig()).rejects.toThrow( - "Local backend API is unavailable before a backend is paired.", - ); - await expect(api.shell.openInEditor("/tmp", "cursor")).rejects.toThrow( - "Local backend API is unavailable before a backend is paired.", - ); + expect(api).not.toHaveProperty("server"); + expect(api.shell).not.toHaveProperty("openInEditor"); }); it("uses the browser context-menu fallback without a desktop bridge", async () => { @@ -120,12 +115,4 @@ describe("LocalApi", () => { await api.persistence.setClientSettings(settings); await expect(api.persistence.getClientSettings()).resolves.toEqual(settings); }); - - it("prefers the native LocalApi when one is injected", async () => { - const nativeApi = { dialogs: {} }; - testWindow().nativeApi = nativeApi as never; - const { readLocalApi } = await import("./localApi"); - - expect(readLocalApi()).toBe(nativeApi); - }); }); diff --git a/apps/web/src/localApi.ts b/apps/web/src/localApi.ts index 41f50193b4f..b42702c7a4a 100644 --- a/apps/web/src/localApi.ts +++ b/apps/web/src/localApi.ts @@ -6,10 +6,6 @@ import { readBrowserClientSettings, writeBrowserClientSettings } from "./clientP let cachedApi: LocalApi | undefined; -function unavailableLocalBackendError(): Error { - return new Error("Local backend API is unavailable before a backend is paired."); -} - function createBrowserLocalApi(): LocalApi { return { dialogs: { @@ -25,7 +21,6 @@ function createBrowserLocalApi(): LocalApi { }, }, shell: { - openInEditor: () => Promise.reject(unavailableLocalBackendError()), openExternal: async (url) => { if (window.desktopBridge) { const opened = await window.desktopBridge.openExternal(url); @@ -63,23 +58,6 @@ function createBrowserLocalApi(): LocalApi { writeBrowserClientSettings(settings); }, }, - server: { - getConfig: () => Promise.reject(unavailableLocalBackendError()), - refreshProviders: () => Promise.reject(unavailableLocalBackendError()), - updateProvider: () => Promise.reject(unavailableLocalBackendError()), - upsertKeybinding: () => Promise.reject(unavailableLocalBackendError()), - removeKeybinding: () => Promise.reject(unavailableLocalBackendError()), - getSettings: () => Promise.reject(unavailableLocalBackendError()), - updateSettings: () => Promise.reject(unavailableLocalBackendError()), - discoverSourceControl: () => Promise.reject(unavailableLocalBackendError()), - getTraceDiagnostics: () => Promise.reject(unavailableLocalBackendError()), - getProcessDiagnostics: () => Promise.reject(unavailableLocalBackendError()), - getProcessResourceHistory: () => Promise.reject(unavailableLocalBackendError()), - getResourceTelemetry: () => Promise.reject(unavailableLocalBackendError()), - getResourceTelemetryHistory: () => Promise.reject(unavailableLocalBackendError()), - retryResourceTelemetry: () => Promise.reject(unavailableLocalBackendError()), - signalProcess: () => Promise.reject(unavailableLocalBackendError()), - }, }; } @@ -91,12 +69,7 @@ export function readLocalApi(): LocalApi | undefined { if (typeof window === "undefined") return undefined; if (cachedApi) return cachedApi; - if (window.nativeApi) { - cachedApi = window.nativeApi; - return cachedApi; - } - - cachedApi = createBrowserLocalApi(); + cachedApi = createLocalApi(); return cachedApi; } diff --git a/apps/web/src/vite-env.d.ts b/apps/web/src/vite-env.d.ts index d8a6d71b49a..d4fae7378f8 100644 --- a/apps/web/src/vite-env.d.ts +++ b/apps/web/src/vite-env.d.ts @@ -1,6 +1,6 @@ /// -import type { DesktopBridge, LocalApi } from "@t3tools/contracts"; +import type { DesktopBridge } from "@t3tools/contracts"; interface ImportMetaEnv { readonly VITE_HTTP_URL: string; @@ -21,7 +21,6 @@ interface ImportMeta { declare global { interface Window { - nativeApi?: LocalApi; desktopBridge?: DesktopBridge; } } diff --git a/packages/contracts/src/ipc.ts b/packages/contracts/src/ipc.ts index b2a6140dbf9..9f505c7d9db 100644 --- a/packages/contracts/src/ipc.ts +++ b/packages/contracts/src/ipc.ts @@ -1,121 +1,19 @@ -import type { - VcsCreateRefInput, - VcsCreateRefResult, - VcsCreateWorktreeInput, - VcsCreateWorktreeResult, - VcsInitInput, - VcsListRefsInput, - VcsListRefsResult, - VcsPullInput, - VcsPullResult, - VcsRemoveWorktreeInput, - VcsSwitchRefInput, - VcsSwitchRefResult, - GitPreparePullRequestThreadInput, - GitPreparePullRequestThreadResult, - GitPullRequestRefInput, - GitResolvePullRequestResult, - VcsStatusInput, - VcsStatusResult, -} from "./git.ts"; -import type { ReviewDiffPreviewInput, ReviewDiffPreviewResult } from "./review.ts"; -import type { FilesystemBrowseInput, FilesystemBrowseResult } from "./filesystem.ts"; -import type { AssetCreateUrlInput, AssetCreateUrlResult } from "./assets.ts"; -import type { - ProjectListEntriesInput, - ProjectListEntriesResult, - ProjectReadFileInput, - ProjectReadFileResult, - ProjectSearchEntriesInput, - ProjectSearchEntriesResult, - ProjectWriteFileInput, - ProjectWriteFileResult, -} from "./project.ts"; -import type { ProviderInstanceId } from "./providerInstance.ts"; -import type { - ResourceTelemetryHistory, - ResourceTelemetryHistoryInput, - ResourceTelemetryRetryResult, - ResourceTelemetrySnapshot, -} from "./resourceTelemetry.ts"; -import type { - ServerConfig, - ServerProcessDiagnosticsResult, - ServerProcessResourceHistoryInput, - ServerProcessResourceHistoryResult, - ServerProviderUpdateInput, - ServerProviderUpdatedPayload, - ServerRemoveKeybindingResult, - ServerSignalProcessInput, - ServerSignalProcessResult, - ServerTraceDiagnosticsResult, - ServerUpsertKeybindingResult, -} from "./server.ts"; -import type { - TerminalAttachInput, - TerminalAttachStreamEvent, - TerminalClearInput, - TerminalCloseInput, - TerminalMetadataStreamEvent, - TerminalOpenInput, - TerminalResizeInput, - TerminalRestartInput, - TerminalSessionSnapshot, - TerminalWriteInput, -} from "./terminal.ts"; -import type { ServerRemoveKeybindingInput, ServerUpsertKeybindingInput } from "./server.ts"; import * as Schema from "effect/Schema"; -import type { - DiscoveredLocalServerList, - PreviewCloseInput, - PreviewEvent, - PreviewListInput, - PreviewListResult, - PreviewNavigateInput, - PreviewOpenInput, - PreviewRefreshInput, - PreviewReportStatusInput, - PreviewSessionSnapshot, -} from "./preview.ts"; import { PreviewAutomationClickInput, PreviewAutomationEvaluateInput, - PreviewAutomationOwner, PreviewAutomationPressInput, - PreviewAutomationRequest, - PreviewAutomationResponse, PreviewAutomationScrollInput, PreviewAutomationSnapshot, PreviewAutomationStatus, PreviewAutomationTypeInput, PreviewAutomationWaitForInput, } from "./previewAutomation.ts"; -import type { - ClientOrchestrationCommand, - OrchestrationGetFullThreadDiffInput, - OrchestrationGetFullThreadDiffResult, - OrchestrationGetTurnDiffInput, - OrchestrationGetTurnDiffResult, - OrchestrationShellSnapshot, - OrchestrationShellStreamItem, - OrchestrationSubscribeThreadInput, - OrchestrationThreadStreamItem, -} from "./orchestration.ts"; import { EnvironmentId } from "./baseSchemas.ts"; import { AuthAccessTokenResult, AuthSessionState, AuthWebSocketTicketResult } from "./auth.ts"; import { AdvertisedEndpoint } from "./remoteAccess.ts"; -import { EditorId } from "./editor.ts"; import { ExecutionEnvironmentDescriptor } from "./environment.ts"; -import type { ClientSettings, ServerSettings, ServerSettingsPatch } from "./settings.ts"; -import type { - SourceControlCloneRepositoryInput, - SourceControlCloneRepositoryResult, - SourceControlDiscoveryResult, - SourceControlPublishRepositoryInput, - SourceControlPublishRepositoryResult, - SourceControlRepositoryInfo, - SourceControlRepositoryLookupInput, -} from "./sourceControl.ts"; +import type { ClientSettings } from "./settings.ts"; export interface ContextMenuItem { id: T; @@ -1027,7 +925,7 @@ export interface DesktopPreviewBridge { * APIs bound to the local app shell, not to any particular backend environment. * * These capabilities describe the desktop/browser host that the user is - * currently running: dialogs, editor/external-link opening, context menus, and + * currently running: dialogs, external-link opening, context menus, and * app-level settings/config access. They must not be used as a proxy for * "whatever environment the user is targeting", because in a multi-environment * world the local shell and a selected backend environment are distinct @@ -1039,7 +937,6 @@ export interface LocalApi { confirm: (message: string) => Promise; }; shell: { - openInEditor: (cwd: string, editor: EditorId) => Promise; openExternal: (url: string) => Promise; }; contextMenu: { @@ -1052,161 +949,4 @@ export interface LocalApi { getClientSettings: () => Promise; setClientSettings: (settings: ClientSettings) => Promise; }; - server: { - getConfig: () => Promise; - /** - * Refresh provider snapshots. When `input.instanceId` is supplied only that - * configured instance is probed; otherwise every configured instance is - * refreshed (legacy untargeted refresh). - */ - refreshProviders: (input?: { - readonly instanceId?: ProviderInstanceId; - }) => Promise; - updateProvider: (input: ServerProviderUpdateInput) => Promise; - upsertKeybinding: (input: ServerUpsertKeybindingInput) => Promise; - removeKeybinding: (input: ServerRemoveKeybindingInput) => Promise; - getSettings: () => Promise; - updateSettings: (patch: ServerSettingsPatch) => Promise; - discoverSourceControl: () => Promise; - getTraceDiagnostics: () => Promise; - getProcessDiagnostics: () => Promise; - getProcessResourceHistory: ( - input: ServerProcessResourceHistoryInput, - ) => Promise; - getResourceTelemetry: () => Promise; - getResourceTelemetryHistory: ( - input: ResourceTelemetryHistoryInput, - ) => Promise; - retryResourceTelemetry: () => Promise; - signalProcess: (input: ServerSignalProcessInput) => Promise; - }; -} - -/** - * APIs bound to a specific backend environment connection. - * - * These operations must always be routed with explicit environment context. - * They represent remote stateful capabilities such as orchestration, terminal, - * project, VCS, and provider operations. In multi-environment mode, each environment gets - * its own instance of this surface, and callers should resolve it by - * `environmentId` rather than reaching through the local desktop bridge. - */ -export interface EnvironmentApi { - terminal: { - open: (input: typeof TerminalOpenInput.Encoded) => Promise; - attach: ( - input: typeof TerminalAttachInput.Encoded, - callback: (event: TerminalAttachStreamEvent) => void, - options?: { - onResubscribe?: () => void; - }, - ) => () => void; - write: (input: typeof TerminalWriteInput.Encoded) => Promise; - resize: (input: typeof TerminalResizeInput.Encoded) => Promise; - clear: (input: typeof TerminalClearInput.Encoded) => Promise; - restart: (input: typeof TerminalRestartInput.Encoded) => Promise; - close: (input: typeof TerminalCloseInput.Encoded) => Promise; - onMetadata: ( - callback: (event: TerminalMetadataStreamEvent) => void, - options?: { - onResubscribe?: () => void; - }, - ) => () => void; - }; - projects: { - listEntries: (input: ProjectListEntriesInput) => Promise; - readFile: (input: ProjectReadFileInput) => Promise; - searchEntries: (input: ProjectSearchEntriesInput) => Promise; - writeFile: (input: ProjectWriteFileInput) => Promise; - }; - filesystem: { - browse: (input: FilesystemBrowseInput) => Promise; - }; - assets: { - createUrl: (input: AssetCreateUrlInput) => Promise; - }; - sourceControl: { - lookupRepository: ( - input: SourceControlRepositoryLookupInput, - ) => Promise; - cloneRepository: ( - input: SourceControlCloneRepositoryInput, - ) => Promise; - publishRepository: ( - input: SourceControlPublishRepositoryInput, - ) => Promise; - }; - vcs: { - listRefs: (input: VcsListRefsInput) => Promise; - createWorktree: (input: VcsCreateWorktreeInput) => Promise; - removeWorktree: (input: VcsRemoveWorktreeInput) => Promise; - createRef: (input: VcsCreateRefInput) => Promise; - switchRef: (input: VcsSwitchRefInput) => Promise; - init: (input: VcsInitInput) => Promise; - pull: (input: VcsPullInput) => Promise; - refreshStatus: (input: VcsStatusInput) => Promise; - onStatus: ( - input: VcsStatusInput, - callback: (status: VcsStatusResult) => void, - options?: { - onResubscribe?: () => void; - }, - ) => () => void; - }; - git: { - resolvePullRequest: (input: GitPullRequestRefInput) => Promise; - preparePullRequestThread: ( - input: GitPreparePullRequestThreadInput, - ) => Promise; - }; - review: { - getDiffPreview: (input: ReviewDiffPreviewInput) => Promise; - }; - orchestration: { - dispatchCommand: (command: ClientOrchestrationCommand) => Promise<{ sequence: number }>; - getTurnDiff: (input: OrchestrationGetTurnDiffInput) => Promise; - getFullThreadDiff: ( - input: OrchestrationGetFullThreadDiffInput, - ) => Promise; - getArchivedShellSnapshot: () => Promise; - subscribeShell: ( - callback: (event: OrchestrationShellStreamItem) => void, - options?: { - onResubscribe?: () => void; - }, - ) => () => void; - subscribeThread: ( - input: OrchestrationSubscribeThreadInput, - callback: (event: OrchestrationThreadStreamItem) => void, - options?: { - onResubscribe?: () => void; - }, - ) => () => void; - }; - preview: { - open: (input: typeof PreviewOpenInput.Encoded) => Promise; - navigate: (input: typeof PreviewNavigateInput.Encoded) => Promise; - refresh: (input: typeof PreviewRefreshInput.Encoded) => Promise; - close: (input: typeof PreviewCloseInput.Encoded) => Promise; - list: (input: typeof PreviewListInput.Encoded) => Promise; - reportStatus: (input: typeof PreviewReportStatusInput.Encoded) => Promise; - automation: { - connect: ( - input: { clientId: string }, - callback: (request: PreviewAutomationRequest) => void, - options?: { onResubscribe?: () => void }, - ) => () => void; - respond: (response: PreviewAutomationResponse) => Promise; - reportOwner: (owner: PreviewAutomationOwner) => Promise; - clearOwner: (input: { clientId: string }) => Promise; - }; - onEvent: ( - callback: (event: PreviewEvent) => void, - options?: { onResubscribe?: () => void }, - ) => () => void; - subscribePorts: ( - callback: (servers: DiscoveredLocalServerList) => void, - options?: { onResubscribe?: () => void }, - ) => () => void; - }; } diff --git a/packages/contracts/src/rpc.ts b/packages/contracts/src/rpc.ts index 11b8a6519d2..f4aa570904f 100644 --- a/packages/contracts/src/rpc.ts +++ b/packages/contracts/src/rpc.ts @@ -221,7 +221,6 @@ export const WS_METHODS = { serverGetTraceDiagnostics: "server.getTraceDiagnostics", serverGetProcessDiagnostics: "server.getProcessDiagnostics", serverGetProcessResourceHistory: "server.getProcessResourceHistory", - serverGetResourceTelemetry: "server.getResourceTelemetry", serverGetResourceTelemetryHistory: "server.getResourceTelemetryHistory", serverRetryResourceTelemetry: "server.retryResourceTelemetry", serverSignalProcess: "server.signalProcess", @@ -328,12 +327,6 @@ export const WsServerGetProcessResourceHistoryRpc = Rpc.make( }, ); -export const WsServerGetResourceTelemetryRpc = Rpc.make(WS_METHODS.serverGetResourceTelemetry, { - payload: Schema.Struct({}), - success: ResourceTelemetrySnapshot, - error: EnvironmentAuthorizationError, -}); - export const WsServerGetResourceTelemetryHistoryRpc = Rpc.make( WS_METHODS.serverGetResourceTelemetryHistory, { @@ -760,7 +753,6 @@ export const WsRpcGroup = RpcGroup.make( WsServerGetTraceDiagnosticsRpc, WsServerGetProcessDiagnosticsRpc, WsServerGetProcessResourceHistoryRpc, - WsServerGetResourceTelemetryRpc, WsServerGetResourceTelemetryHistoryRpc, WsServerRetryResourceTelemetryRpc, WsServerSignalProcessRpc,