diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eaf8fc367cc..1d620c04bb1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,6 +22,9 @@ jobs: cache: true run-install: true + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + - name: Ensure Electron runtime is installed run: vp run --filter @t3tools/desktop ensure:electron @@ -31,6 +34,9 @@ jobs: - name: Typecheck run: vpr typecheck + - name: Check resource monitor formatting + run: cargo fmt --manifest-path native/resource-monitor/Cargo.toml -- --check + - name: Build desktop pipeline run: vp run build:desktop @@ -54,12 +60,18 @@ jobs: cache: true run-install: true + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + - name: Ensure Electron runtime is installed run: vp run --filter @t3tools/desktop ensure:electron - name: Test run: vp run test + - name: Test resource monitor + run: cargo test --locked --manifest-path native/resource-monitor/Cargo.toml + mobile_native_static_analysis: name: Mobile Native Static Analysis runs-on: blacksmith-12vcpu-macos-26 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2348417abc5..e51982b6aa8 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -272,21 +272,29 @@ jobs: platform: mac target: dmg arch: arm64 + rust_target: aarch64-apple-darwin + resource_key: darwin-arm64 - label: macOS x64 runner: blacksmith-12vcpu-macos-26 platform: mac target: dmg arch: x64 + rust_target: x86_64-apple-darwin + resource_key: darwin-x64 - label: Linux x64 runner: blacksmith-32vcpu-ubuntu-2404 platform: linux target: AppImage arch: x64 + rust_target: x86_64-unknown-linux-gnu + resource_key: linux-x64 - label: Windows x64 runner: blacksmith-32vcpu-windows-2025 platform: win target: nsis arch: x64 + rust_target: x86_64-pc-windows-msvc + resource_key: win32-x64 # - label: Windows arm64 # runner: windows-11-arm # platform: win @@ -306,6 +314,11 @@ jobs: cache: true run-install: true + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + with: + targets: ${{ matrix.rust_target }} + - name: Download relay client tracing config uses: actions/download-artifact@v8 with: @@ -518,6 +531,19 @@ jobs: # done # fi + - name: Collect resource monitor + shell: bash + run: | + set -euo pipefail + binary_name="t3-resource-monitor" + if [[ "${{ matrix.platform }}" == "win" ]]; then + binary_name="${binary_name}.exe" + fi + source_path="native/resource-monitor/target/${{ matrix.rust_target }}/release/${binary_name}" + target_dir="resource-monitor-publish/${{ matrix.resource_key }}" + mkdir -p "$target_dir" + cp "$source_path" "$target_dir/$binary_name" + - name: Upload build artifacts uses: actions/upload-artifact@v7 with: @@ -525,6 +551,13 @@ jobs: path: release-publish/* if-no-files-found: error + - name: Upload resource monitor + uses: actions/upload-artifact@v7 + with: + name: resource-monitor-${{ matrix.resource_key }} + path: resource-monitor-publish/${{ matrix.resource_key }}/* + if-no-files-found: error + publish_cli: name: Publish CLI to npm needs: [preflight, relay_public_config, build] @@ -579,6 +612,24 @@ jobs: - name: Build CLI package run: vp run --filter t3 build + - name: Download resource monitors + uses: actions/download-artifact@v8 + with: + pattern: resource-monitor-* + path: ${{ runner.temp }}/resource-monitors + + - name: Bundle resource monitors into CLI package + shell: bash + run: | + set -euo pipefail + for artifact_dir in "$RUNNER_TEMP"/resource-monitors/resource-monitor-*; do + resource_key="${artifact_dir##*/resource-monitor-}" + target_dir="apps/server/dist/resource-monitor/${resource_key}" + mkdir -p "$target_dir" + cp "$artifact_dir"/t3-resource-monitor* "$target_dir/" + chmod +x "$target_dir"/t3-resource-monitor 2>/dev/null || true + done + - name: Publish CLI package run: node apps/server/scripts/cli.ts publish --tag "${{ needs.preflight.outputs.cli_dist_tag }}" --app-version "${{ needs.preflight.outputs.version }}" --verbose diff --git a/.gitignore b/.gitignore index ef6067824f2..69544f4b86a 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ squashfs-root/ .gstack/ dist-electron/ .electron-runtime/ +native/**/target/ node_modules/ .alchemy/ *.log diff --git a/apps/desktop/src/app/DesktopAppIdentity.test.ts b/apps/desktop/src/app/DesktopAppIdentity.test.ts index eafdbf056dc..81812cf4854 100644 --- a/apps/desktop/src/app/DesktopAppIdentity.test.ts +++ b/apps/desktop/src/app/DesktopAppIdentity.test.ts @@ -54,6 +54,7 @@ const makeElectronAppLayer = (calls: ElectronAppCalls) => }), setAppUserModelId: () => Effect.void, requestSingleInstanceLock: Effect.succeed(true), + getAppMetrics: Effect.succeed([]), isDefaultProtocolClient: () => Effect.succeed(false), setAsDefaultProtocolClient: () => Effect.succeed(true), setDesktopName: () => Effect.void, diff --git a/apps/desktop/src/app/DesktopCloudAuth.test.ts b/apps/desktop/src/app/DesktopCloudAuth.test.ts index 002fd86b0a4..8d783910e59 100644 --- a/apps/desktop/src/app/DesktopCloudAuth.test.ts +++ b/apps/desktop/src/app/DesktopCloudAuth.test.ts @@ -54,6 +54,7 @@ function makeHarness(input: { readonly isDevelopment: boolean }): CloudAuthHarne setAboutPanelOptions: () => Effect.void, setAppUserModelId: () => Effect.void, requestSingleInstanceLock: Effect.succeed(true), + getAppMetrics: Effect.succeed([]), isDefaultProtocolClient: () => Effect.succeed(false), setAsDefaultProtocolClient: (protocol, path, args) => Effect.sync(() => { diff --git a/apps/desktop/src/app/DesktopObservability.test.ts b/apps/desktop/src/app/DesktopObservability.test.ts index a78de48d5e1..73b386671ce 100644 --- a/apps/desktop/src/app/DesktopObservability.test.ts +++ b/apps/desktop/src/app/DesktopObservability.test.ts @@ -49,14 +49,14 @@ const environmentInput = (baseDir: string) => runningUnderArm64Translation: false, }) satisfies DesktopEnvironment.MakeDesktopEnvironmentInput; -const makeEnvironmentLayer = (baseDir: string) => +const makeEnvironmentLayer = (baseDir: string, isDevelopment = true) => DesktopEnvironment.layer(environmentInput(baseDir)).pipe( Layer.provide( Layer.mergeAll( NodeServices.layer, DesktopConfig.layerTest({ T3CODE_HOME: baseDir, - VITE_DEV_SERVER_URL: "http://127.0.0.1:5733", + VITE_DEV_SERVER_URL: isDevelopment ? "http://127.0.0.1:5733" : undefined, }), ), ), @@ -112,41 +112,56 @@ describe("DesktopObservability", () => { ), ); - it.effect("persists backend child output as structured JSON records in development", () => + it.effect("buffers backend child output and persists it only when a failure is reported", () => Effect.gen(function* () { const fileSystem = yield* FileSystem.FileSystem; const baseDir = yield* fileSystem.makeTempDirectoryScoped({ prefix: "t3-desktop-backend-output-log-test-", }); - const environmentLayer = makeEnvironmentLayer(baseDir); + const environmentLayer = makeEnvironmentLayer(baseDir, false); const logPath = yield* Effect.gen(function* () { const environment = yield* DesktopEnvironment.DesktopEnvironment; return environment.path.join(environment.logDir, "server-child.log"); }).pipe(Effect.provide(environmentLayer)); + const tracePath = yield* Effect.gen(function* () { + const environment = yield* DesktopEnvironment.DesktopEnvironment; + return environment.path.join(environment.logDir, "desktop.trace.ndjson"); + }).pipe(Effect.provide(environmentLayer)); - yield* Effect.gen(function* () { - const outputLog = yield* DesktopObservability.DesktopBackendOutputLog; - yield* outputLog.writeSessionBoundary({ - phase: "START", - details: "pid=123 port=3773 cwd=/repo", - }); - yield* outputLog.writeOutputChunk("stdout", new TextEncoder().encode("hello server\n")); - }).pipe( - Effect.annotateLogs({ runId: "test-run" }), - Effect.provide(DesktopObservability.layer.pipe(Layer.provideMerge(environmentLayer))), + yield* Effect.scoped( + Effect.gen(function* () { + const outputLog = yield* DesktopObservability.DesktopBackendOutputLog; + yield* outputLog.beginSession({ + details: "pid=123 port=3773 cwd=/repo", + }); + yield* outputLog.writeOutputChunk("stdout", new TextEncoder().encode("hello server\n")); + assert.isFalse(yield* fileSystem.exists(logPath)); + yield* outputLog.persistFailure({ details: "code=1" }); + yield* outputLog.beginSession({ details: "pid=456" }); + yield* outputLog.writeOutputChunk( + "stderr", + new TextEncoder().encode("normal shutdown\n"), + ); + yield* outputLog.discardSession; + }).pipe( + Effect.annotateLogs({ runId: "test-run" }), + Effect.provide(DesktopObservability.layer.pipe(Layer.provideMerge(environmentLayer))), + ), ); const log = yield* fileSystem.readFileString(logPath); const lines = log.trimEnd().split("\n"); - const boundary = yield* decodeDesktopBackendChildLogRecord(lines[0] ?? ""); + const start = yield* decodeDesktopBackendChildLogRecord(lines[0] ?? ""); const output = yield* decodeDesktopBackendChildLogRecord(lines[1] ?? ""); + const end = yield* decodeDesktopBackendChildLogRecord(lines[2] ?? ""); - assert.equal(boundary.message, "backend child process session start"); - assert.equal(boundary.level, "INFO"); - assert.equal(boundary.annotations.component, "desktop-backend-child"); - assert.equal(boundary.annotations.runId, "test-run"); - assert.equal(boundary.annotations.phase, "START"); - assert.equal(boundary.annotations.details, "pid=123 port=3773 cwd=/repo"); + assert.equal(lines.length, 3); + assert.equal(start.message, "backend child process failure output start"); + assert.equal(start.level, "ERROR"); + assert.equal(start.annotations.component, "desktop-backend-child"); + assert.equal(start.annotations.runId, "test-run"); + assert.equal(start.annotations.phase, "START"); + assert.equal(start.annotations.details, "pid=123 port=3773 cwd=/repo"); assert.equal(output.message, "backend child process output"); assert.equal(output.level, "INFO"); @@ -154,6 +169,98 @@ describe("DesktopObservability", () => { assert.equal(output.annotations.runId, "test-run"); assert.equal(output.annotations.stream, "stdout"); assert.equal(output.annotations.text, "hello server\n"); + + assert.equal(end.message, "backend child process failure output end"); + assert.equal(end.level, "ERROR"); + assert.equal(end.annotations.phase, "END"); + assert.equal(end.annotations.details, "code=1"); + + const traceRecords = (yield* fileSystem.readFileString(tracePath)) + .trim() + .split("\n") + .filter((line) => line.length > 0) + .map((line) => decodeTraceRecordLine(line)); + assert.isFalse( + traceRecords.some( + (record) => record.name === "desktop.observability.backendOutput.writeOutputChunk", + ), + ); + }).pipe( + Effect.scoped, + Effect.provide(Layer.mergeAll(NodeServices.layer, NodeHttpClient.layerUndici)), + ), + ); + + it.effect("retains only the last mebibyte of backend child output", () => + Effect.gen(function* () { + const fileSystem = yield* FileSystem.FileSystem; + const baseDir = yield* fileSystem.makeTempDirectoryScoped({ + prefix: "t3-desktop-backend-output-bound-test-", + }); + const environmentLayer = makeEnvironmentLayer(baseDir, false); + const logPath = yield* Effect.gen(function* () { + const environment = yield* DesktopEnvironment.DesktopEnvironment; + return environment.path.join(environment.logDir, "server-child.log"); + }).pipe(Effect.provide(environmentLayer)); + const maxBufferedBytes = 1024 * 1024; + const discardedPrefixBytes = 128; + const output = new Uint8Array(maxBufferedBytes + discardedPrefixBytes); + output.fill("x".charCodeAt(0)); + output.fill("y".charCodeAt(0), 0, discardedPrefixBytes); + + yield* Effect.scoped( + Effect.gen(function* () { + const outputLog = yield* DesktopObservability.DesktopBackendOutputLog; + yield* outputLog.beginSession({ details: "pid=123" }); + yield* outputLog.writeOutputChunk("stderr", output); + yield* outputLog.persistFailure({ details: "code=1" }); + }).pipe( + Effect.provide(DesktopObservability.layer.pipe(Layer.provideMerge(environmentLayer))), + ), + ); + + const lines = (yield* fileSystem.readFileString(logPath)).trimEnd().split("\n"); + const record = yield* decodeDesktopBackendChildLogRecord(lines[1] ?? ""); + const text = record.annotations.text; + assert.equal(typeof text, "string"); + if (typeof text !== "string") { + return; + } + assert.equal(new TextEncoder().encode(text).byteLength, maxBufferedBytes); + assert.isFalse(text.includes("y")); + }).pipe( + Effect.scoped, + Effect.provide(Layer.mergeAll(NodeServices.layer, NodeHttpClient.layerUndici)), + ), + ); + + it.effect("bounds the number of retained backend child output chunks", () => + Effect.gen(function* () { + const fileSystem = yield* FileSystem.FileSystem; + const baseDir = yield* fileSystem.makeTempDirectoryScoped({ + prefix: "t3-desktop-backend-output-chunks-test-", + }); + const environmentLayer = makeEnvironmentLayer(baseDir, false); + const logPath = yield* Effect.gen(function* () { + const environment = yield* DesktopEnvironment.DesktopEnvironment; + return environment.path.join(environment.logDir, "server-child.log"); + }).pipe(Effect.provide(environmentLayer)); + + yield* Effect.scoped( + Effect.gen(function* () { + const outputLog = yield* DesktopObservability.DesktopBackendOutputLog; + yield* outputLog.beginSession({ details: "pid=123" }); + for (let index = 0; index < 300; index += 1) { + yield* outputLog.writeOutputChunk("stderr", Uint8Array.of(index % 128)); + } + yield* outputLog.persistFailure({ details: "code=1" }); + }).pipe( + Effect.provide(DesktopObservability.layer.pipe(Layer.provideMerge(environmentLayer))), + ), + ); + + const lines = (yield* fileSystem.readFileString(logPath)).trimEnd().split("\n"); + assert.equal(lines.length, 258); }).pipe( Effect.scoped, Effect.provide(Layer.mergeAll(NodeServices.layer, NodeHttpClient.layerUndici)), diff --git a/apps/desktop/src/app/DesktopObservability.ts b/apps/desktop/src/app/DesktopObservability.ts index 2349fe52dc3..0468e810883 100644 --- a/apps/desktop/src/app/DesktopObservability.ts +++ b/apps/desktop/src/app/DesktopObservability.ts @@ -21,8 +21,10 @@ import * as DesktopEnvironment from "./DesktopEnvironment.ts"; const DESKTOP_LOG_FILE_MAX_BYTES = 10 * 1024 * 1024; const DESKTOP_LOG_FILE_MAX_FILES = 10; +const DESKTOP_BACKEND_OUTPUT_BUFFER_MAX_BYTES = 1024 * 1024; +const DESKTOP_BACKEND_OUTPUT_BUFFER_MAX_CHUNKS = 256; const DESKTOP_BACKEND_CHILD_LOG_FIBER_ID = "#backend-child"; -const DESKTOP_TRACE_BATCH_WINDOW_MS = 200; +const DESKTOP_TRACE_BATCH_WINDOW_MS = 1_000; export interface RotatingLogFileWriter { readonly writeBytes: (chunk: Uint8Array) => Effect.Effect; @@ -30,14 +32,13 @@ export interface RotatingLogFileWriter { } export interface DesktopBackendOutputLogShape { - readonly writeSessionBoundary: (input: { - readonly phase: "START" | "END"; - readonly details: string; - }) => Effect.Effect; + readonly beginSession: (input: { readonly details: string }) => Effect.Effect; readonly writeOutputChunk: ( streamName: "stdout" | "stderr", chunk: Uint8Array, ) => Effect.Effect; + readonly persistFailure: (input: { readonly details: string }) => Effect.Effect; + readonly discardSession: Effect.Effect; } export class DesktopBackendOutputLog extends Context.Service< @@ -113,10 +114,84 @@ const encodeDesktopBackendChildLogRecord = Schema.encodeEffect( ); const DesktopBackendOutputLogNoop: DesktopBackendOutputLogShape = { - writeSessionBoundary: () => Effect.void, + beginSession: () => Effect.void, writeOutputChunk: () => Effect.void, + persistFailure: () => Effect.void, + discardSession: Effect.void, }; +interface BufferedBackendOutputChunk { + readonly streamName: "stdout" | "stderr"; + readonly chunk: Uint8Array; +} + +interface BackendOutputSession { + readonly runId: string; + readonly startDetails: string; + readonly chunks: ReadonlyArray; + readonly byteLength: number; +} + +function appendBoundedOutputChunk( + session: BackendOutputSession, + streamName: "stdout" | "stderr", + chunk: Uint8Array, +): BackendOutputSession { + if (chunk.byteLength === 0) { + return session; + } + + const retainedChunk = + chunk.byteLength > DESKTOP_BACKEND_OUTPUT_BUFFER_MAX_BYTES + ? chunk.slice(chunk.byteLength - DESKTOP_BACKEND_OUTPUT_BUFFER_MAX_BYTES) + : chunk.slice(); + const chunks = [ + ...session.chunks, + { + streamName, + chunk: retainedChunk, + }, + ]; + let byteLength = session.byteLength + retainedChunk.byteLength; + let overflow = Math.max(0, byteLength - DESKTOP_BACKEND_OUTPUT_BUFFER_MAX_BYTES); + let firstRetainedIndex = 0; + + while (overflow > 0) { + const first = chunks[firstRetainedIndex]; + if (!first) { + break; + } + if (first.chunk.byteLength <= overflow) { + overflow -= first.chunk.byteLength; + byteLength -= first.chunk.byteLength; + firstRetainedIndex += 1; + continue; + } + + chunks[firstRetainedIndex] = { + ...first, + chunk: first.chunk.slice(overflow), + }; + byteLength -= overflow; + overflow = 0; + } + + const excessChunks = Math.max( + 0, + chunks.length - firstRetainedIndex - DESKTOP_BACKEND_OUTPUT_BUFFER_MAX_CHUNKS, + ); + for (let index = firstRetainedIndex; index < firstRetainedIndex + excessChunks; index += 1) { + byteLength -= chunks[index]?.chunk.byteLength ?? 0; + } + firstRetainedIndex += excessChunks; + + return { + ...session, + chunks: chunks.slice(firstRetainedIndex), + byteLength, + }; +} + const currentDesktopRunId = Effect.gen(function* () { const annotations = yield* References.CurrentLogAnnotations; const runId = annotations.runId; @@ -301,45 +376,86 @@ const backendOutputLogLayer = Layer.effect( const writer = yield* makeRotatingLogFileWriter({ filePath: environment.path.join(environment.logDir, "server-child.log"), }).pipe(Effect.option); + const sessionRef = yield* Ref.make(Option.none()); return Option.match(writer, { onNone: () => DesktopBackendOutputLogNoop, - onSome: (logFile) => - ({ - writeSessionBoundary: Effect.fn( - "desktop.observability.backendOutput.writeSessionBoundary", - )(function* ({ phase, details }) { + onSome: (logFile) => { + return { + beginSession: Effect.fn("desktop.observability.backendOutput.beginSession")(function* ({ + details, + }) { const runId = yield* currentDesktopRunId; - yield* writeBackendChildLogRecord(logFile, { - message: `backend child process session ${phase.toLowerCase()}`, - level: "INFO", - annotations: { - component: "desktop-backend-child", + yield* Ref.set( + sessionRef, + Option.some({ runId, - phase, - details: sanitizeLogValue(details), - }, - }); + startDetails: sanitizeLogValue(details), + chunks: [], + byteLength: 0, + }), + ); + }), + writeOutputChunk: Effect.fnUntraced(function* (streamName, chunk) { + if (environment.isDevelopment) { + yield* writeDevelopmentConsoleOutput(streamName, chunk); + } + yield* Ref.update( + sessionRef, + Option.map((session) => appendBoundedOutputChunk(session, streamName, chunk)), + ); }), - writeOutputChunk: Effect.fn("desktop.observability.backendOutput.writeOutputChunk")( - function* (streamName, chunk) { - if (environment.isDevelopment) { - yield* writeDevelopmentConsoleOutput(streamName, chunk); + persistFailure: Effect.fn("desktop.observability.backendOutput.persistFailure")( + function* ({ details }) { + const session = yield* Ref.modify(sessionRef, (current) => [ + current, + Option.map(current, (value) => ({ + ...value, + chunks: [], + byteLength: 0, + })), + ]); + if (Option.isNone(session)) { + return; + } + + yield* writeBackendChildLogRecord(logFile, { + message: "backend child process failure output start", + level: "ERROR", + annotations: { + component: "desktop-backend-child", + runId: session.value.runId, + phase: "START", + details: session.value.startDetails, + }, + }); + for (const output of session.value.chunks) { + yield* writeBackendChildLogRecord(logFile, { + message: "backend child process output", + level: output.streamName === "stderr" ? "ERROR" : "INFO", + annotations: { + component: "desktop-backend-child", + runId: session.value.runId, + stream: output.streamName, + text: textDecoder.decode(output.chunk), + }, + }); } - const runId = yield* currentDesktopRunId; yield* writeBackendChildLogRecord(logFile, { - message: "backend child process output", - level: streamName === "stderr" ? "ERROR" : "INFO", + message: "backend child process failure output end", + level: "ERROR", annotations: { component: "desktop-backend-child", - runId, - stream: streamName, - text: textDecoder.decode(chunk), + runId: session.value.runId, + phase: "END", + details: sanitizeLogValue(details), }, }); }, ), - }) satisfies DesktopBackendOutputLogShape, + discardSession: Ref.set(sessionRef, Option.none()), + } satisfies DesktopBackendOutputLogShape; + }, }); }), ); diff --git a/apps/desktop/src/backend/DesktopBackendConfiguration.test.ts b/apps/desktop/src/backend/DesktopBackendConfiguration.test.ts index 96e56a87c9d..255f98513fb 100644 --- a/apps/desktop/src/backend/DesktopBackendConfiguration.test.ts +++ b/apps/desktop/src/backend/DesktopBackendConfiguration.test.ts @@ -41,6 +41,7 @@ function makeEnvironmentLayer( options?: { readonly isPackaged?: boolean; readonly devServerUrl?: string; + readonly resourcesPath?: string; }, ) { return DesktopEnvironment.layer({ @@ -51,7 +52,7 @@ function makeEnvironmentLayer( appVersion: "1.2.3", appPath: "/repo", isPackaged: options?.isPackaged ?? true, - resourcesPath: "/missing/resources", + resourcesPath: options?.resourcesPath ?? "/missing/resources", runningUnderArm64Translation: false, }).pipe( Layer.provide( @@ -192,4 +193,39 @@ describe("DesktopBackendConfiguration", () => { ); }).pipe(Effect.scoped, Effect.provide(NodeServices.layer)), ); + + it.effect("passes the packaged resource monitor path to the backend", () => + Effect.gen(function* () { + const fileSystem = yield* FileSystem.FileSystem; + const baseDir = yield* fileSystem.makeTempDirectoryScoped({ + prefix: "t3-desktop-backend-config-test-", + }); + const resourcesPath = `${baseDir}/resources`; + const monitorPath = `${resourcesPath}/resource-monitor/t3-resource-monitor`; + yield* fileSystem.makeDirectory(`${resourcesPath}/resource-monitor`, { + recursive: true, + }); + yield* fileSystem.writeFileString(monitorPath, "binary"); + yield* fileSystem.chmod(monitorPath, 0o755); + + yield* Effect.gen(function* () { + const configuration = yield* DesktopBackendConfiguration.DesktopBackendConfiguration; + const config = yield* configuration.resolve; + assert.equal(config.bootstrap.resourceMonitorPath, monitorPath); + assert.equal(config.bootstrap.desktopTelemetryFd, 4); + assert.equal(config.bootstrap.desktopTelemetryControlFd, 5); + }).pipe( + Effect.provide( + DesktopBackendConfiguration.layer.pipe( + Layer.provideMerge(serverExposureLayer), + Layer.provideMerge( + makeEnvironmentLayer(baseDir, { + resourcesPath, + }), + ), + ), + ), + ); + }).pipe(Effect.scoped, Effect.provide(NodeServices.layer)), + ); }); diff --git a/apps/desktop/src/backend/DesktopBackendConfiguration.ts b/apps/desktop/src/backend/DesktopBackendConfiguration.ts index 5e4e034b5e7..a6f59d3652a 100644 --- a/apps/desktop/src/backend/DesktopBackendConfiguration.ts +++ b/apps/desktop/src/backend/DesktopBackendConfiguration.ts @@ -56,6 +56,42 @@ const { logWarning: logBackendConfigurationWarning } = DesktopObservability.make "desktop-backend-configuration", ); +function resourceMonitorBinaryName(platform: NodeJS.Platform): string { + return platform === "win32" ? "t3-resource-monitor.exe" : "t3-resource-monitor"; +} + +const resolveResourceMonitorPath = Effect.fn( + "desktop.backendConfiguration.resolveResourceMonitorPath", +)(function* () { + const environment = yield* DesktopEnvironment.DesktopEnvironment; + const fileSystem = yield* FileSystem.FileSystem; + const binaryName = resourceMonitorBinaryName(environment.platform); + const candidates = environment.isDevelopment + ? [ + environment.path.join( + environment.rootDir, + "native/resource-monitor/target/debug", + binaryName, + ), + environment.path.join( + environment.rootDir, + "native/resource-monitor/target/release", + binaryName, + ), + ] + : environment.resolveResourcePathCandidates( + environment.path.join("resource-monitor", binaryName), + ); + + for (const candidate of candidates) { + if (yield* fileSystem.exists(candidate).pipe(Effect.orElseSucceed(() => false))) { + return Option.some(candidate); + } + } + + return Option.none(); +}); + const readPersistedBackendObservabilitySettings: Effect.Effect< BackendObservabilitySettings, never, @@ -89,6 +125,7 @@ const resolveBackendStartConfig = Effect.fn("desktop.backendConfiguration.resolv function* (input: { readonly bootstrapToken: string; readonly observabilitySettings: BackendObservabilitySettings; + readonly resourceMonitorPath: Option.Option; }): Effect.fn.Return< DesktopBackendManager.DesktopBackendStartConfig, never, @@ -115,6 +152,12 @@ const resolveBackendStartConfig = Effect.fn("desktop.backendConfiguration.resolv desktopBootstrapToken: input.bootstrapToken, tailscaleServeEnabled: backendExposure.tailscaleServeEnabled, tailscaleServePort: backendExposure.tailscaleServePort, + desktopTelemetryFd: 4, + desktopTelemetryControlFd: 5, + ...Option.match(input.resourceMonitorPath, { + onNone: () => ({}), + onSome: (resourceMonitorPath) => ({ resourceMonitorPath }), + }), ...Option.match(input.observabilitySettings.otlpTracesUrl, { onNone: () => ({}), onSome: (otlpTracesUrl) => ({ otlpTracesUrl }), @@ -156,9 +199,14 @@ export const layer = Layer.effect( Effect.provideService(FileSystem.FileSystem, fileSystem), Effect.provideService(DesktopEnvironment.DesktopEnvironment, environment), ); + const resourceMonitorPath = yield* resolveResourceMonitorPath().pipe( + Effect.provideService(FileSystem.FileSystem, fileSystem), + Effect.provideService(DesktopEnvironment.DesktopEnvironment, environment), + ); return yield* resolveBackendStartConfig({ bootstrapToken, observabilitySettings, + resourceMonitorPath, }).pipe( Effect.provideService(DesktopEnvironment.DesktopEnvironment, environment), Effect.provideService(DesktopServerExposure.DesktopServerExposure, serverExposure), diff --git a/apps/desktop/src/backend/DesktopBackendManager.test.ts b/apps/desktop/src/backend/DesktopBackendManager.test.ts index 6c5109c8714..5c29b4f12e7 100644 --- a/apps/desktop/src/backend/DesktopBackendManager.test.ts +++ b/apps/desktop/src/backend/DesktopBackendManager.test.ts @@ -1,6 +1,7 @@ import { DesktopBackendBootstrap, type DesktopBackendBootstrap as DesktopBackendBootstrapValue, + DesktopTelemetryControlMessage, } from "@t3tools/contracts"; import { assert, describe, it } from "@effect/vitest"; import * as Deferred from "effect/Deferred"; @@ -23,11 +24,15 @@ import * as DesktopBackendManager from "./DesktopBackendManager.ts"; import * as DesktopBackendConfiguration from "./DesktopBackendConfiguration.ts"; import * as DesktopObservability from "../app/DesktopObservability.ts"; import * as DesktopState from "../app/DesktopState.ts"; +import * as DesktopTelemetryPublisher from "../telemetry/DesktopTelemetryPublisher.ts"; import * as DesktopWindow from "../window/DesktopWindow.ts"; const decodeDesktopBackendBootstrap = Schema.decodeEffect( Schema.fromJsonString(DesktopBackendBootstrap), ); +const encodeDesktopTelemetryControl = Schema.encodeSync( + Schema.fromJsonString(DesktopTelemetryControlMessage), +); const baseConfig: DesktopBackendManager.DesktopBackendStartConfig = { executablePath: "/electron", @@ -43,6 +48,8 @@ const baseConfig: DesktopBackendManager.DesktopBackendStartConfig = { desktopBootstrapToken: "token", tailscaleServeEnabled: false, tailscaleServePort: 443, + desktopTelemetryFd: 4, + desktopTelemetryControlFd: 5, }, httpBaseUrl: new URL("http://127.0.0.1:3773"), captureOutput: true, @@ -59,6 +66,7 @@ function makeProcess(options?: { readonly stderr?: Stream.Stream; readonly exitCode?: Effect.Effect; readonly kill?: ChildProcessSpawner.ChildProcessHandle["kill"]; + readonly getOutputFd?: ChildProcessSpawner.ChildProcessHandle["getOutputFd"]; }): ChildProcessSpawner.ChildProcessHandle { return ChildProcessSpawner.makeHandle({ pid: ChildProcessSpawner.ProcessId(123), @@ -70,7 +78,7 @@ function makeProcess(options?: { kill: options?.kill ?? (() => Effect.void), stdin: Sink.drain, getInputFd: () => Sink.drain, - getOutputFd: () => Stream.empty, + getOutputFd: options?.getOutputFd ?? (() => Stream.empty), unref: Effect.succeed(Effect.void), }); } @@ -107,6 +115,7 @@ function makeManagerLayer(input: { readonly backendOutputLog?: Partial; readonly desktopState?: DesktopState.DesktopStateShape; readonly desktopWindow?: Partial; + readonly desktopTelemetryPublisher?: Partial; readonly config?: DesktopBackendManager.DesktopBackendStartConfig; }) { return DesktopBackendManager.layer.pipe( @@ -120,12 +129,21 @@ function makeManagerLayer(input: { }), input.spawnerLayer, input.httpClientLayer ?? healthyHttpClientLayer, + Layer.succeed(DesktopTelemetryPublisher.DesktopTelemetryPublisher, { + latest: Effect.succeed(Option.none()), + changes: Stream.empty, + encoded: Stream.empty, + handleControl: () => Effect.void, + ...input.desktopTelemetryPublisher, + }), input.desktopState ? Layer.succeed(DesktopState.DesktopState, input.desktopState) : DesktopState.layer, Layer.succeed(DesktopObservability.DesktopBackendOutputLog, { - writeSessionBoundary: () => Effect.void, + beginSession: () => Effect.void, writeOutputChunk: () => Effect.void, + persistFailure: () => Effect.void, + discardSession: Effect.void, ...input.backendOutputLog, } satisfies DesktopObservability.DesktopBackendOutputLogShape), Layer.succeed(DesktopWindow.DesktopWindow, { @@ -184,8 +202,7 @@ describe("DesktopBackendManager", () => { }).pipe(Effect.andThen(Deferred.succeed(ready, void 0))), }, backendOutputLog: { - writeSessionBoundary: ({ phase }) => - phase === "END" ? Queue.offer(exited, void 0).pipe(Effect.asVoid) : Effect.void, + persistFailure: () => Queue.offer(exited, void 0).pipe(Effect.asVoid), }, }); @@ -208,6 +225,8 @@ describe("DesktopBackendManager", () => { assert.equal(spawnedCommand.options.stderr, "pipe"); assert.equal(spawnedCommand.options.killSignal, "SIGTERM"); assert.isDefined(spawnedCommand.options.forceKillAfter); + assert.equal(spawnedCommand.options.additionalFds?.fd4?.type, "input"); + assert.equal(spawnedCommand.options.additionalFds?.fd5?.type, "output"); assert.equal( Duration.toMillis(Duration.fromInputUnsafe(spawnedCommand.options.forceKillAfter)), 2_000, @@ -218,6 +237,88 @@ describe("DesktopBackendManager", () => { }), ); + it.effect("routes desktop telemetry control messages from fd5 to the publisher", () => + Effect.gen(function* () { + const handled = yield* Deferred.make(); + const controlMessage = encodeDesktopTelemetryControl({ + version: 1, + type: "setDiagnosticsDemand", + enabled: true, + }); + const spawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make(() => + Effect.succeed( + makeProcess({ + getOutputFd: (fd) => + fd === 5 ? Stream.encodeText(Stream.make(`${controlMessage}\n`)) : Stream.empty, + exitCode: Deferred.await(handled).pipe(Effect.as(ChildProcessSpawner.ExitCode(0))), + }), + ), + ), + ); + const managerLayer = makeManagerLayer({ + spawnerLayer, + desktopTelemetryPublisher: { + handleControl: (message) => + Deferred.succeed(handled, message.enabled).pipe(Effect.asVoid), + }, + }); + + yield* Effect.gen(function* () { + const manager = yield* DesktopBackendManager.DesktopBackendManager; + yield* manager.start; + assert.isTrue(yield* Deferred.await(handled)); + }).pipe(Effect.provide(managerLayer)); + }), + ); + + it.effect("drains trailing child output before persisting an unexpected exit", () => + Effect.gen(function* () { + const persistedOutput = yield* Deferred.make>(); + const outputDrainStarted = yield* Deferred.make(); + const outputChunks = yield* Ref.make>([]); + const spawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make(() => + Effect.succeed( + makeProcess({ + stdout: Stream.fromEffect( + Deferred.succeed(outputDrainStarted, void 0).pipe( + Effect.andThen(Effect.sleep(Duration.millis(50))), + Effect.as(new TextEncoder().encode("trailing output\n")), + ), + ), + exitCode: Effect.succeed(ChildProcessSpawner.ExitCode(1)), + }), + ), + ), + ); + const managerLayer = makeManagerLayer({ + spawnerLayer, + httpClientLayer: httpClientLayer(() => Effect.never), + backendOutputLog: { + writeOutputChunk: (_streamName, chunk) => + Ref.update(outputChunks, (current) => [...current, new TextDecoder().decode(chunk)]), + persistFailure: () => + Ref.get(outputChunks).pipe( + Effect.flatMap((chunks) => Deferred.succeed(persistedOutput, chunks)), + Effect.asVoid, + ), + }, + }); + + yield* Effect.gen(function* () { + const manager = yield* DesktopBackendManager.DesktopBackendManager; + yield* manager.start; + yield* Deferred.await(outputDrainStarted); + yield* TestClock.adjust(Duration.millis(50)); + + assert.deepEqual(yield* Deferred.await(persistedOutput), ["trailing output\n"]); + }).pipe(Effect.provide(Layer.merge(TestClock.layer(), managerLayer))); + }), + ); + it.effect("retries HTTP readiness before reporting the backend ready", () => Effect.gen(function* () { const requestUrls: Array = []; @@ -255,8 +356,7 @@ describe("DesktopBackendManager", () => { }).pipe(Effect.andThen(Deferred.succeed(ready, void 0))), }, backendOutputLog: { - writeSessionBoundary: ({ phase }) => - phase === "END" ? Queue.offer(exited, void 0).pipe(Effect.asVoid) : Effect.void, + persistFailure: () => Queue.offer(exited, void 0).pipe(Effect.asVoid), }, }); @@ -289,6 +389,8 @@ describe("DesktopBackendManager", () => { const ready = yield* Deferred.make(); const backendReady = yield* Ref.make(false); const quitting = yield* Ref.make(false); + let persistedFailureCount = 0; + let discardedSessionCount = 0; const spawnerLayer = Layer.succeed( ChildProcessSpawner.ChildProcessSpawner, @@ -320,6 +422,15 @@ describe("DesktopBackendManager", () => { desktopWindow: { handleBackendReady: Deferred.succeed(ready, void 0).pipe(Effect.asVoid), }, + backendOutputLog: { + persistFailure: () => + Effect.sync(() => { + persistedFailureCount += 1; + }), + discardSession: Effect.sync(() => { + discardedSessionCount += 1; + }), + }, }); yield* Effect.gen(function* () { @@ -339,6 +450,8 @@ describe("DesktopBackendManager", () => { yield* manager.stop(); assert.equal(startCount, 1); assert.equal(closedCount, 1); + assert.equal(persistedFailureCount, 0); + assert.equal(discardedSessionCount, 1); const stoppedSnapshot = yield* manager.snapshot; assert.isFalse(yield* Ref.get(backendReady)); @@ -352,6 +465,7 @@ describe("DesktopBackendManager", () => { it.effect("restarts an unexpectedly exited backend with the Effect clock", () => Effect.gen(function* () { const starts = yield* Queue.unbounded(); + const failures = yield* Queue.unbounded(); let startCount = 0; const spawnerLayer = Layer.succeed( @@ -371,6 +485,9 @@ describe("DesktopBackendManager", () => { const managerLayer = makeManagerLayer({ spawnerLayer, httpClientLayer: httpClientLayer(() => Effect.never), + backendOutputLog: { + persistFailure: ({ details }) => Queue.offer(failures, details).pipe(Effect.asVoid), + }, }); yield* Effect.gen(function* () { @@ -378,6 +495,7 @@ describe("DesktopBackendManager", () => { yield* manager.start; assert.equal(yield* Queue.take(starts), 1); + assert.equal(yield* Queue.take(failures), "pid=123 code=1"); yield* TestClock.adjust(Duration.millis(499)); assert.equal(yield* Queue.size(starts), 0); @@ -434,6 +552,13 @@ describe("DesktopBackendManager", () => { yield* manager.start; assert.equal(yield* Queue.take(starts), 1); + let restartScheduled = false; + while (!restartScheduled) { + restartScheduled = (yield* manager.snapshot).restartScheduled; + if (!restartScheduled) { + yield* Effect.yieldNow; + } + } yield* manager.start; assert.equal(yield* Queue.take(starts), 2); diff --git a/apps/desktop/src/backend/DesktopBackendManager.ts b/apps/desktop/src/backend/DesktopBackendManager.ts index 07693a82707..94a4f419421 100644 --- a/apps/desktop/src/backend/DesktopBackendManager.ts +++ b/apps/desktop/src/backend/DesktopBackendManager.ts @@ -16,17 +16,21 @@ import * as Schema from "effect/Schema"; import * as Semaphore from "effect/Semaphore"; import * as Scope from "effect/Scope"; import * as Stream from "effect/Stream"; +import * as Ndjson from "effect/unstable/encoding/Ndjson"; import { HttpClient } from "effect/unstable/http"; import { ChildProcess, ChildProcessSpawner } from "effect/unstable/process"; import { DesktopBackendBootstrap, type DesktopBackendBootstrap as DesktopBackendBootstrapValue, + DesktopTelemetryControlMessage, + type DesktopTelemetryControlMessage as DesktopTelemetryControlMessageValue, } from "@t3tools/contracts"; import * as DesktopBackendConfiguration from "./DesktopBackendConfiguration.ts"; import * as DesktopObservability from "../app/DesktopObservability.ts"; import * as DesktopState from "../app/DesktopState.ts"; +import * as DesktopTelemetryPublisher from "../telemetry/DesktopTelemetryPublisher.ts"; import * as DesktopWindow from "../window/DesktopWindow.ts"; const INITIAL_RESTART_DELAY = Duration.millis(500); @@ -35,6 +39,7 @@ const DEFAULT_BACKEND_READINESS_TIMEOUT = Duration.minutes(1); const DEFAULT_BACKEND_READINESS_INTERVAL = Duration.millis(100); const DEFAULT_BACKEND_READINESS_REQUEST_TIMEOUT = Duration.seconds(1); const DEFAULT_BACKEND_TERMINATE_GRACE = Duration.seconds(2); +const DEFAULT_BACKEND_OUTPUT_DRAIN_TIMEOUT = Duration.millis(250); const BACKEND_READINESS_PATH = "/.well-known/t3/environment"; type BackendProcessLayerServices = ChildProcessSpawner.ChildProcessSpawner | HttpClient.HttpClient; @@ -88,6 +93,10 @@ class BackendProcessSpawnError extends Data.TaggedError("BackendProcessSpawnErro type BackendProcessError = BackendProcessBootstrapEncodeError | BackendProcessSpawnError; interface RunBackendProcessOptions extends DesktopBackendStartConfig { + readonly desktopTelemetryStream: Stream.Stream; + readonly onDesktopTelemetryControl?: ( + message: DesktopTelemetryControlMessageValue, + ) => Effect.Effect; readonly readinessTimeout?: Duration.Duration; readonly onStarted?: (pid: number) => Effect.Effect; readonly onReady?: () => Effect.Effect; @@ -224,6 +233,7 @@ function drainBackendOutput( } const encodeBootstrapJson = Schema.encodeEffect(Schema.fromJsonString(DesktopBackendBootstrap)); +const decodeDesktopTelemetryControl = Schema.decodeUnknownEffect(DesktopTelemetryControlMessage); const runBackendProcess = Effect.fn("runBackendProcess")(function* ( options: RunBackendProcessOptions, @@ -233,6 +243,23 @@ const runBackendProcess = Effect.fn("runBackendProcess")(function* ( Effect.mapError((cause) => new BackendProcessBootstrapEncodeError({ cause })), ); const onOutput = options.onOutput ?? (() => Effect.void); + const additionalFds: Record<`fd${number}`, ChildProcess.AdditionalFdConfig> = { + fd3: { + type: "input", + stream: Stream.encodeText(Stream.make(`${bootstrapJson}\n`)), + }, + }; + if (options.bootstrap.desktopTelemetryFd !== undefined) { + additionalFds[`fd${options.bootstrap.desktopTelemetryFd}`] = { + type: "input", + stream: options.desktopTelemetryStream, + }; + } + if (options.bootstrap.desktopTelemetryControlFd !== undefined) { + additionalFds[`fd${options.bootstrap.desktopTelemetryControlFd}`] = { + type: "output", + }; + } const command = ChildProcess.make( options.executablePath, [options.entryPath, "--bootstrap-fd", "3"], @@ -247,23 +274,47 @@ const runBackendProcess = Effect.fn("runBackendProcess")(function* ( stderr: options.captureOutput ? "pipe" : "inherit", killSignal: "SIGTERM", forceKillAfter: DEFAULT_BACKEND_TERMINATE_GRACE, - additionalFds: { - fd3: { - type: "input", - stream: Stream.encodeText(Stream.make(`${bootstrapJson}\n`)), - }, - }, + additionalFds, }, ); const handle = yield* spawner .spawn(command) .pipe(Effect.mapError((cause) => new BackendProcessSpawnError({ cause }))); + const outputFibers: Array> = []; yield* options.onStarted?.(handle.pid) ?? Effect.void; + if ( + options.bootstrap.desktopTelemetryControlFd !== undefined && + options.onDesktopTelemetryControl !== undefined + ) { + const controlFd = options.bootstrap.desktopTelemetryControlFd; + const handleControl = options.onDesktopTelemetryControl; + yield* handle.getOutputFd(controlFd).pipe( + Stream.pipeThroughChannel(Ndjson.decode({ ignoreEmptyLines: true })), + Stream.mapEffect((message) => decodeDesktopTelemetryControl(message)), + Stream.runForEach(handleControl), + Effect.catchCause((cause) => + logBackendManagerWarning("desktop telemetry control stream stopped", { + fd: controlFd, + cause: Cause.pretty(cause), + }), + ), + Effect.ensuring( + handleControl({ + version: 1, + type: "setDiagnosticsDemand", + enabled: false, + }), + ), + Effect.forkScoped, + ); + } if (options.captureOutput) { - yield* drainBackendOutput("stdout", handle.stdout, onOutput).pipe(Effect.forkScoped); - yield* drainBackendOutput("stderr", handle.stderr, onOutput).pipe(Effect.forkScoped); + outputFibers.push( + yield* drainBackendOutput("stdout", handle.stdout, onOutput).pipe(Effect.forkScoped), + yield* drainBackendOutput("stderr", handle.stderr, onOutput).pipe(Effect.forkScoped), + ); } yield* waitForHttpReady( options.httpBaseUrl, @@ -274,7 +325,12 @@ const runBackendProcess = Effect.fn("runBackendProcess")(function* ( Effect.forkScoped, ); - return describeProcessExit(yield* Effect.result(handle.exitCode)); + const exit = describeProcessExit(yield* Effect.result(handle.exitCode)); + yield* Effect.forEach(outputFibers, Fiber.await, { + concurrency: "unbounded", + discard: true, + }).pipe(Effect.timeout(DEFAULT_BACKEND_OUTPUT_DRAIN_TIMEOUT), Effect.ignore); + return exit; }); const makeDesktopBackendManager = Effect.fn("makeDesktopBackendManager")(function* () { @@ -283,6 +339,7 @@ const makeDesktopBackendManager = Effect.fn("makeDesktopBackendManager")(functio const configuration = yield* DesktopBackendConfiguration.DesktopBackendConfiguration; const backendOutputLog = yield* DesktopObservability.DesktopBackendOutputLog; const desktopState = yield* DesktopState.DesktopState; + const desktopTelemetryPublisher = yield* DesktopTelemetryPublisher.DesktopTelemetryPublisher; const desktopWindow = yield* DesktopWindow.DesktopWindow; const spawner = yield* ChildProcessSpawner.ChildProcessSpawner; const httpClient = yield* HttpClient.HttpClient; @@ -419,10 +476,13 @@ const makeDesktopBackendManager = Effect.fn("makeDesktopBackendManager")(functio if (isCurrentRun) { if (Option.isSome(pid)) { - yield* backendOutputLog.writeSessionBoundary({ - phase: "END", - details: `pid=${pid.value} ${reason}`, - }); + if (nextState.desiredRunning) { + yield* backendOutputLog.persistFailure({ + details: `pid=${pid.value} ${reason}`, + }); + } else { + yield* backendOutputLog.discardSession; + } } yield* Ref.set(desktopState.backendReady, false); } @@ -436,13 +496,14 @@ const makeDesktopBackendManager = Effect.fn("makeDesktopBackendManager")(functio const program = runBackendProcess({ ...config.value, + desktopTelemetryStream: desktopTelemetryPublisher.encoded, + onDesktopTelemetryControl: desktopTelemetryPublisher.handleControl, onStarted: Effect.fn("desktop.backendManager.onStarted")(function* (pid) { yield* updateActiveRun(runId, (run) => ({ ...run, pid: Option.some(pid), })); - yield* backendOutputLog.writeSessionBoundary({ - phase: "START", + yield* backendOutputLog.beginSession({ details: `pid=${pid} port=${config.value.bootstrap.port} cwd=${config.value.cwd}`, }); }), @@ -475,10 +536,16 @@ const makeDesktopBackendManager = Effect.fn("makeDesktopBackendManager")(functio ), ); }), - onReadinessFailure: (error) => - logBackendManagerWarning("backend readiness check failed during bootstrap", { - error: error.message, - }), + onReadinessFailure: Effect.fn("desktop.backendManager.onReadinessFailure")( + function* (error) { + yield* logBackendManagerWarning("backend readiness check failed during bootstrap", { + error: error.message, + }); + yield* backendOutputLog.persistFailure({ + details: error.message, + }); + }, + ), onOutput: (streamName, chunk) => backendOutputLog.writeOutputChunk(streamName, chunk), }).pipe( Effect.provideService(ChildProcessSpawner.ChildProcessSpawner, spawner), @@ -589,7 +656,7 @@ const makeDesktopBackendManager = Effect.fn("makeDesktopBackendManager")(functio }); yield* Option.match(active, { onNone: () => Effect.void, - onSome: (run) => closeRun(run, options), + onSome: (run) => closeRun(run, options).pipe(Effect.andThen(backendOutputLog.discardSession)), }); }); diff --git a/apps/desktop/src/electron/ElectronApp.ts b/apps/desktop/src/electron/ElectronApp.ts index 49b432fd5dd..8648b837335 100644 --- a/apps/desktop/src/electron/ElectronApp.ts +++ b/apps/desktop/src/electron/ElectronApp.ts @@ -30,6 +30,7 @@ export interface ElectronAppShape { ) => Effect.Effect; readonly setAppUserModelId: (id: string) => Effect.Effect; readonly requestSingleInstanceLock: Effect.Effect; + readonly getAppMetrics: Effect.Effect>; readonly isDefaultProtocolClient: (protocol: string) => Effect.Effect; readonly setAsDefaultProtocolClient: ( protocol: string, @@ -101,6 +102,7 @@ const make = ElectronApp.of({ Electron.app.setAppUserModelId(id); }), requestSingleInstanceLock: Effect.sync(() => Electron.app.requestSingleInstanceLock()), + getAppMetrics: Effect.sync(() => Electron.app.getAppMetrics()), isDefaultProtocolClient: (protocol) => Effect.sync(() => Electron.app.isDefaultProtocolClient(protocol)), setAsDefaultProtocolClient: (protocol, path, args) => diff --git a/apps/desktop/src/electron/ElectronPowerMonitor.ts b/apps/desktop/src/electron/ElectronPowerMonitor.ts new file mode 100644 index 00000000000..8578b407462 --- /dev/null +++ b/apps/desktop/src/electron/ElectronPowerMonitor.ts @@ -0,0 +1,89 @@ +import * as Context from "effect/Context"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as Scope from "effect/Scope"; + +import * as Electron from "electron"; + +export type ElectronThermalState = ReturnType; +export type ElectronIdleState = ReturnType; + +export interface ElectronPowerMonitorShape { + readonly isOnBatteryPower: Effect.Effect; + readonly getSystemIdleTime: Effect.Effect; + readonly getSystemIdleState: (idleThresholdSeconds: number) => Effect.Effect; + readonly getCurrentThermalState: Effect.Effect; + readonly onSimpleEvent: ( + eventName: "lock-screen" | "unlock-screen" | "on-ac" | "on-battery" | "suspend" | "resume", + listener: () => void, + ) => Effect.Effect; + readonly onThermalStateChange: ( + listener: (state: ElectronThermalState) => void, + ) => Effect.Effect; + readonly onSpeedLimitChange: ( + listener: (limit: number) => void, + ) => Effect.Effect; +} + +export class ElectronPowerMonitor extends Context.Service< + ElectronPowerMonitor, + ElectronPowerMonitorShape +>()("@t3tools/desktop/electron/ElectronPowerMonitor") {} + +const onSimpleEvent: ElectronPowerMonitorShape["onSimpleEvent"] = (eventName, listener) => + Effect.acquireRelease( + Effect.sync(() => { + Electron.powerMonitor.on(eventName as any, listener as any); + }), + () => + Effect.sync(() => { + Electron.powerMonitor.removeListener(eventName as any, listener as any); + }), + ).pipe(Effect.asVoid); + +const onThermalStateChange: ElectronPowerMonitorShape["onThermalStateChange"] = (listener) => { + const wrapped = ( + event: Electron.Event, + ): void => { + listener(event.state); + }; + return Effect.acquireRelease( + Effect.sync(() => { + Electron.powerMonitor.on("thermal-state-change", wrapped); + }), + () => + Effect.sync(() => { + Electron.powerMonitor.removeListener("thermal-state-change", wrapped); + }), + ).pipe(Effect.asVoid); +}; + +const onSpeedLimitChange: ElectronPowerMonitorShape["onSpeedLimitChange"] = (listener) => { + const wrapped = ( + event: Electron.Event, + ): void => { + listener(event.limit); + }; + return Effect.acquireRelease( + Effect.sync(() => { + Electron.powerMonitor.on("speed-limit-change", wrapped); + }), + () => + Effect.sync(() => { + Electron.powerMonitor.removeListener("speed-limit-change", wrapped); + }), + ).pipe(Effect.asVoid); +}; + +export const make = ElectronPowerMonitor.of({ + isOnBatteryPower: Effect.sync(() => Electron.powerMonitor.isOnBatteryPower()), + getSystemIdleTime: Effect.sync(() => Electron.powerMonitor.getSystemIdleTime()), + getSystemIdleState: (idleThresholdSeconds) => + Effect.sync(() => Electron.powerMonitor.getSystemIdleState(idleThresholdSeconds)), + getCurrentThermalState: Effect.sync(() => Electron.powerMonitor.getCurrentThermalState()), + onSimpleEvent, + onThermalStateChange, + onSpeedLimitChange, +}); + +export const layer = Layer.succeed(ElectronPowerMonitor, make); diff --git a/apps/desktop/src/main.ts b/apps/desktop/src/main.ts index 33eac8ea646..d3c82d2f7db 100644 --- a/apps/desktop/src/main.ts +++ b/apps/desktop/src/main.ts @@ -19,6 +19,7 @@ import * as DesktopIpc from "./ipc/DesktopIpc.ts"; import * as ElectronApp from "./electron/ElectronApp.ts"; import * as ElectronDialog from "./electron/ElectronDialog.ts"; import * as ElectronMenu from "./electron/ElectronMenu.ts"; +import * as ElectronPowerMonitor from "./electron/ElectronPowerMonitor.ts"; import * as ElectronProtocol from "./electron/ElectronProtocol.ts"; import * as DesktopSecretStorage from "./electron/ElectronSafeStorage.ts"; import * as ElectronShell from "./electron/ElectronShell.ts"; @@ -45,6 +46,7 @@ import * as DesktopShellEnvironment from "./shell/DesktopShellEnvironment.ts"; import * as DesktopSshEnvironment from "./ssh/DesktopSshEnvironment.ts"; import * as DesktopSshPasswordPrompts from "./ssh/DesktopSshPasswordPrompts.ts"; import * as DesktopState from "./app/DesktopState.ts"; +import * as DesktopTelemetryPublisher from "./telemetry/DesktopTelemetryPublisher.ts"; import * as DesktopUpdates from "./updates/DesktopUpdates.ts"; import * as PreviewBrowserSession from "./preview/BrowserSession.ts"; import * as PreviewManager from "./preview/Manager.ts"; @@ -104,6 +106,7 @@ const electronLayer = Layer.mergeAll( ElectronApp.layer, ElectronDialog.layer, ElectronMenu.layer, + ElectronPowerMonitor.layer, ElectronProtocol.layer, DesktopSecretStorage.layer, ElectronShell.layer, @@ -146,6 +149,7 @@ const desktopWindowLayer = DesktopWindow.layer.pipe( const desktopBackendLayer = DesktopBackendManager.layer.pipe( Layer.provideMerge(DesktopAppIdentity.layer), Layer.provideMerge(DesktopBackendConfiguration.layer), + Layer.provideMerge(DesktopTelemetryPublisher.layer), Layer.provideMerge(desktopWindowLayer), ); diff --git a/apps/desktop/src/telemetry/DesktopTelemetryPublisher.test.ts b/apps/desktop/src/telemetry/DesktopTelemetryPublisher.test.ts new file mode 100644 index 00000000000..1fe8934c1b5 --- /dev/null +++ b/apps/desktop/src/telemetry/DesktopTelemetryPublisher.test.ts @@ -0,0 +1,196 @@ +import { assert, describe, it } from "@effect/vitest"; +import * as Duration from "effect/Duration"; +import * as Effect from "effect/Effect"; +import * as Fiber from "effect/Fiber"; +import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; +import * as Ref from "effect/Ref"; +import * as Stream from "effect/Stream"; +import * as TestClock from "effect/testing/TestClock"; + +import type * as Electron from "electron"; + +import * as ElectronApp from "../electron/ElectronApp.ts"; +import * as ElectronPowerMonitor from "../electron/ElectronPowerMonitor.ts"; +import * as DesktopTelemetryPublisher from "./DesktopTelemetryPublisher.ts"; + +function makeElectronAppLayer( + metrics: ReadonlyArray, + onMetricsRead: () => void = () => undefined, +) { + return Layer.succeed(ElectronApp.ElectronApp, { + metadata: Effect.die("unexpected metadata read"), + name: Effect.succeed("T3 Code"), + whenReady: Effect.void, + quit: Effect.void, + exit: () => Effect.void, + relaunch: () => Effect.void, + setPath: () => Effect.void, + setName: () => Effect.void, + setAboutPanelOptions: () => Effect.void, + setAppUserModelId: () => Effect.void, + requestSingleInstanceLock: Effect.succeed(true), + getAppMetrics: Effect.sync(() => { + onMetricsRead(); + return metrics; + }), + isDefaultProtocolClient: () => Effect.succeed(false), + setAsDefaultProtocolClient: () => Effect.succeed(true), + setDesktopName: () => Effect.void, + setDockIcon: () => Effect.void, + appendCommandLineSwitch: () => Effect.void, + on: () => Effect.void, + } satisfies ElectronApp.ElectronAppShape); +} + +describe("DesktopTelemetryPublisher", () => { + it.effect("publishes Electron metrics and event-driven power state over NDJSON", () => + Effect.gen(function* () { + const onBattery = yield* Ref.make(false); + let metricsReadCount = 0; + const simpleListeners = new Map void>(); + let thermalListener: ((state: ElectronPowerMonitor.ElectronThermalState) => void) | null = + null; + let speedLimitListener: ((limit: number) => void) | null = null; + const metrics = [ + { + pid: 4_242, + type: "Browser", + creationTime: 1_000, + name: "electron", + cpu: { + percentCPUUsage: 12.5, + cumulativeCPUUsage: 3.25, + idleWakeupsPerSecond: 7, + }, + memory: { + workingSetSize: 2_048, + peakWorkingSetSize: 4_096, + }, + } as Electron.ProcessMetric, + ]; + const powerLayer = Layer.succeed( + ElectronPowerMonitor.ElectronPowerMonitor, + ElectronPowerMonitor.ElectronPowerMonitor.of({ + isOnBatteryPower: Ref.get(onBattery), + getSystemIdleTime: Effect.succeed(5), + getSystemIdleState: () => Effect.succeed("active"), + getCurrentThermalState: Effect.succeed("nominal"), + onSimpleEvent: (eventName, listener) => + Effect.sync(() => { + simpleListeners.set(eventName, listener); + }), + onThermalStateChange: (listener) => + Effect.sync(() => { + thermalListener = listener; + }), + onSpeedLimitChange: (listener) => + Effect.sync(() => { + speedLimitListener = listener; + }), + }), + ); + const layer = DesktopTelemetryPublisher.layer.pipe( + Layer.provide( + Layer.mergeAll( + makeElectronAppLayer(metrics, () => { + metricsReadCount += 1; + }), + powerLayer, + ), + ), + ); + + yield* Effect.gen(function* () { + const publisher = yield* DesktopTelemetryPublisher.DesktopTelemetryPublisher; + const encoded = yield* publisher.encoded.pipe(Stream.take(2), Stream.runCollect); + const decoder = new TextDecoder(); + const messages = Array.from(encoded, (bytes) => JSON.parse(decoder.decode(bytes).trim())); + + assert.equal(messages[0]?.type, "desktopTelemetryHello"); + assert.equal(messages[0]?.electronPid, process.pid); + assert.equal(messages[1]?.type, "desktopTelemetry"); + assert.deepEqual(messages[1]?.electronProcesses, []); + assert.equal(messages[1]?.electronPid, process.pid); + assert.equal(metricsReadCount, 0); + + const nextSnapshotFiber = yield* Stream.runHead(publisher.changes).pipe(Effect.forkChild); + yield* Effect.yieldNow; + yield* publisher.handleControl({ + version: 1, + type: "setDiagnosticsDemand", + enabled: true, + }); + const demandedSnapshot = Option.getOrThrow(yield* Fiber.join(nextSnapshotFiber)); + assert.equal(demandedSnapshot.electronProcesses[0]?.pid, 4_242); + assert.equal(demandedSnapshot.electronProcesses[0]?.cpuPercent, 12.5); + assert.equal(demandedSnapshot.electronProcesses[0]?.workingSetBytes, 2_048 * 1_024); + assert.equal(metricsReadCount, 1); + + const batterySnapshotFiber = yield* Stream.runHead(publisher.changes).pipe( + Effect.forkChild, + ); + yield* Effect.yieldNow; + yield* Ref.set(onBattery, true); + simpleListeners.get("on-battery")?.(); + const batterySnapshot = Option.getOrThrow(yield* Fiber.join(batterySnapshotFiber)); + assert.equal(batterySnapshot.power.onBattery, "true"); + + const metricsAfterBatteryEvent = metricsReadCount; + yield* TestClock.adjust(Duration.millis(4_999)); + assert.equal(metricsReadCount, metricsAfterBatteryEvent); + yield* TestClock.adjust(Duration.millis(1)); + assert.equal(metricsReadCount, metricsAfterBatteryEvent + 1); + + const constrainedSnapshotFiber = yield* Stream.runHead(publisher.changes).pipe( + Effect.forkChild, + ); + yield* Effect.yieldNow; + thermalListener?.("serious"); + const constrainedSnapshot = Option.getOrThrow(yield* Fiber.join(constrainedSnapshotFiber)); + assert.equal(constrainedSnapshot.power.thermalState, "serious"); + + const metricsAfterThermalEvent = metricsReadCount; + yield* TestClock.adjust(Duration.millis(14_999)); + assert.equal(metricsReadCount, metricsAfterThermalEvent); + yield* TestClock.adjust(Duration.millis(1)); + assert.equal(metricsReadCount, metricsAfterThermalEvent + 1); + + const speedLimitSnapshotFiber = yield* Stream.runHead(publisher.changes).pipe( + Effect.forkChild, + ); + yield* Effect.yieldNow; + speedLimitListener?.(65); + const speedLimitSnapshot = Option.getOrThrow(yield* Fiber.join(speedLimitSnapshotFiber)); + assert.equal(Option.getOrNull(speedLimitSnapshot.speedLimitPercent), 65); + + const stoppedSnapshotFiber = yield* Stream.runHead(publisher.changes).pipe( + Effect.forkChild, + ); + yield* Effect.yieldNow; + yield* publisher.handleControl({ + version: 1, + type: "setDiagnosticsDemand", + enabled: false, + }); + const stoppedSnapshot = Option.getOrThrow(yield* Fiber.join(stoppedSnapshotFiber)); + assert.deepEqual(stoppedSnapshot.electronProcesses, []); + const backgroundSequence = stoppedSnapshot.sequence; + const metricsAfterStopping = metricsReadCount; + + yield* TestClock.adjust(Duration.seconds(29)); + assert.equal( + (yield* publisher.latest).pipe(Option.getOrThrow).sequence, + backgroundSequence, + ); + assert.equal(metricsReadCount, metricsAfterStopping); + yield* TestClock.adjust(Duration.seconds(1)); + assert.equal( + (yield* publisher.latest).pipe(Option.getOrThrow).sequence, + backgroundSequence + 1, + ); + assert.equal(metricsReadCount, metricsAfterStopping); + }).pipe(Effect.provide(layer)); + }), + ); +}); diff --git a/apps/desktop/src/telemetry/DesktopTelemetryPublisher.ts b/apps/desktop/src/telemetry/DesktopTelemetryPublisher.ts new file mode 100644 index 00000000000..8aa765a23b7 --- /dev/null +++ b/apps/desktop/src/telemetry/DesktopTelemetryPublisher.ts @@ -0,0 +1,264 @@ +import { + DesktopHostTelemetryMessage, + type DesktopHostTelemetrySnapshot, + type DesktopTelemetryControlMessage, + type HostPowerSnapshot, +} from "@t3tools/contracts"; +import * as Context from "effect/Context"; +import * as DateTime from "effect/DateTime"; +import * as Duration from "effect/Duration"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; +import * as PubSub from "effect/PubSub"; +import * as Queue from "effect/Queue"; +import * as Ref from "effect/Ref"; +import * as Schema from "effect/Schema"; +import * as Stream from "effect/Stream"; + +import * as ElectronApp from "../electron/ElectronApp.ts"; +import * as ElectronPowerMonitor from "../electron/ElectronPowerMonitor.ts"; + +const LIVE_SAMPLE_INTERVAL = Duration.seconds(1); +const BATTERY_SAMPLE_INTERVAL = Duration.seconds(5); +const CONSTRAINED_SAMPLE_INTERVAL = Duration.seconds(15); +const BACKGROUND_HEARTBEAT_INTERVAL = Duration.seconds(30); +const IDLE_THRESHOLD_SECONDS = 60; +const encodeMessage = Schema.encodeSync(Schema.fromJsonString(DesktopHostTelemetryMessage)); +const textEncoder = new TextEncoder(); + +type PowerEvent = + | { readonly type: "locked"; readonly value: boolean } + | { readonly type: "suspended"; readonly value: boolean } + | { readonly type: "onBattery"; readonly value: boolean } + | { readonly type: "thermal"; readonly value: HostPowerSnapshot["thermalState"] } + | { readonly type: "speedLimit"; readonly value: number }; + +interface PowerState { + readonly locked: HostPowerSnapshot["locked"]; + readonly suspended: boolean; + readonly onBattery: HostPowerSnapshot["onBattery"]; + readonly thermalState: HostPowerSnapshot["thermalState"]; + readonly speedLimitPercent: Option.Option; +} + +export interface DesktopTelemetryPublisherShape { + readonly latest: Effect.Effect>; + readonly changes: Stream.Stream; + readonly encoded: Stream.Stream; + readonly handleControl: (message: DesktopTelemetryControlMessage) => Effect.Effect; +} + +export class DesktopTelemetryPublisher extends Context.Service< + DesktopTelemetryPublisher, + DesktopTelemetryPublisherShape +>()("@t3tools/desktop/telemetry/DesktopTelemetryPublisher") {} + +function booleanState(value: boolean): HostPowerSnapshot["onBattery"] { + return value ? "true" : "false"; +} + +function idleState(value: ElectronPowerMonitor.ElectronIdleState): HostPowerSnapshot["idle"] { + switch (value) { + case "active": + return "false"; + case "idle": + case "locked": + return "true"; + case "unknown": + return "unknown"; + } +} + +function updatePowerState(state: PowerState, event: PowerEvent): PowerState { + switch (event.type) { + case "locked": + return { ...state, locked: booleanState(event.value) }; + case "suspended": + return { ...state, suspended: event.value }; + case "onBattery": + return { ...state, onBattery: booleanState(event.value) }; + case "thermal": + return { ...state, thermalState: event.value }; + case "speedLimit": + return { ...state, speedLimitPercent: Option.some(event.value) }; + } +} + +function sampleInterval(power: PowerState, diagnosticsDemand: boolean): Duration.Duration { + if (!diagnosticsDemand) return BACKGROUND_HEARTBEAT_INTERVAL; + if ( + power.suspended || + power.locked === "true" || + power.thermalState === "serious" || + power.thermalState === "critical" + ) { + return CONSTRAINED_SAMPLE_INTERVAL; + } + if (power.onBattery === "true") return BATTERY_SAMPLE_INTERVAL; + return LIVE_SAMPLE_INTERVAL; +} + +export const make = Effect.fn("desktop.telemetryPublisher.make")(function* () { + const electronApp = yield* ElectronApp.ElectronApp; + const powerMonitor = yield* ElectronPowerMonitor.ElectronPowerMonitor; + yield* electronApp.whenReady; + + const initialPowerState: PowerState = { + locked: "unknown", + suspended: false, + onBattery: booleanState(yield* powerMonitor.isOnBatteryPower), + thermalState: yield* powerMonitor.getCurrentThermalState, + speedLimitPercent: Option.none(), + }; + const powerState = yield* Ref.make(initialPowerState); + const powerEvents = yield* Queue.unbounded(); + const sampleTriggers = yield* Queue.sliding(1); + const diagnosticsDemand = yield* Ref.make(false); + const latest = yield* Ref.make(Option.none()); + const changes = yield* PubSub.sliding(8); + const sequence = yield* Ref.make(0); + + const offer = (event: PowerEvent): void => { + Queue.offerUnsafe(powerEvents, event); + }; + yield* Effect.all( + [ + powerMonitor.onSimpleEvent("lock-screen", () => offer({ type: "locked", value: true })), + powerMonitor.onSimpleEvent("unlock-screen", () => offer({ type: "locked", value: false })), + powerMonitor.onSimpleEvent("suspend", () => offer({ type: "suspended", value: true })), + powerMonitor.onSimpleEvent("resume", () => offer({ type: "suspended", value: false })), + powerMonitor.onSimpleEvent("on-battery", () => offer({ type: "onBattery", value: true })), + powerMonitor.onSimpleEvent("on-ac", () => offer({ type: "onBattery", value: false })), + powerMonitor.onThermalStateChange((value) => offer({ type: "thermal", value })), + powerMonitor.onSpeedLimitChange((value) => offer({ type: "speedLimit", value })), + ], + { concurrency: "unbounded" }, + ); + yield* Effect.forever( + Queue.take(powerEvents).pipe( + Effect.flatMap((event) => Ref.update(powerState, (state) => updatePowerState(state, event))), + Effect.andThen(Queue.offer(sampleTriggers, undefined)), + ), + ).pipe(Effect.forkScoped); + + const sampleOnce = Effect.gen(function* () { + const sampledAt = yield* DateTime.now; + const sampledAtUnixMs = DateTime.toEpochMillis(sampledAt); + const demand = yield* Ref.get(diagnosticsDemand); + const [currentPower, idleSeconds, systemIdleState, onBattery, metrics] = yield* Effect.all( + [ + Ref.get(powerState), + powerMonitor.getSystemIdleTime, + powerMonitor.getSystemIdleState(IDLE_THRESHOLD_SECONDS), + powerMonitor.isOnBatteryPower, + demand ? electronApp.getAppMetrics : Effect.succeed([]), + ], + { concurrency: "unbounded" }, + ); + const nextSequence = yield* Ref.modify(sequence, (current) => [current + 1, current + 1]); + const locked = systemIdleState === "locked" ? "true" : currentPower.locked; + const snapshot: DesktopHostTelemetrySnapshot = { + version: 1, + type: "desktopTelemetry", + sequence: nextSequence, + sampledAtUnixMs, + electronPid: process.pid, + power: { + source: "electron-main", + idle: idleState(systemIdleState), + idleSeconds, + locked, + suspended: currentPower.suspended, + onBattery: booleanState(onBattery), + lowPowerMode: "unknown", + thermalState: currentPower.thermalState, + stale: false, + updatedAt: sampledAt, + }, + speedLimitPercent: currentPower.speedLimitPercent, + electronProcesses: metrics.map((metric) => ({ + pid: metric.pid, + creationTimeMs: metric.creationTime, + type: metric.type, + ...(metric.name === undefined ? {} : { name: metric.name }), + ...(metric.serviceName === undefined ? {} : { serviceName: metric.serviceName }), + cpuPercent: metric.cpu.percentCPUUsage, + ...(metric.cpu.cumulativeCPUUsage === undefined + ? {} + : { cumulativeCpuSeconds: metric.cpu.cumulativeCPUUsage }), + idleWakeupsPerSecond: metric.cpu.idleWakeupsPerSecond, + workingSetBytes: Math.max(0, Math.round(metric.memory.workingSetSize * 1024)), + peakWorkingSetBytes: Math.max(0, Math.round(metric.memory.peakWorkingSetSize * 1024)), + })), + }; + + yield* Ref.set(latest, Option.some(snapshot)); + yield* PubSub.publish(changes, snapshot); + }).pipe( + Effect.catchCause((cause) => + Effect.logWarning("Failed to sample Electron telemetry", { + cause: String(cause), + }), + ), + ); + + yield* Effect.gen(function* () { + yield* sampleOnce; + while (true) { + const [currentPower, demand] = yield* Effect.all([ + Ref.get(powerState), + Ref.get(diagnosticsDemand), + ]); + yield* Effect.raceFirst( + Queue.take(sampleTriggers), + Effect.sleep(sampleInterval(currentPower, demand)), + ); + yield* sampleOnce; + } + }).pipe(Effect.forkScoped); + + const handleControl: DesktopTelemetryPublisherShape["handleControl"] = (message) => { + switch (message.type) { + case "setDiagnosticsDemand": + return Ref.getAndSet(diagnosticsDemand, message.enabled).pipe( + Effect.flatMap((previous) => + previous === message.enabled + ? Effect.void + : Queue.offer(sampleTriggers, undefined).pipe(Effect.asVoid), + ), + ); + } + }; + + const snapshots = Stream.concat( + Stream.unwrap( + Ref.get(latest).pipe( + Effect.map( + Option.match({ + onNone: () => Stream.empty, + onSome: Stream.make, + }), + ), + ), + ), + Stream.fromPubSub(changes), + ); + const encoded = Stream.concat( + Stream.make({ + version: 1, + type: "desktopTelemetryHello", + electronPid: process.pid, + } as const), + snapshots, + ).pipe(Stream.map((message) => textEncoder.encode(`${encodeMessage(message)}\n`))); + + return DesktopTelemetryPublisher.of({ + latest: Ref.get(latest), + changes: Stream.fromPubSub(changes), + encoded, + handleControl, + }); +}); + +export const layer = Layer.effect(DesktopTelemetryPublisher, make()); diff --git a/apps/desktop/src/window/DesktopApplicationMenu.test.ts b/apps/desktop/src/window/DesktopApplicationMenu.test.ts index 62d619fe18b..08afcd2fc13 100644 --- a/apps/desktop/src/window/DesktopApplicationMenu.test.ts +++ b/apps/desktop/src/window/DesktopApplicationMenu.test.ts @@ -40,6 +40,7 @@ const electronAppLayer = Layer.succeed(ElectronApp.ElectronApp, { setAboutPanelOptions: () => Effect.void, setAppUserModelId: () => Effect.void, requestSingleInstanceLock: Effect.succeed(true), + getAppMetrics: Effect.succeed([]), isDefaultProtocolClient: () => Effect.succeed(false), setAsDefaultProtocolClient: () => Effect.succeed(true), setDesktopName: () => Effect.void, diff --git a/apps/server/src/background/BackgroundPolicy.test.ts b/apps/server/src/background/BackgroundPolicy.test.ts new file mode 100644 index 00000000000..084bb069fc7 --- /dev/null +++ b/apps/server/src/background/BackgroundPolicy.test.ts @@ -0,0 +1,217 @@ +import { assert, describe, it } from "@effect/vitest"; +import { + AuthSessionId, + RpcClientId, + type HostPowerSnapshot, + type ClientActivityReportInput, +} from "@t3tools/contracts"; +import * as DateTime from "effect/DateTime"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as PubSub from "effect/PubSub"; +import * as Stream from "effect/Stream"; + +import { ServerSettingsService } from "../serverSettings.ts"; +import * as BackgroundPolicy from "./BackgroundPolicy.ts"; +import * as HostPowerMonitor from "./HostPowerMonitor.ts"; + +const TEST_NOW = DateTime.makeUnsafe("2026-05-13T00:00:00.000Z"); + +const nominalHostPower: HostPowerSnapshot = { + source: "unknown", + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt: TEST_NOW, +}; + +const constrainedHostPower: HostPowerSnapshot = { + ...nominalHostPower, + lowPowerMode: "true", + stale: false, +}; + +function makeReport(overrides: Partial = {}): ClientActivityReportInput { + return { + clientId: "client-1", + clientKind: "web", + visible: true, + focused: true, + recentlyInteracted: true, + scopes: [{ type: "vcs-status", cwd: "/repo" }], + ttlMs: 45_000, + observedAt: TEST_NOW, + ...overrides, + }; +} + +function makeLayer( + hostPower: HostPowerSnapshot, + settingsOverrides: Parameters[0] = {}, +) { + const hostLayer = Layer.effect( + HostPowerMonitor.HostPowerMonitor, + Effect.gen(function* () { + const changes = yield* PubSub.sliding(1); + let snapshot = hostPower; + return HostPowerMonitor.HostPowerMonitor.of({ + snapshot: Effect.sync(() => snapshot), + report: (next) => + Effect.sync(() => { + snapshot = next; + }).pipe(Effect.andThen(PubSub.publish(changes, next)), Effect.asVoid), + streamChanges: Stream.fromPubSub(changes), + }); + }), + ); + return BackgroundPolicy.layer.pipe( + Layer.provide(Layer.merge(hostLayer, ServerSettingsService.layerTest(settingsOverrides))), + ); +} + +describe("BackgroundPolicy", () => { + it.effect("records foreground scoped client demand", () => + Effect.gen(function* () { + const policy = yield* BackgroundPolicy.BackgroundPolicy; + yield* policy.reportClientActivity( + AuthSessionId.make("session-1"), + RpcClientId.make(1), + makeReport(), + ); + + const snapshot = yield* policy.snapshot; + assert.equal(snapshot.activeForegroundLeaseCount, 1); + assert.deepStrictEqual(snapshot.activeScopeKeys, ["vcs-status:/repo"]); + assert.equal(snapshot.shouldRunOpportunisticWork, true); + assert.equal(yield* policy.hasDemand({ type: "vcs-status", cwd: "/repo" }), true); + assert.equal(yield* policy.hasDemand({ type: "vcs-status", cwd: "/other" }), false); + assert.equal(yield* policy.shouldRunScopeWork({ type: "vcs-status", cwd: "/repo" }), true); + assert.equal(yield* policy.shouldRunScopeWork({ type: "vcs-status", cwd: "/other" }), false); + }).pipe(Effect.provide(makeLayer(nominalHostPower))), + ); + + it.effect("removes all leases for a disconnected websocket connection", () => + Effect.gen(function* () { + const policy = yield* BackgroundPolicy.BackgroundPolicy; + yield* policy.reportClientActivity( + AuthSessionId.make("session-1"), + RpcClientId.make(1), + makeReport(), + ); + yield* policy.removeRpcClient(RpcClientId.make(1)); + + const snapshot = yield* policy.snapshot; + assert.equal(snapshot.activeForegroundLeaseCount, 0); + assert.deepStrictEqual(snapshot.activeScopeKeys, []); + assert.equal(snapshot.shouldRunOpportunisticWork, false); + }).pipe(Effect.provide(makeLayer(nominalHostPower))), + ); + + it.effect("host low power mode disables opportunistic work without dropping scoped demand", () => + Effect.gen(function* () { + const policy = yield* BackgroundPolicy.BackgroundPolicy; + yield* policy.reportClientActivity( + AuthSessionId.make("session-1"), + RpcClientId.make(1), + makeReport(), + ); + + const snapshot = yield* policy.snapshot; + assert.equal(snapshot.activeForegroundLeaseCount, 1); + assert.deepStrictEqual(snapshot.activeScopeKeys, ["vcs-status:/repo"]); + assert.equal(snapshot.shouldRunOpportunisticWork, false); + assert.equal(yield* policy.hasDemand({ type: "vcs-status", cwd: "/repo" }), true); + assert.equal(yield* policy.shouldRunScopeWork({ type: "vcs-status", cwd: "/repo" }), false); + }).pipe(Effect.provide(makeLayer(constrainedHostPower))), + ); + + it.effect("keeps background demand visible while preventing scoped work", () => + Effect.gen(function* () { + const policy = yield* BackgroundPolicy.BackgroundPolicy; + yield* policy.reportClientActivity( + AuthSessionId.make("session-1"), + RpcClientId.make(1), + makeReport({ focused: false, visible: false }), + ); + + const snapshot = yield* policy.snapshot; + assert.equal(snapshot.activeForegroundLeaseCount, 0); + assert.deepStrictEqual(snapshot.activeScopeKeys, ["vcs-status:/repo"]); + assert.equal(yield* policy.hasDemand({ type: "vcs-status", cwd: "/repo" }), true); + assert.equal(yield* policy.shouldRunScopeWork({ type: "vcs-status", cwd: "/repo" }), false); + }).pipe(Effect.provide(makeLayer(nominalHostPower))), + ); + + it.effect( + "performance profile allows background scoped work while a scoped lease is active", + () => + Effect.gen(function* () { + const policy = yield* BackgroundPolicy.BackgroundPolicy; + yield* policy.reportClientActivity( + AuthSessionId.make("session-1"), + RpcClientId.make(1), + makeReport({ focused: false, visible: false }), + ); + + assert.equal(yield* policy.shouldRunScopeWork({ type: "vcs-status", cwd: "/repo" }), true); + }).pipe( + Effect.provide(makeLayer(nominalHostPower, { backgroundActivityProfile: "performance" })), + ), + ); + + it.effect("battery saver profile pauses scoped work on battery", () => + Effect.gen(function* () { + const policy = yield* BackgroundPolicy.BackgroundPolicy; + yield* policy.reportClientActivity( + AuthSessionId.make("session-1"), + RpcClientId.make(1), + makeReport(), + ); + + assert.equal(yield* policy.shouldRunScopeWork({ type: "vcs-status", cwd: "/repo" }), false); + }).pipe( + Effect.provide( + makeLayer( + { + ...nominalHostPower, + onBattery: "true", + stale: false, + }, + { backgroundActivityProfile: "battery-saver" }, + ), + ), + ), + ); + + it.effect("does not gate work on stale host power values", () => + Effect.gen(function* () { + const policy = yield* BackgroundPolicy.BackgroundPolicy; + yield* policy.reportClientActivity( + AuthSessionId.make("session-1"), + RpcClientId.make(1), + makeReport(), + ); + + assert.equal(yield* policy.shouldRunScopeWork({ type: "vcs-status", cwd: "/repo" }), true); + }).pipe( + Effect.provide( + makeLayer( + { + ...nominalHostPower, + locked: "true", + onBattery: "true", + lowPowerMode: "true", + thermalState: "critical", + stale: true, + }, + { backgroundActivityProfile: "battery-saver" }, + ), + ), + ), + ); +}); diff --git a/apps/server/src/background/BackgroundPolicy.ts b/apps/server/src/background/BackgroundPolicy.ts new file mode 100644 index 00000000000..1397aed4a87 --- /dev/null +++ b/apps/server/src/background/BackgroundPolicy.ts @@ -0,0 +1,274 @@ +import { + type AuthSessionId, + type BackgroundPolicySnapshot, + type BackgroundScope, + type ClientActivityLease, + type ClientActivityReportInput, + type HostPowerSnapshot, + type RpcClientId, +} from "@t3tools/contracts"; +import { + getBackgroundActivityPresetSettings, + resolveServerBackgroundActivitySettings, + type ResolvedBackgroundActivitySettings, +} from "@t3tools/shared/backgroundActivitySettings"; +import * as DateTime from "effect/DateTime"; +import * as Context from "effect/Context"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as PubSub from "effect/PubSub"; +import * as Ref from "effect/Ref"; +import * as Stream from "effect/Stream"; + +import { ServerSettingsService } from "../serverSettings.ts"; +import * as HostPowerMonitor from "./HostPowerMonitor.ts"; + +export interface BackgroundPolicyShape { + readonly reportClientActivity: ( + sessionId: AuthSessionId, + rpcClientId: RpcClientId, + input: ClientActivityReportInput, + ) => Effect.Effect; + readonly removeRpcClient: (rpcClientId: RpcClientId) => Effect.Effect; + readonly reportHostPowerState: (snapshot: HostPowerSnapshot) => Effect.Effect; + readonly snapshot: Effect.Effect; + readonly streamChanges: Stream.Stream; + readonly hasDemand: (scope: BackgroundScope) => Effect.Effect; + readonly shouldRunScopeWork: (scope: BackgroundScope) => Effect.Effect; + readonly shouldRunOpportunisticWork: Effect.Effect; +} + +export class BackgroundPolicy extends Context.Service()( + "t3/background/BackgroundPolicy", +) {} + +const DEFAULT_LEASE_TTL_MS = 45_000; +const MAX_LEASE_TTL_MS = 120_000; + +function scopeKey(scope: BackgroundScope): string { + switch (scope.type) { + case "server-config": + case "diagnostics": + return scope.type; + case "provider-status": + return scope.instanceId ? `${scope.type}:${scope.instanceId}` : scope.type; + case "vcs-status": + case "git-refs": + return `${scope.type}:${scope.cwd}`; + case "thread": + return `${scope.type}:${scope.threadId}`; + } +} + +function isLeaseActive(lease: ClientActivityLease, now: DateTime.Utc): boolean { + return DateTime.isGreaterThan(lease.expiresAt, now); +} + +function isForegroundLease(lease: ClientActivityLease, now: DateTime.Utc): boolean { + return isLeaseActive(lease, now) && lease.visible && lease.focused; +} + +function leaseHasScope(lease: ClientActivityLease, scope: BackgroundScope): boolean { + const key = scopeKey(scope); + return lease.scopes.some((leaseScope) => scopeKey(leaseScope) === key); +} + +function hasThermalPressure(hostPower: HostPowerSnapshot): boolean { + return hostPower.thermalState === "serious" || hostPower.thermalState === "critical"; +} + +function isHostConstrained( + hostPower: HostPowerSnapshot, + settings: ResolvedBackgroundActivitySettings, +): boolean { + if (hostPower.stale) return false; + if ( + (settings.pauseWhenHostLocked && hostPower.locked === "true") || + hasThermalPressure(hostPower) + ) { + return true; + } + if (settings.pauseWhenHostLowPower && hostPower.lowPowerMode === "true") return true; + return settings.pauseWhenOnBattery && hostPower.onBattery === "true"; +} + +function isClientConstrained( + lease: ClientActivityLease, + settings: ResolvedBackgroundActivitySettings, +): boolean { + if (settings.pauseWhenClientLowPower && lease.lowPowerMode === "true") return true; + return settings.pauseWhenOnBattery && lease.batteryState === "unplugged"; +} + +function leaseMayRunScopedWork( + lease: ClientActivityLease, + scope: BackgroundScope, + now: DateTime.Utc, + settings: ResolvedBackgroundActivitySettings, +): boolean { + const activeWithScope = isLeaseActive(lease, now) && leaseHasScope(lease, scope); + if (!activeWithScope || isClientConstrained(lease, settings)) { + return false; + } + if (settings.profile === "performance") { + return true; + } + return isForegroundLease(lease, now); +} + +function computeSnapshot(input: { + readonly hostPower: HostPowerSnapshot; + readonly leases: ReadonlyMap; + readonly now: DateTime.Utc; + readonly settings: ResolvedBackgroundActivitySettings; + readonly updatedAt: DateTime.Utc; +}): BackgroundPolicySnapshot { + const activeLeases = [...input.leases.values()].filter((lease) => + isLeaseActive(lease, input.now), + ); + const foregroundLeases = activeLeases.filter((lease) => isForegroundLease(lease, input.now)); + const activeScopeKeys = new Set(); + for (const lease of activeLeases) { + for (const scope of lease.scopes) { + activeScopeKeys.add(scopeKey(scope)); + } + } + + return { + hostPower: input.hostPower, + leases: activeLeases, + activeForegroundLeaseCount: foregroundLeases.length, + activeScopeKeys: [...activeScopeKeys].toSorted(), + shouldRunOpportunisticWork: + foregroundLeases.some((lease) => !isClientConstrained(lease, input.settings)) && + !isHostConstrained(input.hostPower, input.settings), + updatedAt: input.updatedAt, + }; +} + +export const make = Effect.fn("background.policy.make")(function* () { + const hostPowerMonitor = yield* HostPowerMonitor.HostPowerMonitor; + const serverSettings = yield* ServerSettingsService; + const leasesRef = yield* Ref.make(new Map()); + const changes = yield* PubSub.sliding(1); + + const backgroundActivitySettings = serverSettings.getSettings.pipe( + Effect.map(resolveServerBackgroundActivitySettings), + Effect.orElseSucceed(() => getBackgroundActivityPresetSettings("balanced")), + ); + + const snapshot = Effect.gen(function* () { + const [hostPower, leases, now, settings] = yield* Effect.all([ + hostPowerMonitor.snapshot, + Ref.get(leasesRef), + DateTime.now, + backgroundActivitySettings, + ]); + return computeSnapshot({ hostPower, leases, now, settings, updatedAt: now }); + }); + + const publishSnapshot = snapshot.pipe(Effect.flatMap((next) => PubSub.publish(changes, next))); + + const reportClientActivity: BackgroundPolicyShape["reportClientActivity"] = ( + sessionId, + rpcClientId, + input, + ) => + Effect.gen(function* () { + const ttlMs = Math.min( + Math.max(input.ttlMs ?? DEFAULT_LEASE_TTL_MS, 1_000), + MAX_LEASE_TTL_MS, + ); + const now = yield* DateTime.now; + const expiresAt = DateTime.add(now, { milliseconds: ttlMs }); + const lease: ClientActivityLease = { + sessionId, + rpcClientId, + clientId: input.clientId, + clientKind: input.clientKind, + visible: input.visible, + focused: input.focused, + recentlyInteracted: input.recentlyInteracted, + ...(input.appState !== undefined ? { appState: input.appState } : {}), + ...(input.lowPowerMode !== undefined ? { lowPowerMode: input.lowPowerMode } : {}), + ...(input.batteryState !== undefined ? { batteryState: input.batteryState } : {}), + ...(input.networkType !== undefined ? { networkType: input.networkType } : {}), + scopes: input.scopes, + updatedAt: now, + expiresAt, + }; + yield* Ref.update(leasesRef, (leases) => { + const next = new Map(leases); + next.set(`${rpcClientId}:${input.clientId}`, lease); + return next; + }); + yield* publishSnapshot; + }); + + const removeRpcClient: BackgroundPolicyShape["removeRpcClient"] = (rpcClientId) => + Ref.update(leasesRef, (leases) => { + const next = new Map(leases); + for (const key of next.keys()) { + if (key.startsWith(`${rpcClientId}:`)) { + next.delete(key); + } + } + return next; + }).pipe(Effect.andThen(publishSnapshot), Effect.asVoid); + + const hasDemand: BackgroundPolicyShape["hasDemand"] = (scope) => + Effect.map(snapshot, (current) => current.activeScopeKeys.includes(scopeKey(scope))); + + const shouldRunScopeWork: BackgroundPolicyShape["shouldRunScopeWork"] = (scope) => + Effect.gen(function* () { + const [current, settings] = yield* Effect.all([snapshot, backgroundActivitySettings]); + if (isHostConstrained(current.hostPower, settings)) { + return false; + } + return current.leases.some((lease) => + leaseMayRunScopedWork(lease, scope, current.updatedAt, settings), + ); + }); + + const shouldRunOpportunisticWork = Effect.map( + snapshot, + (current) => current.shouldRunOpportunisticWork, + ); + + yield* Stream.runForEach(hostPowerMonitor.streamChanges, () => publishSnapshot).pipe( + Effect.forkScoped, + ); + + yield* Effect.forever( + Effect.sleep("15 seconds").pipe( + Effect.andThen( + Effect.gen(function* () { + const now = yield* DateTime.now; + yield* Ref.update(leasesRef, (leases) => { + const next = new Map(leases); + for (const [key, lease] of next) { + if (!isLeaseActive(lease, now)) { + next.delete(key); + } + } + return next; + }); + }), + ), + Effect.andThen(publishSnapshot), + ), + ).pipe(Effect.forkScoped); + + return BackgroundPolicy.of({ + reportClientActivity, + removeRpcClient, + reportHostPowerState: hostPowerMonitor.report, + snapshot, + streamChanges: Stream.fromPubSub(changes), + hasDemand, + shouldRunScopeWork, + shouldRunOpportunisticWork, + }); +}); + +export const layer = Layer.effect(BackgroundPolicy, make()); diff --git a/apps/server/src/background/HostPowerMonitor.test.ts b/apps/server/src/background/HostPowerMonitor.test.ts new file mode 100644 index 00000000000..445cd618c83 --- /dev/null +++ b/apps/server/src/background/HostPowerMonitor.test.ts @@ -0,0 +1,92 @@ +import type { DesktopHostTelemetrySnapshot } from "@t3tools/contracts"; +import { describe, expect, it } from "@effect/vitest"; +import * as DateTime from "effect/DateTime"; +import * as Effect from "effect/Effect"; +import * as Fiber from "effect/Fiber"; +import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; +import * as PubSub from "effect/PubSub"; +import * as Ref from "effect/Ref"; +import * as Stream from "effect/Stream"; + +import * as DesktopTelemetryReceiver from "../resourceTelemetry/DesktopTelemetryReceiver.ts"; +import * as HostPowerMonitor from "./HostPowerMonitor.ts"; + +describe("HostPowerMonitor", () => { + it.effect("publishes semantic power changes without idle-time heartbeat churn", () => + Effect.gen(function* () { + const monitor = yield* HostPowerMonitor.make(); + const initial = { + source: "electron-main", + idle: "false", + idleSeconds: 0, + locked: "false", + suspended: false, + onBattery: "false", + lowPowerMode: "unknown", + thermalState: "nominal", + stale: false, + updatedAt: DateTime.makeUnsafe("2026-06-17T12:00:00.000Z"), + } as const; + yield* monitor.report(initial); + + const nextChange = yield* Stream.runHead(monitor.streamChanges).pipe(Effect.forkChild); + yield* Effect.yieldNow; + yield* monitor.report({ + ...initial, + idleSeconds: 1, + updatedAt: DateTime.makeUnsafe("2026-06-17T12:00:01.000Z"), + }); + yield* monitor.report({ + ...initial, + locked: "true", + updatedAt: DateTime.makeUnsafe("2026-06-17T12:00:02.000Z"), + }); + + expect(Option.getOrThrow(yield* Fiber.join(nextChange)).locked).toBe("true"); + }), + ); + + it.effect("consumes desktop power directly without retaining diagnostics telemetry", () => + Effect.gen(function* () { + const sampledAt = DateTime.makeUnsafe("2026-06-17T12:00:00.000Z"); + const desktopChanges = yield* PubSub.sliding(1); + const diagnosticsDemandWrites = yield* Ref.make(0); + const receiverLayer = DesktopTelemetryReceiver.layerTest({ + changes: Stream.fromPubSub(desktopChanges), + setDiagnosticsDemand: () => Ref.update(diagnosticsDemandWrites, (count) => count + 1), + }); + const layer = HostPowerMonitor.layer.pipe(Layer.provide(receiverLayer)); + + yield* Effect.gen(function* () { + const monitor = yield* HostPowerMonitor.HostPowerMonitor; + const nextPower = yield* Stream.runHead(monitor.streamChanges).pipe(Effect.forkChild); + yield* Effect.yieldNow; + yield* PubSub.publish(desktopChanges, { + version: 1, + type: "desktopTelemetry", + sequence: 1, + sampledAtUnixMs: DateTime.toEpochMillis(sampledAt), + electronPid: 100, + power: { + source: "electron-main", + idle: "false", + idleSeconds: 0, + locked: "false", + suspended: false, + onBattery: "true", + lowPowerMode: "unknown", + thermalState: "nominal", + stale: false, + updatedAt: sampledAt, + }, + speedLimitPercent: Option.none(), + electronProcesses: [], + }); + + expect(Option.getOrThrow(yield* Fiber.join(nextPower)).onBattery).toBe("true"); + expect(yield* Ref.get(diagnosticsDemandWrites)).toBe(0); + }).pipe(Effect.provide(layer)); + }), + ); +}); diff --git a/apps/server/src/background/HostPowerMonitor.ts b/apps/server/src/background/HostPowerMonitor.ts new file mode 100644 index 00000000000..53efafcc835 --- /dev/null +++ b/apps/server/src/background/HostPowerMonitor.ts @@ -0,0 +1,93 @@ +import type { HostPowerSnapshot } from "@t3tools/contracts"; +import * as Context from "effect/Context"; +import * as DateTime from "effect/DateTime"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; +import * as PubSub from "effect/PubSub"; +import * as Ref from "effect/Ref"; +import * as Stream from "effect/Stream"; + +import * as DesktopTelemetryReceiver from "../resourceTelemetry/DesktopTelemetryReceiver.ts"; + +export interface HostPowerMonitorShape { + readonly snapshot: Effect.Effect; + readonly report: (snapshot: HostPowerSnapshot) => Effect.Effect; + readonly streamChanges: Stream.Stream; +} + +export class HostPowerMonitor extends Context.Service()( + "t3/background/HostPowerMonitor", +) {} + +export const makeUnknownSnapshot = ( + source: HostPowerSnapshot["source"], + updatedAt: HostPowerSnapshot["updatedAt"], +): HostPowerSnapshot => ({ + source, + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt, +}); + +function samePowerState(left: HostPowerSnapshot, right: HostPowerSnapshot): boolean { + return ( + left.source === right.source && + left.idle === right.idle && + left.locked === right.locked && + left.suspended === right.suspended && + left.onBattery === right.onBattery && + left.lowPowerMode === right.lowPowerMode && + left.thermalState === right.thermalState && + left.stale === right.stale + ); +} + +export const make = Effect.fn("background.hostPower.make")(function* ( + initialSource: HostPowerSnapshot["source"] = "unknown", +) { + const initial = makeUnknownSnapshot(initialSource, yield* DateTime.now); + const latestRef = yield* Ref.make(initial); + const changes = yield* PubSub.sliding(1); + + const report: HostPowerMonitorShape["report"] = (snapshot) => + Ref.modify(latestRef, (current) => [!samePowerState(current, snapshot), snapshot]).pipe( + Effect.flatMap((changed) => (changed ? PubSub.publish(changes, snapshot) : Effect.void)), + Effect.asVoid, + ); + + return HostPowerMonitor.of({ + snapshot: Ref.get(latestRef), + report, + streamChanges: Stream.fromPubSub(changes), + }); +}); + +export const layer = Layer.effect( + HostPowerMonitor, + Effect.gen(function* () { + const desktopTelemetry = yield* DesktopTelemetryReceiver.DesktopTelemetryReceiver; + const initial = yield* desktopTelemetry.latest; + const monitor = yield* make( + Option.match(initial, { + onNone: () => "unknown" as const, + onSome: (snapshot) => snapshot.power.source, + }), + ); + if (Option.isSome(initial)) { + yield* monitor.report(initial.value.power); + } + yield* desktopTelemetry.changes.pipe( + Stream.map((snapshot) => snapshot.power), + Stream.runForEach(monitor.report), + Effect.forkScoped, + ); + return monitor; + }), +); diff --git a/apps/server/src/cli/config.test.ts b/apps/server/src/cli/config.test.ts index d4d9d378557..9d6622ef6d7 100644 --- a/apps/server/src/cli/config.test.ts +++ b/apps/server/src/cli/config.test.ts @@ -38,7 +38,7 @@ it.layer(NodeServices.layer)("cli config resolution", (it) => { const defaultObservabilityConfig = { traceMinLevel: "Info", traceTimingEnabled: true, - traceBatchWindowMs: 200, + traceBatchWindowMs: 1_000, traceMaxBytes: 10 * 1024 * 1024, traceMaxFiles: 10, otlpTracesUrl: undefined, @@ -268,6 +268,8 @@ it.layer(NodeServices.layer)("cli config resolution", (it) => { t3Home: baseDir, noBrowser: true, desktopBootstrapToken: "desktop-token", + desktopTelemetryFd: 4, + desktopTelemetryControlFd: 5, tailscaleServeEnabled: false, tailscaleServePort: 443, otlpTracesUrl: "http://localhost:4318/v1/traces", @@ -323,12 +325,17 @@ it.layer(NodeServices.layer)("cli config resolution", (it) => { noBrowser: true, startupPresentation: "browser", desktopBootstrapToken: "desktop-token", + desktopTelemetryFd: 4, + desktopTelemetryControlFd: 5, + resourceMonitorPath: undefined, autoBootstrapProjectFromCwd: false, logWebSocketEvents: false, tailscaleServeEnabled: false, tailscaleServePort: 443, }); assert.equal(join(baseDir, "userdata"), resolved.stateDir); + assert.equal(resolved.desktopTelemetryFd, 4); + assert.equal(resolved.desktopTelemetryControlFd, 5); }), ); diff --git a/apps/server/src/cli/config.ts b/apps/server/src/cli/config.ts index 7182854e18c..58f827541c9 100644 --- a/apps/server/src/cli/config.ts +++ b/apps/server/src/cli/config.ts @@ -91,7 +91,7 @@ const EnvServerConfig = Config.all({ ), traceMaxBytes: Config.int("T3CODE_TRACE_MAX_BYTES").pipe(Config.withDefault(10 * 1024 * 1024)), traceMaxFiles: Config.int("T3CODE_TRACE_MAX_FILES").pipe(Config.withDefault(10)), - traceBatchWindowMs: Config.int("T3CODE_TRACE_BATCH_WINDOW_MS").pipe(Config.withDefault(200)), + traceBatchWindowMs: Config.int("T3CODE_TRACE_BATCH_WINDOW_MS").pipe(Config.withDefault(1_000)), otlpTracesUrl: Config.string("T3CODE_OTLP_TRACES_URL").pipe( Config.option, Config.map(Option.getOrUndefined), @@ -298,6 +298,9 @@ export const resolveServerConfig = ( () => mode === "desktop", ); const desktopBootstrapToken = bootstrap?.desktopBootstrapToken; + const desktopTelemetryFd = bootstrap?.desktopTelemetryFd; + const desktopTelemetryControlFd = bootstrap?.desktopTelemetryControlFd; + const resourceMonitorPath = bootstrap?.resourceMonitorPath; const autoBootstrapProjectFromCwd = Option.getOrElse( resolveOptionPrecedence( Option.fromUndefinedOr(options?.forceAutoBootstrapProjectFromCwd), @@ -370,6 +373,9 @@ export const resolveServerConfig = ( noBrowser, startupPresentation, desktopBootstrapToken, + desktopTelemetryFd, + desktopTelemetryControlFd, + resourceMonitorPath, autoBootstrapProjectFromCwd, logWebSocketEvents, tailscaleServeEnabled, diff --git a/apps/server/src/config.ts b/apps/server/src/config.ts index b0a23cb273c..67b34b7b09e 100644 --- a/apps/server/src/config.ts +++ b/apps/server/src/config.ts @@ -69,6 +69,9 @@ export interface ServerConfigShape extends ServerDerivedPaths { readonly noBrowser: boolean; readonly startupPresentation: StartupPresentation; readonly desktopBootstrapToken: string | undefined; + readonly desktopTelemetryFd?: number | undefined; + readonly desktopTelemetryControlFd?: number | undefined; + readonly resourceMonitorPath?: string | undefined; readonly autoBootstrapProjectFromCwd: boolean; readonly logWebSocketEvents: boolean; readonly tailscaleServeEnabled: boolean; @@ -171,6 +174,9 @@ export class ServerConfig extends Context.Service Effect.void, - unref: Effect.succeed(Effect.void), - stdin: Sink.drain, - stdout: Stream.make(encoder.encode(result.stdout ?? "")), - stderr: Stream.make(encoder.encode(result.stderr ?? "")), - all: Stream.empty, - getInputFd: () => Sink.drain, - getOutputFd: () => Stream.empty, +function makeTelemetryLayer( + snapshot: ResourceMonitorSnapshotEvent, + desktopSnapshot?: DesktopHostTelemetrySnapshot, +) { + const nativeLayer = NativeTelemetryClient.layerTest({ + sampleNow: Effect.succeed(snapshot), + health: Effect.succeed({ + status: "healthy", + hello: Option.none(), + lastSampleAt: Option.some(DateTime.makeUnsafe(snapshot.sampledAtUnixMs)), + lastError: Option.none(), + restartCount: 0, + sampleIntervalMs: 1_000, + }), }); + const desktopLayer = desktopSnapshot + ? DesktopTelemetryReceiver.layerTest({ + latest: Effect.succeedSome(desktopSnapshot), + health: Effect.succeed({ + status: "healthy", + lastSampleAt: Option.some(DateTime.makeUnsafe(desktopSnapshot.sampledAtUnixMs)), + lastError: Option.none(), + }), + }) + : DesktopTelemetryReceiver.layerTest(); + return ResourceTelemetry.layer.pipe( + Layer.provide(Layer.mergeAll(nativeLayer, desktopLayer, ResourceAttribution.layer)), + ); } describe("ProcessDiagnostics", () => { - it.effect("parses POSIX ps rows with full commands", () => - Effect.sync(() => { - const rows = ProcessDiagnostics.parsePosixProcessRows( - [ - " 10 1 10 Ss 0.0 1024 01:02.03 /usr/bin/node server.js", - " 11 10 10 S+ 12.5 20480 00:04 codex app-server --config /tmp/one two", - ].join("\n"), - ); - - expect(rows).toEqual([ + it.effect("projects live process data from resource telemetry", () => + Effect.gen(function* () { + const snapshot = makeNativeSnapshot([ { - pid: 10, + pid: process.pid, ppid: 1, - pgid: 10, - status: "Ss", + startTimeMs: 1_000, + runTimeMs: 60_000, + name: "node", + command: "t3 server", + status: "Running", cpuPercent: 0, - rssBytes: 1024 * 1024, - elapsed: "01:02.03", - command: "/usr/bin/node server.js", + cpuTimeMs: 100, + residentBytes: 1_024, + virtualBytes: 2_048, + ioReadBytes: 100, + ioWriteBytes: 200, + ioSemantics: "storage", }, { - pid: 11, - ppid: 10, - pgid: 10, - status: "S+", - cpuPercent: 12.5, - rssBytes: 20480 * 1024, - elapsed: "00:04", - command: "codex app-server --config /tmp/one two", + pid: 4_242, + ppid: process.pid, + startTimeMs: 2_000, + runTimeMs: 4_000, + name: "agent", + command: "codex app-server", + status: "Running", + cpuPercent: 1.5, + cpuTimeMs: 60, + residentBytes: 2_048, + virtualBytes: 4_096, + ioReadBytes: 300, + ioWriteBytes: 400, + ioSemantics: "storage", }, ]); - }), - ); + const telemetryLayer = makeTelemetryLayer(snapshot); + const layer = ProcessDiagnostics.layer.pipe(Layer.provideMerge(telemetryLayer)); - it.effect("aggregates only descendants of the server process", () => - Effect.sync(() => { - const diagnostics = ProcessDiagnostics.aggregateProcessDiagnostics({ - serverPid: 100, - readAt: DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"), - rows: [ - { - pid: 100, - ppid: 1, - pgid: 100, - status: "S", - cpuPercent: 0, - rssBytes: 1_000, - elapsed: "01:00", - command: "t3 server", - }, - { - pid: 101, - ppid: 100, - pgid: 100, - status: "S", - cpuPercent: 1.5, - rssBytes: 2_000, - elapsed: "00:20", - command: "codex app-server", - }, - { - pid: 102, - ppid: 101, - pgid: 100, - status: "R", - cpuPercent: 3.25, - rssBytes: 4_000, - elapsed: "00:05", - command: "git status", - }, - { - pid: 200, - ppid: 1, - pgid: 200, - status: "S", - cpuPercent: 99, - rssBytes: 8_000, - elapsed: "00:01", - command: "unrelated", - }, - { - pid: 201, - ppid: 100, - pgid: 100, - status: "R", - cpuPercent: 9, - rssBytes: 9_000, - elapsed: "00:00", - command: "ps -axo pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command=", - }, - ], - }); - - expect(diagnostics.serverPid).toBe(100); - expect(DateTime.formatIso(diagnostics.readAt)).toBe("2026-05-05T10:00:00.000Z"); - expect(diagnostics.processCount).toBe(2); - expect(diagnostics.totalRssBytes).toBe(6_000); - expect(diagnostics.totalCpuPercent).toBe(4.75); - expect(diagnostics.processes.map((process) => process.pid)).toEqual([101, 102]); - expect(diagnostics.processes.map((process) => process.depth)).toEqual([0, 1]); - expect(Option.getOrNull(diagnostics.processes[0]!.pgid)).toBe(100); - expect(diagnostics.processes[0]?.childPids).toEqual([102]); - }), - ); - - it.effect("preserves ascending sibling order for nested descendants", () => - Effect.sync(() => { - const diagnostics = ProcessDiagnostics.aggregateProcessDiagnostics({ - serverPid: 100, - readAt: DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"), - rows: [ - { - pid: 101, - ppid: 100, - pgid: 100, - status: "S", - cpuPercent: 0, - rssBytes: 100, - elapsed: "00:10", - command: "agent", - }, - { - pid: 103, - ppid: 101, - pgid: 100, - status: "S", - cpuPercent: 0, - rssBytes: 100, - elapsed: "00:10", - command: "child-b", - }, - { - pid: 102, - ppid: 101, - pgid: 100, - status: "S", - cpuPercent: 0, - rssBytes: 100, - elapsed: "00:10", - command: "child-a", - }, - ], - }); + const diagnostics = yield* Effect.gen(function* () { + const telemetry = yield* ResourceTelemetry.ResourceTelemetry; + const processDiagnostics = yield* ProcessDiagnostics.ProcessDiagnostics; + yield* telemetry.refresh; + return yield* processDiagnostics.read; + }).pipe(Effect.provide(layer)); - expect(diagnostics.processes.map((process) => process.pid)).toEqual([101, 102, 103]); + expect(diagnostics.processes.map((process) => process.pid)).toEqual([4242]); + expect(diagnostics.processes[0]?.startTimeMs).toBe(2_000); + expect(diagnostics.processes[0]?.cpuPercent).toBe(1.5); + expect(diagnostics.processes[0]?.rssBytes).toBe(2_048); }), ); - it.effect("queries processes through the ChildProcessSpawner service", () => + it.effect("rejects stale process identities before signaling", () => Effect.gen(function* () { - const commands: Array<{ readonly command: string; readonly args: ReadonlyArray }> = - []; - const spawnerLayer = Layer.succeed( - ChildProcessSpawner.ChildProcessSpawner, - ChildProcessSpawner.make((command) => { - const childProcess = command as unknown as { - readonly command: string; - readonly args: ReadonlyArray; - }; - commands.push({ command: childProcess.command, args: childProcess.args }); - return Effect.succeed( - mockHandle({ - stdout: [ - ` ${process.pid} 1 ${process.pid} Ss 0.0 1024 01:02.03 t3 server`, - ` 4242 ${process.pid} ${process.pid} S 1.5 2048 00:04 agent`, - ].join("\n"), - }), - ); - }), - ); - const layer = ProcessDiagnostics.layer.pipe(Layer.provide(spawnerLayer)); + const snapshot = makeNativeSnapshot([]); + const telemetryLayer = makeTelemetryLayer(snapshot); + const layer = ProcessDiagnostics.layer.pipe(Layer.provide(telemetryLayer)); - const diagnostics = yield* Effect.service(ProcessDiagnostics.ProcessDiagnostics).pipe( - Effect.flatMap((pd) => pd.read), + const result = yield* Effect.service(ProcessDiagnostics.ProcessDiagnostics).pipe( + Effect.flatMap((processDiagnostics) => + processDiagnostics.signal({ + pid: 4_242, + startTimeMs: 2_000, + signal: "SIGINT", + }), + ), Effect.provide(layer), ); - expect(diagnostics.processes.map((process) => process.pid)).toEqual([4242]); - expect(commands).toEqual([ - { - command: "ps", - args: ["-axo", "pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command="], - }, - ]); + expect(result).toEqual({ + pid: 4242, + signal: "SIGINT", + signaled: false, + message: Option.some("Process 4242 no longer matches the selected process identity."), + }); }), ); - it.effect("does not allow signaling the diagnostics query process", () => + it.effect("rejects Electron processes as signal targets", () => Effect.gen(function* () { - const spawnerLayer = Layer.succeed( - ChildProcessSpawner.ChildProcessSpawner, - ChildProcessSpawner.make(() => - Effect.succeed( - mockHandle({ - stdout: [ - ` ${process.pid} 1 ${process.pid} Ss 0.0 1024 01:02.03 t3 server`, - ` 4242 ${process.pid} ${process.pid} R 1.5 2048 00:00 ps -axo pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command=`, - ].join("\n"), - }), - ), - ), + const sampledAtUnixMs = DateTime.toEpochMillis( + DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"), ); - const layer = ProcessDiagnostics.layer.pipe(Layer.provide(spawnerLayer)); + const snapshot = makeNativeSnapshot([ + { + pid: 4_242, + ppid: 1, + startTimeMs: 2_000, + runTimeMs: 4_000, + name: "electron", + command: "electron", + status: "Running", + cpuPercent: 1.5, + cpuTimeMs: 60, + residentBytes: 2_048, + virtualBytes: 4_096, + ioReadBytes: 300, + ioWriteBytes: 400, + ioSemantics: "storage", + }, + ]); + const sampledAt = DateTime.makeUnsafe(sampledAtUnixMs); + const telemetryLayer = makeTelemetryLayer(snapshot, { + version: 1, + type: "desktopTelemetry", + sequence: 1, + sampledAtUnixMs, + electronPid: 4_242, + power: { + source: "electron-main", + idle: "false", + idleSeconds: 0, + locked: "false", + suspended: false, + onBattery: "false", + lowPowerMode: "unknown", + thermalState: "nominal", + stale: false, + updatedAt: sampledAt, + }, + speedLimitPercent: Option.none(), + electronProcesses: [ + { + pid: 4_242, + creationTimeMs: 2_000, + type: "Browser", + name: "electron", + cpuPercent: 1.5, + idleWakeupsPerSecond: 0, + workingSetBytes: 2_048, + peakWorkingSetBytes: 2_048, + }, + ], + }); + const layer = ProcessDiagnostics.layer.pipe(Layer.provide(telemetryLayer)); const result = yield* Effect.service(ProcessDiagnostics.ProcessDiagnostics).pipe( - Effect.flatMap((pd) => pd.signal({ pid: 4242, signal: "SIGINT" })), + Effect.flatMap((processDiagnostics) => + processDiagnostics.signal({ + pid: 4_242, + startTimeMs: 2_000, + signal: "SIGKILL", + }), + ), Effect.provide(layer), ); expect(result).toEqual({ - pid: 4242, - signal: "SIGINT", + pid: 4_242, + signal: "SIGKILL", signaled: false, - message: Option.some("Process 4242 is not a live descendant of the T3 server."), + message: Option.some("Process 4242 is not a signalable T3 backend descendant."), }); }), ); diff --git a/apps/server/src/diagnostics/ProcessDiagnostics.ts b/apps/server/src/diagnostics/ProcessDiagnostics.ts index f5f746134f2..70997379478 100644 --- a/apps/server/src/diagnostics/ProcessDiagnostics.ts +++ b/apps/server/src/diagnostics/ProcessDiagnostics.ts @@ -1,40 +1,23 @@ import type { + ResourceTelemetryProcessCategory, ServerProcessDiagnosticsEntry, ServerProcessDiagnosticsResult, ServerProcessSignal, ServerSignalProcessResult, } from "@t3tools/contracts"; -import { HostProcessPlatform } from "@t3tools/shared/hostProcess"; import * as Context from "effect/Context"; -import * as DateTime from "effect/DateTime"; -import * as Duration from "effect/Duration"; import * as Effect from "effect/Effect"; import * as Layer from "effect/Layer"; import * as Option from "effect/Option"; import * as Schema from "effect/Schema"; -import { ChildProcess, ChildProcessSpawner } from "effect/unstable/process"; -import { collectUint8StreamText } from "../stream/collectUint8StreamText.ts"; - -export interface ProcessRow { - readonly pid: number; - readonly ppid: number; - readonly pgid: number | null; - readonly status: string; - readonly cpuPercent: number; - readonly rssBytes: number; - readonly elapsed: string; - readonly command: string; -} - -const PROCESS_QUERY_TIMEOUT_MS = 1_000; -const POSIX_PROCESS_QUERY_COMMAND = "pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command="; -const PROCESS_QUERY_MAX_OUTPUT_BYTES = 2 * 1024 * 1024; +import * as ResourceTelemetry from "../resourceTelemetry/ResourceTelemetry.ts"; export interface ProcessDiagnosticsShape { readonly read: Effect.Effect; readonly signal: (input: { readonly pid: number; + readonly startTimeMs: number; readonly signal: ServerProcessSignal; }) => Effect.Effect; } @@ -44,420 +27,161 @@ export class ProcessDiagnostics extends Context.Service< ProcessDiagnosticsShape >()("t3/diagnostics/ProcessDiagnostics") {} -class ProcessDiagnosticsError extends Schema.TaggedErrorClass()( - "ProcessDiagnosticsError", +export class ProcessIdentityChanged extends Schema.TaggedErrorClass()( + "ProcessIdentityChanged", { - message: Schema.String, - cause: Schema.optional(Schema.Defect()), + pid: Schema.Number, + startTimeMs: Schema.Number, }, -) {} -const isProcessDiagnosticsError = Schema.is(ProcessDiagnosticsError); - -function toProcessDiagnosticsError(message: string, cause?: unknown): ProcessDiagnosticsError { - return new ProcessDiagnosticsError({ - message, - ...(cause === undefined ? {} : { cause }), - }); -} - -function parsePositiveInt(value: string): number | null { - const parsed = Number.parseInt(value, 10); - return Number.isInteger(parsed) && parsed > 0 ? parsed : null; -} - -function parseNonNegativeInt(value: string): number | null { - const parsed = Number.parseInt(value, 10); - return Number.isInteger(parsed) && parsed >= 0 ? parsed : null; -} - -function parseNumber(value: string): number | null { - const parsed = Number.parseFloat(value); - return Number.isFinite(parsed) ? parsed : null; -} - -export function parsePosixProcessRows(output: string): ReadonlyArray { - const rows: ProcessRow[] = []; - const rowPattern = - /^\s*(\d+)\s+(\d+)\s+(-?\d+)\s+(\S+)\s+([+-]?(?:\d+\.?\d*|\.\d+))\s+(\d+)\s+(\S+)\s+(.+)$/; - - for (const line of output.split(/\r?\n/)) { - if (line.trim().length === 0) continue; - - const match = rowPattern.exec(line); - if (!match) continue; - - const pidText = match[1]; - const ppidText = match[2]; - const pgidText = match[3]; - const status = match[4]; - const cpuText = match[5]; - const rssText = match[6]; - const elapsed = match[7]; - const command = match[8]; - if ( - pidText === undefined || - ppidText === undefined || - pgidText === undefined || - status === undefined || - cpuText === undefined || - rssText === undefined || - elapsed === undefined || - command === undefined - ) { - continue; - } - - const pid = parsePositiveInt(pidText); - const ppid = parseNonNegativeInt(ppidText); - const pgid = Number.parseInt(pgidText, 10); - const cpuPercent = parseNumber(cpuText); - const rssKiB = parseNonNegativeInt(rssText); - if ( - pid === null || - ppid === null || - !Number.isInteger(pgid) || - cpuPercent === null || - rssKiB === null || - !status || - !elapsed || - !command - ) { - continue; - } - - rows.push({ - pid, - ppid, - pgid, - status, - cpuPercent, - rssBytes: rssKiB * 1024, - elapsed, - command, - }); +) { + override get message(): string { + return `Process ${this.pid} no longer matches start time ${this.startTimeMs}.`; } - - return rows; -} - -function normalizeWindowsProcessRow(value: unknown): ProcessRow | null { - if (typeof value !== "object" || value === null) return null; - const record = value as Record; - const pid = typeof record.ProcessId === "number" ? record.ProcessId : null; - const ppid = typeof record.ParentProcessId === "number" ? record.ParentProcessId : null; - const commandLine = - typeof record.CommandLine === "string" && record.CommandLine.trim().length > 0 - ? record.CommandLine - : typeof record.Name === "string" - ? record.Name - : null; - const workingSet = - typeof record.WorkingSetSize === "number" && Number.isFinite(record.WorkingSetSize) - ? Math.max(0, Math.round(record.WorkingSetSize)) - : 0; - const cpuPercent = - typeof record.PercentProcessorTime === "number" && Number.isFinite(record.PercentProcessorTime) - ? Math.max(0, record.PercentProcessorTime) - : 0; - - if (!pid || pid <= 0 || ppid === null || ppid < 0 || !commandLine) return null; - return { - pid, - ppid, - pgid: null, - status: typeof record.Status === "string" && record.Status.length > 0 ? record.Status : "Live", - cpuPercent, - rssBytes: workingSet, - elapsed: "", - command: commandLine, - }; } -function parseWindowsProcessRows(output: string): ReadonlyArray { - if (output.trim().length === 0) return []; - try { - const parsed = JSON.parse(output) as unknown; - const records = Array.isArray(parsed) ? parsed : [parsed]; - return records.flatMap((record) => { - const row = normalizeWindowsProcessRow(record); - return row ? [row] : []; - }); - } catch { - return []; +export class ProcessSignalFailed extends Schema.TaggedErrorClass()( + "ProcessSignalFailed", + { + pid: Schema.Number, + signal: Schema.String, + cause: Schema.Defect(), + }, +) { + override get message(): string { + return `Failed to signal process ${this.pid} with ${this.signal}.`; } } -export function buildDescendantEntries( - rows: ReadonlyArray, - serverPid: number, -): ReadonlyArray { - const childrenByParent = new Map(); - for (const row of rows) { - const children = childrenByParent.get(row.ppid) ?? []; - children.push(row); - childrenByParent.set(row.ppid, children); - } - - const entries: ServerProcessDiagnosticsEntry[] = []; - const visited = new Set(); - const stack = [...(childrenByParent.get(serverPid) ?? [])] - .toSorted((left, right) => left.pid - right.pid) - .map((row) => ({ row, depth: 0 })); - - while (stack.length > 0) { - const item = stack.shift(); - if (!item || visited.has(item.row.pid)) continue; - visited.add(item.row.pid); - - const children = [...(childrenByParent.get(item.row.pid) ?? [])].toSorted( - (left, right) => left.pid - right.pid, - ); - entries.push({ - pid: item.row.pid, - ppid: item.row.ppid, - pgid: Option.fromNullishOr(item.row.pgid), - status: item.row.status, - cpuPercent: item.row.cpuPercent, - rssBytes: item.row.rssBytes, - elapsed: item.row.elapsed || "n/a", - command: item.row.command, - depth: item.depth, - childPids: children.map((child) => child.pid), - }); - - stack.unshift(...children.map((row) => ({ row, depth: item.depth + 1 }))); - } +export type ProcessDiagnosticsError = ProcessIdentityChanged | ProcessSignalFailed; - return entries; +function formatElapsed(runTimeMs: number): string { + const totalSeconds = Math.max(0, Math.floor(runTimeMs / 1_000)); + const hours = Math.floor(totalSeconds / 3_600); + const minutes = Math.floor((totalSeconds % 3_600) / 60); + const seconds = totalSeconds % 60; + return hours > 0 + ? `${hours}:${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}` + : `${minutes}:${String(seconds).padStart(2, "0")}`; } -export function isDiagnosticsQueryProcess(row: ProcessRow, serverPid: number): boolean { - if (row.ppid !== serverPid) return false; - - const command = row.command.trim(); +function canSignalCategory(category: ResourceTelemetryProcessCategory): boolean { return ( - /(?:^|[/\\])ps\s+-axo\s+pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command=/.test(command) || - (/\bpowershell(?:\.exe)?\b/i.test(command) && - /\bGet-CimInstance\s+Win32_Process\b/i.test(command)) - ); -} - -function makeResult(input: { - readonly serverPid: number; - readonly rows: ReadonlyArray; - readonly readAt: DateTime.Utc; - readonly error?: string; -}): ServerProcessDiagnosticsResult { - const readAt = input.readAt; - const rows = input.rows.filter((row) => !isDiagnosticsQueryProcess(row, input.serverPid)); - const processes = buildDescendantEntries(rows, input.serverPid); - const totalRssBytes = processes.reduce((total, process) => total + process.rssBytes, 0); - const totalCpuPercent = processes.reduce((total, process) => total + process.cpuPercent, 0); - - return { - serverPid: input.serverPid, - readAt, - processCount: processes.length, - totalRssBytes, - totalCpuPercent, - processes, - error: input.error ? Option.some({ message: input.error }) : Option.none(), - }; -} - -interface ProcessOutput { - readonly exitCode: number; - readonly stdout: string; - readonly stderr: string; -} - -const runProcess = Effect.fn("runProcess")( - function* (input: { - readonly command: string; - readonly args: ReadonlyArray; - readonly errorMessage: string; - }) { - const spawner = yield* ChildProcessSpawner.ChildProcessSpawner; - // `ps` and `powershell.exe` are real executables; spawning through cmd.exe - // shell mode would re-tokenize the PowerShell `-Command` payload (which - // contains pipes) before PowerShell ever sees it. - const child = yield* spawner.spawn( - ChildProcess.make(input.command, input.args, { - cwd: process.cwd(), - }), - ); - const [stdout, stderr, exitCode] = yield* Effect.all( - [ - collectUint8StreamText({ - stream: child.stdout, - maxBytes: PROCESS_QUERY_MAX_OUTPUT_BYTES, - truncatedMarker: "\n\n[truncated]", - }), - collectUint8StreamText({ - stream: child.stderr, - maxBytes: PROCESS_QUERY_MAX_OUTPUT_BYTES, - truncatedMarker: "\n\n[truncated]", - }), - child.exitCode, - ], - { concurrency: "unbounded" }, - ); - - return { - exitCode, - stdout: stdout.text, - stderr: stderr.text, - } satisfies ProcessOutput; - }, - (effect, input) => - effect.pipe( - Effect.scoped, - Effect.timeoutOption(Duration.millis(PROCESS_QUERY_TIMEOUT_MS)), - Effect.flatMap((result) => - Option.match(result, { - onNone: () => Effect.fail(toProcessDiagnosticsError(`${input.errorMessage} timed out.`)), - onSome: Effect.succeed, - }), - ), - Effect.mapError((cause) => - isProcessDiagnosticsError(cause) - ? cause - : toProcessDiagnosticsError(input.errorMessage, cause), - ), - ), -); - -function readPosixProcessRows(): Effect.Effect< - ReadonlyArray, - ProcessDiagnosticsError, - ChildProcessSpawner.ChildProcessSpawner -> { - return runProcess({ - command: "ps", - args: ["-axo", POSIX_PROCESS_QUERY_COMMAND], - errorMessage: "Failed to query process diagnostics.", - }).pipe( - Effect.flatMap((result) => - result.exitCode !== 0 - ? Effect.fail(toProcessDiagnosticsError(result.stderr.trim() || "ps failed.")) - : Effect.succeed(parsePosixProcessRows(result.stdout)), - ), - ); -} - -function readWindowsProcessRows(): Effect.Effect< - ReadonlyArray, - ProcessDiagnosticsError, - ChildProcessSpawner.ChildProcessSpawner -> { - const command = [ - "$processes = Get-CimInstance Win32_Process | ForEach-Object {", - '$perf = Get-CimInstance Win32_PerfFormattedData_PerfProc_Process -Filter "IDProcess = $($_.ProcessId)" -ErrorAction SilentlyContinue;', - "[pscustomobject]@{ ProcessId = $_.ProcessId; ParentProcessId = $_.ParentProcessId; Name = $_.Name; CommandLine = $_.CommandLine; Status = $_.Status; WorkingSetSize = $_.WorkingSetSize; PercentProcessorTime = if ($perf) { $perf.PercentProcessorTime } else { 0 } }", - "};", - "$processes | ConvertTo-Json -Compress -Depth 3", - ].join(" "); - - return runProcess({ - command: "powershell.exe", - args: ["-NoProfile", "-NonInteractive", "-Command", command], - errorMessage: "Failed to query process diagnostics.", - }).pipe( - Effect.flatMap((result) => - result.exitCode !== 0 - ? Effect.fail( - toProcessDiagnosticsError(result.stderr.trim() || "PowerShell process query failed."), - ) - : Effect.succeed(parseWindowsProcessRows(result.stdout)), - ), - ); -} - -export const readProcessRows = Effect.gen(function* () { - const platform = yield* HostProcessPlatform; - return yield* platform === "win32" ? readWindowsProcessRows() : readPosixProcessRows(); -}); - -export function aggregateProcessDiagnostics(input: { - readonly serverPid: number; - readonly rows: ReadonlyArray; - readonly readAt: DateTime.Utc; -}): ServerProcessDiagnosticsResult { - return makeResult(input); -} - -function assertDescendantPid( - pid: number, -): Effect.Effect { - if (pid === process.pid) { - return Effect.fail(toProcessDiagnosticsError("Refusing to signal the T3 server process.")); - } - - return readProcessRows.pipe( - Effect.flatMap((rows) => { - const filteredRows = rows.filter((row) => !isDiagnosticsQueryProcess(row, process.pid)); - const descendant = buildDescendantEntries(filteredRows, process.pid).some( - (entry) => entry.pid === pid, - ); - return descendant - ? Effect.void - : Effect.fail( - toProcessDiagnosticsError(`Process ${pid} is not a live descendant of the T3 server.`), - ); - }), + category === "server-child" || category === "provider-root" || category === "terminal-root" ); } export const make = Effect.fn("makeProcessDiagnostics")(function* () { - const spawner = yield* ChildProcessSpawner.ChildProcessSpawner; - - const read: ProcessDiagnosticsShape["read"] = Effect.gen(function* () { - const readAt = yield* DateTime.now; - const rows = yield* readProcessRows.pipe( - Effect.provideService(ChildProcessSpawner.ChildProcessSpawner, spawner), - ); - return makeResult({ serverPid: process.pid, rows, readAt }); - }).pipe( - Effect.catch((error: ProcessDiagnosticsError) => - DateTime.now.pipe( - Effect.map((readAt) => - makeResult({ serverPid: process.pid, rows: [], readAt, error: error.message }), - ), - ), - ), + const telemetry = yield* ResourceTelemetry.ResourceTelemetry; + const read: ProcessDiagnosticsShape["read"] = telemetry.latest.pipe( + Effect.map((snapshot) => { + const processes = snapshot.processes + .filter((entry) => entry.identity.pid !== process.pid) + .map( + (entry): ServerProcessDiagnosticsEntry => ({ + pid: entry.identity.pid, + startTimeMs: entry.identity.startTimeMs, + ppid: entry.ppid, + pgid: Option.none(), + status: entry.status || "Unknown", + cpuPercent: entry.cpuPercent, + rssBytes: entry.residentBytes, + elapsed: formatElapsed(entry.runTimeMs), + command: entry.command || entry.name || "unknown", + depth: Math.max(0, entry.depth - 1), + childPids: entry.childPids, + }), + ); + return { + serverPid: process.pid, + readAt: snapshot.readAt, + processCount: processes.length, + totalRssBytes: processes.reduce((total, entry) => total + entry.rssBytes, 0), + totalCpuPercent: processes.reduce((total, entry) => total + entry.cpuPercent, 0), + processes, + error: Option.map(snapshot.health.native.lastError, (message) => ({ message })), + }; + }), ); const signal: ProcessDiagnosticsShape["signal"] = Effect.fn("ProcessDiagnostics.signal")( function* (input) { - return yield* assertDescendantPid(input.pid).pipe( - Effect.provideService(ChildProcessSpawner.ChildProcessSpawner, spawner), - Effect.flatMap(() => - Effect.try({ - try: () => { - process.kill(input.pid, input.signal); - return { - pid: input.pid, - signal: input.signal, - signaled: true, - message: Option.none(), - }; - }, - catch: (cause) => - toProcessDiagnosticsError( - `Failed to signal process ${input.pid} with ${input.signal}.`, - cause, - ), - }), - ), - Effect.catch((error: ProcessDiagnosticsError) => - Effect.succeed({ - pid: input.pid, - signal: input.signal, - signaled: false, - message: Option.some(error.message), - }), - ), + if (input.pid === process.pid) { + return { + pid: input.pid, + signal: input.signal, + signaled: false, + message: Option.some("Refusing to signal the T3 server process."), + }; + } + const current = yield* telemetry.latest; + const selected = current.processes.find( + (entry) => + entry.identity.pid === input.pid && entry.identity.startTimeMs === input.startTimeMs, ); + if (!selected) { + return { + pid: input.pid, + signal: input.signal, + signaled: false, + message: Option.some( + `Process ${input.pid} no longer matches the selected process identity.`, + ), + }; + } + if (!canSignalCategory(selected.category)) { + return { + pid: input.pid, + signal: input.signal, + signaled: false, + message: Option.some(`Process ${input.pid} is not a signalable T3 backend descendant.`), + }; + } + return yield* telemetry + .validateProcessIdentity({ + pid: input.pid, + startTimeMs: input.startTimeMs, + }) + .pipe( + Effect.flatMap((valid) => + valid + ? Effect.void + : Effect.fail( + new ProcessIdentityChanged({ + pid: input.pid, + startTimeMs: input.startTimeMs, + }), + ), + ), + Effect.flatMap(() => + Effect.try({ + try: () => { + process.kill(input.pid, input.signal); + return { + pid: input.pid, + signal: input.signal, + signaled: true, + message: Option.none(), + }; + }, + catch: (cause) => + new ProcessSignalFailed({ + pid: input.pid, + signal: input.signal, + cause, + }), + }), + ), + Effect.catch((error) => + Effect.succeed({ + pid: input.pid, + signal: input.signal, + signaled: false, + message: Option.some( + error instanceof Error ? error.message : "Failed to signal process.", + ), + }), + ), + ); }, ); diff --git a/apps/server/src/diagnostics/ProcessResourceMonitor.test.ts b/apps/server/src/diagnostics/ProcessResourceMonitor.test.ts index 11d12c012db..0b71785fbf6 100644 --- a/apps/server/src/diagnostics/ProcessResourceMonitor.test.ts +++ b/apps/server/src/diagnostics/ProcessResourceMonitor.test.ts @@ -1,231 +1,132 @@ import { describe, expect, it } from "@effect/vitest"; +import type { ResourceTelemetryHistory } from "@t3tools/contracts"; import * as DateTime from "effect/DateTime"; import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; import * as Option from "effect/Option"; +import * as Stream from "effect/Stream"; -import { - aggregateProcessResourceHistory, - collectMonitoredSamples, -} from "./ProcessResourceMonitor.ts"; +import * as ResourceTelemetry from "../resourceTelemetry/ResourceTelemetry.ts"; +import * as ProcessResourceMonitor from "./ProcessResourceMonitor.ts"; describe("ProcessResourceMonitor", () => { - it.effect("samples the server root process and descendants", () => - Effect.sync(() => { - const sampledAt = DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"); - const samples = collectMonitoredSamples({ - serverPid: 100, - sampledAt, - sampledAtMs: DateTime.toEpochMillis(sampledAt), - rows: [ - { - pid: 100, - ppid: 1, - pgid: 100, - status: "S", - cpuPercent: 2, - rssBytes: 1_000, - elapsed: "01:00", - command: "t3 server", - }, - { - pid: 101, - ppid: 100, - pgid: 100, - status: "S", - cpuPercent: 10, - rssBytes: 2_000, - elapsed: "00:20", - command: "codex app-server", - }, - { - pid: 102, - ppid: 101, - pgid: 100, - status: "R", - cpuPercent: 50, - rssBytes: 3_000, - elapsed: "00:05", - command: "rg needle", - }, + it.effect("projects resource telemetry history into the legacy diagnostics contract", () => + Effect.gen(function* () { + const readAt = DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"); + const history: ResourceTelemetryHistory = { + readAt, + windowMs: 60_000, + bucketMs: 10_000, + sampleIntervalMs: 1_000, + retainedSampleCount: 2, + buckets: [ { - pid: 200, - ppid: 1, - pgid: 200, - status: "R", - cpuPercent: 99, - rssBytes: 9_000, - elapsed: "00:05", - command: "unrelated", + startedAt: DateTime.makeUnsafe("2026-05-05T09:59:50.000Z"), + endedAt: readAt, + avgCpuPercent: 15, + maxCpuPercent: 25, + maxRssBytes: 4_096, + ioReadBytes: 1_024, + ioWriteBytes: 2_048, + maxProcessCount: 2, }, ], - }); - - expect(samples.map((sample) => sample.pid)).toEqual([100, 101, 102]); - expect(samples.map((sample) => sample.depth)).toEqual([0, 1, 2]); - expect(samples[0]?.isServerRoot).toBe(true); - expect(samples[1]?.isServerRoot).toBe(false); - }), - ); - - it.effect("rolls samples up by process and CPU time", () => - Effect.sync(() => { - const firstAt = DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"); - const secondAt = DateTime.makeUnsafe("2026-05-05T10:00:05.000Z"); - const samples = [ - ...collectMonitoredSamples({ - serverPid: 100, - sampledAt: firstAt, - sampledAtMs: DateTime.toEpochMillis(firstAt), - rows: [ - { - pid: 100, - ppid: 1, - pgid: 100, - status: "S", - cpuPercent: 10, - rssBytes: 1_000, - elapsed: "01:00", - command: "t3 server", - }, - ], - }), - ...collectMonitoredSamples({ - serverPid: 100, - sampledAt: secondAt, - sampledAtMs: DateTime.toEpochMillis(secondAt), - rows: [ - { - pid: 100, - ppid: 1, - pgid: 100, - status: "S", - cpuPercent: 30, - rssBytes: 2_000, - elapsed: "01:05", - command: "t3 server", - }, - ], - }), - ]; - - const result = aggregateProcessResourceHistory({ - samples, - readAt: secondAt, - readAtMs: DateTime.toEpochMillis(secondAt), - windowMs: 60_000, - bucketMs: 10_000, - lastError: null, - }); - - expect(Option.isNone(result.error)).toBe(true); - expect(result.topProcesses).toHaveLength(1); - expect(result.topProcesses[0]?.avgCpuPercent).toBe(20); - expect(result.topProcesses[0]?.maxCpuPercent).toBe(30); - expect(result.topProcesses[0]?.cpuSecondsApprox).toBe(2); - expect(result.totalCpuSecondsApprox).toBe(2); - expect(result.buckets.some((bucket) => bucket.maxCpuPercent === 30)).toBe(true); - }), - ); - - it.effect("keeps a process grouped when elapsed time drifts between samples", () => - Effect.sync(() => { - const firstAt = DateTime.makeUnsafe("2026-05-05T10:00:00.400Z"); - const secondAt = DateTime.makeUnsafe("2026-05-05T10:00:05.900Z"); - const samples = [ - ...collectMonitoredSamples({ - serverPid: 100, - sampledAt: firstAt, - sampledAtMs: DateTime.toEpochMillis(firstAt), - rows: [ - { - pid: 100, - ppid: 1, - pgid: 100, - status: "S", - cpuPercent: 1, - rssBytes: 1_000, - elapsed: "01:00", - command: "t3 server", - }, - ], - }), - ...collectMonitoredSamples({ - serverPid: 100, - sampledAt: secondAt, - sampledAtMs: DateTime.toEpochMillis(secondAt), - rows: [ - { - pid: 100, - ppid: 1, - pgid: 100, - status: "S", - cpuPercent: 2, - rssBytes: 2_000, - elapsed: "01:06", - command: "t3 server", - }, - ], - }), - ]; - - const result = aggregateProcessResourceHistory({ - samples, - readAt: secondAt, - readAtMs: DateTime.toEpochMillis(secondAt), - windowMs: 60_000, - bucketMs: 10_000, - lastError: null, - }); - - expect(result.topProcesses).toHaveLength(1); - expect(result.topProcesses[0]?.isServerRoot).toBe(true); - expect(result.topProcesses[0]?.sampleCount).toBe(2); - expect(result.topProcesses[0]?.maxRssBytes).toBe(2_000); - }), - ); - - it.effect("returns all process summaries in the selected window", () => - Effect.sync(() => { - const sampledAt = DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"); - const samples = collectMonitoredSamples({ - serverPid: 100, - sampledAt, - sampledAtMs: DateTime.toEpochMillis(sampledAt), - rows: [ + topProcesses: [ { - pid: 100, + identity: { pid: process.pid, startTimeMs: 100 }, ppid: 1, - pgid: 100, - status: "S", - cpuPercent: 1, - rssBytes: 1_000, - elapsed: "01:00", + depth: 0, + name: "node", command: "t3 server", + category: "server", + firstSeenAt: DateTime.makeUnsafe("2026-05-05T09:59:55.000Z"), + lastSeenAt: readAt, + currentCpuPercent: 5, + avgCpuPercent: 4, + maxCpuPercent: 8, + cpuTimeMs: 1_500, + currentRssBytes: 2_048, + peakRssBytes: 4_096, + ioReadBytes: 1_024, + ioWriteBytes: 2_048, + ioSemantics: "storage", + sampleCount: 2, }, - ...Array.from({ length: 35 }, (_, index) => ({ - pid: 200 + index, - ppid: index === 0 ? 100 : 199 + index, - pgid: 100, - status: "S", - cpuPercent: 35 - index, - rssBytes: 2_000 + index, - elapsed: "00:10", - command: `worker ${index}`, - })), ], - }); + health: { + native: { + status: "degraded", + lastSampleAt: Option.some(readAt), + lastError: Option.some("collector stalled"), + }, + desktop: { + status: "healthy", + lastSampleAt: Option.some(readAt), + lastError: Option.none(), + }, + sidecarVersion: Option.some("0.1.0"), + sidecarPid: Option.some(9_000), + restartCount: 1, + collectionDurationMicros: 250, + scannedProcessCount: 80, + retainedProcessCount: 2, + inaccessibleProcessCount: 0, + }, + }; + const telemetry: ResourceTelemetry.ResourceTelemetryShape = { + latest: Effect.die("unused"), + changes: Stream.empty, + readHistory: () => Effect.succeed(history), + refresh: Effect.die("unused"), + validateProcessIdentity: () => Effect.die("unused"), + retry: Effect.die("unused"), + }; + const layer = ProcessResourceMonitor.layer.pipe( + Layer.provide( + Layer.succeed( + ResourceTelemetry.ResourceTelemetry, + ResourceTelemetry.ResourceTelemetry.of(telemetry), + ), + ), + ); - const result = aggregateProcessResourceHistory({ - samples, - readAt: sampledAt, - readAtMs: DateTime.toEpochMillis(sampledAt), - windowMs: 60_000, - bucketMs: 10_000, - lastError: null, - }); + const result = yield* Effect.service(ProcessResourceMonitor.ProcessResourceMonitor).pipe( + Effect.flatMap((monitor) => + monitor.readHistory({ + windowMs: 60_000, + bucketMs: 10_000, + }), + ), + Effect.provide(layer), + ); - expect(result.topProcesses).toHaveLength(36); - expect(result.topProcesses.some((process) => process.command === "worker 34")).toBe(true); + expect(result.totalCpuSecondsApprox).toBe(1.5); + expect(result.topProcesses).toEqual([ + { + processKey: `${process.pid}:100`, + pid: process.pid, + ppid: 1, + command: "t3 server", + depth: 0, + isServerRoot: true, + firstSeenAt: DateTime.makeUnsafe("2026-05-05T09:59:55.000Z"), + lastSeenAt: readAt, + currentCpuPercent: 5, + avgCpuPercent: 4, + maxCpuPercent: 8, + cpuSecondsApprox: 1.5, + currentRssBytes: 2_048, + maxRssBytes: 4_096, + sampleCount: 2, + }, + ]); + expect(result.buckets[0]).toMatchObject({ + avgCpuPercent: 15, + maxCpuPercent: 25, + maxRssBytes: 4_096, + maxProcessCount: 2, + }); + expect(result.error).toEqual(Option.some({ message: "collector stalled" })); }), ); }); diff --git a/apps/server/src/diagnostics/ProcessResourceMonitor.ts b/apps/server/src/diagnostics/ProcessResourceMonitor.ts index efeeb66256d..455db721b20 100644 --- a/apps/server/src/diagnostics/ProcessResourceMonitor.ts +++ b/apps/server/src/diagnostics/ProcessResourceMonitor.ts @@ -1,45 +1,13 @@ import type { - ServerProcessResourceHistoryBucket, ServerProcessResourceHistoryInput, ServerProcessResourceHistoryResult, - ServerProcessResourceHistorySummary, } from "@t3tools/contracts"; import * as Context from "effect/Context"; -import * as DateTime from "effect/DateTime"; import * as Effect from "effect/Effect"; import * as Layer from "effect/Layer"; import * as Option from "effect/Option"; -import * as Ref from "effect/Ref"; -import { ChildProcessSpawner } from "effect/unstable/process"; -import { - buildDescendantEntries, - isDiagnosticsQueryProcess, - type ProcessRow, - readProcessRows, -} from "./ProcessDiagnostics.ts"; - -const SAMPLE_INTERVAL_MS = 5_000; -const RETENTION_MS = 60 * 60_000; -const MAX_RETAINED_SAMPLES = 20_000; - -export interface ProcessResourceSample { - readonly sampledAt: DateTime.Utc; - readonly sampledAtMs: number; - readonly processKey: string; - readonly pid: number; - readonly ppid: number; - readonly command: string; - readonly cpuPercent: number; - readonly rssBytes: number; - readonly depth: number; - readonly isServerRoot: boolean; -} - -interface MonitorState { - readonly samples: ReadonlyArray; - readonly lastError: string | null; -} +import * as ResourceTelemetry from "../resourceTelemetry/ResourceTelemetry.ts"; export interface ProcessResourceMonitorShape { readonly readHistory: ( @@ -52,246 +20,51 @@ export class ProcessResourceMonitor extends Context.Service< ProcessResourceMonitorShape >()("t3/diagnostics/ProcessResourceMonitor") {} -function dateTimeFromMillis(ms: number): DateTime.Utc { - return DateTime.makeUnsafe(ms); -} - -function sampleKey(row: Pick): string { - return `${row.pid}:${row.command}`; -} - -function findServerRootRow(rows: ReadonlyArray, serverPid: number): ProcessRow | null { - return rows.find((row) => row.pid === serverPid) ?? null; -} - -export function collectMonitoredSamples(input: { - readonly rows: ReadonlyArray; - readonly serverPid: number; - readonly sampledAt: DateTime.Utc; - readonly sampledAtMs: number; -}): ReadonlyArray { - const rows = input.rows.filter((row) => !isDiagnosticsQueryProcess(row, input.serverPid)); - const root = findServerRootRow(rows, input.serverPid); - const descendants = buildDescendantEntries(rows, input.serverPid); - const samples: ProcessResourceSample[] = []; - - if (root) { - samples.push({ - sampledAt: input.sampledAt, - sampledAtMs: input.sampledAtMs, - processKey: sampleKey(root), - pid: root.pid, - ppid: root.ppid, - command: root.command, - cpuPercent: root.cpuPercent, - rssBytes: root.rssBytes, - depth: 0, - isServerRoot: true, - }); - } - - for (const process of descendants) { - samples.push({ - sampledAt: input.sampledAt, - sampledAtMs: input.sampledAtMs, - processKey: sampleKey(process), - pid: process.pid, - ppid: process.ppid, - command: process.command, - cpuPercent: process.cpuPercent, - rssBytes: process.rssBytes, - depth: process.depth + 1, - isServerRoot: false, - }); - } - - return samples; -} - -function trimSamples( - samples: ReadonlyArray, - nowMs: number, -): ReadonlyArray { - const minSampledAtMs = nowMs - RETENTION_MS; - const retained = samples.filter((sample) => sample.sampledAtMs >= minSampledAtMs); - return retained.length <= MAX_RETAINED_SAMPLES - ? retained - : retained.slice(retained.length - MAX_RETAINED_SAMPLES); -} - -function summarizeProcesses( - samples: ReadonlyArray, -): ReadonlyArray { - const groups = new Map(); - for (const sample of samples) { - const processSamples = groups.get(sample.processKey) ?? []; - processSamples.push(sample); - groups.set(sample.processKey, processSamples); - } - - return [...groups.entries()] - .map(([processKey, processSamples]) => { - const sorted = processSamples.toSorted((left, right) => left.sampledAtMs - right.sampledAtMs); - const first = sorted[0]!; - const latest = sorted[sorted.length - 1]!; - const cpuPercentTotal = sorted.reduce((total, sample) => total + sample.cpuPercent, 0); - const maxCpuPercent = Math.max(...sorted.map((sample) => sample.cpuPercent)); - const maxRssBytes = Math.max(...sorted.map((sample) => sample.rssBytes)); - const cpuSecondsApprox = sorted.reduce( - (total, sample) => total + (sample.cpuPercent / 100) * (SAMPLE_INTERVAL_MS / 1_000), - 0, - ); - - return { - processKey, - pid: latest.pid, - ppid: latest.ppid, - command: latest.command, - depth: latest.depth, - isServerRoot: latest.isServerRoot, - firstSeenAt: first.sampledAt, - lastSeenAt: latest.sampledAt, - currentCpuPercent: latest.cpuPercent, - avgCpuPercent: cpuPercentTotal / sorted.length, - maxCpuPercent, - cpuSecondsApprox, - currentRssBytes: latest.rssBytes, - maxRssBytes, - sampleCount: sorted.length, - } satisfies ServerProcessResourceHistorySummary; - }) - .toSorted((left, right) => right.cpuSecondsApprox - left.cpuSecondsApprox); -} - -function buildBuckets(input: { - readonly samples: ReadonlyArray; - readonly nowMs: number; - readonly windowMs: number; - readonly bucketMs: number; -}): ReadonlyArray { - const bucketMs = Math.max(1_000, input.bucketMs); - const windowStartMs = input.nowMs - input.windowMs; - const buckets: ServerProcessResourceHistoryBucket[] = []; - - for (let startedAtMs = windowStartMs; startedAtMs < input.nowMs; startedAtMs += bucketMs) { - const endedAtMs = Math.min(input.nowMs, startedAtMs + bucketMs); - const bucketSamples = input.samples.filter( - (sample) => - sample.sampledAtMs >= startedAtMs && - (endedAtMs === input.nowMs - ? sample.sampledAtMs <= endedAtMs - : sample.sampledAtMs < endedAtMs), - ); - const samplesByRead = new Map(); - for (const sample of bucketSamples) { - const samplesAtTime = samplesByRead.get(sample.sampledAtMs) ?? []; - samplesAtTime.push(sample); - samplesByRead.set(sample.sampledAtMs, samplesAtTime); - } - - const readTotals = [...samplesByRead.values()].map((samplesAtTime) => ({ - cpuPercent: samplesAtTime.reduce((total, sample) => total + sample.cpuPercent, 0), - rssBytes: samplesAtTime.reduce((total, sample) => total + sample.rssBytes, 0), - processCount: samplesAtTime.length, - })); - const avgCpuPercent = - readTotals.length === 0 - ? 0 - : readTotals.reduce((total, read) => total + read.cpuPercent, 0) / readTotals.length; - - buckets.push({ - startedAt: dateTimeFromMillis(startedAtMs), - endedAt: dateTimeFromMillis(endedAtMs), - avgCpuPercent, - maxCpuPercent: readTotals.length ? Math.max(...readTotals.map((read) => read.cpuPercent)) : 0, - maxRssBytes: readTotals.length ? Math.max(...readTotals.map((read) => read.rssBytes)) : 0, - maxProcessCount: readTotals.length - ? Math.max(...readTotals.map((read) => read.processCount)) - : 0, - }); - } - - return buckets; -} - -export function aggregateProcessResourceHistory(input: { - readonly samples: ReadonlyArray; - readonly readAt: DateTime.Utc; - readonly readAtMs: number; - readonly windowMs: number; - readonly bucketMs: number; - readonly lastError: string | null; -}): ServerProcessResourceHistoryResult { - const windowMs = Math.max(1_000, input.windowMs); - const bucketMs = Math.max(1_000, input.bucketMs); - const minSampledAtMs = input.readAtMs - windowMs; - const samples = input.samples.filter((sample) => sample.sampledAtMs >= minSampledAtMs); - const topProcesses = summarizeProcesses(samples); - const totalCpuSecondsApprox = samples.reduce( - (total, sample) => total + (sample.cpuPercent / 100) * (SAMPLE_INTERVAL_MS / 1_000), - 0, - ); - - return { - readAt: input.readAt, - windowMs, - bucketMs, - sampleIntervalMs: SAMPLE_INTERVAL_MS, - retainedSampleCount: input.samples.length, - totalCpuSecondsApprox, - buckets: buildBuckets({ samples, nowMs: input.readAtMs, windowMs, bucketMs }), - topProcesses, - error: input.lastError ? Option.some({ message: input.lastError }) : Option.none(), - }; -} - export const make = Effect.fn("makeProcessResourceMonitor")(function* () { - const spawner = yield* ChildProcessSpawner.ChildProcessSpawner; - const state = yield* Ref.make({ samples: [], lastError: null }); - - const sampleOnce = Effect.gen(function* () { - const sampledAt = yield* DateTime.now; - const sampledAtMs = DateTime.toEpochMillis(sampledAt); - const rows = yield* readProcessRows.pipe( - Effect.provideService(ChildProcessSpawner.ChildProcessSpawner, spawner), - ); - const samples = collectMonitoredSamples({ - rows, - serverPid: process.pid, - sampledAt, - sampledAtMs, - }); - yield* Ref.update(state, (current) => ({ - samples: trimSamples([...current.samples, ...samples], sampledAtMs), - lastError: null, - })); - }).pipe( - Effect.catch((error: unknown) => - Ref.update(state, (current) => ({ - ...current, - lastError: error instanceof Error ? error.message : "Failed to sample process resources.", - })), - ), - ); - - yield* Effect.forever(sampleOnce.pipe(Effect.andThen(Effect.sleep(SAMPLE_INTERVAL_MS)))).pipe( - Effect.forkScoped, - ); - + const telemetry = yield* ResourceTelemetry.ResourceTelemetry; const readHistory: ProcessResourceMonitorShape["readHistory"] = (input) => - Effect.gen(function* () { - const readAt = yield* DateTime.now; - const readAtMs = DateTime.toEpochMillis(readAt); - const current = yield* Ref.get(state); - return aggregateProcessResourceHistory({ - samples: current.samples, - readAt, - readAtMs, - windowMs: input.windowMs, - bucketMs: input.bucketMs, - lastError: current.lastError, - }); - }); + telemetry.readHistory(input).pipe( + Effect.map((history) => { + const topProcesses = history.topProcesses.map((entry) => ({ + processKey: `${entry.identity.pid}:${entry.identity.startTimeMs}`, + pid: entry.identity.pid, + ppid: entry.ppid, + command: entry.command || entry.name || "unknown", + depth: entry.depth, + isServerRoot: entry.category === "server", + firstSeenAt: entry.firstSeenAt, + lastSeenAt: entry.lastSeenAt, + currentCpuPercent: entry.currentCpuPercent, + avgCpuPercent: entry.avgCpuPercent, + maxCpuPercent: entry.maxCpuPercent, + cpuSecondsApprox: entry.cpuTimeMs / 1_000, + currentRssBytes: entry.currentRssBytes, + maxRssBytes: entry.peakRssBytes, + sampleCount: entry.sampleCount, + })); + return { + readAt: history.readAt, + windowMs: history.windowMs, + bucketMs: history.bucketMs, + sampleIntervalMs: history.sampleIntervalMs, + retainedSampleCount: history.retainedSampleCount, + totalCpuSecondsApprox: topProcesses.reduce( + (total, entry) => total + entry.cpuSecondsApprox, + 0, + ), + buckets: history.buckets.map((bucket) => ({ + startedAt: bucket.startedAt, + endedAt: bucket.endedAt, + avgCpuPercent: bucket.avgCpuPercent, + maxCpuPercent: bucket.maxCpuPercent, + maxRssBytes: bucket.maxRssBytes, + maxProcessCount: bucket.maxProcessCount, + })), + topProcesses, + error: history.health.native.lastError.pipe(Option.map((message) => ({ message }))), + }; + }), + ); return ProcessResourceMonitor.of({ readHistory }); }); diff --git a/apps/server/src/observability/Layers/Observability.ts b/apps/server/src/observability/Layers/Observability.ts index 95263866d80..f1eea07cac5 100644 --- a/apps/server/src/observability/Layers/Observability.ts +++ b/apps/server/src/observability/Layers/Observability.ts @@ -7,6 +7,7 @@ import * as Tracer from "effect/Tracer"; import { OtlpMetrics, OtlpSerialization, OtlpTracer } from "effect/unstable/observability"; import { ServerConfig } from "../../config.ts"; +import * as ResourceAttribution from "../../resourceTelemetry/ResourceAttribution.ts"; import { ServerLoggerLive } from "../../serverLogger.ts"; import { BrowserTraceCollector } from "../Services/BrowserTraceCollector.ts"; @@ -15,6 +16,7 @@ const otlpSerializationLayer = OtlpSerialization.layerJson; export const ObservabilityLive = Layer.unwrap( Effect.gen(function* () { const config = yield* ServerConfig; + const attribution = yield* ResourceAttribution.ResourceAttribution; const traceReferencesLayer = Layer.mergeAll( Layer.succeed(Tracer.MinimumTraceLevel, config.traceMinLevel), @@ -29,6 +31,14 @@ export const ObservabilityLive = Layer.unwrap( maxBytes: config.traceMaxBytes, maxFiles: config.traceMaxFiles, batchWindowMs: config.traceBatchWindowMs, + onFlush: (stats) => + attribution.record({ + component: "server-trace", + operation: "append", + logicalWriteBytes: stats.logicalWriteBytes, + count: stats.count, + durationMs: stats.durationMs, + }), }); const delegate = config.otlpTracesUrl === undefined diff --git a/apps/server/src/provider/Drivers/ClaudeDriver.ts b/apps/server/src/provider/Drivers/ClaudeDriver.ts index b126028f813..a5f180c62de 100644 --- a/apps/server/src/provider/Drivers/ClaudeDriver.ts +++ b/apps/server/src/provider/Drivers/ClaudeDriver.ts @@ -25,7 +25,9 @@ import { HttpClient } from "effect/unstable/http"; import { ChildProcessSpawner } from "effect/unstable/process"; import { makeClaudeTextGeneration } from "../../textGeneration/ClaudeTextGeneration.ts"; +import * as BackgroundPolicy from "../../background/BackgroundPolicy.ts"; import { ServerConfig } from "../../config.ts"; +import { ServerSettingsService } from "../../serverSettings.ts"; import { ProviderDriverError } from "../Errors.ts"; import { makeClaudeAdapter } from "../Layers/ClaudeAdapter.ts"; import { @@ -52,7 +54,6 @@ import { makeClaudeCapabilitiesCacheKey, makeClaudeContinuationGroupKey } from " const decodeClaudeSettings = Schema.decodeSync(ClaudeSettings); const DRIVER_KIND = ProviderDriverKind.make("claudeAgent"); -const SNAPSHOT_REFRESH_INTERVAL = Duration.minutes(5); const CAPABILITIES_PROBE_TTL = Duration.minutes(5); function isClaudeNativeCommandPath(commandPath: string): boolean { @@ -77,13 +78,15 @@ const UPDATE = makePackageManagedProviderMaintenanceResolver({ }); export type ClaudeDriverEnv = + | BackgroundPolicy.BackgroundPolicy | ChildProcessSpawner.ChildProcessSpawner | Crypto.Crypto | FileSystem.FileSystem | HttpClient.HttpClient | Path.Path | ProviderEventLoggers - | ServerConfig; + | ServerConfig + | ServerSettingsService; const withInstanceIdentity = (input: { @@ -176,7 +179,6 @@ export const ClaudeDriver: ProviderDriver = { Effect.provideService(HttpClient.HttpClient, httpClient), Effect.flatMap((enrichedSnapshot) => publishSnapshot(enrichedSnapshot)), ), - refreshInterval: SNAPSHOT_REFRESH_INTERVAL, }).pipe( Effect.mapError( (cause) => diff --git a/apps/server/src/provider/Drivers/CodexDriver.ts b/apps/server/src/provider/Drivers/CodexDriver.ts index 441edda479f..f36cfa3dddc 100644 --- a/apps/server/src/provider/Drivers/CodexDriver.ts +++ b/apps/server/src/provider/Drivers/CodexDriver.ts @@ -22,7 +22,6 @@ * @module provider/Drivers/CodexDriver */ import { CodexSettings, ProviderDriverKind, type ServerProvider } from "@t3tools/contracts"; -import * as Duration from "effect/Duration"; import * as Crypto from "effect/Crypto"; import * as Effect from "effect/Effect"; import * as FileSystem from "effect/FileSystem"; @@ -33,7 +32,9 @@ import { HttpClient } from "effect/unstable/http"; import { ChildProcessSpawner } from "effect/unstable/process"; import { makeCodexTextGeneration } from "../../textGeneration/CodexTextGeneration.ts"; +import * as BackgroundPolicy from "../../background/BackgroundPolicy.ts"; import { ServerConfig } from "../../config.ts"; +import { ServerSettingsService } from "../../serverSettings.ts"; import { ProviderDriverError } from "../Errors.ts"; import { makeCodexAdapter } from "../Layers/CodexAdapter.ts"; import { checkCodexProviderStatus, makePendingCodexProvider } from "../Layers/CodexProvider.ts"; @@ -55,7 +56,6 @@ import { const decodeCodexSettings = Schema.decodeSync(CodexSettings); const DRIVER_KIND = ProviderDriverKind.make("codex"); -const SNAPSHOT_REFRESH_INTERVAL = Duration.minutes(5); const UPDATE = makePackageManagedProviderMaintenanceResolver({ provider: DRIVER_KIND, npmPackageName: "@openai/codex", @@ -69,13 +69,15 @@ const UPDATE = makePackageManagedProviderMaintenanceResolver({ * registered driver and the runtime satisfies them once. */ export type CodexDriverEnv = + | BackgroundPolicy.BackgroundPolicy | ChildProcessSpawner.ChildProcessSpawner | Crypto.Crypto | FileSystem.FileSystem | HttpClient.HttpClient | Path.Path | ProviderEventLoggers - | ServerConfig; + | ServerConfig + | ServerSettingsService; /** * Stamp instance identity onto a `ServerProvider` snapshot produced by the @@ -176,7 +178,6 @@ export const CodexDriver: ProviderDriver = { Effect.provideService(HttpClient.HttpClient, httpClient), Effect.flatMap((enrichedSnapshot) => publishSnapshot(enrichedSnapshot)), ), - refreshInterval: SNAPSHOT_REFRESH_INTERVAL, }).pipe( Effect.mapError( (cause) => diff --git a/apps/server/src/provider/Drivers/CursorDriver.ts b/apps/server/src/provider/Drivers/CursorDriver.ts index ba532864c45..3f045be89d8 100644 --- a/apps/server/src/provider/Drivers/CursorDriver.ts +++ b/apps/server/src/provider/Drivers/CursorDriver.ts @@ -12,7 +12,6 @@ * @module provider/Drivers/CursorDriver */ import { CursorSettings, ProviderDriverKind, type ServerProvider } from "@t3tools/contracts"; -import * as Duration from "effect/Duration"; import * as Crypto from "effect/Crypto"; import * as Effect from "effect/Effect"; import * as FileSystem from "effect/FileSystem"; @@ -23,6 +22,8 @@ import { HttpClient } from "effect/unstable/http"; import { ChildProcessSpawner } from "effect/unstable/process"; import { ServerConfig } from "../../config.ts"; +import * as BackgroundPolicy from "../../background/BackgroundPolicy.ts"; +import { ServerSettingsService } from "../../serverSettings.ts"; import { makeCursorTextGeneration } from "../../textGeneration/CursorTextGeneration.ts"; import { ProviderDriverError } from "../Errors.ts"; import { makeCursorAdapter } from "../Layers/CursorAdapter.ts"; @@ -48,7 +49,6 @@ import { const decodeCursorSettings = Schema.decodeSync(CursorSettings); const DRIVER_KIND = ProviderDriverKind.make("cursor"); -const SNAPSHOT_REFRESH_INTERVAL = Duration.minutes(5); const UPDATE = makeStaticProviderMaintenanceResolver( makeProviderMaintenanceCapabilities({ provider: DRIVER_KIND, @@ -60,13 +60,15 @@ const UPDATE = makeStaticProviderMaintenanceResolver( ); export type CursorDriverEnv = + | BackgroundPolicy.BackgroundPolicy | ChildProcessSpawner.ChildProcessSpawner | Crypto.Crypto | FileSystem.FileSystem | HttpClient.HttpClient | Path.Path | ProviderEventLoggers - | ServerConfig; + | ServerConfig + | ServerSettingsService; const withInstanceIdentity = (input: { @@ -148,8 +150,7 @@ export const CursorDriver: ProviderDriver = { publishSnapshot, stampIdentity, httpClient, - }), - refreshInterval: SNAPSHOT_REFRESH_INTERVAL, + }).pipe(Effect.provideService(ChildProcessSpawner.ChildProcessSpawner, spawner)), }).pipe( Effect.mapError( (cause) => diff --git a/apps/server/src/provider/Drivers/GrokDriver.ts b/apps/server/src/provider/Drivers/GrokDriver.ts index ab01439ffd3..dc6f21b151d 100644 --- a/apps/server/src/provider/Drivers/GrokDriver.ts +++ b/apps/server/src/provider/Drivers/GrokDriver.ts @@ -9,7 +9,9 @@ import * as Stream from "effect/Stream"; import { HttpClient } from "effect/unstable/http"; import { ChildProcessSpawner } from "effect/unstable/process"; +import * as BackgroundPolicy from "../../background/BackgroundPolicy.ts"; import { ServerConfig } from "../../config.ts"; +import { ServerSettingsService } from "../../serverSettings.ts"; import { makeGrokTextGeneration } from "../../textGeneration/GrokTextGeneration.ts"; import { ProviderDriverError } from "../Errors.ts"; import { makeGrokAdapter } from "../Layers/GrokAdapter.ts"; @@ -44,13 +46,15 @@ const UPDATE = makeStaticProviderMaintenanceResolver( ); export type GrokDriverEnv = + | BackgroundPolicy.BackgroundPolicy | ChildProcessSpawner.ChildProcessSpawner | Crypto.Crypto | FileSystem.FileSystem | HttpClient.HttpClient | Path.Path | ProviderEventLoggers - | ServerConfig; + | ServerConfig + | ServerSettingsService; const withInstanceIdentity = (input: { diff --git a/apps/server/src/provider/Drivers/OpenCodeDriver.ts b/apps/server/src/provider/Drivers/OpenCodeDriver.ts index e7216f83366..c5a85c7c274 100644 --- a/apps/server/src/provider/Drivers/OpenCodeDriver.ts +++ b/apps/server/src/provider/Drivers/OpenCodeDriver.ts @@ -13,7 +13,6 @@ * @module provider/Drivers/OpenCodeDriver */ import { OpenCodeSettings, ProviderDriverKind, type ServerProvider } from "@t3tools/contracts"; -import * as Duration from "effect/Duration"; import * as Crypto from "effect/Crypto"; import * as Effect from "effect/Effect"; import * as FileSystem from "effect/FileSystem"; @@ -24,7 +23,9 @@ import { HttpClient } from "effect/unstable/http"; import { ChildProcessSpawner } from "effect/unstable/process"; import { makeOpenCodeTextGeneration } from "../../textGeneration/OpenCodeTextGeneration.ts"; +import * as BackgroundPolicy from "../../background/BackgroundPolicy.ts"; import { ServerConfig } from "../../config.ts"; +import { ServerSettingsService } from "../../serverSettings.ts"; import { ProviderDriverError } from "../Errors.ts"; import { makeOpenCodeAdapter } from "../Layers/OpenCodeAdapter.ts"; import { @@ -50,7 +51,6 @@ import { const decodeOpenCodeSettings = Schema.decodeSync(OpenCodeSettings); const DRIVER_KIND = ProviderDriverKind.make("opencode"); -const SNAPSHOT_REFRESH_INTERVAL = Duration.minutes(5); function isOpenCodeNativeCommandPath(commandPath: string): boolean { const normalized = normalizeCommandPath(commandPath); @@ -73,6 +73,7 @@ const UPDATE = makePackageManagedProviderMaintenanceResolver({ }); export type OpenCodeDriverEnv = + | BackgroundPolicy.BackgroundPolicy | ChildProcessSpawner.ChildProcessSpawner | Crypto.Crypto | FileSystem.FileSystem @@ -80,7 +81,8 @@ export type OpenCodeDriverEnv = | OpenCodeRuntime | Path.Path | ProviderEventLoggers - | ServerConfig; + | ServerConfig + | ServerSettingsService; const withInstanceIdentity = (input: { @@ -155,7 +157,6 @@ export const OpenCodeDriver: ProviderDriver Effect.provideService(HttpClient.HttpClient, httpClient), Effect.flatMap((enrichedSnapshot) => publishSnapshot(enrichedSnapshot)), ), - refreshInterval: SNAPSHOT_REFRESH_INTERVAL, }).pipe( Effect.mapError( (cause) => diff --git a/apps/server/src/provider/Layers/EventNdjsonLogger.test.ts b/apps/server/src/provider/Layers/EventNdjsonLogger.test.ts index 0b1f99d3c11..ab2fc2ca81d 100644 --- a/apps/server/src/provider/Layers/EventNdjsonLogger.test.ts +++ b/apps/server/src/provider/Layers/EventNdjsonLogger.test.ts @@ -7,6 +7,7 @@ import { ThreadId } from "@t3tools/contracts"; import { assert, describe, it } from "@effect/vitest"; import * as Effect from "effect/Effect"; +import * as ResourceAttribution from "../../resourceTelemetry/ResourceAttribution.ts"; import { makeEventNdjsonLogger } from "./EventNdjsonLogger.ts"; function parseLogLine(line: string) { @@ -207,4 +208,36 @@ describe("EventNdjsonLogger", () => { } }), ); + + it.effect("reports logical provider log writes to resource attribution", () => + Effect.gen(function* () { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "t3-provider-log-")); + const basePath = path.join(tempDir, "provider-native.ndjson"); + + try { + const attribution = yield* ResourceAttribution.make(); + const logger = yield* makeEventNdjsonLogger(basePath, { + stream: "native", + batchWindowMs: 0, + attribution, + }); + assert.notEqual(logger, undefined); + if (!logger) { + return; + } + + yield* logger.write({ id: "attributed-event" }, ThreadId.make("thread-attribution")); + yield* logger.close(); + + const snapshot = yield* attribution.snapshot; + assert.equal(snapshot.entries.length, 1); + assert.equal(snapshot.entries[0]?.component, "provider-event-log"); + assert.equal(snapshot.entries[0]?.operation, "native.append"); + assert.equal(snapshot.entries[0]?.count, 1); + assert.isAbove(snapshot.entries[0]?.logicalWriteBytes ?? 0, 0); + } finally { + fs.rmSync(tempDir, { recursive: true, force: true }); + } + }), + ); }); diff --git a/apps/server/src/provider/Layers/EventNdjsonLogger.ts b/apps/server/src/provider/Layers/EventNdjsonLogger.ts index 04377ad520c..194c5746ef7 100644 --- a/apps/server/src/provider/Layers/EventNdjsonLogger.ts +++ b/apps/server/src/provider/Layers/EventNdjsonLogger.ts @@ -11,6 +11,7 @@ import path from "node:path"; import type { ThreadId } from "@t3tools/contracts"; import { RotatingFileSink } from "@t3tools/shared/logging"; +import * as Clock from "effect/Clock"; import * as Effect from "effect/Effect"; import * as Exit from "effect/Exit"; import * as Logger from "effect/Logger"; @@ -19,6 +20,7 @@ import * as Scope from "effect/Scope"; import * as SynchronizedRef from "effect/SynchronizedRef"; import { toSafeThreadAttachmentSegment } from "../../attachmentStore.ts"; +import type { ResourceAttributionShape } from "../../resourceTelemetry/ResourceAttribution.ts"; const DEFAULT_MAX_BYTES = 10 * 1024 * 1024; const DEFAULT_MAX_FILES = 10; @@ -26,6 +28,7 @@ const DEFAULT_BATCH_WINDOW_MS = 200; const GLOBAL_THREAD_SEGMENT = "_global"; const LOG_SCOPE = "provider-observability"; const encodeUnknownJsonString = Schema.encodeUnknownEffect(Schema.UnknownFromJsonString); +const textEncoder = new TextEncoder(); export type EventNdjsonStream = "native" | "canonical" | "orchestration"; @@ -40,6 +43,7 @@ export interface EventNdjsonLoggerOptions { readonly maxBytes?: number; readonly maxFiles?: number; readonly batchWindowMs?: number; + readonly attribution?: ResourceAttributionShape; } interface ThreadWriter { @@ -86,6 +90,41 @@ function resolveStreamLabel(stream: EventNdjsonStream): string { } } +function writeBatchedMessages( + sink: RotatingFileSink, + messages: ReadonlyArray, + maxBytes: number, +): number { + let pendingMessages: Array = []; + let pendingBytes = 0; + let logicalWriteBytes = 0; + + const flush = () => { + if (pendingMessages.length === 0) { + return; + } + sink.write(pendingMessages.join("")); + pendingMessages = []; + pendingBytes = 0; + }; + + for (const message of messages) { + const messageBytes = textEncoder.encode(message).byteLength; + logicalWriteBytes += messageBytes; + if (pendingBytes > 0 && pendingBytes + messageBytes > maxBytes) { + flush(); + } + pendingMessages.push(message); + pendingBytes += messageBytes; + if (pendingBytes >= maxBytes) { + flush(); + } + } + flush(); + + return logicalWriteBytes; +} + const toLogMessage = Effect.fn("toLogMessage")(function* ( event: unknown, ): Effect.fn.Return { @@ -104,6 +143,8 @@ const makeThreadWriter = Effect.fn("makeThreadWriter")(function* (input: { readonly maxFiles: number; readonly batchWindowMs: number; readonly streamLabel: string; + readonly stream: EventNdjsonStream; + readonly attribution?: ResourceAttributionShape; }): Effect.fn.Return { const sinkResult = yield* Effect.sync(() => { try { @@ -135,12 +176,14 @@ const makeThreadWriter = Effect.fn("makeThreadWriter")(function* (input: { const batchedLogger = yield* Logger.batched(lineLogger, { window: input.batchWindowMs, flush: Effect.fn("makeThreadWriter.flush")(function* (messages) { + const startedAt = yield* Clock.currentTimeMillis; const flushResult = yield* Effect.sync(() => { try { - for (const message of messages) { - sink.write(message); - } - return { ok: true as const }; + return { + ok: true as const, + logicalWriteBytes: writeBatchedMessages(sink, messages, input.maxBytes), + count: messages.length, + }; } catch (error) { return { ok: false as const, error }; } @@ -151,6 +194,18 @@ const makeThreadWriter = Effect.fn("makeThreadWriter")(function* (input: { filePath: input.filePath, error: flushResult.error, }); + return; + } + + if (input.attribution && flushResult.count > 0) { + const completedAt = yield* Clock.currentTimeMillis; + yield* input.attribution.record({ + component: "provider-event-log", + operation: `${input.stream}.append`, + logicalWriteBytes: flushResult.logicalWriteBytes, + count: flushResult.count, + durationMs: Math.max(0, completedAt - startedAt), + }); } }), }).pipe(Effect.provideService(Scope.Scope, scope)); @@ -216,6 +271,8 @@ export const makeEventNdjsonLogger = Effect.fn("makeEventNdjsonLogger")(function maxFiles, batchWindowMs, streamLabel, + stream: options.stream, + ...(options.attribution ? { attribution: options.attribution } : {}), }).pipe( Effect.map((writer) => { if (!writer) { diff --git a/apps/server/src/provider/Layers/ProviderEventLoggers.ts b/apps/server/src/provider/Layers/ProviderEventLoggers.ts index 711aa6e76b6..b020983e13c 100644 --- a/apps/server/src/provider/Layers/ProviderEventLoggers.ts +++ b/apps/server/src/provider/Layers/ProviderEventLoggers.ts @@ -32,6 +32,7 @@ import * as Effect from "effect/Effect"; import * as Layer from "effect/Layer"; import { ServerConfig } from "../../config.ts"; +import * as ResourceAttribution from "../../resourceTelemetry/ResourceAttribution.ts"; import { type EventNdjsonLogger, makeEventNdjsonLogger } from "./EventNdjsonLogger.ts"; export interface ProviderEventLoggersShape { @@ -71,11 +72,14 @@ export const ProviderEventLoggersLive = Layer.effect( ProviderEventLoggers, Effect.gen(function* () { const { providerEventLogPath } = yield* ServerConfig; + const attribution = yield* ResourceAttribution.ResourceAttribution; const native = yield* makeEventNdjsonLogger(providerEventLogPath, { stream: "native", + attribution, }); const canonical = yield* makeEventNdjsonLogger(providerEventLogPath, { stream: "canonical", + attribution, }); return { native, diff --git a/apps/server/src/provider/Layers/ProviderInstanceRegistryLive.test.ts b/apps/server/src/provider/Layers/ProviderInstanceRegistryLive.test.ts index f2c5892a2c6..c151a165412 100644 --- a/apps/server/src/provider/Layers/ProviderInstanceRegistryLive.test.ts +++ b/apps/server/src/provider/Layers/ProviderInstanceRegistryLive.test.ts @@ -34,11 +34,15 @@ import { type ProviderInstanceConfigMap, ProviderInstanceId, } from "@t3tools/contracts"; +import * as DateTime from "effect/DateTime"; import * as Effect from "effect/Effect"; import * as Layer from "effect/Layer"; +import * as Stream from "effect/Stream"; import { HttpClient, HttpClientResponse } from "effect/unstable/http"; +import * as BackgroundPolicy from "../../background/BackgroundPolicy.ts"; import { ServerConfig } from "../../config.ts"; +import { ServerSettingsService } from "../../serverSettings.ts"; import { ClaudeDriver } from "../Drivers/ClaudeDriver.ts"; import { CodexDriver } from "../Drivers/CodexDriver.ts"; import { CursorDriver } from "../Drivers/CursorDriver.ts"; @@ -55,6 +59,37 @@ const TestHttpClientLive = Layer.succeed( ), ); +const TEST_EPOCH = DateTime.makeUnsafe("1970-01-01T00:00:00.000Z"); + +const BackgroundPolicyAlwaysRunLayer = Layer.mock(BackgroundPolicy.BackgroundPolicy)({ + reportClientActivity: () => Effect.void, + removeRpcClient: () => Effect.void, + reportHostPowerState: () => Effect.void, + snapshot: Effect.succeed({ + hostPower: { + source: "unknown", + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt: TEST_EPOCH, + }, + leases: [], + activeForegroundLeaseCount: 0, + activeScopeKeys: [], + shouldRunOpportunisticWork: true, + updatedAt: TEST_EPOCH, + }), + streamChanges: Stream.empty, + hasDemand: () => Effect.succeed(true), + shouldRunScopeWork: () => Effect.succeed(true), + shouldRunOpportunisticWork: Effect.succeed(true), +}); + const makeCodexConfig = (overrides: Partial): CodexSettings => ({ enabled: false, binaryPath: "codex", @@ -107,6 +142,8 @@ describe("ProviderInstanceRegistryLive — multi-instance codex slice", () => { prefix: "provider-instance-registry-test", }).pipe( Layer.provideMerge(NodeServices.layer), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), + Layer.provideMerge(ServerSettingsService.layerTest()), Layer.provideMerge(TestHttpClientLive), Layer.provideMerge(Layer.succeed(ProviderEventLoggers, NoOpProviderEventLoggers)), ); @@ -244,6 +281,8 @@ describe("ProviderInstanceRegistryLive — all drivers slice", () => { prefix: "provider-instance-registry-all-drivers-test", }).pipe( Layer.provideMerge(infraLayer), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), + Layer.provideMerge(ServerSettingsService.layerTest()), Layer.provideMerge(TestHttpClientLive), Layer.provideMerge(Layer.succeed(ProviderEventLoggers, NoOpProviderEventLoggers)), ); diff --git a/apps/server/src/provider/Layers/ProviderRegistry.test.ts b/apps/server/src/provider/Layers/ProviderRegistry.test.ts index 5fe0f903686..f415ce3bfaf 100644 --- a/apps/server/src/provider/Layers/ProviderRegistry.test.ts +++ b/apps/server/src/provider/Layers/ProviderRegistry.test.ts @@ -1,5 +1,6 @@ import * as NodeServices from "@effect/platform-node/NodeServices"; import { describe, it, assert } from "@effect/vitest"; +import * as DateTime from "effect/DateTime"; import * as Effect from "effect/Effect"; import * as Exit from "effect/Exit"; import * as Fiber from "effect/Fiber"; @@ -32,6 +33,7 @@ import { applyServerSettingsPatch } from "@t3tools/shared/serverSettings"; import { checkCodexProviderStatus, type CodexAppServerProviderSnapshot } from "./CodexProvider.ts"; import { checkClaudeProviderStatus } from "./ClaudeProvider.ts"; +import * as BackgroundPolicy from "../../background/BackgroundPolicy.ts"; import { OpenCodeRuntimeLive } from "../opencodeRuntime.ts"; import { NoOpProviderEventLoggers, ProviderEventLoggers } from "./ProviderEventLoggers.ts"; import { ProviderInstanceRegistryHydrationLive } from "./ProviderInstanceRegistryHydration.ts"; @@ -64,6 +66,7 @@ process.env.T3CODE_CURSOR_ENABLED = "1"; // ── Test helpers ──────────────────────────────────────────────────── const encoder = new TextEncoder(); +const TEST_EPOCH = DateTime.makeUnsafe("1970-01-01T00:00:00.000Z"); const TestHttpClientLive = Layer.succeed( HttpClient.HttpClient, @@ -72,6 +75,35 @@ const TestHttpClientLive = Layer.succeed( ), ); +const BackgroundPolicyAlwaysRunLayer = Layer.mock(BackgroundPolicy.BackgroundPolicy)({ + reportClientActivity: () => Effect.void, + removeRpcClient: () => Effect.void, + reportHostPowerState: () => Effect.void, + snapshot: Effect.succeed({ + hostPower: { + source: "unknown", + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt: TEST_EPOCH, + }, + leases: [], + activeForegroundLeaseCount: 0, + activeScopeKeys: [], + shouldRunOpportunisticWork: true, + updatedAt: TEST_EPOCH, + }), + streamChanges: Stream.empty, + hasDemand: () => Effect.succeed(true), + shouldRunScopeWork: () => Effect.succeed(true), + shouldRunOpportunisticWork: Effect.succeed(true), +}); + function selectDescriptor( id: string, label: string, @@ -806,6 +838,7 @@ it.layer(Layer.mergeAll(NodeServices.layer, ServerSettingsService.layerTest(), T prefix: "t3-provider-registry-merged-persist-", }), ), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), Layer.provideMerge(NodeServices.layer), ), ).pipe(Scope.provide(scope)); @@ -900,6 +933,7 @@ it.layer(Layer.mergeAll(NodeServices.layer, ServerSettingsService.layerTest(), T prefix: "t3-provider-registry-refresh-failure-", }), ), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), Layer.provideMerge(NodeServices.layer), ), ).pipe(Scope.provide(scope)); @@ -1004,6 +1038,7 @@ it.layer(Layer.mergeAll(NodeServices.layer, ServerSettingsService.layerTest(), T prefix: "t3-provider-registry-sync-failure-", }), ), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), Layer.provideMerge(NodeServices.layer), ), ).pipe(Scope.provide(scope)); @@ -1101,6 +1136,7 @@ it.layer(Layer.mergeAll(NodeServices.layer, ServerSettingsService.layerTest(), T Layer.provideMerge(TestHttpClientLive), Layer.provideMerge(Layer.succeed(ProviderEventLoggers, NoOpProviderEventLoggers)), Layer.provideMerge(OpenCodeRuntimeLive), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), // NO spawner mock — `ChildProcessSpawner` is supplied by the // outer `NodeServices.layer` on `it.layer(...)` and will // genuinely spawn a subprocess. The missing-binary ENOENT is @@ -1193,6 +1229,7 @@ it.layer(Layer.mergeAll(NodeServices.layer, ServerSettingsService.layerTest(), T }), ), Layer.provideMerge(NodeServices.layer), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), ); const runtimeServices = yield* Layer.build(providerRegistryLayer).pipe( Scope.provide(scope), @@ -1301,6 +1338,7 @@ it.layer(Layer.mergeAll(NodeServices.layer, ServerSettingsService.layerTest(), T Layer.provideMerge(Layer.succeed(ProviderEventLoggers, NoOpProviderEventLoggers)), Layer.provideMerge(OpenCodeRuntimeLive), Layer.provideMerge(NodeServices.layer), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), ); const runtimeServices = yield* Layer.build(providerRegistryLayer).pipe( Scope.provide(scope), @@ -1354,6 +1392,7 @@ it.layer(Layer.mergeAll(NodeServices.layer, ServerSettingsService.layerTest(), T Layer.provideMerge(TestHttpClientLive), Layer.provideMerge(Layer.succeed(ProviderEventLoggers, NoOpProviderEventLoggers)), Layer.provideMerge(OpenCodeRuntimeLive), + Layer.provideMerge(BackgroundPolicyAlwaysRunLayer), Layer.provideMerge( mockCommandSpawnerLayer((command, args) => { if (command === "agent") { diff --git a/apps/server/src/provider/makeManagedServerProvider.test.ts b/apps/server/src/provider/makeManagedServerProvider.test.ts index ba1f01fe2b9..b2830688cc6 100644 --- a/apps/server/src/provider/makeManagedServerProvider.test.ts +++ b/apps/server/src/provider/makeManagedServerProvider.test.ts @@ -1,16 +1,22 @@ import { describe, it, assert } from "@effect/vitest"; import { ProviderDriverKind, ProviderInstanceId, type ServerProvider } from "@t3tools/contracts"; import { createModelCapabilities } from "@t3tools/shared/model"; +import * as DateTime from "effect/DateTime"; import * as Deferred from "effect/Deferred"; import * as Effect from "effect/Effect"; import * as Fiber from "effect/Fiber"; +import * as Layer from "effect/Layer"; import * as PubSub from "effect/PubSub"; import * as Ref from "effect/Ref"; import * as Stream from "effect/Stream"; +import { TestClock } from "effect/testing"; +import * as BackgroundPolicy from "../background/BackgroundPolicy.ts"; +import { ServerSettingsService } from "../serverSettings.ts"; import { makeManagedServerProvider } from "./makeManagedServerProvider.ts"; const emptyCapabilities = createModelCapabilities({ optionDescriptors: [] }); +const TEST_EPOCH = DateTime.makeUnsafe("1970-01-01T00:00:00.000Z"); const fastModeCapabilities = createModelCapabilities({ optionDescriptors: [ { @@ -87,6 +93,43 @@ const refreshedSnapshotSecond: ServerProvider = { message: "Refreshed provider availability again.", }; +function makeBackgroundPolicyLayer(shouldRunScopeWork: boolean) { + return Layer.mock(BackgroundPolicy.BackgroundPolicy)({ + reportClientActivity: () => Effect.void, + removeRpcClient: () => Effect.void, + reportHostPowerState: () => Effect.void, + snapshot: Effect.succeed({ + hostPower: { + source: "unknown", + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt: TEST_EPOCH, + }, + leases: [], + activeForegroundLeaseCount: 0, + activeScopeKeys: [], + shouldRunOpportunisticWork: true, + updatedAt: TEST_EPOCH, + }), + streamChanges: Stream.empty, + hasDemand: () => Effect.succeed(shouldRunScopeWork), + shouldRunScopeWork: () => Effect.succeed(shouldRunScopeWork), + shouldRunOpportunisticWork: Effect.succeed(shouldRunScopeWork), + }); +} + +const BackgroundPolicyAlwaysRunLayer = makeBackgroundPolicyLayer(true); +const BackgroundPolicyNeverRunLayer = makeBackgroundPolicyLayer(false); +const ServerSettingsTestLayer = ServerSettingsService.layerTest(); +const AlwaysRunTestLayer = Layer.merge(BackgroundPolicyAlwaysRunLayer, ServerSettingsTestLayer); +const NeverRunTestLayer = Layer.merge(BackgroundPolicyNeverRunLayer, ServerSettingsTestLayer); + const enrichedSnapshotSecond: ServerProvider = { ...refreshedSnapshotSecond, checkedAt: "2026-04-10T00:00:04.000Z", @@ -140,7 +183,38 @@ describe("makeManagedServerProvider", () => { assert.deepStrictEqual(latest, refreshedSnapshot); assert.strictEqual(yield* Ref.get(checkCalls), 1); }), - ), + ).pipe(Effect.provide(AlwaysRunTestLayer)), + ); + + it.effect("skips periodic provider refreshes without foreground provider-status demand", () => + Effect.scoped( + Effect.gen(function* () { + const checkCalls = yield* Ref.make(0); + const initialCheckDone = yield* Deferred.make(); + yield* makeManagedServerProvider({ + maintenanceCapabilities, + getSettings: Effect.succeed({ enabled: true }), + streamSettings: Stream.empty, + haveSettingsChanged: (previous, next) => previous.enabled !== next.enabled, + initialSnapshot: () => Effect.succeed(initialSnapshot), + checkProvider: Ref.updateAndGet(checkCalls, (count) => count + 1).pipe( + Effect.tap((count) => + count === 1 + ? Deferred.succeed(initialCheckDone, undefined).pipe(Effect.ignore) + : Effect.void, + ), + Effect.as(refreshedSnapshot), + ), + refreshInterval: "1 second", + }); + + yield* Deferred.await(initialCheckDone); + yield* TestClock.adjust("1 second"); + yield* Effect.yieldNow; + + assert.strictEqual(yield* Ref.get(checkCalls), 1); + }), + ).pipe(Effect.provide(Layer.mergeAll(NeverRunTestLayer, TestClock.layer()))), ); it.effect("reruns the provider check when streamed settings change", () => @@ -185,7 +259,7 @@ describe("makeManagedServerProvider", () => { assert.deepStrictEqual(latest, refreshedSnapshotSecond); assert.strictEqual(yield* Ref.get(checkCalls), 2); }), - ), + ).pipe(Effect.provide(AlwaysRunTestLayer)), ); it.effect("streams supplemental snapshot updates after the base provider check completes", () => @@ -223,7 +297,7 @@ describe("makeManagedServerProvider", () => { assert.deepStrictEqual(updates, [refreshedSnapshot, enrichedSnapshot]); assert.deepStrictEqual(latest, enrichedSnapshot); }), - ), + ).pipe(Effect.provide(AlwaysRunTestLayer)), ); it.effect("ignores stale enrichment callbacks after a newer refresh advances generation", () => @@ -284,6 +358,6 @@ describe("makeManagedServerProvider", () => { ]); assert.deepStrictEqual(latest, enrichedSnapshotSecond); }), - ), + ).pipe(Effect.provide(AlwaysRunTestLayer)), ); }); diff --git a/apps/server/src/provider/makeManagedServerProvider.ts b/apps/server/src/provider/makeManagedServerProvider.ts index 88547fb3afa..8bb1f351a88 100644 --- a/apps/server/src/provider/makeManagedServerProvider.ts +++ b/apps/server/src/provider/makeManagedServerProvider.ts @@ -1,4 +1,9 @@ -import type { ServerProvider } from "@t3tools/contracts"; +import { + DEFAULT_PROVIDER_HEALTH_REFRESH_INTERVAL, + type ServerProvider, + ServerSettingsError, +} from "@t3tools/contracts"; +import { resolveServerBackgroundActivitySettings } from "@t3tools/shared/backgroundActivitySettings"; import * as Duration from "effect/Duration"; import * as Effect from "effect/Effect"; import * as Equal from "effect/Equal"; @@ -9,8 +14,9 @@ import * as Scope from "effect/Scope"; import * as Stream from "effect/Stream"; import * as Semaphore from "effect/Semaphore"; +import * as BackgroundPolicy from "../background/BackgroundPolicy.ts"; +import { ServerSettingsService } from "../serverSettings.ts"; import type { ServerProviderShape } from "./Services/ServerProvider.ts"; -import { ServerSettingsError } from "@t3tools/contracts"; interface ProviderSnapshotState { readonly snapshot: ServerProvider; @@ -33,7 +39,13 @@ export const makeManagedServerProvider = Effect.fn("makeManagedServerProvider")( readonly publishSnapshot: (snapshot: ServerProvider) => Effect.Effect; }) => Effect.Effect; readonly refreshInterval?: Duration.Input; -}): Effect.fn.Return { +}): Effect.fn.Return< + ServerProviderShape, + ServerSettingsError, + Scope.Scope | BackgroundPolicy.BackgroundPolicy | ServerSettingsService +> { + const backgroundPolicy = yield* BackgroundPolicy.BackgroundPolicy; + const serverSettings = yield* ServerSettingsService; const refreshSemaphore = yield* Semaphore.make(1); const changesPubSub = yield* Effect.acquireRelease( PubSub.unbounded(), @@ -134,13 +146,45 @@ export const makeManagedServerProvider = Effect.fn("makeManagedServerProvider")( return yield* applySnapshot(nextSettings, { forceRefresh: true }); }); + const hasProviderStatusDemand = Effect.gen(function* () { + const state = yield* Ref.get(snapshotStateRef); + const instanceId = state.snapshot.instanceId; + const [genericDemand, instanceDemand] = yield* Effect.all([ + backgroundPolicy.shouldRunScopeWork({ type: "provider-status" }), + backgroundPolicy.shouldRunScopeWork({ type: "provider-status", instanceId }), + ]); + return genericDemand || instanceDemand; + }); + + const getRefreshInterval = input.refreshInterval + ? Effect.succeed(input.refreshInterval) + : serverSettings.getSettings.pipe( + Effect.map( + (settings) => + resolveServerBackgroundActivitySettings(settings).providerHealthRefreshInterval, + ), + Effect.orElseSucceed(() => DEFAULT_PROVIDER_HEALTH_REFRESH_INTERVAL), + ); + yield* Stream.runForEach(input.streamSettings, (nextSettings) => Effect.asVoid(applySnapshot(nextSettings)), ).pipe(Effect.forkScoped); yield* Effect.forever( - Effect.sleep(input.refreshInterval ?? "60 seconds").pipe( - Effect.flatMap(() => refreshSnapshot()), + getRefreshInterval.pipe( + Effect.flatMap((refreshInterval) => + Duration.toMillis(Duration.fromInputUnsafe(refreshInterval)) <= 0 + ? Effect.sleep("60 seconds") + : Effect.sleep(refreshInterval).pipe( + Effect.flatMap(() => + hasProviderStatusDemand.pipe( + Effect.flatMap((shouldRefresh) => + shouldRefresh ? refreshSnapshot().pipe(Effect.asVoid) : Effect.void, + ), + ), + ), + ), + ), Effect.ignoreCause({ log: true }), ), ).pipe(Effect.forkScoped); diff --git a/apps/server/src/resourceTelemetry/DesktopTelemetryReceiver.ts b/apps/server/src/resourceTelemetry/DesktopTelemetryReceiver.ts new file mode 100644 index 00000000000..96a5bf47a4e --- /dev/null +++ b/apps/server/src/resourceTelemetry/DesktopTelemetryReceiver.ts @@ -0,0 +1,412 @@ +// @effect-diagnostics nodeBuiltinImport:off +import * as NodeFileSystem from "node:fs"; + +import * as NodeStream from "@effect/platform-node/NodeStream"; +import { + DesktopHostTelemetryMessage, + type DesktopHostTelemetryMessage as DesktopHostTelemetryMessageValue, + type DesktopHostTelemetrySnapshot, + DesktopTelemetryControlMessage, + type ResourceTelemetrySourceStatus, +} from "@t3tools/contracts"; +import * as Context from "effect/Context"; +import * as DateTime from "effect/DateTime"; +import * as Duration from "effect/Duration"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; +import * as PubSub from "effect/PubSub"; +import * as Ref from "effect/Ref"; +import * as Schema from "effect/Schema"; +import * as Semaphore from "effect/Semaphore"; +import * as Stream from "effect/Stream"; +import * as Ndjson from "effect/unstable/encoding/Ndjson"; + +import { ServerConfig } from "../config.ts"; + +const STALE_AFTER_MS = 90_000; +const STALE_CHECK_INTERVAL = Duration.seconds(30); + +export class DesktopTelemetryDescriptorUnavailable extends Schema.TaggedErrorClass()( + "DesktopTelemetryDescriptorUnavailable", + { + mode: Schema.String, + }, +) { + override get message(): string { + return `Desktop telemetry descriptor is unavailable in '${this.mode}' mode.`; + } +} + +export class DesktopTelemetryProtocolMismatch extends Schema.TaggedErrorClass()( + "DesktopTelemetryProtocolMismatch", + { + expectedVersion: Schema.Number, + receivedVersion: Schema.Number, + }, +) { + override get message(): string { + return `Desktop telemetry protocol ${this.receivedVersion} is incompatible with expected protocol ${this.expectedVersion}.`; + } +} + +export class DesktopTelemetryDecodeFailed extends Schema.TaggedErrorClass()( + "DesktopTelemetryDecodeFailed", + { + cause: Schema.Defect(), + }, +) { + override get message(): string { + return "Failed to decode desktop telemetry."; + } +} + +export class DesktopTelemetryStreamFailed extends Schema.TaggedErrorClass()( + "DesktopTelemetryStreamFailed", + { + fd: Schema.Number, + cause: Schema.Defect(), + }, +) { + override get message(): string { + return `Desktop telemetry stream on fd ${this.fd} failed.`; + } +} + +export class DesktopTelemetryStreamClosed extends Schema.TaggedErrorClass()( + "DesktopTelemetryStreamClosed", + { + fd: Schema.Number, + }, +) { + override get message(): string { + return `Desktop telemetry stream on fd ${this.fd} closed.`; + } +} + +export class DesktopTelemetryStale extends Schema.TaggedErrorClass()( + "DesktopTelemetryStale", + { + fd: Schema.Number, + staleAfterMs: Schema.Number, + }, +) { + override get message(): string { + return `Desktop telemetry on fd ${this.fd} has not updated for ${this.staleAfterMs}ms.`; + } +} + +export type DesktopTelemetryReceiverError = + | DesktopTelemetryDescriptorUnavailable + | DesktopTelemetryProtocolMismatch + | DesktopTelemetryDecodeFailed + | DesktopTelemetryStreamFailed + | DesktopTelemetryStreamClosed; + +export class DesktopTelemetryControlFailed extends Schema.TaggedErrorClass()( + "DesktopTelemetryControlFailed", + { + fd: Schema.Number, + operation: Schema.String, + cause: Schema.Defect(), + }, +) { + override get message(): string { + return `Desktop telemetry control '${this.operation}' failed on fd ${this.fd}.`; + } +} + +export interface DesktopTelemetryReceiverHealth { + readonly status: ResourceTelemetrySourceStatus; + readonly lastSampleAt: Option.Option; + readonly lastError: Option.Option; +} + +export interface DesktopTelemetryReceiverShape { + readonly latest: Effect.Effect>; + readonly changes: Stream.Stream; + readonly health: Effect.Effect; + readonly healthChanges: Stream.Stream; + readonly setDiagnosticsDemand: ( + enabled: boolean, + ) => Effect.Effect; +} + +export class DesktopTelemetryReceiver extends Context.Service< + DesktopTelemetryReceiver, + DesktopTelemetryReceiverShape +>()("t3/resourceTelemetry/DesktopTelemetryReceiver") {} + +const decodeMessage = Schema.decodeUnknownEffect(DesktopHostTelemetryMessage); +const encodeControlMessage = Schema.encodeEffect( + Schema.fromJsonString(DesktopTelemetryControlMessage), +); +const isDescriptorUnavailable = Schema.is(DesktopTelemetryDescriptorUnavailable); +const isProtocolMismatch = Schema.is(DesktopTelemetryProtocolMismatch); +const isDecodeFailed = Schema.is(DesktopTelemetryDecodeFailed); +const isStreamFailed = Schema.is(DesktopTelemetryStreamFailed); + +function normalizeReceiverError(error: unknown): DesktopTelemetryReceiverError { + if ( + isDescriptorUnavailable(error) || + isProtocolMismatch(error) || + isDecodeFailed(error) || + isStreamFailed(error) + ) { + return error; + } + return new DesktopTelemetryDecodeFailed({ cause: error }); +} + +function messageVersion(value: unknown): number | undefined { + if (typeof value !== "object" || value === null) return undefined; + const version = Reflect.get(value, "version"); + return typeof version === "number" ? version : undefined; +} + +export const make = Effect.fn("resourceTelemetry.desktopTelemetryReceiver.make")(function* () { + const config = yield* ServerConfig; + const latest = yield* Ref.make(Option.none()); + const changes = yield* PubSub.sliding(8); + const healthChanges = yield* PubSub.sliding(4); + const controlMutex = yield* Semaphore.make(1); + const health = yield* Ref.make({ + status: config.desktopTelemetryFd === undefined ? "unavailable" : "starting", + lastSampleAt: Option.none(), + lastError: + config.desktopTelemetryFd === undefined + ? Option.some( + new DesktopTelemetryDescriptorUnavailable({ + mode: config.mode, + }).message, + ) + : Option.none(), + }); + const updateHealth = ( + update: (current: DesktopTelemetryReceiverHealth) => DesktopTelemetryReceiverHealth, + ) => + Ref.modify(health, (current) => { + const next = update(current); + return [next, next]; + }).pipe( + Effect.flatMap((next) => PubSub.publish(healthChanges, next)), + Effect.asVoid, + ); + const updateSampleHealth = (sampledAt: DateTime.Utc) => + Ref.modify(health, (current) => { + const next: DesktopTelemetryReceiverHealth = { + status: "healthy", + lastSampleAt: Option.some(sampledAt), + lastError: Option.none(), + }; + return [ + current.status !== "healthy" || Option.isSome(current.lastError) + ? Option.some(next) + : Option.none(), + next, + ] as const; + }).pipe( + Effect.flatMap( + Option.match({ + onNone: () => Effect.void, + onSome: (next) => PubSub.publish(healthChanges, next), + }), + ), + Effect.asVoid, + ); + + const setDiagnosticsDemand: DesktopTelemetryReceiverShape["setDiagnosticsDemand"] = (enabled) => + controlMutex.withPermits(1)( + Effect.gen(function* () { + const fd = config.desktopTelemetryControlFd; + if (fd === undefined) return; + const encoded = yield* encodeControlMessage({ + version: 1, + type: "setDiagnosticsDemand", + enabled, + }).pipe( + Effect.mapError( + (cause) => + new DesktopTelemetryControlFailed({ + fd, + operation: "encode", + cause, + }), + ), + ); + yield* Effect.try({ + try: () => { + const payload = Buffer.from(`${encoded}\n`); + let offset = 0; + while (offset < payload.byteLength) { + const written = NodeFileSystem.writeSync(fd, payload, offset); + if (written <= 0) throw new Error("desktop telemetry control pipe accepted no bytes"); + offset += written; + } + }, + catch: (cause) => + new DesktopTelemetryControlFailed({ + fd, + operation: "write", + cause, + }), + }).pipe( + Effect.tapError((error) => + updateHealth((current) => ({ + ...current, + status: "degraded", + lastError: Option.some(error.message), + })), + ), + ); + }), + ); + + if (config.desktopTelemetryFd !== undefined) { + const fd = config.desktopTelemetryFd; + const readable = yield* Effect.acquireRelease( + Effect.try({ + try: () => + NodeFileSystem.createReadStream("", { + fd, + autoClose: true, + }), + catch: (cause) => new DesktopTelemetryStreamFailed({ fd, cause }), + }), + (stream) => + Effect.sync(() => { + stream.destroy(); + }), + ); + + const messages: Stream.Stream = + NodeStream.fromReadable({ + evaluate: () => readable, + closeOnDone: true, + onError: (cause) => new DesktopTelemetryStreamFailed({ fd, cause }), + }).pipe( + Stream.pipeThroughChannel(Ndjson.decode({ ignoreEmptyLines: true })), + Stream.mapEffect( + ( + value, + ): Effect.Effect< + DesktopHostTelemetryMessageValue, + DesktopTelemetryProtocolMismatch | DesktopTelemetryDecodeFailed + > => { + const version = messageVersion(value); + if (version !== undefined && version !== 1) { + return Effect.fail( + new DesktopTelemetryProtocolMismatch({ + expectedVersion: 1, + receivedVersion: version, + }), + ); + } + return decodeMessage(value).pipe( + Effect.mapError((cause) => new DesktopTelemetryDecodeFailed({ cause })), + ); + }, + ), + Stream.mapError(normalizeReceiverError), + ); + + yield* messages.pipe( + Stream.runForEach((message) => { + if (message.type === "desktopTelemetryHello") { + return updateHealth( + (current): DesktopTelemetryReceiverHealth => ({ + ...current, + status: "healthy", + lastError: Option.none(), + }), + ); + } + + const sampledAt = DateTime.makeUnsafe(message.sampledAtUnixMs); + return Ref.set(latest, Option.some(message)).pipe( + Effect.andThen(updateSampleHealth(sampledAt)), + Effect.andThen(PubSub.publish(changes, message)), + Effect.asVoid, + ); + }), + Effect.andThen( + updateHealth( + (current): DesktopTelemetryReceiverHealth => ({ + ...current, + status: "stopped", + lastError: Option.some(new DesktopTelemetryStreamClosed({ fd }).message), + }), + ), + ), + Effect.catch((error) => + updateHealth( + (current): DesktopTelemetryReceiverHealth => ({ + ...current, + status: "degraded", + lastError: Option.some(error.message), + }), + ), + ), + Effect.forkScoped, + ); + + yield* Effect.forever( + Effect.sleep(STALE_CHECK_INTERVAL).pipe( + Effect.andThen( + Effect.gen(function* () { + const current = yield* Ref.get(latest); + if (Option.isNone(current) || current.value.power.stale) return; + const now = yield* DateTime.now; + if (DateTime.toEpochMillis(now) - current.value.sampledAtUnixMs < STALE_AFTER_MS) + return; + const staleSnapshot: DesktopHostTelemetrySnapshot = { + ...current.value, + power: { ...current.value.power, stale: true }, + }; + yield* Ref.set(latest, Option.some(staleSnapshot)); + yield* updateHealth((currentHealth) => ({ + ...currentHealth, + status: currentHealth.status === "stopped" ? "stopped" : "degraded", + lastError: + currentHealth.status === "stopped" + ? currentHealth.lastError + : Option.some( + new DesktopTelemetryStale({ fd, staleAfterMs: STALE_AFTER_MS }).message, + ), + })); + yield* PubSub.publish(changes, staleSnapshot); + }), + ), + ), + ).pipe(Effect.forkScoped); + } + + return DesktopTelemetryReceiver.of({ + latest: Ref.get(latest), + changes: Stream.fromPubSub(changes), + health: Ref.get(health), + healthChanges: Stream.fromPubSub(healthChanges), + setDiagnosticsDemand, + }); +}); + +export const layer = Layer.effect(DesktopTelemetryReceiver, make()); + +export const layerTest = ( + overrides: Partial = {}, +): Layer.Layer => + Layer.succeed( + DesktopTelemetryReceiver, + DesktopTelemetryReceiver.of({ + latest: Effect.succeedNone, + changes: Stream.empty, + health: Effect.succeed({ + status: "unavailable", + lastSampleAt: Option.none(), + lastError: Option.some("Desktop telemetry test implementation is unavailable."), + }), + healthChanges: Stream.empty, + setDiagnosticsDemand: () => Effect.void, + ...overrides, + }), + ); diff --git a/apps/server/src/resourceTelemetry/Model.test.ts b/apps/server/src/resourceTelemetry/Model.test.ts new file mode 100644 index 00000000000..0d93894c4ca --- /dev/null +++ b/apps/server/src/resourceTelemetry/Model.test.ts @@ -0,0 +1,433 @@ +import { + type DesktopElectronProcessMetric, + type DesktopHostTelemetrySnapshot, + type ResourceMonitorProcessSample, + type ResourceMonitorSnapshotEvent, +} from "@t3tools/contracts"; +import { describe, expect, it } from "@effect/vitest"; +import * as DateTime from "effect/DateTime"; +import * as Option from "effect/Option"; + +import { emptyTelemetryCounters, mergeProcesses, type MergeProcessesResult } from "./Model.ts"; + +const SERVER_PID = 100; +const BASE_TIME_MS = DateTime.toEpochMillis(DateTime.makeUnsafe("2026-06-17T12:00:00.000Z")); + +function processSample( + input: Partial & + Pick, +): ResourceMonitorProcessSample { + return { + runTimeMs: 1_000, + name: `process-${input.pid}`, + command: `process-${input.pid}`, + status: "Running", + cpuPercent: 0, + cpuTimeMs: 0, + residentBytes: 1_024, + virtualBytes: 2_048, + ioReadBytes: 0, + ioWriteBytes: 0, + ioSemantics: "storage", + ...input, + }; +} + +function nativeSnapshot( + sampledAtUnixMs: number, + processes: ReadonlyArray, + sequence = 1, +): ResourceMonitorSnapshotEvent { + return { + version: 2, + type: "snapshot", + sequence, + sampledAtUnixMs, + collectionDurationMicros: 250, + scannedProcessCount: processes.length, + retainedProcessCount: processes.length, + inaccessibleProcessCount: 0, + processes: [...processes], + }; +} + +function electronMetric( + input: Partial & + Pick, +): DesktopElectronProcessMetric { + return { + cpuPercent: 0, + idleWakeupsPerSecond: 0, + workingSetBytes: 1_024, + peakWorkingSetBytes: 2_048, + ...input, + }; +} + +function desktopSnapshot( + sampledAtUnixMs: number, + electronProcesses: ReadonlyArray, +): DesktopHostTelemetrySnapshot { + const sampledAt = DateTime.makeUnsafe(sampledAtUnixMs); + return { + version: 1, + type: "desktopTelemetry", + sequence: 1, + sampledAtUnixMs, + electronPid: electronProcesses[0]?.pid ?? 10_000, + power: { + source: "electron-main", + idle: "false", + idleSeconds: 0, + locked: "false", + suspended: false, + onBattery: "false", + lowPowerMode: "unknown", + thermalState: "nominal", + stale: false, + updatedAt: sampledAt, + }, + speedLimitPercent: Option.none(), + electronProcesses: [...electronProcesses], + }; +} + +function merge(input: { + readonly native: ResourceMonitorSnapshotEvent; + readonly desktop?: DesktopHostTelemetrySnapshot; + readonly previous?: MergeProcessesResult; + readonly sidecarPid?: number; +}): MergeProcessesResult { + return mergeProcesses({ + serverPid: SERVER_PID, + sidecarPid: Option.fromUndefinedOr(input.sidecarPid), + fallbackSampledAtMs: input.native.sampledAtUnixMs, + nativeSnapshot: Option.some(input.native), + desktopSnapshot: Option.fromUndefinedOr(input.desktop), + previous: input.previous?.previous ?? new Map(), + counters: input.previous?.counters ?? emptyTelemetryCounters(), + updatePrevious: true, + }); +} + +describe("resource telemetry process model", () => { + it("builds complete descendant depths and isolates monitor overhead", () => { + const result = merge({ + sidecarPid: 900, + native: nativeSnapshot(BASE_TIME_MS, [ + processSample({ pid: SERVER_PID, ppid: 1, startTimeMs: 1_000 }), + processSample({ pid: 200, ppid: SERVER_PID, startTimeMs: 2_000 }), + processSample({ pid: 201, ppid: 200, startTimeMs: 3_000 }), + processSample({ pid: 202, ppid: 201, startTimeMs: 4_000 }), + processSample({ pid: 900, ppid: SERVER_PID, startTimeMs: 5_000 }), + ]), + }); + + expect(result.processes.map((process) => [process.identity.pid, process.depth])).toEqual([ + [100, 0], + [200, 1], + [201, 2], + [202, 3], + [900, 1], + ]); + expect(result.processes.find((process) => process.identity.pid === 900)?.category).toBe( + "resource-monitor", + ); + expect(result.groups.backend.processCount).toBe(4); + expect(result.groups.monitor.processCount).toBe(1); + expect(result.groups.monitor.processStarts).toBe(1); + expect(result.groups.allT3.processStarts).toBe(5); + }); + + it("deduplicates Electron metrics and classifies Electron descendants", () => { + const electronStart = 10_000; + const result = merge({ + native: nativeSnapshot(BASE_TIME_MS, [ + processSample({ pid: SERVER_PID, ppid: 1, startTimeMs: 1_000 }), + processSample({ pid: 300, ppid: 1, startTimeMs: electronStart }), + processSample({ pid: 301, ppid: 300, startTimeMs: electronStart + 1 }), + ]), + desktop: desktopSnapshot(BASE_TIME_MS, [ + electronMetric({ + pid: 300, + creationTimeMs: electronStart + 500, + type: "Browser", + name: "electron", + }), + electronMetric({ + pid: 301, + creationTimeMs: electronStart + 500, + type: "Utility", + name: "network-service", + }), + ]), + }); + + expect(result.processes.filter((process) => process.identity.pid === 300)).toHaveLength(1); + expect(result.processes.find((process) => process.identity.pid === 300)?.category).toBe( + "electron-main", + ); + expect(result.processes.find((process) => process.identity.pid === 301)?.category).toBe( + "electron-utility", + ); + expect(result.processes.find((process) => process.identity.pid === 301)?.depth).toBe(1); + expect(result.groups.electron.processCount).toBe(2); + }); + + it("ignores stale Electron metrics after PID reuse", () => { + const result = merge({ + native: nativeSnapshot(BASE_TIME_MS, [ + processSample({ pid: SERVER_PID, ppid: 1, startTimeMs: 1_000 }), + processSample({ pid: 300, ppid: SERVER_PID, startTimeMs: 50_000 }), + ]), + desktop: desktopSnapshot(BASE_TIME_MS, [ + electronMetric({ + pid: 300, + creationTimeMs: 10_000, + type: "Browser", + }), + ]), + }); + + expect(result.processes.find((process) => process.identity.pid === 300)?.category).toBe( + "server-child", + ); + expect(result.groups.electron.processCount).toBe(0); + }); + + it("derives rates from cumulative counters and preserves I/O semantics", () => { + const first = merge({ + native: nativeSnapshot(BASE_TIME_MS, [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 1_000, + ioReadBytes: 10_000, + ioWriteBytes: 20_000, + ioSemantics: "all-io", + }), + ]), + }); + const second = merge({ + previous: first, + native: nativeSnapshot( + BASE_TIME_MS + 1_000, + [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 1_250, + ioReadBytes: 12_000, + ioWriteBytes: 23_000, + ioSemantics: "all-io", + }), + ], + 2, + ), + }); + const server = second.processes[0]!; + + expect(server.cpuPercent).toBe(25); + expect(server.ioReadBytesPerSecond).toBe(2_000); + expect(server.ioWriteBytesPerSecond).toBe(3_000); + expect(server.ioSemantics).toBe("all-io"); + expect(second.groups.backend.cpuTimeMs).toBe(250); + expect(second.groups.backend.ioReadBytes).toBe(2_000); + expect(second.groups.backend.ioWriteBytes).toBe(3_000); + }); + + it("derives deltas at the constrained 15-second sampling cadence", () => { + const first = merge({ + native: nativeSnapshot(BASE_TIME_MS, [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 1_000, + ioReadBytes: 10_000, + ioWriteBytes: 20_000, + }), + ]), + }); + const second = merge({ + previous: first, + native: nativeSnapshot( + BASE_TIME_MS + 15_000, + [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 2_500, + ioReadBytes: 25_000, + ioWriteBytes: 50_000, + }), + ], + 2, + ), + }); + + expect(second.processes[0]?.cpuPercent).toBe(10); + expect(second.processes[0]?.ioReadBytesPerSecond).toBe(1_000); + expect(second.processes[0]?.ioWriteBytesPerSecond).toBe(2_000); + expect(second.groups.backend.cpuTimeMs).toBe(1_500); + expect(second.groups.backend.ioReadBytes).toBe(15_000); + expect(second.groups.backend.ioWriteBytes).toBe(30_000); + }); + + it("preserves native rates while applying a desktop-only update", () => { + const first = merge({ + native: nativeSnapshot(BASE_TIME_MS, [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 1_000, + ioReadBytes: 10_000, + ioWriteBytes: 20_000, + }), + ]), + }); + const second = merge({ + previous: first, + native: nativeSnapshot( + BASE_TIME_MS + 1_000, + [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 1_250, + ioReadBytes: 12_000, + ioWriteBytes: 23_000, + }), + ], + 2, + ), + }); + const desktopOnly = mergeProcesses({ + serverPid: SERVER_PID, + sidecarPid: Option.none(), + fallbackSampledAtMs: BASE_TIME_MS + 1_000, + nativeSnapshot: Option.some( + nativeSnapshot( + BASE_TIME_MS + 1_000, + [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 1_250, + ioReadBytes: 12_000, + ioWriteBytes: 23_000, + }), + ], + 2, + ), + ), + desktopSnapshot: Option.some(desktopSnapshot(BASE_TIME_MS + 1_500, [])), + previous: second.previous, + counters: second.counters, + updatePrevious: false, + }); + + expect(desktopOnly.processes[0]?.cpuPercent).toBe(25); + expect(desktopOnly.processes[0]?.ioReadBytesPerSecond).toBe(2_000); + expect(desktopOnly.processes[0]?.ioWriteBytesPerSecond).toBe(3_000); + }); + + it("resets deltas when counters decrease or the sampling gap is unsafe", () => { + const first = merge({ + native: nativeSnapshot(BASE_TIME_MS, [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 1_000, + ioReadBytes: 10_000, + ioWriteBytes: 20_000, + }), + ]), + }); + const decreased = merge({ + previous: first, + native: nativeSnapshot( + BASE_TIME_MS + 1_000, + [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 100, + ioReadBytes: 100, + ioWriteBytes: 200, + }), + ], + 2, + ), + }); + const delayed = merge({ + previous: decreased, + native: nativeSnapshot( + BASE_TIME_MS + 90_000, + [ + processSample({ + pid: SERVER_PID, + ppid: 1, + startTimeMs: 1_000, + cpuTimeMs: 10_000, + ioReadBytes: 100_000, + ioWriteBytes: 200_000, + }), + ], + 3, + ), + }); + + expect(decreased.processes[0]?.cpuPercent).toBe(0); + expect(decreased.processes[0]?.ioReadBytesPerSecond).toBe(0); + expect(decreased.processes[0]?.ioWriteBytesPerSecond).toBe(0); + expect(delayed.processes[0]?.cpuPercent).toBe(0); + expect(delayed.processes[0]?.ioReadBytesPerSecond).toBe(0); + expect(delayed.processes[0]?.ioWriteBytesPerSecond).toBe(0); + expect(delayed.groups.backend.cpuTimeMs).toBe(0); + expect(delayed.groups.backend.ioReadBytes).toBe(0); + expect(delayed.groups.backend.ioWriteBytes).toBe(0); + }); + + it("treats reused PIDs as an exit plus a new process", () => { + const first = merge({ + native: nativeSnapshot(BASE_TIME_MS, [ + processSample({ pid: SERVER_PID, ppid: 1, startTimeMs: 1_000 }), + processSample({ pid: 200, ppid: SERVER_PID, startTimeMs: 2_000 }), + ]), + }); + const second = merge({ + previous: first, + native: nativeSnapshot( + BASE_TIME_MS + 1_000, + [ + processSample({ pid: SERVER_PID, ppid: 1, startTimeMs: 1_000 }), + processSample({ + pid: 200, + ppid: SERVER_PID, + startTimeMs: 9_000, + cpuTimeMs: 999, + ioReadBytes: 999, + ioWriteBytes: 999, + }), + ], + 2, + ), + }); + const reused = second.processes.find((process) => process.identity.pid === 200)!; + + expect(reused.identity.startTimeMs).toBe(9_000); + expect(reused.cpuPercent).toBe(0); + expect(reused.ioReadBytesPerSecond).toBe(0); + expect(second.groups.backend.processStarts).toBe(3); + expect(second.groups.backend.processExits).toBe(1); + }); +}); diff --git a/apps/server/src/resourceTelemetry/Model.ts b/apps/server/src/resourceTelemetry/Model.ts new file mode 100644 index 00000000000..7bffe62d782 --- /dev/null +++ b/apps/server/src/resourceTelemetry/Model.ts @@ -0,0 +1,559 @@ +import type { + DesktopElectronProcessMetric, + DesktopHostTelemetrySnapshot, + ResourceMonitorProcessSample, + ResourceMonitorSnapshotEvent, + ResourceTelemetryAggregate, + ResourceTelemetryProcess, + ResourceTelemetryProcessCategory, +} from "@t3tools/contracts"; +import * as DateTime from "effect/DateTime"; +import * as Option from "effect/Option"; + +const MAX_DELTA_INTERVAL_MS = 30_000; +const ELECTRON_IDENTITY_TOLERANCE_MS = 2_000; + +export interface ProcessState { + readonly process: ResourceTelemetryProcess; + readonly sampledAtMs: number; +} + +export interface GroupCounters { + readonly cpuTimeMs: number; + readonly ioReadBytes: number; + readonly ioWriteBytes: number; + readonly processStarts: number; + readonly processExits: number; +} + +export interface TelemetryCounters { + readonly backend: GroupCounters; + readonly electron: GroupCounters; + readonly monitor: GroupCounters; + readonly allT3: GroupCounters; +} + +export interface ProcessDelta { + readonly identityKey: string; + readonly category: ResourceTelemetryProcessCategory; + readonly cpuTimeMs: number; + readonly ioReadBytes: number; + readonly ioWriteBytes: number; +} + +export interface MergeProcessesInput { + readonly serverPid: number; + readonly sidecarPid: Option.Option; + readonly fallbackSampledAtMs: number; + readonly nativeSnapshot: Option.Option; + readonly desktopSnapshot: Option.Option; + readonly electronRootPids?: ReadonlySet; + readonly previous: ReadonlyMap; + readonly counters: TelemetryCounters; + readonly updatePrevious: boolean; +} + +export interface MergeProcessesResult { + readonly sampledAtMs: number; + readonly processes: ReadonlyArray; + readonly previous: ReadonlyMap; + readonly counters: TelemetryCounters; + readonly groups: { + readonly backend: ResourceTelemetryAggregate; + readonly electron: ResourceTelemetryAggregate; + readonly monitor: ResourceTelemetryAggregate; + readonly allT3: ResourceTelemetryAggregate; + }; + readonly deltas: ReadonlyArray; +} + +export const emptyGroupCounters = (): GroupCounters => ({ + cpuTimeMs: 0, + ioReadBytes: 0, + ioWriteBytes: 0, + processStarts: 0, + processExits: 0, +}); + +export const emptyTelemetryCounters = (): TelemetryCounters => ({ + backend: emptyGroupCounters(), + electron: emptyGroupCounters(), + monitor: emptyGroupCounters(), + allT3: emptyGroupCounters(), +}); + +export function processIdentityKey(pid: number, startTimeMs: number): string { + return `${pid}:${startTimeMs}`; +} + +function finiteNonNegative(value: number): number { + return Number.isFinite(value) ? Math.max(0, value) : 0; +} + +function categoryGroup( + category: ResourceTelemetryProcessCategory, +): "backend" | "electron" | "monitor" { + if (category === "resource-monitor") return "monitor"; + if (category.startsWith("electron-")) return "electron"; + return "backend"; +} + +function electronCategory(metric: DesktopElectronProcessMetric): ResourceTelemetryProcessCategory { + switch (metric.type) { + case "Browser": + return "electron-main"; + case "Tab": + return "electron-renderer"; + case "GPU": + return "electron-gpu"; + default: + return "electron-utility"; + } +} + +function inferredElectronCategory( + process: ResourceMonitorProcessSample, +): ResourceTelemetryProcessCategory { + const command = process.command.toLowerCase(); + if (command.includes("--type=renderer")) return "electron-renderer"; + if (command.includes("--type=gpu-process")) return "electron-gpu"; + return "electron-utility"; +} + +function matchElectronMetric( + process: ResourceMonitorProcessSample, + metricsByPid: ReadonlyMap, +): DesktopElectronProcessMetric | undefined { + const metric = metricsByPid.get(process.pid); + if (!metric) return undefined; + return Math.abs(metric.creationTimeMs - process.startTimeMs) <= ELECTRON_IDENTITY_TOLERANCE_MS + ? metric + : undefined; +} + +function syntheticNativeSample( + metric: DesktopElectronProcessMetric, + sampledAtMs: number, +): ResourceMonitorProcessSample { + return { + pid: metric.pid, + ppid: 0, + startTimeMs: metric.creationTimeMs, + runTimeMs: Math.max(0, sampledAtMs - metric.creationTimeMs), + name: metric.name ?? metric.serviceName ?? metric.type, + command: metric.name ?? metric.serviceName ?? metric.type, + status: "Running", + cpuPercent: metric.cpuPercent, + cpuTimeMs: Math.max(0, Math.round((metric.cumulativeCpuSeconds ?? 0) * 1_000)), + residentBytes: metric.workingSetBytes, + virtualBytes: 0, + ioReadBytes: 0, + ioWriteBytes: 0, + ioSemantics: "storage", + }; +} + +function processDepths( + processes: ReadonlyArray, + roots: ReadonlySet, +): ReadonlyMap { + const childrenByParent = new Map(); + for (const process of processes) { + const children = childrenByParent.get(process.ppid) ?? []; + children.push(process.pid); + childrenByParent.set(process.ppid, children); + } + + const depths = new Map(); + const queue = [...roots].map((pid) => ({ pid, depth: 0 })); + while (queue.length > 0) { + const current = queue.shift(); + if (!current || depths.has(current.pid)) continue; + depths.set(current.pid, current.depth); + for (const childPid of childrenByParent.get(current.pid) ?? []) { + queue.push({ pid: childPid, depth: current.depth + 1 }); + } + } + return depths; +} + +function isElectronDescendant( + pid: number, + processesByPid: ReadonlyMap, + electronPids: ReadonlySet, +): boolean { + const visited = new Set(); + let currentPid = pid; + while (!visited.has(currentPid)) { + visited.add(currentPid); + if (electronPids.has(currentPid)) return true; + const current = processesByPid.get(currentPid); + if (!current || current.ppid <= 0 || current.ppid === currentPid) return false; + currentPid = current.ppid; + } + return false; +} + +function hasElectronAncestor( + process: ResourceMonitorProcessSample, + processesByPid: ReadonlyMap, + electronPids: ReadonlySet, +): boolean { + const visited = new Set(); + let currentPid = process.ppid; + while (currentPid > 0 && !visited.has(currentPid)) { + visited.add(currentPid); + if (electronPids.has(currentPid)) return true; + const current = processesByPid.get(currentPid); + if (!current || current.ppid === currentPid) return false; + currentPid = current.ppid; + } + return false; +} + +function orderProcessTree( + processes: ReadonlyArray, + rootPids: ReadonlyArray, +): ReadonlyArray { + const processesByPid = new Map(processes.map((process) => [process.identity.pid, process])); + const childrenByParent = new Map(); + for (const process of processes) { + const children = childrenByParent.get(process.ppid) ?? []; + children.push(process); + childrenByParent.set(process.ppid, children); + } + for (const children of childrenByParent.values()) { + children.sort((left, right) => left.identity.pid - right.identity.pid); + } + + const ordered: ResourceTelemetryProcess[] = []; + const visited = new Set(); + const visit = (process: ResourceTelemetryProcess): void => { + if (visited.has(process.identity.pid)) return; + visited.add(process.identity.pid); + ordered.push(process); + for (const child of childrenByParent.get(process.identity.pid) ?? []) { + visit(child); + } + }; + + for (const rootPid of rootPids) { + const root = processesByPid.get(rootPid); + if (root) visit(root); + } + for (const process of processes.toSorted( + (left, right) => left.depth - right.depth || left.identity.pid - right.identity.pid, + )) { + visit(process); + } + return ordered; +} + +function delta(input: { + readonly current: number; + readonly previous: number; + readonly elapsedMs: number; +}): number { + if ( + input.elapsedMs <= 0 || + input.elapsedMs > MAX_DELTA_INTERVAL_MS || + input.current < input.previous + ) { + return 0; + } + return input.current - input.previous; +} + +function incrementCounters(counters: GroupCounters, update: Partial): GroupCounters { + return { + cpuTimeMs: counters.cpuTimeMs + (update.cpuTimeMs ?? 0), + ioReadBytes: counters.ioReadBytes + (update.ioReadBytes ?? 0), + ioWriteBytes: counters.ioWriteBytes + (update.ioWriteBytes ?? 0), + processStarts: counters.processStarts + (update.processStarts ?? 0), + processExits: counters.processExits + (update.processExits ?? 0), + }; +} + +function applyLifecycleCounters(input: { + readonly counters: TelemetryCounters; + readonly deltas: ReadonlyArray; + readonly current: ReadonlyMap; + readonly previous: ReadonlyMap; +}): TelemetryCounters { + let backend = input.counters.backend; + let electron = input.counters.electron; + let monitor = input.counters.monitor; + let allT3 = input.counters.allT3; + for (const processDelta of input.deltas) { + const group = categoryGroup(processDelta.category); + switch (group) { + case "backend": + backend = incrementCounters(backend, processDelta); + break; + case "electron": + electron = incrementCounters(electron, processDelta); + break; + case "monitor": + monitor = incrementCounters(monitor, processDelta); + break; + } + allT3 = incrementCounters(allT3, processDelta); + } + + for (const [identityKey, current] of input.current) { + if (input.previous.has(identityKey)) continue; + const group = categoryGroup(current.process.category); + switch (group) { + case "backend": + backend = incrementCounters(backend, { processStarts: 1 }); + break; + case "electron": + electron = incrementCounters(electron, { processStarts: 1 }); + break; + case "monitor": + monitor = incrementCounters(monitor, { processStarts: 1 }); + break; + } + allT3 = incrementCounters(allT3, { processStarts: 1 }); + } + + for (const [identityKey, previous] of input.previous) { + if (input.current.has(identityKey)) continue; + const group = categoryGroup(previous.process.category); + switch (group) { + case "backend": + backend = incrementCounters(backend, { processExits: 1 }); + break; + case "electron": + electron = incrementCounters(electron, { processExits: 1 }); + break; + case "monitor": + monitor = incrementCounters(monitor, { processExits: 1 }); + break; + } + allT3 = incrementCounters(allT3, { processExits: 1 }); + } + + return { backend, electron, monitor, allT3 }; +} + +function aggregate( + processes: ReadonlyArray, + counters: GroupCounters, +): ResourceTelemetryAggregate { + return { + processCount: processes.length, + currentCpuPercent: processes.reduce((total, process) => total + process.cpuPercent, 0), + cpuTimeMs: counters.cpuTimeMs, + currentRssBytes: processes.reduce((total, process) => total + process.residentBytes, 0), + peakRssBytes: processes.reduce((total, process) => total + process.peakResidentBytes, 0), + ioReadBytes: counters.ioReadBytes, + ioWriteBytes: counters.ioWriteBytes, + ioReadBytesPerSecond: processes.reduce( + (total, process) => total + process.ioReadBytesPerSecond, + 0, + ), + ioWriteBytesPerSecond: processes.reduce( + (total, process) => total + process.ioWriteBytesPerSecond, + 0, + ), + processStarts: counters.processStarts, + processExits: counters.processExits, + }; +} + +export function mergeProcesses(input: MergeProcessesInput): MergeProcessesResult { + const nativeProcesses = Option.match(input.nativeSnapshot, { + onNone: () => [] as ReadonlyArray, + onSome: (snapshot) => snapshot.processes, + }); + const electronMetrics = Option.match(input.desktopSnapshot, { + onNone: () => [] as ReadonlyArray, + onSome: (snapshot) => snapshot.electronProcesses, + }); + const sampledAtMs = Option.match(input.nativeSnapshot, { + onNone: () => + Option.match(input.desktopSnapshot, { + onNone: () => input.fallbackSampledAtMs, + onSome: (snapshot) => snapshot.sampledAtUnixMs, + }), + onSome: (snapshot) => snapshot.sampledAtUnixMs, + }); + const nativeByPid = new Map(nativeProcesses.map((process) => [process.pid, process])); + const metricsByPid = new Map(); + for (const metric of electronMetrics) { + const nativeProcess = nativeByPid.get(metric.pid); + if (!nativeProcess) { + nativeByPid.set(metric.pid, syntheticNativeSample(metric, sampledAtMs)); + metricsByPid.set(metric.pid, metric); + continue; + } + if ( + Math.abs(metric.creationTimeMs - nativeProcess.startTimeMs) <= ELECTRON_IDENTITY_TOLERANCE_MS + ) { + metricsByPid.set(metric.pid, metric); + } + } + const processes = [...nativeByPid.values()]; + const processesByPid = new Map(processes.map((process) => [process.pid, process])); + const explicitElectronRootPids = input.electronRootPids ?? new Set(); + const electronPids = new Set([...metricsByPid.keys(), ...explicitElectronRootPids]); + const electronRootPids = [ + ...explicitElectronRootPids, + ...[...electronPids] + .filter((pid) => { + if (explicitElectronRootPids.has(pid)) return false; + const process = processesByPid.get(pid); + return process === undefined + ? true + : !hasElectronAncestor(process, processesByPid, electronPids); + }) + .toSorted((left, right) => left - right), + ].filter((pid, index, values) => values.indexOf(pid) === index); + const rootPids = [input.serverPid, ...electronRootPids]; + const roots = new Set(rootPids); + const depths = processDepths(processes, roots); + const childrenByParent = new Map(); + for (const process of processes) { + const children = childrenByParent.get(process.ppid) ?? []; + children.push(process.pid); + childrenByParent.set(process.ppid, children); + } + + const nextPrevious = new Map(); + const processDeltas: ProcessDelta[] = []; + const normalized = processes.map((process): ResourceTelemetryProcess => { + const identityKey = processIdentityKey(process.pid, process.startTimeMs); + const previous = input.previous.get(identityKey); + const elapsedMs = previous ? sampledAtMs - previous.sampledAtMs : 0; + const cpuTimeDelta = previous + ? delta({ + current: process.cpuTimeMs, + previous: previous.process.cpuTimeMs, + elapsedMs, + }) + : 0; + const ioReadDelta = previous + ? delta({ + current: process.ioReadBytes, + previous: previous.process.ioReadBytes, + elapsedMs, + }) + : 0; + const ioWriteDelta = previous + ? delta({ + current: process.ioWriteBytes, + previous: previous.process.ioWriteBytes, + elapsedMs, + }) + : 0; + const electronMetric = matchElectronMetric(process, metricsByPid); + const category: ResourceTelemetryProcessCategory = + process.pid === input.serverPid + ? "server" + : Option.contains(input.sidecarPid, process.pid) + ? "resource-monitor" + : explicitElectronRootPids.has(process.pid) + ? "electron-main" + : electronMetric + ? electronCategory(electronMetric) + : isElectronDescendant(process.pid, processesByPid, electronPids) + ? inferredElectronCategory(process) + : "server-child"; + const firstSeenAt = previous?.process.firstSeenAt ?? DateTime.makeUnsafe(sampledAtMs); + const preservePreviousRates = !input.updatePrevious && previous !== undefined; + const cpuPercent = preservePreviousRates + ? previous.process.cpuPercent + : previous && elapsedMs > 0 && elapsedMs <= MAX_DELTA_INTERVAL_MS + ? (cpuTimeDelta / elapsedMs) * 100 + : finiteNonNegative(process.cpuPercent); + const normalizedProcess: ResourceTelemetryProcess = { + identity: { + pid: process.pid, + startTimeMs: process.startTimeMs, + }, + ppid: process.ppid, + childPids: [...(childrenByParent.get(process.pid) ?? [])].toSorted( + (left, right) => left - right, + ), + depth: depths.get(process.pid) ?? 0, + name: process.name, + command: process.command, + status: process.status, + category, + ...(electronMetric ? { electronType: electronMetric.type } : {}), + ...(electronMetric?.serviceName ? { electronServiceName: electronMetric.serviceName } : {}), + cpuPercent: finiteNonNegative(cpuPercent), + cpuTimeMs: process.cpuTimeMs, + residentBytes: process.residentBytes, + peakResidentBytes: Math.max( + process.residentBytes, + electronMetric?.peakWorkingSetBytes ?? 0, + previous?.process.peakResidentBytes ?? 0, + ), + virtualBytes: process.virtualBytes, + ioReadBytes: process.ioReadBytes, + ioWriteBytes: process.ioWriteBytes, + ioReadBytesPerSecond: preservePreviousRates + ? previous.process.ioReadBytesPerSecond + : elapsedMs > 0 + ? finiteNonNegative((ioReadDelta * 1_000) / elapsedMs) + : 0, + ioWriteBytesPerSecond: preservePreviousRates + ? previous.process.ioWriteBytesPerSecond + : elapsedMs > 0 + ? finiteNonNegative((ioWriteDelta * 1_000) / elapsedMs) + : 0, + ioSemantics: process.ioSemantics, + ...(electronMetric ? { idleWakeupsPerSecond: electronMetric.idleWakeupsPerSecond } : {}), + runTimeMs: process.runTimeMs, + firstSeenAt, + lastSeenAt: DateTime.makeUnsafe(sampledAtMs), + }; + nextPrevious.set(identityKey, { + process: normalizedProcess, + sampledAtMs, + }); + processDeltas.push({ + identityKey, + category, + cpuTimeMs: cpuTimeDelta, + ioReadBytes: ioReadDelta, + ioWriteBytes: ioWriteDelta, + }); + return normalizedProcess; + }); + const ordered = orderProcessTree(normalized, rootPids); + + const counters = input.updatePrevious + ? applyLifecycleCounters({ + counters: input.counters, + deltas: processDeltas, + current: nextPrevious, + previous: input.previous, + }) + : input.counters; + const backendProcesses = ordered.filter( + (process) => categoryGroup(process.category) === "backend", + ); + const electronProcesses = ordered.filter( + (process) => categoryGroup(process.category) === "electron", + ); + const monitorProcesses = ordered.filter( + (process) => categoryGroup(process.category) === "monitor", + ); + + return { + sampledAtMs, + processes: ordered, + previous: input.updatePrevious ? nextPrevious : input.previous, + counters, + groups: { + backend: aggregate(backendProcesses, counters.backend), + electron: aggregate(electronProcesses, counters.electron), + monitor: aggregate(monitorProcesses, counters.monitor), + allT3: aggregate(ordered, counters.allT3), + }, + deltas: processDeltas, + }; +} diff --git a/apps/server/src/resourceTelemetry/NativeTelemetryClient.test.ts b/apps/server/src/resourceTelemetry/NativeTelemetryClient.test.ts new file mode 100644 index 00000000000..30bbfa3a223 --- /dev/null +++ b/apps/server/src/resourceTelemetry/NativeTelemetryClient.test.ts @@ -0,0 +1,47 @@ +import type { HostPowerSnapshot } from "@t3tools/contracts"; +import { describe, expect, it } from "@effect/vitest"; +import * as DateTime from "effect/DateTime"; + +import { resolveNativeSampleIntervalMs } from "./NativeTelemetryClient.ts"; + +const basePower: HostPowerSnapshot = { + source: "electron-main", + idle: "false", + idleSeconds: 0, + locked: "false", + suspended: false, + onBattery: "false", + lowPowerMode: "false", + thermalState: "nominal", + stale: false, + updatedAt: DateTime.makeUnsafe("2026-06-17T12:00:00.000Z"), +}; + +describe("resolveNativeSampleIntervalMs", () => { + it("pauses while suspended and backs off under host constraints", () => { + expect(resolveNativeSampleIntervalMs({ ...basePower, suspended: true }, 1)).toBe(0); + expect(resolveNativeSampleIntervalMs({ ...basePower, locked: "true" }, 1)).toBe(15_000); + expect(resolveNativeSampleIntervalMs({ ...basePower, lowPowerMode: "true" }, 1)).toBe(15_000); + expect(resolveNativeSampleIntervalMs({ ...basePower, thermalState: "critical" }, 1)).toBe( + 15_000, + ); + expect(resolveNativeSampleIntervalMs({ ...basePower, onBattery: "true" }, 1)).toBe(5_000); + }); + + it("keeps unknown background telemetry cheap but serves live diagnostics at 1Hz", () => { + const unknown: HostPowerSnapshot = { + ...basePower, + source: "unknown", + stale: true, + }; + expect(resolveNativeSampleIntervalMs(unknown, 0)).toBe(5_000); + expect(resolveNativeSampleIntervalMs(unknown, 1)).toBe(1_000); + expect( + resolveNativeSampleIntervalMs( + { ...basePower, stale: true, locked: "true", suspended: true }, + 0, + ), + ).toBe(5_000); + expect(resolveNativeSampleIntervalMs(basePower, 0)).toBe(1_000); + }); +}); diff --git a/apps/server/src/resourceTelemetry/NativeTelemetryClient.ts b/apps/server/src/resourceTelemetry/NativeTelemetryClient.ts new file mode 100644 index 00000000000..ac4c135d60d --- /dev/null +++ b/apps/server/src/resourceTelemetry/NativeTelemetryClient.ts @@ -0,0 +1,922 @@ +import type { + HostPowerSnapshot, + ResourceMonitorCapabilities, + ResourceMonitorCommand, + ResourceMonitorEvent, + ResourceMonitorExternalProcess, + ResourceMonitorHelloEvent, + ResourceMonitorSnapshotEvent, + ResourceTelemetrySourceStatus, +} from "@t3tools/contracts"; +import { + RESOURCE_MONITOR_PROTOCOL_VERSION, + ResourceMonitorCommand as ResourceMonitorCommandSchema, + ResourceMonitorEvent as ResourceMonitorEventSchema, +} from "@t3tools/contracts"; +import * as Cause from "effect/Cause"; +import * as Context from "effect/Context"; +import * as Crypto from "effect/Crypto"; +import * as DateTime from "effect/DateTime"; +import * as Deferred from "effect/Deferred"; +import * as Duration from "effect/Duration"; +import * as Effect from "effect/Effect"; +import * as Fiber from "effect/Fiber"; +import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; +import * as PubSub from "effect/PubSub"; +import * as Queue from "effect/Queue"; +import * as Ref from "effect/Ref"; +import * as Result from "effect/Result"; +import * as Schema from "effect/Schema"; +import * as Semaphore from "effect/Semaphore"; +import * as Stream from "effect/Stream"; +import * as Ndjson from "effect/unstable/encoding/Ndjson"; +import { ChildProcess, ChildProcessSpawner } from "effect/unstable/process"; + +import * as ResourceMonitorBinary from "./ResourceMonitorBinary.ts"; +import { ServerConfig } from "../config.ts"; + +const SAMPLE_INTERVAL_MS = 1_000; +const UNKNOWN_BACKGROUND_SAMPLE_INTERVAL_MS = 5_000; +const BATTERY_SAMPLE_INTERVAL_MS = 5_000; +const CONSTRAINED_SAMPLE_INTERVAL_MS = 15_000; +const HANDSHAKE_TIMEOUT = Duration.seconds(5); +const SAMPLE_REQUEST_TIMEOUT = Duration.seconds(5); +const HISTORY_REQUEST_TIMEOUT = Duration.seconds(15); +const INITIAL_RESTART_DELAY = Duration.millis(500); +const MAX_RESTART_DELAY = Duration.seconds(10); +const FAILURE_WINDOW_MS = 60_000; +const MAX_FAILURES_PER_WINDOW = 5; + +export class NativeTelemetrySpawnFailed extends Schema.TaggedErrorClass()( + "NativeTelemetrySpawnFailed", + { + path: Schema.String, + cause: Schema.Defect(), + }, +) { + override get message(): string { + return `Failed to start resource monitor '${this.path}'.`; + } +} + +export class NativeTelemetryHandshakeTimedOut extends Schema.TaggedErrorClass()( + "NativeTelemetryHandshakeTimedOut", + { + timeoutMs: Schema.Number, + }, +) { + override get message(): string { + return `Resource monitor handshake timed out after ${this.timeoutMs}ms.`; + } +} + +export class NativeTelemetryProtocolMismatch extends Schema.TaggedErrorClass()( + "NativeTelemetryProtocolMismatch", + { + expectedVersion: Schema.Number, + receivedVersion: Schema.Number, + }, +) { + override get message(): string { + return `Resource monitor protocol ${this.receivedVersion} is incompatible with expected protocol ${this.expectedVersion}.`; + } +} + +export class NativeTelemetryDecodeFailed extends Schema.TaggedErrorClass()( + "NativeTelemetryDecodeFailed", + { + cause: Schema.Defect(), + }, +) { + override get message(): string { + return "Failed to decode resource monitor output."; + } +} + +export class NativeTelemetryCommandFailed extends Schema.TaggedErrorClass()( + "NativeTelemetryCommandFailed", + { + operation: Schema.String, + cause: Schema.Defect(), + }, +) { + override get message(): string { + return `Resource monitor command '${this.operation}' failed.`; + } +} + +export class NativeTelemetryExited extends Schema.TaggedErrorClass()( + "NativeTelemetryExited", + { + exitCode: Schema.Number, + }, +) { + override get message(): string { + return `Resource monitor exited with code ${this.exitCode}.`; + } +} + +export class NativeTelemetryUnavailable extends Schema.TaggedErrorClass()( + "NativeTelemetryUnavailable", + { + reason: Schema.String, + }, +) { + override get message(): string { + return `Resource monitor is unavailable: ${this.reason}`; + } +} + +export type NativeTelemetryClientError = + | NativeTelemetrySpawnFailed + | NativeTelemetryHandshakeTimedOut + | NativeTelemetryProtocolMismatch + | NativeTelemetryDecodeFailed + | NativeTelemetryCommandFailed + | NativeTelemetryExited + | NativeTelemetryUnavailable; + +export interface NativeTelemetryClientHealth { + readonly status: ResourceTelemetrySourceStatus; + readonly hello: Option.Option; + readonly lastSampleAt: Option.Option; + readonly lastError: Option.Option; + readonly restartCount: number; + readonly sampleIntervalMs: number; +} + +export interface NativeTelemetryClientShape { + readonly capabilities: Effect.Effect; + readonly snapshots: Stream.Stream; + readonly readHistory: ( + windowMs: number, + ) => Effect.Effect, NativeTelemetryClientError>; + readonly setExternalProcesses: ( + processes: ReadonlyArray, + ) => Effect.Effect; + readonly setHostPowerState: ( + snapshot: HostPowerSnapshot, + ) => Effect.Effect; + readonly sampleNow: Effect.Effect; + readonly retry: Effect.Effect; + readonly health: Effect.Effect; + readonly healthChanges: Stream.Stream; +} + +export class NativeTelemetryClient extends Context.Service< + NativeTelemetryClient, + NativeTelemetryClientShape +>()("t3/resourceTelemetry/NativeTelemetryClient") {} + +interface ClientState { + readonly status: ResourceTelemetrySourceStatus; + readonly handle: Option.Option; + readonly hello: Option.Option; + readonly lastSampleAt: Option.Option; + readonly lastError: Option.Option; + readonly restartCount: number; +} + +interface CollectionControl { + readonly hostPower: HostPowerSnapshot; + readonly liveSubscriberCount: number; + readonly sampleIntervalMs: number; +} + +interface PendingHistoryRequest { + readonly deferred: Deferred.Deferred< + ReadonlyArray, + NativeTelemetryClientError + >; + readonly snapshots: ReadonlyArray; +} + +const initialState: ClientState = { + status: "starting", + handle: Option.none(), + hello: Option.none(), + lastSampleAt: Option.none(), + lastError: Option.none(), + restartCount: 0, +}; + +function toHealth(state: ClientState, sampleIntervalMs: number): NativeTelemetryClientHealth { + return { + status: state.status, + hello: state.hello, + lastSampleAt: state.lastSampleAt, + lastError: state.lastError, + restartCount: state.restartCount, + sampleIntervalMs, + }; +} + +function isThermallyConstrained(snapshot: HostPowerSnapshot): boolean { + return snapshot.thermalState === "serious" || snapshot.thermalState === "critical"; +} + +export function resolveNativeSampleIntervalMs( + snapshot: HostPowerSnapshot, + liveSubscriberCount: number, +): number { + if (snapshot.stale || snapshot.source === "unknown") { + return liveSubscriberCount > 0 ? SAMPLE_INTERVAL_MS : UNKNOWN_BACKGROUND_SAMPLE_INTERVAL_MS; + } + if (snapshot.suspended) return 0; + if ( + snapshot.locked === "true" || + snapshot.lowPowerMode === "true" || + isThermallyConstrained(snapshot) + ) { + return CONSTRAINED_SAMPLE_INTERVAL_MS; + } + if (snapshot.onBattery === "true") return BATTERY_SAMPLE_INTERVAL_MS; + return SAMPLE_INTERVAL_MS; +} + +const decodeMonitorEvent: ( + value: unknown, +) => Effect.Effect = Schema.decodeUnknownEffect( + ResourceMonitorEventSchema, +); +const encodeMonitorCommand = Schema.encodeEffect( + Schema.fromJsonString(ResourceMonitorCommandSchema), +); +const isProtocolMismatch = Schema.is(NativeTelemetryProtocolMismatch); +const isDecodeFailed = Schema.is(NativeTelemetryDecodeFailed); +const isCommandFailed = Schema.is(NativeTelemetryCommandFailed); + +function eventVersion(value: unknown): number | undefined { + if (typeof value !== "object" || value === null) return undefined; + const version = Reflect.get(value, "version"); + return typeof version === "number" ? version : undefined; +} + +function restartDelay(attempt: number): Duration.Duration { + return Duration.min(Duration.times(INITIAL_RESTART_DELAY, 2 ** attempt), MAX_RESTART_DELAY); +} + +function errorMessage(error: NativeTelemetryClientError): string { + return error.message; +} + +export const make = Effect.fn("resourceTelemetry.nativeTelemetryClient.make")(function* () { + const binary = yield* ResourceMonitorBinary.ResourceMonitorBinary; + const spawner = yield* ChildProcessSpawner.ChildProcessSpawner; + const crypto = yield* Crypto.Crypto; + const config = yield* ServerConfig; + const initializedAt = yield* DateTime.now; + const state = yield* Ref.make(initialState); + const collectionControl = yield* Ref.make({ + hostPower: { + source: "unknown", + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt: initializedAt, + }, + liveSubscriberCount: 0, + sampleIntervalMs: UNKNOWN_BACKGROUND_SAMPLE_INTERVAL_MS, + }); + const externalProcesses = yield* Ref.make>([]); + const pendingSamples = yield* Ref.make( + new Map>(), + ); + const pendingHistories = yield* Ref.make(new Map()); + const snapshots = yield* PubSub.sliding(8); + const healthChanges = yield* PubSub.sliding(4); + const retryQueue = yield* Queue.sliding(1); + const commandMutex = yield* Semaphore.make(1); + const currentHealth = Effect.all([Ref.get(state), Ref.get(collectionControl)]).pipe( + Effect.map(([current, control]) => toHealth(current, control.sampleIntervalMs)), + ); + const publishHealth = currentHealth.pipe( + Effect.flatMap((health) => PubSub.publish(healthChanges, health)), + Effect.asVoid, + ); + + const failPending = (error: NativeTelemetryClientError) => + Effect.gen(function* () { + const samples = yield* Ref.getAndSet(pendingSamples, new Map()); + const histories = yield* Ref.getAndSet(pendingHistories, new Map()); + yield* Effect.forEach(samples.values(), (deferred) => Deferred.fail(deferred, error), { + discard: true, + }); + yield* Effect.forEach( + histories.values(), + (request) => Deferred.fail(request.deferred, error), + { discard: true }, + ); + }); + + const writeCommand = ( + handle: ChildProcessSpawner.ChildProcessHandle, + command: ResourceMonitorCommand, + ): Effect.Effect => + commandMutex.withPermits(1)( + encodeMonitorCommand(command).pipe( + Effect.map((encoded) => `${encoded}\n`), + Effect.mapError( + (cause) => + new NativeTelemetryCommandFailed({ + operation: command.type, + cause, + }), + ), + Effect.flatMap((encoded) => + Stream.run(Stream.encodeText(Stream.make(encoded)), handle.stdin), + ), + Effect.mapError( + (cause) => + new NativeTelemetryCommandFailed({ + operation: command.type, + cause, + }), + ), + ), + ); + + const processEvent = ( + event: ResourceMonitorEvent, + helloDeferred: Deferred.Deferred, + ): Effect.Effect => { + switch (event.type) { + case "hello": + return Ref.update(state, (current) => ({ + ...current, + status: "starting" as const, + hello: Option.some(event), + lastError: Option.none(), + })).pipe( + Effect.andThen(publishHealth), + Effect.andThen(Deferred.succeed(helloDeferred, event)), + Effect.asVoid, + ); + case "snapshot": + return Effect.gen(function* () { + const sampledAt = DateTime.makeUnsafe(event.sampledAtUnixMs); + yield* Ref.update(state, (current) => ({ + ...current, + status: "healthy" as const, + lastSampleAt: Option.some(sampledAt), + lastError: Option.none(), + })); + yield* PubSub.publish(snapshots, event); + if (event.requestId) { + const deferred = yield* Ref.modify(pendingSamples, (pending) => { + const next = new Map(pending); + const current = next.get(event.requestId!); + next.delete(event.requestId!); + return [Option.fromUndefinedOr(current), next]; + }); + if (Option.isSome(deferred)) { + yield* Deferred.succeed(deferred.value, event); + } + } + }); + case "historyChunk": + return Effect.gen(function* () { + const latestSnapshot = event.snapshots.at(-1); + if (latestSnapshot) { + yield* Ref.update(state, (current) => ({ + ...current, + status: "healthy" as const, + lastSampleAt: Option.some(DateTime.makeUnsafe(latestSnapshot.sampledAtUnixMs)), + lastError: Option.none(), + })); + } + const completed = yield* Ref.modify(pendingHistories, (pending) => { + const request = pending.get(event.requestId); + if (!request) return [Option.none(), pending] as const; + const snapshots = [...request.snapshots, ...event.snapshots]; + const next = new Map(pending); + if (event.done) { + next.delete(event.requestId); + return [Option.some({ deferred: request.deferred, snapshots }), next] as const; + } + next.set(event.requestId, { deferred: request.deferred, snapshots }); + return [Option.none(), next] as const; + }); + if (Option.isSome(completed)) { + yield* Deferred.succeed(completed.value.deferred, completed.value.snapshots); + } + }); + case "error": + return Ref.update(state, (current) => ({ + ...current, + status: "degraded" as const, + lastError: Option.some(event.message), + })).pipe( + Effect.andThen(publishHealth), + Effect.andThen( + event.recoverable + ? Effect.void + : Effect.fail( + new NativeTelemetryCommandFailed({ + operation: event.code, + cause: event.message, + }), + ), + ), + ); + } + }; + + const runAttempt: Effect.Effect = Effect.scoped( + Effect.gen(function* () { + const executablePath = yield* binary.resolve.pipe( + Effect.mapError( + (error) => + new NativeTelemetryUnavailable({ + reason: error.message, + }), + ), + ); + const command = ChildProcess.make(executablePath, [], { + cwd: config.cwd, + stdin: { + stream: "pipe", + endOnDone: false, + }, + stdout: "pipe", + stderr: "pipe", + killSignal: "SIGTERM", + forceKillAfter: Duration.seconds(2), + }); + const handle = yield* Effect.acquireRelease( + spawner + .spawn(command) + .pipe( + Effect.mapError( + (cause) => new NativeTelemetrySpawnFailed({ path: executablePath, cause }), + ), + ), + (child) => child.kill().pipe(Effect.ignore), + ); + yield* Ref.update(state, (current) => ({ + ...current, + status: "starting" as const, + handle: Option.some(handle), + hello: Option.none(), + })); + yield* publishHealth; + + const helloDeferred = yield* Deferred.make(); + const eventFiber = yield* handle.stdout.pipe( + Stream.pipeThroughChannel(Ndjson.decode({ ignoreEmptyLines: true })), + Stream.mapEffect( + ( + value, + ): Effect.Effect< + ResourceMonitorEvent, + NativeTelemetryProtocolMismatch | NativeTelemetryDecodeFailed + > => { + const version = eventVersion(value); + if (version !== undefined && version !== RESOURCE_MONITOR_PROTOCOL_VERSION) { + return Effect.fail( + new NativeTelemetryProtocolMismatch({ + expectedVersion: RESOURCE_MONITOR_PROTOCOL_VERSION, + receivedVersion: version, + }), + ); + } + return decodeMonitorEvent(value).pipe( + Effect.mapError((cause) => new NativeTelemetryDecodeFailed({ cause })), + ); + }, + ), + Stream.runForEach((event) => processEvent(event, helloDeferred)), + Effect.mapError((cause) => + isProtocolMismatch(cause) || isDecodeFailed(cause) || isCommandFailed(cause) + ? cause + : new NativeTelemetryDecodeFailed({ cause }), + ), + Effect.forkScoped, + ); + yield* handle.stderr.pipe(Stream.runDrain, Effect.ignore, Effect.forkScoped); + + const hello = yield* Deferred.await(helloDeferred).pipe( + Effect.timeoutOption(HANDSHAKE_TIMEOUT), + Effect.flatMap( + Option.match({ + onNone: () => + Effect.fail( + new NativeTelemetryHandshakeTimedOut({ + timeoutMs: Duration.toMillis(HANDSHAKE_TIMEOUT), + }), + ), + onSome: Effect.succeed, + }), + ), + ); + const control = yield* Ref.get(collectionControl); + yield* writeCommand(handle, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "configure", + rootPid: process.pid, + sampleIntervalMs: control.sampleIntervalMs, + externalProcesses: [...(yield* Ref.get(externalProcesses))], + }); + if (control.liveSubscriberCount > 0) { + yield* writeCommand(handle, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "setStreaming", + enabled: true, + }); + } + yield* Ref.update(state, (current) => ({ + ...current, + status: "healthy" as const, + hello: Option.some(hello), + })); + yield* publishHealth; + + yield* writeCommand(handle, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "setExternalProcesses", + processes: [...(yield* Ref.get(externalProcesses))], + }); + const latestControl = yield* Ref.get(collectionControl); + if (latestControl.sampleIntervalMs !== control.sampleIntervalMs) { + yield* writeCommand(handle, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "setSampleInterval", + sampleIntervalMs: latestControl.sampleIntervalMs, + }); + } + if (latestControl.liveSubscriberCount > 0 !== control.liveSubscriberCount > 0) { + yield* writeCommand(handle, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "setStreaming", + enabled: latestControl.liveSubscriberCount > 0, + }); + } + + const exitEffect = handle.exitCode.pipe( + Effect.mapError( + (cause) => + new NativeTelemetryCommandFailed({ + operation: "waitForExit", + cause, + }), + ), + Effect.flatMap((exitCode) => + Effect.fail(new NativeTelemetryExited({ exitCode: Number(exitCode) })), + ), + ); + const decoderEffect = Fiber.join(eventFiber).pipe( + Effect.andThen(Effect.fail(new NativeTelemetryExited({ exitCode: -1 }))), + ); + return yield* Effect.raceFirst(exitEffect, decoderEffect); + }), + ).pipe( + Effect.ensuring( + Ref.update(state, (current) => ({ + ...current, + handle: Option.none(), + })), + ), + ); + + yield* Effect.gen(function* () { + let failures: ReadonlyArray = []; + let restartAttempt = 0; + + while (true) { + const result = yield* Effect.result(runAttempt); + if (Result.isSuccess(result)) { + return; + } + + const error = result.failure; + const now = DateTime.toEpochMillis(yield* DateTime.now); + failures = [...failures.filter((failedAt) => now - failedAt <= FAILURE_WINDOW_MS), now]; + const exhausted = failures.length >= MAX_FAILURES_PER_WINDOW; + yield* Ref.update(state, (current) => ({ + ...current, + status: exhausted ? ("unavailable" as const) : ("degraded" as const), + hello: Option.none(), + lastError: Option.some(errorMessage(error)), + restartCount: current.restartCount + 1, + })); + yield* publishHealth; + yield* failPending(error); + + if (exhausted) { + yield* Queue.take(retryQueue); + failures = []; + restartAttempt = 0; + yield* Ref.update(state, (current) => ({ + ...current, + status: "starting" as const, + hello: Option.none(), + lastError: Option.none(), + })); + yield* publishHealth; + continue; + } + + const manuallyRetried = yield* Effect.raceFirst( + Effect.sleep(restartDelay(restartAttempt)).pipe(Effect.as(false)), + Queue.take(retryQueue).pipe(Effect.as(true)), + ); + restartAttempt = manuallyRetried ? 0 : restartAttempt + 1; + } + }).pipe( + Effect.catchCause((cause) => + Cause.hasInterruptsOnly(cause) + ? Effect.void + : Ref.update(state, (current) => ({ + ...current, + status: "unavailable" as const, + hello: Option.none(), + lastError: Option.some(Cause.pretty(cause)), + })).pipe( + Effect.andThen(publishHealth), + Effect.andThen( + Effect.logWarning("Resource monitor supervisor failed", { + cause: Cause.pretty(cause), + }), + ), + ), + ), + Effect.forkScoped, + ); + + const applyCollectionControl = Effect.fn( + "resourceTelemetry.nativeTelemetryClient.applyCollectionControl", + )(function* (previous: CollectionControl, next: CollectionControl) { + const current = yield* Ref.get(state); + if (Option.isSome(current.handle) && current.status === "healthy") { + if (previous.sampleIntervalMs !== next.sampleIntervalMs) { + yield* writeCommand(current.handle.value, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "setSampleInterval", + sampleIntervalMs: next.sampleIntervalMs, + }); + } + const wasStreaming = previous.liveSubscriberCount > 0; + const isStreaming = next.liveSubscriberCount > 0; + if (wasStreaming !== isStreaming) { + yield* writeCommand(current.handle.value, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "setStreaming", + enabled: isStreaming, + }); + } + } + if (previous.sampleIntervalMs !== next.sampleIntervalMs) { + yield* publishHealth; + } + }); + + const setHostPowerState: NativeTelemetryClientShape["setHostPowerState"] = (hostPower) => + Effect.gen(function* () { + const [previous, next] = yield* Ref.modify(collectionControl, (current) => { + const updated: CollectionControl = { + ...current, + hostPower, + sampleIntervalMs: resolveNativeSampleIntervalMs(hostPower, current.liveSubscriberCount), + }; + return [[current, updated] as const, updated]; + }); + yield* applyCollectionControl(previous, next); + }); + + const changeLiveSubscriberCount = Effect.fn( + "resourceTelemetry.nativeTelemetryClient.changeLiveSubscriberCount", + )(function* (delta: 1 | -1) { + const [previous, next] = yield* Ref.modify(collectionControl, (current) => { + const liveSubscriberCount = Math.max(0, current.liveSubscriberCount + delta); + const updated: CollectionControl = { + ...current, + liveSubscriberCount, + sampleIntervalMs: resolveNativeSampleIntervalMs(current.hostPower, liveSubscriberCount), + }; + return [[current, updated] as const, updated]; + }); + yield* applyCollectionControl(previous, next); + }); + + const liveSnapshots = Stream.unwrap( + Effect.gen(function* () { + const subscription = yield* PubSub.subscribe(snapshots); + yield* Effect.acquireRelease(changeLiveSubscriberCount(1), () => + changeLiveSubscriberCount(-1).pipe(Effect.ignore), + ); + return Stream.fromSubscription(subscription); + }), + ); + + const setExternalProcesses: NativeTelemetryClientShape["setExternalProcesses"] = (processes) => + Effect.gen(function* () { + yield* Ref.set(externalProcesses, [...processes]); + const current = yield* Ref.get(state); + if (Option.isNone(current.handle) || current.status !== "healthy") return; + yield* writeCommand(current.handle.value, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "setExternalProcesses", + processes: [...processes], + }); + }); + + const readHistory: NativeTelemetryClientShape["readHistory"] = (windowMs) => + Effect.gen(function* () { + const current = yield* Ref.get(state); + if (Option.isNone(current.handle) || current.status !== "healthy") { + return yield* new NativeTelemetryUnavailable({ + reason: Option.getOrElse(current.lastError, () => "sidecar is not running"), + }); + } + const requestId = yield* crypto.randomUUIDv4.pipe( + Effect.mapError( + (cause) => + new NativeTelemetryCommandFailed({ + operation: "createHistoryRequestId", + cause, + }), + ), + ); + const deferred = yield* Deferred.make< + ReadonlyArray, + NativeTelemetryClientError + >(); + yield* Ref.update(pendingHistories, (pending) => { + const next = new Map(pending); + next.set(requestId, { deferred, snapshots: [] }); + return next; + }); + return yield* writeCommand(current.handle.value, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "readHistory", + requestId, + windowMs: Math.max(0, Math.round(windowMs)), + }).pipe( + Effect.andThen( + Deferred.await(deferred).pipe( + Effect.timeoutOption(HISTORY_REQUEST_TIMEOUT), + Effect.flatMap( + Option.match({ + onNone: () => + Effect.fail( + new NativeTelemetryCommandFailed({ + operation: "readHistory", + cause: "history request timed out", + }), + ), + onSome: Effect.succeed, + }), + ), + ), + ), + Effect.ensuring( + Ref.update(pendingHistories, (pending) => { + const next = new Map(pending); + next.delete(requestId); + return next; + }), + ), + ); + }); + + const sampleNow: NativeTelemetryClientShape["sampleNow"] = Effect.gen(function* () { + const current = yield* Ref.get(state); + if (Option.isNone(current.handle) || current.status !== "healthy") { + return yield* new NativeTelemetryUnavailable({ + reason: Option.getOrElse(current.lastError, () => "sidecar is not running"), + }); + } + + const requestId = yield* crypto.randomUUIDv4.pipe( + Effect.mapError( + (cause) => + new NativeTelemetryCommandFailed({ + operation: "createRequestId", + cause, + }), + ), + ); + const deferred = yield* Deferred.make< + ResourceMonitorSnapshotEvent, + NativeTelemetryClientError + >(); + yield* Ref.update(pendingSamples, (pending) => { + const next = new Map(pending); + next.set(requestId, deferred); + return next; + }); + return yield* writeCommand(current.handle.value, { + version: RESOURCE_MONITOR_PROTOCOL_VERSION, + type: "sampleNow", + requestId, + }).pipe( + Effect.andThen( + Deferred.await(deferred).pipe( + Effect.timeoutOption(SAMPLE_REQUEST_TIMEOUT), + Effect.flatMap( + Option.match({ + onNone: () => + Effect.fail( + new NativeTelemetryCommandFailed({ + operation: "sampleNow", + cause: "sample request timed out", + }), + ), + onSome: Effect.succeed, + }), + ), + ), + ), + Effect.ensuring( + Ref.update(pendingSamples, (pending) => { + const next = new Map(pending); + next.delete(requestId); + return next; + }), + ), + ); + }); + + const health = currentHealth; + + return NativeTelemetryClient.of({ + capabilities: Ref.get(state).pipe( + Effect.flatMap((current) => + Option.match(current.hello, { + onNone: () => + Effect.fail( + new NativeTelemetryUnavailable({ + reason: Option.getOrElse(current.lastError, () => "handshake is incomplete"), + }), + ), + onSome: (hello) => Effect.succeed(hello.capabilities), + }), + ), + ), + snapshots: liveSnapshots, + readHistory, + setExternalProcesses, + setHostPowerState, + sampleNow, + retry: Ref.get(state).pipe( + Effect.flatMap((current) => + current.status === "healthy" || current.status === "starting" + ? Effect.succeed(false) + : Queue.offer(retryQueue, undefined).pipe(Effect.as(true)), + ), + ), + health, + healthChanges: Stream.fromPubSub(healthChanges), + }); +}); + +export const layer = Layer.effect(NativeTelemetryClient, make()); + +export const layerTest = ( + overrides: Partial = {}, +): Layer.Layer => + Layer.succeed( + NativeTelemetryClient, + NativeTelemetryClient.of({ + capabilities: Effect.succeed({ + cumulativeCpuTime: true, + currentCpuPercent: true, + residentMemory: true, + virtualMemory: true, + ioBytes: true, + processStartTime: true, + processTree: true, + }), + snapshots: Stream.empty, + readHistory: () => + Effect.fail( + new NativeTelemetryUnavailable({ + reason: "No resource monitor history was configured for this test.", + }), + ), + setExternalProcesses: () => Effect.void, + setHostPowerState: () => Effect.void, + sampleNow: Effect.fail( + new NativeTelemetryUnavailable({ + reason: "No resource monitor sample was configured for this test.", + }), + ), + retry: Effect.succeed(false), + health: Effect.succeed({ + status: "unavailable", + hello: Option.none(), + lastSampleAt: Option.none(), + lastError: Option.some("Resource monitor test implementation is unavailable."), + restartCount: 0, + sampleIntervalMs: UNKNOWN_BACKGROUND_SAMPLE_INTERVAL_MS, + }), + healthChanges: Stream.empty, + ...overrides, + }), + ); diff --git a/apps/server/src/resourceTelemetry/ResourceAttribution.ts b/apps/server/src/resourceTelemetry/ResourceAttribution.ts new file mode 100644 index 00000000000..1d916cd0f7e --- /dev/null +++ b/apps/server/src/resourceTelemetry/ResourceAttribution.ts @@ -0,0 +1,73 @@ +import type { ResourceAttributionEntry, ResourceAttributionSnapshot } from "@t3tools/contracts"; +import * as Context from "effect/Context"; +import * as DateTime from "effect/DateTime"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as Ref from "effect/Ref"; + +export interface ResourceAttributionRecord { + readonly component: string; + readonly operation: string; + readonly logicalReadBytes?: number; + readonly logicalWriteBytes?: number; + readonly count?: number; + readonly durationMs?: number; +} + +export interface ResourceAttributionShape { + readonly record: (input: ResourceAttributionRecord) => Effect.Effect; + readonly snapshot: Effect.Effect; +} + +export class ResourceAttribution extends Context.Service< + ResourceAttribution, + ResourceAttributionShape +>()("t3/resourceTelemetry/ResourceAttribution") {} + +function key(input: Pick): string { + return `${input.component}\u0000${input.operation}`; +} + +function nonNegativeInteger(value: number | undefined, fallback: number): number { + if (value === undefined) return fallback; + if (!Number.isFinite(value)) return 0; + return Math.max(0, Math.round(value)); +} + +export const make = Effect.fn("resourceTelemetry.resourceAttribution.make")(function* () { + const entries = yield* Ref.make(new Map()); + + const record: ResourceAttributionShape["record"] = (input) => + Ref.update(entries, (current) => { + const next = new Map(current); + const entryKey = key(input); + const existing = next.get(entryKey); + next.set(entryKey, { + component: input.component, + operation: input.operation, + logicalReadBytes: + (existing?.logicalReadBytes ?? 0) + nonNegativeInteger(input.logicalReadBytes, 0), + logicalWriteBytes: + (existing?.logicalWriteBytes ?? 0) + nonNegativeInteger(input.logicalWriteBytes, 0), + count: (existing?.count ?? 0) + nonNegativeInteger(input.count, 1), + durationMs: (existing?.durationMs ?? 0) + nonNegativeInteger(input.durationMs, 0), + }); + return next; + }); + + return ResourceAttribution.of({ + record, + snapshot: Effect.gen(function* () { + const readAt = yield* DateTime.now; + const current = yield* Ref.get(entries); + return { + readAt, + entries: [...current.values()].toSorted( + (left, right) => right.logicalWriteBytes - left.logicalWriteBytes, + ), + }; + }), + }); +}); + +export const layer = Layer.effect(ResourceAttribution, make()); diff --git a/apps/server/src/resourceTelemetry/ResourceMonitorBinary.test.ts b/apps/server/src/resourceTelemetry/ResourceMonitorBinary.test.ts new file mode 100644 index 00000000000..1fe536e7828 --- /dev/null +++ b/apps/server/src/resourceTelemetry/ResourceMonitorBinary.test.ts @@ -0,0 +1,80 @@ +import * as NodeServices from "@effect/platform-node/NodeServices"; +import { + HostProcessArchitecture, + HostProcessEnvironment, + HostProcessPlatform, +} from "@t3tools/shared/hostProcess"; +import { assert, describe, it } from "@effect/vitest"; +import * as Effect from "effect/Effect"; +import * as FileSystem from "effect/FileSystem"; + +import { ServerConfig } from "../config.ts"; +import * as ResourceMonitorBinary from "./ResourceMonitorBinary.ts"; + +describe("ResourceMonitorBinary", () => { + it.effect("resolves an executable override", () => + Effect.gen(function* () { + const fileSystem = yield* FileSystem.FileSystem; + const baseDir = yield* fileSystem.makeTempDirectoryScoped({ + prefix: "t3-resource-monitor-binary-", + }); + const binaryPath = `${baseDir}/t3-resource-monitor`; + yield* fileSystem.writeFileString(binaryPath, "binary"); + yield* fileSystem.chmod(binaryPath, 0o755); + + const service = yield* ResourceMonitorBinary.make().pipe( + Effect.provide(ServerConfig.layerTest(process.cwd(), baseDir)), + Effect.provideService(HostProcessPlatform, "linux"), + Effect.provideService(HostProcessArchitecture, "x64"), + Effect.provideService(HostProcessEnvironment, { + T3CODE_RESOURCE_MONITOR_PATH: binaryPath, + }), + ); + + assert.equal(yield* service.resolve, binaryPath); + }).pipe(Effect.scoped, Effect.provide(NodeServices.layer)), + ); + + it.effect("rejects a non-executable POSIX override", () => + Effect.gen(function* () { + const fileSystem = yield* FileSystem.FileSystem; + const baseDir = yield* fileSystem.makeTempDirectoryScoped({ + prefix: "t3-resource-monitor-binary-", + }); + const binaryPath = `${baseDir}/t3-resource-monitor`; + yield* fileSystem.writeFileString(binaryPath, "binary"); + yield* fileSystem.chmod(binaryPath, 0o644); + + const service = yield* ResourceMonitorBinary.make().pipe( + Effect.provide(ServerConfig.layerTest(process.cwd(), baseDir)), + Effect.provideService(HostProcessPlatform, "linux"), + Effect.provideService(HostProcessArchitecture, "x64"), + Effect.provideService(HostProcessEnvironment, { + T3CODE_RESOURCE_MONITOR_PATH: binaryPath, + }), + ); + const error = yield* Effect.flip(service.resolve); + + assert.instanceOf(error, ResourceMonitorBinary.ResourceMonitorBinaryNotExecutable); + assert.equal(error.path, binaryPath); + }).pipe(Effect.scoped, Effect.provide(NodeServices.layer)), + ); + + it.effect("rejects unsupported platform and architecture pairs", () => + Effect.gen(function* () { + const fileSystem = yield* FileSystem.FileSystem; + const baseDir = yield* fileSystem.makeTempDirectoryScoped({ + prefix: "t3-resource-monitor-binary-", + }); + const service = yield* ResourceMonitorBinary.make().pipe( + Effect.provide(ServerConfig.layerTest(process.cwd(), baseDir)), + Effect.provideService(HostProcessPlatform, "freebsd"), + Effect.provideService(HostProcessArchitecture, "ia32"), + Effect.provideService(HostProcessEnvironment, {}), + ); + const error = yield* Effect.flip(service.resolve); + + assert.instanceOf(error, ResourceMonitorBinary.ResourceMonitorBinaryUnsupported); + }).pipe(Effect.scoped, Effect.provide(NodeServices.layer)), + ); +}); diff --git a/apps/server/src/resourceTelemetry/ResourceMonitorBinary.ts b/apps/server/src/resourceTelemetry/ResourceMonitorBinary.ts new file mode 100644 index 00000000000..6e3cec09d82 --- /dev/null +++ b/apps/server/src/resourceTelemetry/ResourceMonitorBinary.ts @@ -0,0 +1,193 @@ +import { + HostProcessArchitecture, + HostProcessEnvironment, + HostProcessPlatform, +} from "@t3tools/shared/hostProcess"; +import * as Context from "effect/Context"; +import * as Effect from "effect/Effect"; +import * as FileSystem from "effect/FileSystem"; +import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; +import * as Path from "effect/Path"; +import * as Schema from "effect/Schema"; + +import { ServerConfig } from "../config.ts"; + +export class ResourceMonitorBinaryUnsupported extends Schema.TaggedErrorClass()( + "ResourceMonitorBinaryUnsupported", + { + platform: Schema.String, + architecture: Schema.String, + }, +) { + override get message(): string { + return `Resource monitoring is unsupported on ${this.platform}/${this.architecture}.`; + } +} + +export class ResourceMonitorBinaryNotFound extends Schema.TaggedErrorClass()( + "ResourceMonitorBinaryNotFound", + { + platform: Schema.String, + architecture: Schema.String, + candidates: Schema.Array(Schema.String), + }, +) { + override get message(): string { + return `Resource monitor binary was not found for ${this.platform}/${this.architecture}.`; + } +} + +export class ResourceMonitorBinaryNotExecutable extends Schema.TaggedErrorClass()( + "ResourceMonitorBinaryNotExecutable", + { + path: Schema.String, + mode: Schema.Number, + }, +) { + override get message(): string { + return `Resource monitor binary at '${this.path}' is not executable.`; + } +} + +export type ResourceMonitorBinaryError = + | ResourceMonitorBinaryUnsupported + | ResourceMonitorBinaryNotFound + | ResourceMonitorBinaryNotExecutable; + +export interface ResourceMonitorBinaryShape { + readonly resolve: Effect.Effect; +} + +export class ResourceMonitorBinary extends Context.Service< + ResourceMonitorBinary, + ResourceMonitorBinaryShape +>()("t3/resourceTelemetry/ResourceMonitorBinary") {} + +function binaryName(platform: NodeJS.Platform): string { + return platform === "win32" ? "t3-resource-monitor.exe" : "t3-resource-monitor"; +} + +export function resourceMonitorPlatformKey( + platform: NodeJS.Platform, + architecture: NodeJS.Architecture, +): string | undefined { + if ( + (platform !== "darwin" && platform !== "linux" && platform !== "win32") || + (architecture !== "arm64" && architecture !== "x64") + ) { + return undefined; + } + return `${platform}-${architecture}`; +} + +function resourceMonitorRustTarget( + platform: NodeJS.Platform, + architecture: NodeJS.Architecture, +): string | undefined { + if (platform === "darwin") { + return architecture === "arm64" + ? "aarch64-apple-darwin" + : architecture === "x64" + ? "x86_64-apple-darwin" + : undefined; + } + if (platform === "linux") { + return architecture === "arm64" + ? "aarch64-unknown-linux-gnu" + : architecture === "x64" + ? "x86_64-unknown-linux-gnu" + : undefined; + } + if (platform === "win32") { + return architecture === "arm64" + ? "aarch64-pc-windows-msvc" + : architecture === "x64" + ? "x86_64-pc-windows-msvc" + : undefined; + } + return undefined; +} + +export const make = Effect.fn("resourceTelemetry.resourceMonitorBinary.make")(function* () { + const config = yield* ServerConfig; + const fileSystem = yield* FileSystem.FileSystem; + const path = yield* Path.Path; + const platform = yield* HostProcessPlatform; + const architecture = yield* HostProcessArchitecture; + const environment = yield* HostProcessEnvironment; + const executableName = binaryName(platform); + const platformKey = resourceMonitorPlatformKey(platform, architecture); + const rustTarget = resourceMonitorRustTarget(platform, architecture); + if (platformKey === undefined || rustTarget === undefined) { + return ResourceMonitorBinary.of({ + resolve: Effect.fail( + new ResourceMonitorBinaryUnsupported({ + platform, + architecture, + }), + ), + }); + } + + const candidates = [ + environment.T3CODE_RESOURCE_MONITOR_PATH, + config.resourceMonitorPath, + path.resolve(import.meta.dirname, "resource-monitor", platformKey, executableName), + path.resolve(import.meta.dirname, "resource-monitor", executableName), + path.resolve(import.meta.dirname, "../resource-monitor", executableName), + path.resolve( + import.meta.dirname, + "../../../../native/resource-monitor/target", + rustTarget, + "release", + executableName, + ), + path.resolve( + import.meta.dirname, + "../../../native/resource-monitor/target", + rustTarget, + "release", + executableName, + ), + path.resolve( + import.meta.dirname, + "../../../../native/resource-monitor/target/release", + executableName, + ), + path.resolve( + import.meta.dirname, + "../../../../native/resource-monitor/target/debug", + executableName, + ), + ].filter((candidate): candidate is string => Boolean(candidate)); + + const resolve: ResourceMonitorBinaryShape["resolve"] = Effect.gen(function* () { + for (const candidate of candidates) { + const exists = yield* fileSystem.exists(candidate).pipe(Effect.orElseSucceed(() => false)); + if (!exists) continue; + + if (platform !== "win32") { + const stat = yield* fileSystem.stat(candidate).pipe(Effect.option); + if (Option.isSome(stat) && (stat.value.mode & 0o111) === 0) { + return yield* new ResourceMonitorBinaryNotExecutable({ + path: candidate, + mode: stat.value.mode, + }); + } + } + + return candidate; + } + + return yield* new ResourceMonitorBinaryNotFound({ + platform, + architecture, + candidates, + }); + }); + + return ResourceMonitorBinary.of({ resolve }); +}); + +export const layer = Layer.effect(ResourceMonitorBinary, make()); diff --git a/apps/server/src/resourceTelemetry/ResourceTelemetry.test.ts b/apps/server/src/resourceTelemetry/ResourceTelemetry.test.ts new file mode 100644 index 00000000000..f01f4f73df3 --- /dev/null +++ b/apps/server/src/resourceTelemetry/ResourceTelemetry.test.ts @@ -0,0 +1,345 @@ +import type { + DesktopHostTelemetrySnapshot, + ResourceMonitorProcessSample, + ResourceMonitorSnapshotEvent, +} from "@t3tools/contracts"; +import { describe, expect, it } from "@effect/vitest"; +import * as DateTime from "effect/DateTime"; +import * as Duration from "effect/Duration"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; +import * as PubSub from "effect/PubSub"; +import * as Ref from "effect/Ref"; +import * as Stream from "effect/Stream"; +import * as TestClock from "effect/testing/TestClock"; + +import * as DesktopTelemetryReceiver from "./DesktopTelemetryReceiver.ts"; +import * as NativeTelemetryClient from "./NativeTelemetryClient.ts"; +import * as ResourceAttribution from "./ResourceAttribution.ts"; +import * as ResourceTelemetry from "./ResourceTelemetry.ts"; + +function processSample( + input: Partial & + Pick, +): ResourceMonitorProcessSample { + return { + runTimeMs: 1_000, + name: `process-${input.pid}`, + command: `process-${input.pid}`, + status: "Running", + cpuPercent: 0, + cpuTimeMs: 0, + residentBytes: 1_024, + virtualBytes: 2_048, + ioReadBytes: 0, + ioWriteBytes: 0, + ioSemantics: "storage", + ...input, + }; +} + +function nativeSnapshot(input: { + readonly sequence: number; + readonly sampledAtUnixMs: number; + readonly childCpuTimeMs: number; + readonly childWriteBytes: number; +}): ResourceMonitorSnapshotEvent { + const processes = [ + processSample({ + pid: process.pid, + ppid: 1, + startTimeMs: 100, + cpuTimeMs: input.sequence * 10, + }), + processSample({ + pid: 4_242, + ppid: process.pid, + startTimeMs: 200, + name: "codex", + command: "codex app-server", + cpuTimeMs: input.childCpuTimeMs, + ioWriteBytes: input.childWriteBytes, + }), + processSample({ + pid: 5_000, + ppid: 1, + startTimeMs: 300, + name: "electron", + command: "electron", + cpuTimeMs: input.sequence * 20, + }), + processSample({ + pid: 9_000, + ppid: process.pid, + startTimeMs: 400, + name: "t3-resource-monitor", + command: "t3-resource-monitor", + cpuTimeMs: input.sequence * 5, + }), + ]; + return { + version: 2, + type: "snapshot", + sequence: input.sequence, + sampledAtUnixMs: input.sampledAtUnixMs, + collectionDurationMicros: 300, + scannedProcessCount: 80, + retainedProcessCount: processes.length, + inaccessibleProcessCount: 1, + processes, + }; +} + +function desktopSnapshot(sampledAtUnixMs: number): DesktopHostTelemetrySnapshot { + const sampledAt = DateTime.makeUnsafe(sampledAtUnixMs); + return { + version: 1, + type: "desktopTelemetry", + sequence: 1, + sampledAtUnixMs, + electronPid: 5_000, + power: { + source: "electron-main", + idle: "false", + idleSeconds: 2, + locked: "false", + suspended: false, + onBattery: "true", + lowPowerMode: "unknown", + thermalState: "fair", + stale: false, + updatedAt: sampledAt, + }, + speedLimitPercent: Option.some(90), + electronProcesses: [ + { + pid: 5_000, + creationTimeMs: 300, + type: "Browser", + name: "electron", + cpuPercent: 2, + cumulativeCpuSeconds: 0.02, + idleWakeupsPerSecond: 3, + workingSetBytes: 4_096, + peakWorkingSetBytes: 8_192, + }, + ], + }; +} + +describe("ResourceTelemetry", () => { + it.effect("enables live native and Electron collection only while changes are retained", () => + Effect.gen(function* () { + const sampledAtUnixMs = DateTime.toEpochMillis(yield* DateTime.now); + const sample = nativeSnapshot({ + sequence: 1, + sampledAtUnixMs, + childCpuTimeMs: 100, + childWriteBytes: 1_000, + }); + const demandChanges = yield* Ref.make>([]); + const nativeLayer = NativeTelemetryClient.layerTest({ + sampleNow: Effect.succeed(sample), + health: Effect.succeed({ + status: "healthy", + hello: Option.none(), + lastSampleAt: Option.none(), + lastError: Option.none(), + restartCount: 0, + sampleIntervalMs: 1_000, + }), + }); + const desktopLayer = DesktopTelemetryReceiver.layerTest({ + latest: Effect.succeedSome(desktopSnapshot(sampledAtUnixMs)), + setDiagnosticsDemand: (enabled) => + Ref.update(demandChanges, (changes) => [...changes, enabled]), + }); + const telemetryLayer = ResourceTelemetry.layer.pipe( + Layer.provide(Layer.mergeAll(nativeLayer, desktopLayer, ResourceAttribution.layer)), + ); + + const live = yield* Stream.runHead( + Effect.gen(function* () { + const telemetry = yield* ResourceTelemetry.ResourceTelemetry; + return telemetry.changes; + }).pipe(Stream.unwrap), + ).pipe(Effect.provide(telemetryLayer)); + + expect(Option.isSome(live)).toBe(true); + expect(yield* Ref.get(demandChanges)).toEqual([true, false]); + }), + ); + + it.effect("combines native, Electron, attribution, retry, and history data", () => + Effect.gen(function* () { + const startedAt = DateTime.toEpochMillis(yield* DateTime.now); + const samples = [ + nativeSnapshot({ + sequence: 1, + sampledAtUnixMs: startedAt, + childCpuTimeMs: 100, + childWriteBytes: 1_000, + }), + nativeSnapshot({ + sequence: 2, + sampledAtUnixMs: startedAt + 1_000, + childCpuTimeMs: 350, + childWriteBytes: 5_000, + }), + nativeSnapshot({ + sequence: 1, + sampledAtUnixMs: startedAt + 2_000, + childCpuTimeMs: 500, + childWriteBytes: 7_000, + }), + ] as const; + const sampleIndex = yield* Ref.make(0); + const externalProcesses = yield* Ref.make< + ReadonlyArray<{ readonly pid: number; readonly startTimeMs?: number }> + >([]); + const retryCount = yield* Ref.make(0); + const nativeHealth = yield* Ref.make({ + status: "healthy", + hello: Option.some({ + version: 2, + type: "hello", + sidecarVersion: "0.1.0", + sidecarPid: 9_000, + platform: "test", + arch: "test", + capabilities: { + cumulativeCpuTime: true, + currentCpuPercent: true, + residentMemory: true, + virtualMemory: true, + ioBytes: true, + processStartTime: true, + processTree: true, + }, + }), + lastSampleAt: Option.some(DateTime.makeUnsafe(startedAt)), + lastError: Option.none(), + restartCount: 2, + sampleIntervalMs: 1_000, + }); + const nativeHealthChanges = + yield* PubSub.sliding(4); + const nativeLayer = NativeTelemetryClient.layerTest({ + setExternalProcesses: (processes) => Ref.set(externalProcesses, processes), + readHistory: () => Effect.succeed(samples.slice(0, 2)), + sampleNow: Ref.modify(sampleIndex, (index) => [ + samples[Math.min(index, samples.length - 1)]!, + index + 1, + ]), + retry: Ref.updateAndGet(retryCount, (count) => count + 1).pipe(Effect.as(true)), + health: Ref.get(nativeHealth), + healthChanges: Stream.fromPubSub(nativeHealthChanges), + }); + const desktopLayer = DesktopTelemetryReceiver.layerTest({ + latest: Effect.succeedSome(desktopSnapshot(startedAt)), + health: Effect.succeed({ + status: "healthy", + lastSampleAt: Option.some(DateTime.makeUnsafe(startedAt)), + lastError: Option.none(), + }), + }); + const attributionLayer = ResourceAttribution.layer; + const dependencies = Layer.mergeAll(nativeLayer, desktopLayer, attributionLayer); + const telemetryLayer = ResourceTelemetry.layer.pipe(Layer.provide(dependencies)); + const layer = Layer.mergeAll(dependencies, telemetryLayer); + + yield* Effect.gen(function* () { + const telemetry = yield* ResourceTelemetry.ResourceTelemetry; + const attribution = yield* ResourceAttribution.ResourceAttribution; + + expect(yield* Ref.get(externalProcesses)).toEqual([{ pid: 5_000 }]); + + yield* attribution.record({ + component: "provider-event-log", + operation: "append", + logicalWriteBytes: 512, + count: 2, + durationMs: 4, + }); + const first = yield* telemetry.refresh; + expect(first.groups.backend.processCount).toBe(2); + expect(first.groups.electron.processCount).toBe(1); + expect(first.groups.monitor.processCount).toBe(1); + expect(first.power.onBattery).toBe("true"); + expect(Option.getOrNull(first.speedLimitPercent)).toBe(90); + expect(first.attribution.entries).toEqual([ + { + component: "provider-event-log", + operation: "append", + logicalReadBytes: 0, + logicalWriteBytes: 512, + count: 2, + durationMs: 4, + }, + ]); + + yield* TestClock.adjust(Duration.seconds(1)); + const second = yield* telemetry.refresh; + const codex = second.processes.find((entry) => entry.identity.pid === 4_242); + expect(codex?.cpuPercent).toBe(25); + expect(codex?.ioWriteBytesPerSecond).toBe(4_000); + expect(second.groups.backend.ioWriteBytes).toBe(4_000); + expect(second.health.collectionDurationMicros).toBe(300); + expect(second.health.scannedProcessCount).toBe(80); + expect(second.health.inaccessibleProcessCount).toBe(1); + + const history = yield* telemetry.readHistory({ + windowMs: 60_000, + bucketMs: 10_000, + }); + expect(history.retainedSampleCount).toBeGreaterThan(0); + expect( + history.topProcesses.find((entry) => entry.identity.pid === 4_242)?.sampleCount, + ).toBe(2); + expect(history.topProcesses.find((entry) => entry.identity.pid === 4_242)?.cpuTimeMs).toBe( + 250, + ); + expect( + history.topProcesses.find((entry) => entry.identity.pid === 4_242)?.ioWriteBytes, + ).toBe(4_000); + expect(history.buckets.reduce((total, bucket) => total + bucket.ioWriteBytes, 0)).toBe( + 4_000, + ); + + const retry = yield* telemetry.retry; + expect(retry.accepted).toBe(true); + expect(yield* Ref.get(retryCount)).toBe(1); + + yield* Ref.update(nativeHealth, (current) => ({ + ...current, + hello: Option.map(current.hello, (hello) => ({ + ...hello, + sidecarPid: 9_001, + })), + restartCount: 3, + })); + yield* TestClock.adjust(Duration.seconds(1)); + const restarted = yield* telemetry.refresh; + expect(DateTime.toEpochMillis(restarted.readAt)).toBe(startedAt + 2_000); + expect(Option.getOrNull(restarted.health.sidecarPid)).toBe(9_001); + + yield* Ref.update(nativeHealth, (current) => ({ + ...current, + status: "degraded" as const, + lastError: Option.some("collector exited"), + })); + yield* PubSub.publish(nativeHealthChanges, yield* Ref.get(nativeHealth)); + yield* Effect.yieldNow; + const healthUpdate = yield* telemetry.latest; + expect(healthUpdate.health.native.status).toBe("degraded"); + expect(Option.getOrNull(healthUpdate.health.native.lastError)).toBe("collector exited"); + const degradedHistory = yield* telemetry.readHistory({ + windowMs: 60_000, + bucketMs: 10_000, + }); + expect(degradedHistory.health.native.status).toBe("degraded"); + }).pipe(Effect.provide(layer)); + }), + ); +}); diff --git a/apps/server/src/resourceTelemetry/ResourceTelemetry.ts b/apps/server/src/resourceTelemetry/ResourceTelemetry.ts new file mode 100644 index 00000000000..f4a0f9770c0 --- /dev/null +++ b/apps/server/src/resourceTelemetry/ResourceTelemetry.ts @@ -0,0 +1,453 @@ +import type { + DesktopHostTelemetrySnapshot, + HostPowerSnapshot, + ResourceMonitorSnapshotEvent, + ResourceTelemetryHealth, + ResourceTelemetryHistory, + ResourceTelemetryHistoryInput, + ResourceTelemetryProcessIdentity, + ResourceTelemetryRetryResult, + ResourceTelemetrySnapshot, +} from "@t3tools/contracts"; +import * as Context from "effect/Context"; +import * as DateTime from "effect/DateTime"; +import * as Effect from "effect/Effect"; +import * as Exit from "effect/Exit"; +import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; +import * as PubSub from "effect/PubSub"; +import * as Ref from "effect/Ref"; +import * as Result from "effect/Result"; +import * as Schema from "effect/Schema"; +import * as Scope from "effect/Scope"; +import * as Semaphore from "effect/Semaphore"; +import * as Stream from "effect/Stream"; + +import * as DesktopTelemetryReceiver from "./DesktopTelemetryReceiver.ts"; +import { + emptyTelemetryCounters, + mergeProcesses, + type ProcessState, + type TelemetryCounters, +} from "./Model.ts"; +import * as NativeTelemetryClient from "./NativeTelemetryClient.ts"; +import * as ResourceAttribution from "./ResourceAttribution.ts"; +import { + buildResourceTelemetryHistory, + normalizeResourceTelemetryHistoryInput, +} from "./ResourceTelemetryHistory.ts"; + +export class ResourceTelemetryRefreshFailed extends Schema.TaggedErrorClass()( + "ResourceTelemetryRefreshFailed", + { + operation: Schema.String, + cause: Schema.Defect(), + }, +) { + override get message(): string { + return `Resource telemetry operation '${this.operation}' failed.`; + } +} + +export interface ResourceTelemetryShape { + readonly latest: Effect.Effect; + readonly changes: Stream.Stream; + readonly readHistory: ( + input: ResourceTelemetryHistoryInput, + ) => Effect.Effect; + readonly refresh: Effect.Effect; + readonly validateProcessIdentity: ( + identity: ResourceTelemetryProcessIdentity, + ) => Effect.Effect; + readonly retry: Effect.Effect; +} + +export class ResourceTelemetry extends Context.Service()( + "t3/resourceTelemetry/ResourceTelemetry", +) {} + +interface TelemetryState { + readonly nativeSnapshot: Option.Option; + readonly desktopSnapshot: Option.Option; + readonly previous: ReadonlyMap; + readonly counters: TelemetryCounters; + readonly latest: ResourceTelemetrySnapshot; + readonly lastNativeSequence: number; + readonly lastNativeRestartCount: number; +} + +interface LiveTelemetryState { + readonly retainCount: number; + readonly scope: Option.Option; +} + +function unknownPower(updatedAt: DateTime.Utc): HostPowerSnapshot { + return { + source: "unknown", + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt, + }; +} + +function buildHealth(input: { + readonly native: NativeTelemetryClient.NativeTelemetryClientHealth; + readonly desktop: DesktopTelemetryReceiver.DesktopTelemetryReceiverHealth; + readonly nativeSnapshot: Option.Option; +}): ResourceTelemetryHealth { + return { + native: { + status: input.native.status, + lastSampleAt: input.native.lastSampleAt, + lastError: input.native.lastError, + }, + desktop: { + status: input.desktop.status, + lastSampleAt: input.desktop.lastSampleAt, + lastError: input.desktop.lastError, + }, + sidecarVersion: Option.map(input.native.hello, (hello) => hello.sidecarVersion), + sidecarPid: Option.map(input.native.hello, (hello) => hello.sidecarPid), + restartCount: input.native.restartCount, + collectionDurationMicros: Option.match(input.nativeSnapshot, { + onNone: () => 0, + onSome: (snapshot) => snapshot.collectionDurationMicros, + }), + scannedProcessCount: Option.match(input.nativeSnapshot, { + onNone: () => 0, + onSome: (snapshot) => snapshot.scannedProcessCount, + }), + retainedProcessCount: Option.match(input.nativeSnapshot, { + onNone: () => 0, + onSome: (snapshot) => snapshot.retainedProcessCount, + }), + inaccessibleProcessCount: Option.match(input.nativeSnapshot, { + onNone: () => 0, + onSome: (snapshot) => snapshot.inaccessibleProcessCount, + }), + }; +} + +export const make = Effect.fn("resourceTelemetry.resourceTelemetry.make")(function* () { + const nativeClient = yield* NativeTelemetryClient.NativeTelemetryClient; + const desktopReceiver = yield* DesktopTelemetryReceiver.DesktopTelemetryReceiver; + const attribution = yield* ResourceAttribution.ResourceAttribution; + const mutex = yield* Semaphore.make(1); + const changes = yield* PubSub.sliding(8); + const initialReadAt = yield* DateTime.now; + const initialDesktop = yield* desktopReceiver.latest; + if (Option.isSome(initialDesktop)) { + yield* nativeClient + .setExternalProcesses([{ pid: initialDesktop.value.electronPid }]) + .pipe(Effect.ignore); + yield* nativeClient.setHostPowerState(initialDesktop.value.power).pipe(Effect.ignore); + } + const [initialNativeHealth, initialDesktopHealth, initialAttribution] = yield* Effect.all([ + nativeClient.health, + desktopReceiver.health, + attribution.snapshot, + ]); + const initialMerge = mergeProcesses({ + serverPid: process.pid, + sidecarPid: Option.map(initialNativeHealth.hello, (hello) => hello.sidecarPid), + fallbackSampledAtMs: DateTime.toEpochMillis(initialReadAt), + nativeSnapshot: Option.none(), + desktopSnapshot: initialDesktop, + previous: new Map(), + counters: emptyTelemetryCounters(), + updatePrevious: false, + }); + const initialSnapshot: ResourceTelemetrySnapshot = { + readAt: initialReadAt, + sampleIntervalMs: initialNativeHealth.sampleIntervalMs, + processes: initialMerge.processes, + groups: initialMerge.groups, + power: Option.match(initialDesktop, { + onNone: () => unknownPower(initialReadAt), + onSome: (desktop) => desktop.power, + }), + speedLimitPercent: Option.flatMap(initialDesktop, (desktop) => desktop.speedLimitPercent), + attribution: initialAttribution, + health: buildHealth({ + native: initialNativeHealth, + desktop: initialDesktopHealth, + nativeSnapshot: Option.none(), + }), + }; + const state = yield* Ref.make({ + nativeSnapshot: Option.none(), + desktopSnapshot: initialDesktop, + previous: new Map(), + counters: emptyTelemetryCounters(), + latest: initialSnapshot, + lastNativeSequence: 0, + lastNativeRestartCount: initialNativeHealth.restartCount, + }); + const liveState = yield* Ref.make({ + retainCount: 0, + scope: Option.none(), + }); + const liveMutex = yield* Semaphore.make(1); + const refreshHealth = mutex.withPermits(1)( + Effect.gen(function* () { + const current = yield* Ref.get(state); + const [nativeHealth, desktopHealth] = yield* Effect.all([ + nativeClient.health, + desktopReceiver.health, + ]); + const snapshot: ResourceTelemetrySnapshot = { + ...current.latest, + health: buildHealth({ + native: nativeHealth, + desktop: desktopHealth, + nativeSnapshot: current.nativeSnapshot, + }), + }; + yield* Ref.set(state, { + ...current, + latest: snapshot, + }); + if ((yield* Ref.get(liveState)).retainCount > 0) { + yield* PubSub.publish(changes, snapshot); + } + }), + ); + + const rebuild = (input: { + readonly nativeSnapshot?: ResourceMonitorSnapshotEvent; + readonly desktopSnapshot?: DesktopHostTelemetrySnapshot; + readonly updatePrevious: boolean; + readonly publish?: boolean; + }): Effect.Effect => + mutex.withPermits(1)( + Effect.gen(function* () { + const current = yield* Ref.get(state); + const nativeHealth = yield* nativeClient.health; + const nativeGenerationChanged = + nativeHealth.restartCount !== current.lastNativeRestartCount; + if ( + input.nativeSnapshot && + !nativeGenerationChanged && + input.nativeSnapshot.sequence <= current.lastNativeSequence + ) { + return current.latest; + } + const nativeSnapshot = input.nativeSnapshot + ? Option.some(input.nativeSnapshot) + : current.nativeSnapshot; + const desktopSnapshot = input.desktopSnapshot + ? Option.some(input.desktopSnapshot) + : current.desktopSnapshot; + const [desktopHealth, attributionSnapshot] = yield* Effect.all([ + desktopReceiver.health, + attribution.snapshot, + ]); + const merged = mergeProcesses({ + serverPid: process.pid, + sidecarPid: Option.map(nativeHealth.hello, (hello) => hello.sidecarPid), + fallbackSampledAtMs: DateTime.toEpochMillis(current.latest.readAt), + nativeSnapshot, + desktopSnapshot, + electronRootPids: Option.match(desktopSnapshot, { + onNone: () => new Set(), + onSome: (desktop) => new Set([desktop.electronPid]), + }), + previous: current.previous, + counters: current.counters, + updatePrevious: input.updatePrevious, + }); + const readAt = DateTime.makeUnsafe(merged.sampledAtMs); + const snapshot: ResourceTelemetrySnapshot = { + readAt, + sampleIntervalMs: nativeHealth.sampleIntervalMs, + processes: merged.processes, + groups: merged.groups, + power: Option.match(desktopSnapshot, { + onNone: () => unknownPower(readAt), + onSome: (desktop) => desktop.power, + }), + speedLimitPercent: Option.match(desktopSnapshot, { + onNone: () => Option.none(), + onSome: (desktop) => desktop.speedLimitPercent, + }), + attribution: attributionSnapshot, + health: buildHealth({ + native: nativeHealth, + desktop: desktopHealth, + nativeSnapshot, + }), + }; + yield* Ref.set(state, { + nativeSnapshot, + desktopSnapshot, + previous: merged.previous, + counters: merged.counters, + latest: snapshot, + lastNativeSequence: input.nativeSnapshot?.sequence ?? current.lastNativeSequence, + lastNativeRestartCount: input.nativeSnapshot + ? nativeHealth.restartCount + : current.lastNativeRestartCount, + }); + if (input.publish !== false) { + yield* PubSub.publish(changes, snapshot); + } + return snapshot; + }), + ); + + const ingestNative = (snapshot: ResourceMonitorSnapshotEvent) => + rebuild({ nativeSnapshot: snapshot, updatePrevious: true }); + const ingestDesktop = (snapshot: DesktopHostTelemetrySnapshot) => + Effect.gen(function* () { + yield* nativeClient.setExternalProcesses([{ pid: snapshot.electronPid }]).pipe(Effect.ignore); + yield* nativeClient.setHostPowerState(snapshot.power).pipe(Effect.ignore); + const live = (yield* Ref.get(liveState)).retainCount > 0; + return yield* rebuild({ desktopSnapshot: snapshot, updatePrevious: false, publish: live }); + }); + + yield* desktopReceiver.changes.pipe( + Stream.runForEach((snapshot) => ingestDesktop(snapshot)), + Effect.forkScoped, + ); + + const acquireLive = liveMutex.withPermits(1)( + Effect.gen(function* () { + const current = yield* Ref.get(liveState); + if (current.retainCount > 0) { + yield* Ref.set(liveState, { ...current, retainCount: current.retainCount + 1 }); + return; + } + + const scope = yield* Scope.make(); + yield* Ref.set(liveState, { retainCount: 1, scope: Option.some(scope) }); + yield* desktopReceiver.setDiagnosticsDemand(true).pipe(Effect.ignore); + yield* nativeClient.snapshots.pipe( + Stream.runForEach(ingestNative), + Effect.catch((error) => + Effect.logWarning("Native resource telemetry stream stopped", { + cause: error.message, + }), + ), + Effect.forkIn(scope), + ); + yield* nativeClient.sampleNow.pipe(Effect.flatMap(ingestNative), Effect.ignore); + }), + ); + + const releaseLive = liveMutex.withPermits(1)( + Effect.gen(function* () { + const current = yield* Ref.get(liveState); + if (current.retainCount <= 1) { + yield* Ref.set(liveState, { retainCount: 0, scope: Option.none() }); + if (Option.isSome(current.scope)) { + yield* Scope.close(current.scope.value, Exit.void).pipe(Effect.ignore); + } + yield* desktopReceiver.setDiagnosticsDemand(false).pipe(Effect.ignore); + return; + } + yield* Ref.set(liveState, { ...current, retainCount: current.retainCount - 1 }); + }), + ); + + const liveChanges = Stream.unwrap( + Effect.gen(function* () { + const subscription = yield* PubSub.subscribe(changes); + yield* Effect.acquireRelease(acquireLive, () => releaseLive); + return Stream.fromSubscription(subscription); + }), + ); + + const readHistory: ResourceTelemetryShape["readHistory"] = (input) => + Effect.gen(function* () { + const readAt = yield* DateTime.now; + const normalizedInput = normalizeResourceTelemetryHistoryInput(input); + const historyResult = yield* Effect.result( + nativeClient.readHistory(normalizedInput.windowMs), + ); + if (Result.isFailure(historyResult)) { + yield* Effect.logWarning("Failed to read native resource telemetry history", { + cause: historyResult.failure.message, + }); + } + const [nativeHealth, desktopHealth] = yield* Effect.all([ + nativeClient.health, + desktopReceiver.health, + ]); + const current = yield* Ref.get(state); + return buildResourceTelemetryHistory({ + readAt, + windowMs: normalizedInput.windowMs, + bucketMs: normalizedInput.bucketMs, + sampleIntervalMs: nativeHealth.sampleIntervalMs, + serverPid: process.pid, + sidecarPid: Option.map(nativeHealth.hello, (hello) => hello.sidecarPid), + desktopSnapshot: current.desktopSnapshot, + snapshots: Result.isSuccess(historyResult) ? historyResult.success : [], + health: buildHealth({ + native: nativeHealth, + desktop: desktopHealth, + nativeSnapshot: current.nativeSnapshot, + }), + }); + }); + yield* nativeClient.healthChanges.pipe( + Stream.runForEach(() => refreshHealth), + Effect.forkScoped, + ); + yield* desktopReceiver.healthChanges.pipe( + Stream.runForEach(() => refreshHealth), + Effect.forkScoped, + ); + + const refresh: ResourceTelemetryShape["refresh"] = nativeClient.sampleNow.pipe( + Effect.flatMap(ingestNative), + Effect.mapError( + (cause) => + new ResourceTelemetryRefreshFailed({ + operation: "refresh", + cause, + }), + ), + ); + + const validateProcessIdentity: ResourceTelemetryShape["validateProcessIdentity"] = (identity) => + nativeClient.sampleNow.pipe( + Effect.map((snapshot) => + snapshot.processes.some( + (process) => process.pid === identity.pid && process.startTimeMs === identity.startTimeMs, + ), + ), + Effect.mapError( + (cause) => + new ResourceTelemetryRefreshFailed({ + operation: "validateProcessIdentity", + cause, + }), + ), + ); + + return ResourceTelemetry.of({ + latest: Ref.get(state).pipe(Effect.map((current) => current.latest)), + changes: liveChanges, + readHistory, + refresh, + validateProcessIdentity, + retry: nativeClient.retry.pipe( + Effect.zip(Ref.get(state)), + Effect.map( + ([accepted, current]): ResourceTelemetryRetryResult => ({ + accepted, + snapshot: current.latest, + }), + ), + ), + }); +}); + +export const layer = Layer.effect(ResourceTelemetry, make()); diff --git a/apps/server/src/resourceTelemetry/ResourceTelemetryHistory.test.ts b/apps/server/src/resourceTelemetry/ResourceTelemetryHistory.test.ts new file mode 100644 index 00000000000..82ebab1b44c --- /dev/null +++ b/apps/server/src/resourceTelemetry/ResourceTelemetryHistory.test.ts @@ -0,0 +1,167 @@ +import type { + DesktopHostTelemetrySnapshot, + ResourceMonitorProcessSample, + ResourceMonitorSnapshotEvent, + ResourceTelemetryHealth, +} from "@t3tools/contracts"; +import { describe, expect, it } from "@effect/vitest"; +import * as DateTime from "effect/DateTime"; +import * as Option from "effect/Option"; + +import { + buildResourceTelemetryHistory, + normalizeResourceTelemetryHistoryInput, +} from "./ResourceTelemetryHistory.ts"; + +const SERVER_PID = 100; +const ELECTRON_PID = 200; +const CHILD_PID = 300; +const STARTED_AT_MS = DateTime.toEpochMillis(DateTime.makeUnsafe("2026-06-17T12:00:00.000Z")); + +function processSample( + input: Partial & + Pick, +): ResourceMonitorProcessSample { + return { + runTimeMs: 1_000, + name: `process-${input.pid}`, + command: `process-${input.pid}`, + status: "Running", + cpuPercent: 0, + cpuTimeMs: 0, + residentBytes: 1_024, + virtualBytes: 2_048, + ioReadBytes: 0, + ioWriteBytes: 0, + ioSemantics: "storage", + ...input, + }; +} + +function snapshot( + sequence: number, + sampledAtUnixMs: number, + childCpuTimeMs: number, + childWriteBytes: number, +): ResourceMonitorSnapshotEvent { + const processes = [ + processSample({ pid: SERVER_PID, ppid: 1, startTimeMs: 10 }), + processSample({ + pid: ELECTRON_PID, + ppid: 1, + startTimeMs: 20, + name: "electron", + command: "electron", + }), + processSample({ + pid: CHILD_PID, + ppid: SERVER_PID, + startTimeMs: 30, + name: "codex", + command: "codex app-server", + cpuTimeMs: childCpuTimeMs, + ioWriteBytes: childWriteBytes, + }), + ]; + return { + version: 2, + type: "snapshot", + sequence, + sampledAtUnixMs, + collectionDurationMicros: 100, + scannedProcessCount: processes.length, + retainedProcessCount: processes.length, + inaccessibleProcessCount: 0, + processes, + }; +} + +const health: ResourceTelemetryHealth = { + native: { + status: "healthy", + lastSampleAt: Option.none(), + lastError: Option.none(), + }, + desktop: { + status: "healthy", + lastSampleAt: Option.none(), + lastError: Option.none(), + }, + sidecarVersion: Option.some("0.1.0"), + sidecarPid: Option.some(400), + restartCount: 0, + collectionDurationMicros: 100, + scannedProcessCount: 3, + retainedProcessCount: 3, + inaccessibleProcessCount: 0, +}; + +function desktopSnapshot(): DesktopHostTelemetrySnapshot { + const sampledAt = DateTime.makeUnsafe(STARTED_AT_MS + 1_000); + return { + version: 1, + type: "desktopTelemetry", + sequence: 1, + sampledAtUnixMs: STARTED_AT_MS + 1_000, + electronPid: ELECTRON_PID, + power: { + source: "electron-main", + idle: "false", + idleSeconds: 0, + locked: "false", + suspended: false, + onBattery: "false", + lowPowerMode: "unknown", + thermalState: "nominal", + stale: false, + updatedAt: sampledAt, + }, + speedLimitPercent: Option.none(), + electronProcesses: [ + { + pid: ELECTRON_PID, + creationTimeMs: 20, + type: "Browser", + cpuPercent: 999, + idleWakeupsPerSecond: 999, + workingSetBytes: 999_999, + peakWorkingSetBytes: 999_999, + }, + ], + }; +} + +describe("buildResourceTelemetryHistory", () => { + it("normalizes query bounds before requesting native history", () => { + expect(normalizeResourceTelemetryHistoryInput({ windowMs: 0, bucketMs: 0 })).toEqual({ + windowMs: 1_000, + bucketMs: 1_000, + }); + }); + + it("replays native snapshots on demand without applying current Electron metrics", () => { + const history = buildResourceTelemetryHistory({ + readAt: DateTime.makeUnsafe(STARTED_AT_MS + 2_000), + windowMs: 10_000, + bucketMs: 10_000, + sampleIntervalMs: 1_000, + serverPid: SERVER_PID, + sidecarPid: Option.some(400), + desktopSnapshot: Option.some(desktopSnapshot()), + snapshots: [ + snapshot(1, STARTED_AT_MS, 100, 1_000), + snapshot(2, STARTED_AT_MS + 1_000, 350, 5_000), + ], + health, + }); + + const child = history.topProcesses.find((process) => process.identity.pid === CHILD_PID); + const electron = history.topProcesses.find((process) => process.identity.pid === ELECTRON_PID); + expect(child?.sampleCount).toBe(2); + expect(child?.cpuTimeMs).toBe(250); + expect(child?.ioWriteBytes).toBe(4_000); + expect(electron?.category).toBe("electron-main"); + expect(electron?.currentRssBytes).toBe(1_024); + expect(history.buckets.reduce((total, bucket) => total + bucket.ioWriteBytes, 0)).toBe(4_000); + }); +}); diff --git a/apps/server/src/resourceTelemetry/ResourceTelemetryHistory.ts b/apps/server/src/resourceTelemetry/ResourceTelemetryHistory.ts new file mode 100644 index 00000000000..6a6690bea0d --- /dev/null +++ b/apps/server/src/resourceTelemetry/ResourceTelemetryHistory.ts @@ -0,0 +1,214 @@ +import type { + DesktopHostTelemetrySnapshot, + ResourceMonitorSnapshotEvent, + ResourceTelemetryHealth, + ResourceTelemetryHistory, + ResourceTelemetryHistoryBucket, + ResourceTelemetryProcess, + ResourceTelemetryProcessSummary, +} from "@t3tools/contracts"; +import * as DateTime from "effect/DateTime"; +import * as Option from "effect/Option"; + +import { + emptyTelemetryCounters, + mergeProcesses, + processIdentityKey, + type ProcessState, + type TelemetryCounters, +} from "./Model.ts"; + +const MAX_HISTORY_WINDOW_MS = 60 * 60_000; + +export function normalizeResourceTelemetryHistoryInput(input: { + readonly windowMs: number; + readonly bucketMs: number; +}): { readonly windowMs: number; readonly bucketMs: number } { + const windowMs = Math.max(1_000, Math.min(MAX_HISTORY_WINDOW_MS, input.windowMs)); + return { + windowMs, + bucketMs: Math.max(1_000, Math.min(windowMs, input.bucketMs)), + }; +} + +interface AggregateSample { + readonly sampledAtMs: number; + readonly cpuPercent: number; + readonly rssBytes: number; + readonly processCount: number; + readonly ioReadBytes: number; + readonly ioWriteBytes: number; +} + +interface ProcessSample { + readonly sampledAtMs: number; + readonly process: ResourceTelemetryProcess; + readonly cpuTimeMs: number; + readonly ioReadBytes: number; + readonly ioWriteBytes: number; +} + +export interface BuildResourceTelemetryHistoryInput { + readonly readAt: DateTime.Utc; + readonly windowMs: number; + readonly bucketMs: number; + readonly sampleIntervalMs: number; + readonly serverPid: number; + readonly sidecarPid: Option.Option; + readonly desktopSnapshot: Option.Option; + readonly snapshots: ReadonlyArray; + readonly health: ResourceTelemetryHealth; +} + +function summarizeProcesses( + samples: ReadonlyArray, +): ReadonlyArray { + const groups = new Map(); + for (const sample of samples) { + const identityKey = processIdentityKey( + sample.process.identity.pid, + sample.process.identity.startTimeMs, + ); + const current = groups.get(identityKey) ?? []; + current.push(sample); + groups.set(identityKey, current); + } + + return [...groups.values()] + .map((processSamples): ResourceTelemetryProcessSummary => { + const sorted = processSamples.toSorted((left, right) => left.sampledAtMs - right.sampledAtMs); + const first = sorted[0]!; + const latest = sorted[sorted.length - 1]!; + const cpuTotal = sorted.reduce((total, sample) => total + sample.process.cpuPercent, 0); + return { + identity: latest.process.identity, + ppid: latest.process.ppid, + depth: latest.process.depth, + name: latest.process.name, + command: latest.process.command, + category: latest.process.category, + firstSeenAt: first.process.firstSeenAt, + lastSeenAt: latest.process.lastSeenAt, + currentCpuPercent: latest.process.cpuPercent, + avgCpuPercent: cpuTotal / sorted.length, + maxCpuPercent: Math.max(...sorted.map((sample) => sample.process.cpuPercent)), + cpuTimeMs: sorted.reduce((total, sample) => total + sample.cpuTimeMs, 0), + currentRssBytes: latest.process.residentBytes, + peakRssBytes: Math.max(...sorted.map((sample) => sample.process.peakResidentBytes)), + ioReadBytes: sorted.reduce((total, sample) => total + sample.ioReadBytes, 0), + ioWriteBytes: sorted.reduce((total, sample) => total + sample.ioWriteBytes, 0), + ioSemantics: latest.process.ioSemantics, + sampleCount: sorted.length, + }; + }) + .toSorted( + (left, right) => right.cpuTimeMs - left.cpuTimeMs || right.peakRssBytes - left.peakRssBytes, + ); +} + +function buildBuckets(input: { + readonly samples: ReadonlyArray; + readonly nowMs: number; + readonly windowMs: number; + readonly bucketMs: number; +}): ReadonlyArray { + const windowStartMs = input.nowMs - input.windowMs; + const buckets: ResourceTelemetryHistoryBucket[] = []; + for (let startedAtMs = windowStartMs; startedAtMs < input.nowMs; startedAtMs += input.bucketMs) { + const endedAtMs = Math.min(input.nowMs, startedAtMs + input.bucketMs); + const samples = input.samples.filter( + (sample) => + sample.sampledAtMs >= startedAtMs && + (endedAtMs === input.nowMs + ? sample.sampledAtMs <= endedAtMs + : sample.sampledAtMs < endedAtMs), + ); + const cpuTotal = samples.reduce((total, sample) => total + sample.cpuPercent, 0); + buckets.push({ + startedAt: DateTime.makeUnsafe(startedAtMs), + endedAt: DateTime.makeUnsafe(endedAtMs), + avgCpuPercent: samples.length === 0 ? 0 : cpuTotal / samples.length, + maxCpuPercent: + samples.length === 0 ? 0 : Math.max(...samples.map((sample) => sample.cpuPercent)), + maxRssBytes: samples.length === 0 ? 0 : Math.max(...samples.map((sample) => sample.rssBytes)), + ioReadBytes: samples.reduce((total, sample) => total + sample.ioReadBytes, 0), + ioWriteBytes: samples.reduce((total, sample) => total + sample.ioWriteBytes, 0), + maxProcessCount: + samples.length === 0 ? 0 : Math.max(...samples.map((sample) => sample.processCount)), + }); + } + return buckets; +} + +export function buildResourceTelemetryHistory( + input: BuildResourceTelemetryHistoryInput, +): ResourceTelemetryHistory { + const readAtMs = DateTime.toEpochMillis(input.readAt); + const { windowMs, bucketMs } = normalizeResourceTelemetryHistoryInput(input); + const windowStartMs = readAtMs - windowMs; + const snapshots = input.snapshots + .filter((snapshot) => snapshot.sampledAtUnixMs >= windowStartMs) + .toSorted((left, right) => left.sampledAtUnixMs - right.sampledAtUnixMs); + const electronRootPids = Option.match(input.desktopSnapshot, { + onNone: () => new Set(), + onSome: (snapshot) => new Set([snapshot.electronPid]), + }); + const desktopIdentity = Option.map(input.desktopSnapshot, (snapshot) => ({ + ...snapshot, + electronProcesses: [], + })); + const aggregateSamples: AggregateSample[] = []; + const processSamples: ProcessSample[] = []; + let previous: ReadonlyMap = new Map(); + let counters: TelemetryCounters = emptyTelemetryCounters(); + + for (const snapshot of snapshots) { + const merged = mergeProcesses({ + serverPid: input.serverPid, + sidecarPid: input.sidecarPid, + fallbackSampledAtMs: snapshot.sampledAtUnixMs, + nativeSnapshot: Option.some(snapshot), + desktopSnapshot: desktopIdentity, + electronRootPids, + previous, + counters, + updatePrevious: true, + }); + previous = merged.previous; + counters = merged.counters; + const deltasByIdentity = new Map( + merged.deltas.map((processDelta) => [processDelta.identityKey, processDelta]), + ); + aggregateSamples.push({ + sampledAtMs: snapshot.sampledAtUnixMs, + cpuPercent: merged.groups.allT3.currentCpuPercent, + rssBytes: merged.groups.allT3.currentRssBytes, + processCount: merged.groups.allT3.processCount, + ioReadBytes: merged.deltas.reduce((total, process) => total + process.ioReadBytes, 0), + ioWriteBytes: merged.deltas.reduce((total, process) => total + process.ioWriteBytes, 0), + }); + for (const process of merged.processes) { + const processDelta = deltasByIdentity.get( + processIdentityKey(process.identity.pid, process.identity.startTimeMs), + ); + processSamples.push({ + sampledAtMs: snapshot.sampledAtUnixMs, + process, + cpuTimeMs: processDelta?.cpuTimeMs ?? 0, + ioReadBytes: processDelta?.ioReadBytes ?? 0, + ioWriteBytes: processDelta?.ioWriteBytes ?? 0, + }); + } + } + + return { + readAt: input.readAt, + windowMs, + bucketMs, + sampleIntervalMs: input.sampleIntervalMs, + retainedSampleCount: aggregateSamples.length + processSamples.length, + buckets: buildBuckets({ samples: aggregateSamples, nowMs: readAtMs, windowMs, bucketMs }), + topProcesses: summarizeProcesses(processSamples), + health: input.health, + }; +} diff --git a/apps/server/src/server.test.ts b/apps/server/src/server.test.ts index 205833289ea..5adc814c7fb 100644 --- a/apps/server/src/server.test.ts +++ b/apps/server/src/server.test.ts @@ -69,6 +69,7 @@ import { vi } from "vite-plus/test"; const TEST_EPOCH = DateTime.makeUnsafe("1970-01-01T00:00:00.000Z"); +import * as BackgroundPolicy from "./background/BackgroundPolicy.ts"; import type { ServerConfigShape } from "./config.ts"; import { deriveServerPaths, ServerConfig } from "./config.ts"; import { makeRoutesLayer } from "./server.ts"; @@ -140,6 +141,10 @@ import * as CloudCliTokenManager from "./cloud/CliTokenManager.ts"; import * as ProcessDiagnostics from "./diagnostics/ProcessDiagnostics.ts"; import * as ProcessResourceMonitor from "./diagnostics/ProcessResourceMonitor.ts"; import * as TraceDiagnostics from "./diagnostics/TraceDiagnostics.ts"; +import * as DesktopTelemetryReceiver from "./resourceTelemetry/DesktopTelemetryReceiver.ts"; +import * as NativeTelemetryClient from "./resourceTelemetry/NativeTelemetryClient.ts"; +import * as ResourceAttribution from "./resourceTelemetry/ResourceAttribution.ts"; +import * as ResourceTelemetry from "./resourceTelemetry/ResourceTelemetry.ts"; import * as Data from "effect/Data"; const defaultProjectId = ProjectId.make("project-default"); @@ -367,6 +372,8 @@ const buildAppUnderTest = (options?: { cloudManagedEndpointRuntime?: Partial; relayClient?: Partial; cloudCliTokenManager?: Partial; + nativeTelemetryClient?: Partial; + desktopTelemetryReceiver?: Partial; }; }) => Effect.gen(function* () { @@ -539,6 +546,15 @@ const buildAppUnderTest = (options?: { ...options.layers.vcsStatusBroadcaster, }) : VcsStatusBroadcaster.layer.pipe(Layer.provide(gitWorkflowLayer)); + const resourceTelemetryLayer = ResourceTelemetry.layer.pipe( + Layer.provide( + Layer.mergeAll( + NativeTelemetryClient.layerTest(options?.layers?.nativeTelemetryClient), + DesktopTelemetryReceiver.layerTest(options?.layers?.desktopTelemetryReceiver), + ResourceAttribution.layer, + ), + ), + ); const servedRoutesLayer = HttpRouter.serve(makeRoutesLayer, { disableListenLog: true, @@ -750,6 +766,7 @@ const buildAppUnderTest = (options?: { ); const appLayer = servedRoutesLayer.pipe( + Layer.provide(resourceTelemetryLayer), Layer.provide( Layer.mock(BrowserTraceCollector)({ record: () => Effect.void, @@ -772,6 +789,36 @@ const buildAppUnderTest = (options?: { ...options?.layers?.serverRuntimeStartup, }), ), + Layer.provide( + Layer.mock(BackgroundPolicy.BackgroundPolicy)({ + reportClientActivity: () => Effect.void, + removeRpcClient: () => Effect.void, + reportHostPowerState: () => Effect.void, + snapshot: Effect.succeed({ + hostPower: { + source: "unknown", + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt: TEST_EPOCH, + }, + leases: [], + activeForegroundLeaseCount: 0, + activeScopeKeys: [], + shouldRunOpportunisticWork: false, + updatedAt: TEST_EPOCH, + }), + streamChanges: Stream.empty, + hasDemand: () => Effect.succeed(false), + shouldRunScopeWork: () => Effect.succeed(false), + shouldRunOpportunisticWork: Effect.succeed(false), + }), + ), Layer.provide( Layer.mock(ServerEnvironment)({ getEnvironmentId: Effect.succeed(testEnvironmentDescriptor.environmentId), @@ -4225,6 +4272,23 @@ it.layer(NodeServices.layer)("server router seam", (it) => { }).pipe(Effect.provide(NodeHttpServer.layerTest)), ); + it.effect("routes websocket resource telemetry through the subscription", () => + Effect.gen(function* () { + yield* buildAppUnderTest(); + + const wsUrl = yield* getWsServerUrl("/ws"); + const snapshot = yield* Effect.scoped( + withWsRpcClient(wsUrl, (client) => + client[WS_METHODS.subscribeResourceTelemetry]({}).pipe(Stream.runHead), + ), + ); + + assertTrue(Option.isSome(snapshot)); + assert.equal(snapshot.value.processes.length, 0); + assert.equal(snapshot.value.groups.backend.processCount, 0); + }).pipe(Effect.provide(NodeHttpServer.layerTest)), + ); + it.effect("routes websocket rpc subscribeServerConfig emits provider status updates", () => Effect.gen(function* () { const nextProviders = [ diff --git a/apps/server/src/server.ts b/apps/server/src/server.ts index 1da0ea27a65..39f5433334b 100644 --- a/apps/server/src/server.ts +++ b/apps/server/src/server.ts @@ -5,6 +5,8 @@ import { FetchHttpClient, HttpRouter, HttpServer } from "effect/unstable/http"; import * as HttpApiBuilder from "effect/unstable/httpapi/HttpApiBuilder"; import { ServerConfig } from "./config.ts"; +import * as BackgroundPolicy from "./background/BackgroundPolicy.ts"; +import * as HostPowerMonitor from "./background/HostPowerMonitor.ts"; import { otlpTracesProxyRouteLayer, assetRouteLayer, @@ -81,6 +83,11 @@ import * as CloudCliState from "./cloud/CliState.ts"; import * as ProcessDiagnostics from "./diagnostics/ProcessDiagnostics.ts"; import * as ProcessResourceMonitor from "./diagnostics/ProcessResourceMonitor.ts"; import * as TraceDiagnostics from "./diagnostics/TraceDiagnostics.ts"; +import * as DesktopTelemetryReceiver from "./resourceTelemetry/DesktopTelemetryReceiver.ts"; +import * as NativeTelemetryClient from "./resourceTelemetry/NativeTelemetryClient.ts"; +import * as ResourceAttribution from "./resourceTelemetry/ResourceAttribution.ts"; +import * as ResourceMonitorBinary from "./resourceTelemetry/ResourceMonitorBinary.ts"; +import * as ResourceTelemetry from "./resourceTelemetry/ResourceTelemetry.ts"; import { OrchestrationLayerLive } from "./orchestration/runtimeLayer.ts"; import { clearPersistedServerRuntimeState, @@ -97,6 +104,10 @@ import { disableTailscaleServe, ensureTailscaleServe } from "@t3tools/tailscale" // already closes the websocket gracefully. Do not add an artificial drain before // those finalizers get a chance to run. const HTTP_PREEMPTIVE_SHUTDOWN_GRACE_MS = 0; +const ResourceAttributionLayerLive = ResourceAttribution.layer; +const ApplicationObservabilityLive = ObservabilityLive.pipe( + Layer.provideMerge(ResourceAttributionLayerLive), +); const PtyAdapterLive = Layer.unwrap( Effect.gen(function* () { @@ -110,6 +121,31 @@ const PtyAdapterLive = Layer.unwrap( }), ); +const NativeTelemetryLayerLive = NativeTelemetryClient.layer.pipe( + Layer.provide(ResourceMonitorBinary.layer), +); +const DesktopTelemetryReceiverLayerLive = DesktopTelemetryReceiver.layer; + +const ResourceTelemetryLayerLive = ResourceTelemetry.layer.pipe( + Layer.provideMerge(NativeTelemetryLayerLive), + Layer.provideMerge(DesktopTelemetryReceiverLayerLive), +); + +const HostPowerMonitorLayerLive = HostPowerMonitor.layer.pipe( + Layer.provide(DesktopTelemetryReceiverLayerLive), +); + +const BackgroundLayerLive = BackgroundPolicy.layer.pipe( + Layer.provide(HostPowerMonitorLayerLive), + Layer.provideMerge(ServerSettingsLive), +); + +const ResourceDiagnosticsLayerLive = Layer.mergeAll( + ResourceTelemetryLayerLive, + ProcessDiagnostics.layer.pipe(Layer.provide(ResourceTelemetryLayerLive)), + ProcessResourceMonitor.layer.pipe(Layer.provide(ResourceTelemetryLayerLive)), +); + const RelayClientLive = Layer.unwrap( Effect.gen(function* () { const config = yield* ServerConfig; @@ -285,6 +321,7 @@ const ProviderRuntimeLayerLive = ProviderSessionReaperLive.pipe( const RuntimeCoreDependenciesLive = ReactorLayerLive.pipe( // Core Services + Layer.provideMerge(ServerSettingsLive), Layer.provideMerge(CheckpointingLayerLive), Layer.provideMerge(SourceControlProviderRegistryLayerLive), Layer.provideMerge(GitLayerLive), @@ -312,7 +349,6 @@ const RuntimeCoreDependenciesLive = ReactorLayerLive.pipe( // no longer transitively provides it. Exposing it at the runtime level // keeps a single Live for all opencode consumers. Layer.provideMerge(OpenCodeRuntimeLive), - Layer.provideMerge(ServerSettingsLive), Layer.provideMerge(WorkspaceLayerLive), Layer.provideMerge(ProjectFaviconResolverLayerLive), Layer.provideMerge(RepositoryIdentityResolverLive), @@ -329,8 +365,8 @@ const RuntimeCoreDependenciesLive = ReactorLayerLive.pipe( const RuntimeDependenciesLive = RuntimeCoreDependenciesLive.pipe( // Misc. - Layer.provideMerge(ProcessDiagnostics.layer), - Layer.provideMerge(ProcessResourceMonitor.layer), + Layer.provideMerge(BackgroundLayerLive), + Layer.provideMerge(ResourceDiagnosticsLayerLive), Layer.provideMerge(TraceDiagnostics.layer), Layer.provideMerge(AnalyticsServiceLayerLive), Layer.provideMerge(ExternalLauncher.layer), @@ -480,7 +516,7 @@ export const makeServerLayer = Layer.unwrap( Layer.provideMerge(RuntimeServicesLive), Layer.provideMerge(serverRelayBrokerTracingLayer), Layer.provideMerge(HttpServerLive), - Layer.provide(ObservabilityLive), + Layer.provide(ApplicationObservabilityLive), Layer.provideMerge(FetchHttpClient.layer), Layer.provideMerge(VcsProcess.layer), Layer.provideMerge(PlatformServicesLive), diff --git a/apps/server/src/serverSettings.test.ts b/apps/server/src/serverSettings.test.ts index d24f2ee2826..eb8cc3083c2 100644 --- a/apps/server/src/serverSettings.test.ts +++ b/apps/server/src/serverSettings.test.ts @@ -462,6 +462,14 @@ it.layer(NodeServices.layer)("server settings", (it) => { serverPassword: "secret-password", }, }, + backgroundActivity: { + schemaVersion: 1, + profile: "custom", + baseProfile: "balanced", + overrides: { + automaticGitFetchInterval: 10_000, + }, + }, automaticGitFetchInterval: 10_000, }); }).pipe(Effect.provide(makeServerSettingsLayer())), diff --git a/apps/server/src/serverSettings.ts b/apps/server/src/serverSettings.ts index 0e126604b4a..af9b5f0fa58 100644 --- a/apps/server/src/serverSettings.ts +++ b/apps/server/src/serverSettings.ts @@ -135,13 +135,17 @@ export class ServerSettingsService extends Context.Service< Layer.effect( ServerSettingsService, Effect.gen(function* () { - const { automaticGitFetchInterval, ...overridesForMerge } = overrides; + const { automaticGitFetchInterval, providerHealthRefreshInterval, ...overridesForMerge } = + overrides; const merged = deepMerge(DEFAULT_SERVER_SETTINGS, overridesForMerge); const initialSettings = yield* normalizeServerSettings({ ...merged, ...(automaticGitFetchInterval !== undefined ? { automaticGitFetchInterval: automaticGitFetchInterval as Duration.Duration } : {}), + ...(providerHealthRefreshInterval !== undefined + ? { providerHealthRefreshInterval: providerHealthRefreshInterval as Duration.Duration } + : {}), }); const currentSettingsRef = yield* Ref.make(initialSettings); @@ -216,7 +220,9 @@ function fallbackTextGenerationProvider(settings: ServerSettings): ServerSetting // Values under these keys are compared as a whole — never stripped field-by-field. const ATOMIC_SETTINGS_KEYS: ReadonlySet = new Set([ + "backgroundActivity", "automaticGitFetchInterval", + "providerHealthRefreshInterval", "textGenerationModelSelection", ]); diff --git a/apps/server/src/vcs/VcsStatusBroadcaster.test.ts b/apps/server/src/vcs/VcsStatusBroadcaster.test.ts index d78999f88c1..367fd677a54 100644 --- a/apps/server/src/vcs/VcsStatusBroadcaster.test.ts +++ b/apps/server/src/vcs/VcsStatusBroadcaster.test.ts @@ -1,6 +1,7 @@ import { assert, it, describe } from "@effect/vitest"; import * as NodeServices from "@effect/platform-node/NodeServices"; import * as Deferred from "effect/Deferred"; +import * as DateTime from "effect/DateTime"; import * as Duration from "effect/Duration"; import * as Effect from "effect/Effect"; import * as Exit from "effect/Exit"; @@ -12,6 +13,7 @@ import * as Scope from "effect/Scope"; import * as Stream from "effect/Stream"; import * as TestClock from "effect/testing/TestClock"; import type { + BackgroundScope, VcsStatusLocalResult, VcsStatusRemoteResult, VcsStatusResult, @@ -20,8 +22,11 @@ import type { import { GitManagerError } from "@t3tools/contracts"; import * as VcsStatusBroadcaster from "./VcsStatusBroadcaster.ts"; +import * as BackgroundPolicy from "../background/BackgroundPolicy.ts"; import * as GitWorkflowService from "../git/GitWorkflowService.ts"; +const TEST_EPOCH = DateTime.makeUnsafe("1970-01-01T00:00:00.000Z"); + const baseLocalStatus: VcsStatusLocalResult = { isRepo: true, sourceControlProvider: { @@ -71,6 +76,7 @@ function makeTestLayer(state: { }) { return VcsStatusBroadcaster.layer.pipe( Layer.provideMerge(NodeServices.layer), + Layer.provide(makeBackgroundPolicyLayer(() => true)), Layer.provide( Layer.mock(GitWorkflowService.GitWorkflowService)({ localStatus: () => @@ -97,6 +103,37 @@ function makeTestLayer(state: { ); } +function makeBackgroundPolicyLayer(shouldRunScopeWork: (scope: BackgroundScope) => boolean) { + return Layer.mock(BackgroundPolicy.BackgroundPolicy)({ + reportClientActivity: () => Effect.void, + removeRpcClient: () => Effect.void, + reportHostPowerState: () => Effect.void, + snapshot: Effect.succeed({ + hostPower: { + source: "unknown", + idle: "unknown", + idleSeconds: null, + locked: "unknown", + suspended: false, + onBattery: "unknown", + lowPowerMode: "unknown", + thermalState: "unknown", + stale: true, + updatedAt: TEST_EPOCH, + }, + leases: [], + activeForegroundLeaseCount: 0, + activeScopeKeys: [], + shouldRunOpportunisticWork: false, + updatedAt: TEST_EPOCH, + }), + streamChanges: Stream.empty, + hasDemand: () => Effect.succeed(true), + shouldRunScopeWork: (scope) => Effect.sync(() => shouldRunScopeWork(scope)), + shouldRunOpportunisticWork: Effect.succeed(true), + }); +} + describe("VcsStatusBroadcaster", () => { it.effect("reuses the cached VCS status across repeated reads", () => { const state = { @@ -176,6 +213,7 @@ describe("VcsStatusBroadcaster", () => { }; const testLayer = VcsStatusBroadcaster.layer.pipe( Layer.provideMerge(NodeServices.layer), + Layer.provide(makeBackgroundPolicyLayer(() => true)), Layer.provide( Layer.mock(GitWorkflowService.GitWorkflowService)({ localStatus: () => @@ -277,6 +315,7 @@ describe("VcsStatusBroadcaster", () => { }; const testLayer = VcsStatusBroadcaster.layer.pipe( Layer.provideMerge(NodeServices.layer), + Layer.provide(makeBackgroundPolicyLayer(() => true)), Layer.provide( Layer.mock(GitWorkflowService.GitWorkflowService)({ localStatus: (input) => @@ -434,6 +473,7 @@ describe("VcsStatusBroadcaster", () => { let firstRemoteAttemptDeferred: Deferred.Deferred | null = null; const testLayer = VcsStatusBroadcaster.layer.pipe( Layer.provideMerge(NodeServices.layer), + Layer.provide(makeBackgroundPolicyLayer(() => true)), Layer.provide( Layer.mock(GitWorkflowService.GitWorkflowService)({ localStatus: () => @@ -573,6 +613,57 @@ describe("VcsStatusBroadcaster", () => { ); }); + it.effect("does not start automatic remote refreshes without foreground client demand", () => { + const state = { + currentLocalStatus: baseLocalStatus, + currentRemoteStatus: baseRemoteStatus, + localStatusCalls: 0, + remoteStatusCalls: 0, + localInvalidationCalls: 0, + remoteInvalidationCalls: 0, + }; + const testLayer = VcsStatusBroadcaster.layer.pipe( + Layer.provideMerge(NodeServices.layer), + Layer.provide(makeBackgroundPolicyLayer(() => false)), + Layer.provide( + Layer.mock(GitWorkflowService.GitWorkflowService)({ + localStatus: () => + Effect.sync(() => { + state.localStatusCalls += 1; + return state.currentLocalStatus; + }), + remoteStatus: () => + Effect.sync(() => { + state.remoteStatusCalls += 1; + return state.currentRemoteStatus; + }), + invalidateLocalStatus: () => + Effect.sync(() => { + state.localInvalidationCalls += 1; + }), + invalidateRemoteStatus: () => + Effect.sync(() => { + state.remoteInvalidationCalls += 1; + }), + } satisfies Partial), + ), + ); + + return Effect.gen(function* () { + const broadcaster = yield* VcsStatusBroadcaster.VcsStatusBroadcaster; + const snapshot = yield* Stream.runHead( + broadcaster.streamStatus( + { cwd: "/repo" }, + { automaticRemoteRefreshInterval: Effect.succeed(Duration.seconds(1)) }, + ), + ); + + assert.isTrue(Option.isSome(snapshot)); + assert.equal(state.remoteStatusCalls, 0); + assert.equal(state.remoteInvalidationCalls, 0); + }).pipe(Effect.provide(testLayer)); + }); + it.effect("stops the remote poller after the last stream subscriber disconnects", () => { const state = { currentLocalStatus: baseLocalStatus, @@ -586,6 +677,7 @@ describe("VcsStatusBroadcaster", () => { let remoteStartedDeferred: Deferred.Deferred | null = null; const testLayer = VcsStatusBroadcaster.layer.pipe( Layer.provideMerge(NodeServices.layer), + Layer.provide(makeBackgroundPolicyLayer(() => true)), Layer.provide( Layer.mock(GitWorkflowService.GitWorkflowService)({ localStatus: () => diff --git a/apps/server/src/vcs/VcsStatusBroadcaster.ts b/apps/server/src/vcs/VcsStatusBroadcaster.ts index f0cacab2dcb..7039843565c 100644 --- a/apps/server/src/vcs/VcsStatusBroadcaster.ts +++ b/apps/server/src/vcs/VcsStatusBroadcaster.ts @@ -21,6 +21,7 @@ import type { } from "@t3tools/contracts"; import { mergeGitStatusParts } from "@t3tools/shared/git"; +import * as BackgroundPolicy from "../background/BackgroundPolicy.ts"; import * as GitWorkflowService from "../git/GitWorkflowService.ts"; const DEFAULT_VCS_STATUS_REFRESH_INTERVAL = Duration.seconds(30); @@ -98,6 +99,7 @@ export const layer = Layer.effect( VcsStatusBroadcaster, Effect.gen(function* () { const workflow = yield* GitWorkflowService.GitWorkflowService; + const backgroundPolicy = yield* BackgroundPolicy.BackgroundPolicy; const fs = yield* FileSystem.FileSystem; const changesPubSub = yield* Effect.acquireRelease( PubSub.unbounded(), @@ -317,6 +319,14 @@ export const layer = Layer.effect( return activeInterval; } + const shouldRun = yield* backgroundPolicy.shouldRunScopeWork({ + type: "vcs-status", + cwd, + }); + if (!shouldRun) { + return activeInterval; + } + const exit = yield* refreshRemoteStatus(cwd, { refreshUpstream: !Duration.isZero(configuredInterval), }).pipe(Effect.exit); diff --git a/apps/server/src/ws.ts b/apps/server/src/ws.ts index 1ad37e7c49b..db2475e8d3d 100644 --- a/apps/server/src/ws.ts +++ b/apps/server/src/ws.ts @@ -42,6 +42,7 @@ import { type RelayClientInstallProgressEvent, OrchestrationReplayEventsError, FilesystemBrowseError, + RpcClientId, AssetAccessError, EnvironmentAuthorizationError, ThreadId, @@ -52,6 +53,7 @@ import { WS_METHODS, WsRpcGroup, } from "@t3tools/contracts"; +import { resolveServerBackgroundActivitySettings } from "@t3tools/shared/backgroundActivitySettings"; import { clamp } from "effect/Number"; import { HttpRouter, HttpServerRequest, HttpServerRespondable } from "effect/unstable/http"; import { RpcSerialization, RpcServer } from "effect/unstable/rpc"; @@ -88,10 +90,12 @@ import { ReviewService } from "./review/ReviewService.ts"; import { ProjectSetupScriptRunner } from "./project/Services/ProjectSetupScriptRunner.ts"; import { RepositoryIdentityResolver } from "./project/Services/RepositoryIdentityResolver.ts"; import { ServerEnvironment } from "./environment/Services/ServerEnvironment.ts"; +import * as BackgroundPolicy from "./background/BackgroundPolicy.ts"; import * as EnvironmentAuth from "./auth/EnvironmentAuth.ts"; import type { AuthenticatedSession } from "./auth/EnvironmentAuth.ts"; import * as ProcessDiagnostics from "./diagnostics/ProcessDiagnostics.ts"; import * as ProcessResourceMonitor from "./diagnostics/ProcessResourceMonitor.ts"; +import * as ResourceTelemetry from "./resourceTelemetry/ResourceTelemetry.ts"; import * as TraceDiagnostics from "./diagnostics/TraceDiagnostics.ts"; import * as SourceControlDiscoveryLayer from "./sourceControl/SourceControlDiscovery.ts"; import { SourceControlRepositoryService } from "./sourceControl/SourceControlRepositoryService.ts"; @@ -156,6 +160,8 @@ const RPC_REQUIRED_SCOPE = new Map([ [WS_METHODS.serverGetTraceDiagnostics, AuthOrchestrationReadScope], [WS_METHODS.serverGetProcessDiagnostics, AuthOrchestrationReadScope], [WS_METHODS.serverGetProcessResourceHistory, AuthOrchestrationReadScope], + [WS_METHODS.serverGetResourceTelemetryHistory, AuthOrchestrationReadScope], + [WS_METHODS.serverRetryResourceTelemetry, AuthOrchestrationOperateScope], [WS_METHODS.serverSignalProcess, AuthOrchestrationOperateScope], [WS_METHODS.cloudGetRelayClientStatus, AuthRelayWriteScope], [WS_METHODS.cloudInstallRelayClient, AuthRelayWriteScope], @@ -170,6 +176,7 @@ const RPC_REQUIRED_SCOPE = new Map([ [WS_METHODS.filesystemBrowse, AuthOrchestrationReadScope], [WS_METHODS.assetsCreateUrl, AuthOrchestrationReadScope], [WS_METHODS.subscribeVcsStatus, AuthOrchestrationReadScope], + [WS_METHODS.subscribeResourceTelemetry, AuthOrchestrationReadScope], [WS_METHODS.vcsRefreshStatus, AuthOrchestrationReadScope], [WS_METHODS.vcsPull, AuthOrchestrationOperateScope], [WS_METHODS.gitRunStackedAction, AuthOrchestrationOperateScope], @@ -277,10 +284,13 @@ const makeWsRpcLayer = (currentSession: AuthenticatedSession) => const projectSetupScriptRunner = yield* ProjectSetupScriptRunner; const repositoryIdentityResolver = yield* RepositoryIdentityResolver; const serverEnvironment = yield* ServerEnvironment; + const backgroundPolicy = yield* BackgroundPolicy.BackgroundPolicy; const serverAuth = yield* EnvironmentAuth.EnvironmentAuth; const sourceControlDiscovery = yield* SourceControlDiscoveryLayer.SourceControlDiscovery; const automaticGitFetchInterval = serverSettings.getSettings.pipe( - Effect.map((settings) => settings.automaticGitFetchInterval), + Effect.map( + (settings) => resolveServerBackgroundActivitySettings(settings).automaticGitFetchInterval, + ), Effect.catch((cause) => Effect.logWarning("Failed to read automatic Git fetch interval setting", { detail: cause.message, @@ -292,6 +302,7 @@ const makeWsRpcLayer = (currentSession: AuthenticatedSession) => const sessions = yield* SessionStore.SessionStore; const processDiagnostics = yield* ProcessDiagnostics.ProcessDiagnostics; const processResourceMonitor = yield* ProcessResourceMonitor.ProcessResourceMonitor; + const resourceTelemetry = yield* ResourceTelemetry.ResourceTelemetry; const relayClient = yield* RelayClient.RelayClient; const authorizationError = (requiredScope: AuthEnvironmentScope) => new EnvironmentAuthorizationError({ @@ -1112,10 +1123,42 @@ const makeWsRpcLayer = (currentSession: AuthenticatedSession) => "rpc.aggregate": "server", }, ), + [WS_METHODS.serverGetResourceTelemetryHistory]: (input) => + observeRpcEffect( + WS_METHODS.serverGetResourceTelemetryHistory, + resourceTelemetry.readHistory(input), + { + "rpc.aggregate": "server", + }, + ), + [WS_METHODS.serverRetryResourceTelemetry]: (_input) => + observeRpcEffect(WS_METHODS.serverRetryResourceTelemetry, resourceTelemetry.retry, { + "rpc.aggregate": "server", + }), [WS_METHODS.serverSignalProcess]: (input) => observeRpcEffect(WS_METHODS.serverSignalProcess, processDiagnostics.signal(input), { "rpc.aggregate": "server", }), + [WS_METHODS.serverReportClientActivity]: (input, metadata) => + observeRpcEffect( + WS_METHODS.serverReportClientActivity, + backgroundPolicy.reportClientActivity( + currentSessionId, + RpcClientId.make(metadata.client.id), + input, + ), + { "rpc.aggregate": "server" }, + ), + [WS_METHODS.serverReportHostPowerState]: (input) => + observeRpcEffect( + WS_METHODS.serverReportHostPowerState, + backgroundPolicy.reportHostPowerState(input), + { "rpc.aggregate": "server" }, + ), + [WS_METHODS.serverGetBackgroundPolicy]: (_input) => + observeRpcEffect(WS_METHODS.serverGetBackgroundPolicy, backgroundPolicy.snapshot, { + "rpc.aggregate": "server", + }), [WS_METHODS.cloudGetRelayClientStatus]: (_input) => observeRpcEffect(WS_METHODS.cloudGetRelayClientStatus, relayClient.resolve, { "rpc.aggregate": "cloud", @@ -1634,6 +1677,24 @@ const makeWsRpcLayer = (currentSession: AuthenticatedSession) => }), { "rpc.aggregate": "auth" }, ), + [WS_METHODS.subscribeBackgroundPolicy]: (_input) => + observeRpcStream( + WS_METHODS.subscribeBackgroundPolicy, + Stream.concat( + Stream.unwrap(Effect.map(backgroundPolicy.snapshot, Stream.make)), + backgroundPolicy.streamChanges, + ), + { "rpc.aggregate": "server" }, + ), + [WS_METHODS.subscribeResourceTelemetry]: (_input) => + observeRpcStream( + WS_METHODS.subscribeResourceTelemetry, + Stream.concat( + Stream.unwrap(Effect.map(resourceTelemetry.latest, Stream.make)), + resourceTelemetry.changes, + ), + { "rpc.aggregate": "server" }, + ), }); }), ); diff --git a/apps/web/src/components/settings/DiagnosticsSettings.tsx b/apps/web/src/components/settings/DiagnosticsSettings.tsx index 6df3367c642..419994c553d 100644 --- a/apps/web/src/components/settings/DiagnosticsSettings.tsx +++ b/apps/web/src/components/settings/DiagnosticsSettings.tsx @@ -36,6 +36,7 @@ import { Button } from "../ui/button"; import { ScrollArea } from "../ui/scroll-area"; import { Tooltip, TooltipPopup, TooltipTrigger } from "../ui/tooltip"; import { toastManager } from "../ui/toast"; +import { ResourceTelemetryDiagnostics } from "./ResourceTelemetryDiagnostics"; import { SettingsPageContainer, SettingsSection, useRelativeTimeTick } from "./settingsLayout"; import { useAtomCommand } from "../../state/use-atom-command"; @@ -906,12 +907,16 @@ export function DiagnosticsSettingsPanel() { if (environmentId === null) { return; } + const process = processData?.processes.find((entry) => entry.pid === pid); + if (process === undefined) { + return; + } setSignalingPid(pid); void (async () => { const result = await signalServerProcess({ environmentId, - input: { pid, signal }, + input: { pid, startTimeMs: process.startTimeMs, signal }, }); setSignalingPid(null); if (result._tag === "Failure") { @@ -948,7 +953,7 @@ export function DiagnosticsSettingsPanel() { refreshProcesses(); })(); }, - [environmentId, refreshProcesses, signalServerProcess], + [environmentId, processData?.processes, refreshProcesses, signalServerProcess], ); const processDiagnosticsError = processData ? Option.getOrNull(processData.error) : null; @@ -960,6 +965,8 @@ export function DiagnosticsSettingsPanel() { return ( + + = 1_024 && unitIndex < units.length - 1); + return `${next.toFixed(next >= 100 ? 0 : next >= 10 ? 1 : 2)} ${units[unitIndex]}`; +} + +function formatRate(value: number): string { + return `${formatBytes(value)}/s`; +} + +function formatCpuTime(valueMs: number): string { + const seconds = valueMs / 1_000; + if (seconds < 60) return `${seconds.toFixed(seconds >= 10 ? 1 : 2)}s`; + const minutes = seconds / 60; + if (minutes < 60) return `${minutes.toFixed(minutes >= 10 ? 1 : 2)}m`; + return `${(minutes / 60).toFixed(2)}h`; +} + +function formatDurationMicros(value: number): string { + if (value < 1_000) return `${Math.round(value)} µs`; + if (value < 1_000_000) return `${(value / 1_000).toFixed(2)} ms`; + return `${(value / 1_000_000).toFixed(2)} s`; +} + +function processIdentityKey(process: ResourceTelemetryProcess): string { + return `${process.identity.pid}:${process.identity.startTimeMs}`; +} + +function processSummaryIdentityKey(process: ResourceTelemetryProcessSummary): string { + return `${process.identity.pid}:${process.identity.startTimeMs}`; +} + +function formatProcessName(process: Pick): string { + if (process.name.trim()) return process.name; + const firstToken = process.command.trim().split(/\s+/)[0] ?? process.command; + const normalized = firstToken.replace(/^['"]|['"]$/g, ""); + return normalized.split(/[\\/]/).findLast((segment) => segment.length > 0) ?? normalized; +} + +function categoryLabel(category: ResourceTelemetryProcessCategory): string { + switch (category) { + case "server": + return "Server"; + case "server-child": + return "Backend child"; + case "provider-root": + return "Provider"; + case "terminal-root": + return "Terminal"; + case "electron-main": + return "Electron main"; + case "electron-renderer": + return "Renderer"; + case "electron-gpu": + return "GPU"; + case "electron-utility": + return "Electron utility"; + case "resource-monitor": + return "Monitor"; + case "unknown-t3": + return "T3 process"; + } +} + +function categoryDotClass(category: ResourceTelemetryProcessCategory): string { + if (category === "resource-monitor") return "bg-amber-500"; + if (category.startsWith("electron-")) return "bg-sky-500"; + if (category === "server") return "bg-violet-500"; + return "bg-emerald-500"; +} + +function ioSemanticsLabel(semantics: ResourceTelemetryIoSemantics): string { + switch (semantics) { + case "storage": + return "Storage bytes"; + case "logical": + return "Logical bytes"; + case "all-io": + return "All I/O bytes"; + case "unavailable": + return "Unavailable"; + } +} + +function booleanStateLabel( + value: BackgroundBooleanState, + labels: { readonly true: string; readonly false: string }, +): string { + if (value === "true") return labels.true; + if (value === "false") return labels.false; + return "Unknown"; +} + +function sourceStatusTone(status: ResourceTelemetrySourceStatus): "default" | "warning" | "danger" { + if (status === "healthy") return "default"; + if (status === "starting" || status === "degraded") return "warning"; + return "danger"; +} + +function SourceStatusBadge({ + label, + status, +}: { + label: string; + status: ResourceTelemetrySourceStatus; +}) { + const tone = sourceStatusTone(status); + return ( + + + {label} {status} + + ); +} + +function LastSampleLabel({ sampledAt }: { sampledAt: DateTime.Utc | null }) { + useRelativeTimeTick(); + if (!sampledAt) { + return Waiting for sample; + } + const relative = formatRelativeTime(DateTime.formatIso(sampledAt)); + return ( + + Updated {relative.value} + {relative.suffix ? ` ${relative.suffix}` : ""} + + ); +} + +function IconStat({ + icon, + label, + value, + detail, + tone = "default", +}: { + icon: ReactNode; + label: string; + value: string; + detail?: string | undefined; + tone?: "default" | "warning" | "danger"; +}) { + return ( +
+
+ {icon} + {label} +
+
+ {value} +
+ {detail ? ( +
{detail}
+ ) : null} +
+ ); +} + +function AggregateCard({ + label, + accentClass, + aggregate, +}: { + label: string; + accentClass: string; + aggregate: ResourceTelemetryAggregate; +}) { + return ( +
+ +
+
+ {label} +
+
+ {aggregate.processCount} proc +
+
+
+ + + + +
+
+ ); +} + +function MetricPair({ label, value }: { label: string; value: string }) { + return ( +
+
+ {label} +
+
+ {value} +
+
+ ); +} + +function HealthSource({ label, health }: { label: string; health: ResourceTelemetrySourceHealth }) { + return ( +
+
+
{label}
+
+ {Option.match(health.lastError, { + onNone: () => "No reported errors", + onSome: (error) => error, + })} +
+
+ +
+ ); +} + +function DetailRow({ + label, + value, + valueClassName, +}: { + label: string; + value: ReactNode; + valueClassName?: string | undefined; +}) { + return ( +
+ {label} + + {value} + +
+ ); +} + +function HistoryWindowSelector({ + selectedWindowMs, + onSelect, +}: { + selectedWindowMs: number; + onSelect: (windowMs: number) => void; +}) { + return ( +
+ {HISTORY_WINDOWS.map((option) => ( + + ))} +
+ ); +} + +function ResourceHistoryChart({ + buckets, +}: { + buckets: ReadonlyArray; +}) { + const maxCpu = Math.max(1, ...buckets.map((bucket) => bucket.maxCpuPercent)); + const maxIo = Math.max(1, ...buckets.map((bucket) => bucket.ioReadBytes + bucket.ioWriteBytes)); + + return ( +
+
+ + CPU average + + + I/O reads + + + I/O writes + +
+
+ {buckets.map((bucket) => { + const cpuHeight = Math.max(2, (bucket.avgCpuPercent / maxCpu) * 100); + const readHeight = Math.max(1, (bucket.ioReadBytes / maxIo) * 100); + const writeHeight = Math.max(1, (bucket.ioWriteBytes / maxIo) * 100); + return ( + + + + + +
+ } + /> + +
CPU avg {bucket.avgCpuPercent.toFixed(1)}%
+
CPU peak {bucket.maxCpuPercent.toFixed(1)}%
+
Read {formatBytes(bucket.ioReadBytes)}
+
Write {formatBytes(bucket.ioWriteBytes)}
+
+ + ); + })} +
+ + ); +} + +function ProcessTreeName({ + process, + collapsed, + onToggle, +}: { + process: ResourceTelemetryProcess; + collapsed: boolean; + onToggle: (process: ResourceTelemetryProcess) => void; +}) { + const name = formatProcessName(process); + const hasChildren = process.childPids.length > 0; + const ChevronIcon = collapsed ? ChevronRightIcon : ChevronDownIcon; + return ( +
+ {hasChildren ? ( + + ) : ( + + )} + + + {name}} + /> + + {process.command || process.name} + + +
+ ); +} + +function canSignalProcess(process: ResourceTelemetryProcess): boolean { + return ( + process.category === "server-child" || + process.category === "provider-root" || + process.category === "terminal-root" + ); +} + +function ProcessActions({ + process, + signalingKey, + onSignal, +}: { + process: ResourceTelemetryProcess; + signalingKey: string | null; + onSignal: (process: ResourceTelemetryProcess, signal: ServerProcessSignal) => void; +}) { + if (!canSignalProcess(process)) { + return ; + } + const isSignaling = signalingKey === processIdentityKey(process); + return ( +
+ + +
+ ); +} + +function ProcessTable({ + processes, + signalingKey, + onSignal, +}: { + processes: ReadonlyArray; + signalingKey: string | null; + onSignal: (process: ResourceTelemetryProcess, signal: ServerProcessSignal) => void; +}) { + const [collapsed, setCollapsed] = useState>(() => new Set()); + const visible = useMemo(() => { + const result: ResourceTelemetryProcess[] = []; + let hiddenDepth: number | null = null; + for (const process of processes) { + if (hiddenDepth !== null) { + if (process.depth > hiddenDepth) continue; + hiddenDepth = null; + } + result.push(process); + if (collapsed.has(processIdentityKey(process))) { + hiddenDepth = process.depth; + } + } + return result; + }, [collapsed, processes]); + const toggle = useCallback((process: ResourceTelemetryProcess) => { + const identityKey = processIdentityKey(process); + setCollapsed((current) => { + const next = new Set(current); + if (next.has(identityKey)) { + next.delete(identityKey); + } else { + next.add(identityKey); + } + return next; + }); + }, []); + + return ( + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {visible.length === 0 ? ( + + + + ) : null} + {visible.map((process) => ( + + + + + + + + + + + + + + ))} + +
ProcessCategoryCPUCPU TimeMemoryRead/sWrite/sRead TotalWrite TotalPIDKill
+ Waiting for the native process monitor. +
+ + + {categoryLabel(process.category)} + + {process.cpuPercent.toFixed(1)}% + + {formatCpuTime(process.cpuTimeMs)} + + {formatBytes(process.residentBytes)} + + {formatRate(process.ioReadBytesPerSecond)} + + {formatRate(process.ioWriteBytesPerSecond)} + + {formatBytes(process.ioReadBytes)} + + + {formatBytes(process.ioWriteBytes)}} /> + {ioSemanticsLabel(process.ioSemantics)} + + + {process.identity.pid} + + +
+
+ ); +} + +function HistoryProcessTable({ + processes, +}: { + processes: ReadonlyArray; +}) { + return ( + + + + + + + + + + + + + + + + + + + + + + + + + + + + {processes.length === 0 ? ( + + + + ) : null} + {processes.map((process) => ( + + + + + + + + + + + + ))} + +
ProcessCategoryCPU TimePeak CPUPeak MemReadWriteSamplesPID
+ No retained process samples in this window. +
+ + + {process.name || process.command} + + } + /> + + {process.command || process.name} + + + + {categoryLabel(process.category)} + + {formatCpuTime(process.cpuTimeMs)} + + {process.maxCpuPercent.toFixed(1)}% + + {formatBytes(process.peakRssBytes)} + + {formatBytes(process.ioReadBytes)} + + {formatBytes(process.ioWriteBytes)} + + {process.sampleCount} + + {process.identity.pid} +
+
+ ); +} + +function AttributionTable({ entries }: { entries: ReadonlyArray }) { + return ( +
+ + + + + + + + + + + + + + + + + + + + + {entries.length === 0 ? ( + + + + ) : null} + {entries.map((entry) => ( + + + + + + + + + ))} + +
ComponentOperationLogical ReadLogical WriteCountTime
+ No instrumented application I/O has been recorded yet. +
+ {entry.component} + {entry.operation} + {formatBytes(entry.logicalReadBytes)} + + {formatBytes(entry.logicalWriteBytes)} + {entry.count} + {(entry.durationMs / 1_000).toFixed(2)}s +
+
+ ); +} + +export function ResourceTelemetryDiagnostics() { + const [windowMs, setWindowMs] = useState(15 * 60_000); + const selectedWindow = + HISTORY_WINDOWS.find((option) => option.windowMs === windowMs) ?? HISTORY_WINDOWS[1]; + const telemetry = useResourceTelemetry(); + const retryTelemetry = telemetry.retry; + const history = useResourceTelemetryHistory({ + windowMs: selectedWindow.windowMs, + bucketMs: selectedWindow.bucketMs, + }); + const primaryEnvironment = usePrimaryEnvironment(); + const signalServerProcess = useAtomCommand(serverEnvironment.signalProcess, { + reportFailure: false, + }); + const [signalingKey, setSignalingKey] = useState(null); + const [isRetrying, setIsRetrying] = useState(false); + const snapshot = telemetry.data; + const allT3 = snapshot?.groups.allT3; + + const signalProcess = useCallback( + (process: ResourceTelemetryProcess, signal: ServerProcessSignal) => { + if ( + signal === "SIGKILL" && + !window.confirm( + `Send SIGKILL to process ${process.identity.pid}? This cannot be handled by the process.`, + ) + ) { + return; + } + const identityKey = processIdentityKey(process); + const environmentId = primaryEnvironment?.environmentId; + if (environmentId === undefined) { + return; + } + setSignalingKey(identityKey); + void signalServerProcess({ + environmentId, + input: { + pid: process.identity.pid, + startTimeMs: process.identity.startTimeMs, + signal, + }, + }) + .then((result) => { + if (result._tag === "Failure") { + if (isAtomCommandInterrupted(result)) return; + throw squashAtomCommandFailure(result); + } + if (result.value.signaled) return; + toastManager.add({ + type: "error", + title: `Could not send ${signal}`, + description: Option.getOrElse( + result.value.message, + () => `Failed to send ${signal} to process ${process.identity.pid}.`, + ), + }); + }) + .catch((error: unknown) => { + toastManager.add({ + type: "error", + title: `Could not send ${signal}`, + description: error instanceof Error ? error.message : `Failed to send ${signal}.`, + }); + }) + .finally(() => { + setSignalingKey((current) => (current === identityKey ? null : current)); + }); + }, + [primaryEnvironment?.environmentId, signalServerProcess], + ); + + const retryCollector = useCallback(() => { + setIsRetrying(true); + void retryTelemetry() + .catch((error: unknown) => { + toastManager.add({ + type: "error", + title: "Could not restart resource monitor", + description: + error instanceof Error ? error.message : "The resource monitor retry failed.", + }); + }) + .finally(() => { + setIsRetrying(false); + }); + }, [retryTelemetry]); + + const speedLimit = snapshot ? Option.getOrNull(snapshot.speedLimitPercent) : null; + const collectorNeedsRetry = + snapshot?.health.native.status === "degraded" || + snapshot?.health.native.status === "unavailable" || + snapshot?.health.native.status === "stopped"; + + return ( + <> + + {snapshot ? ( + + ) : null} + + + + + + } + /> + Refresh telemetry snapshot + + + } + > +
+ } + label="Current CPU" + value={allT3 ? `${allT3.currentCpuPercent.toFixed(1)}%` : "..."} + detail={allT3 ? `${formatCpuTime(allT3.cpuTimeMs)} observed CPU time` : undefined} + /> + } + label="Resident Memory" + value={allT3 ? formatBytes(allT3.currentRssBytes) : "..."} + detail={allT3 ? `${formatBytes(allT3.peakRssBytes)} process peaks` : undefined} + /> + } + label="I/O Reads" + value={allT3 ? formatRate(allT3.ioReadBytesPerSecond) : "..."} + detail={allT3 ? `${formatBytes(allT3.ioReadBytes)} observed` : undefined} + /> + } + label="I/O Writes" + value={allT3 ? formatRate(allT3.ioWriteBytesPerSecond) : "..."} + detail={allT3 ? `${formatBytes(allT3.ioWriteBytes)} observed` : undefined} + tone={ + allT3 && allT3.ioWriteBytesPerSecond >= 10 * 1_024 * 1_024 + ? "danger" + : allT3 && allT3.ioWriteBytesPerSecond >= 1_024 * 1_024 + ? "warning" + : "default" + } + /> + } + label="Processes" + value={allT3 ? String(allT3.processCount) : "..."} + detail={ + allT3 ? `${allT3.processStarts} starts · ${allT3.processExits} exits` : undefined + } + /> + } + label="CPU Speed Limit" + value={ + snapshot ? (speedLimit === null ? "Unknown" : `${speedLimit.toFixed(0)}%`) : "..." + } + detail={snapshot ? `${snapshot.power.thermalState} thermal state` : undefined} + tone={speedLimit !== null && speedLimit < 80 ? "warning" : "default"} + /> +
+ {telemetry.error ? ( +
+ + {telemetry.error} +
+ ) : null} + {snapshot ? ( +
+ + + +
+ ) : null} +
+ + + + Retry monitor + + ) : null + } + > +
+
+
+ Host state +
+ + + + + +
+
+
+ Collection health +
+ {snapshot ? ( + <> + + + + + 0 + ? "text-amber-600 dark:text-amber-300" + : undefined + } + /> + "Unavailable", + onSome: (version) => + `${version}${Option.match(snapshot.health.sidecarPid, { + onNone: () => "", + onSome: (pid) => ` · PID ${pid}`, + })}`, + })} + /> + + + ) : ( +
+ Waiting for collector health. +
+ )} +
+
+
+ + + + + + } + > + {history.error ? ( +
+ + {history.error} +
+ ) : null} + + +
+ + + Identity: PID + start time +
+ ) : null + } + > + +
+ + Logical bytes by operation + } + > +
+ Native counters identify which process is reading or writing. These application-level + counters identify known T3 operations so process spikes can be correlated with specific + persistence and logging paths. +
+ +
+ + ); +} diff --git a/apps/web/src/components/settings/SettingsPanels.tsx b/apps/web/src/components/settings/SettingsPanels.tsx index 71311c10d5c..7127eb2b14a 100644 --- a/apps/web/src/components/settings/SettingsPanels.tsx +++ b/apps/web/src/components/settings/SettingsPanels.tsx @@ -1,9 +1,19 @@ -import { ArchiveIcon, ArchiveX, LoaderIcon, PlusIcon, RefreshCwIcon } from "lucide-react"; +import { + ArchiveIcon, + ArchiveX, + InfoIcon, + LoaderIcon, + PlusIcon, + RefreshCwIcon, + SettingsIcon, +} from "lucide-react"; import { Link } from "@tanstack/react-router"; import { useCallback, useMemo, useRef, useState } from "react"; import { useAtomValue } from "@effect/atom-react"; import { defaultInstanceIdForDriver, + type BackgroundActivityProfile, + type BackgroundActivitySettings, type DesktopUpdateChannel, PROVIDER_DISPLAY_NAMES, ProviderDriverKind, @@ -18,6 +28,11 @@ import { squashAtomCommandFailure, } from "@t3tools/client-runtime/state/runtime"; import { DEFAULT_UNIFIED_SETTINGS } from "@t3tools/contracts/settings"; +import { + getBackgroundActivityBaseProfile, + getBackgroundActivityPresetSettings, + resolveServerBackgroundActivitySettings, +} from "@t3tools/shared/backgroundActivitySettings"; import { createModelSelection } from "@t3tools/shared/model"; import * as Arr from "effect/Array"; import * as Duration from "effect/Duration"; @@ -58,7 +73,23 @@ import { useProjects } from "../../state/entities"; import { useArchivedThreadSnapshots } from "../../lib/archivedThreadsState"; import { formatRelativeTime, formatRelativeTimeLabel } from "../../timestampFormat"; import { Button } from "../ui/button"; +import { + Dialog, + DialogDescription, + DialogFooter, + DialogHeader, + DialogPanel, + DialogPopup, + DialogTitle, +} from "../ui/dialog"; import { DraftInput } from "../ui/draft-input"; +import { + NumberField, + NumberFieldDecrement, + NumberFieldGroup, + NumberFieldIncrement, + NumberFieldInput, +} from "../ui/number-field"; import { Select, SelectItem, SelectPopup, SelectTrigger, SelectValue } from "../ui/select"; import { Switch } from "../ui/switch"; import { stackedThreadToast, toastManager } from "../ui/toast"; @@ -108,7 +139,128 @@ const TIMESTAMP_FORMAT_LABELS = { "24-hour": "24-hour", } as const; +const BACKGROUND_ACTIVITY_PROFILE_LABELS: Record = { + balanced: "Balanced", + performance: "Performance", + "battery-saver": "Battery saver", +}; + +type BackgroundActivityProfileOption = BackgroundActivityProfile | "advanced"; +type BackgroundActivityOverridePatch = Partial<{ + [K in keyof BackgroundActivitySettings["overrides"]]: + | BackgroundActivitySettings["overrides"][K] + | undefined; +}>; + +const BACKGROUND_ACTIVITY_PROFILE_OPTION_LABELS: Record = { + ...BACKGROUND_ACTIVITY_PROFILE_LABELS, + advanced: "Advanced", +}; + +const BACKGROUND_ACTIVITY_PROFILE_DESCRIPTIONS: Record = { + balanced: + "Pauses background probes when clients are idle, the host is locked, or low power mode is active.", + performance: "Allows scoped background probes while any subscribed client remains connected.", + "battery-saver": "Also pauses background probes when the host or client is on battery.", +}; + +const ADVANCED_BACKGROUND_ACTIVITY_DESCRIPTION = + "Uses custom background intervals with the selected shared power policy."; + +const PROVIDER_HEALTH_INTERVAL_STEP_SECONDS = 30; const DEFAULT_DRIVER_KIND = ProviderDriverKind.make("codex"); +const BACKGROUND_ACTIVITY_BOOLEAN_OVERRIDES: ReadonlyArray<{ + readonly key: + | "pauseWhenHostLocked" + | "pauseWhenHostLowPower" + | "pauseWhenClientLowPower" + | "pauseWhenOnBattery"; + readonly label: string; +}> = [ + { key: "pauseWhenHostLocked", label: "Pause when host is locked" }, + { key: "pauseWhenHostLowPower", label: "Pause on host low power" }, + { key: "pauseWhenClientLowPower", label: "Pause on client low power" }, + { key: "pauseWhenOnBattery", label: "Pause on battery" }, +]; + +function durationToSeconds(duration: Duration.Duration): number { + return Math.round(Duration.toMillis(duration) / 1_000); +} + +function normalizeIntervalSeconds(value: number | null): number { + if (value === null || !Number.isFinite(value)) { + return 0; + } + return Math.max(0, Math.round(value)); +} + +function resolveBackgroundActivityProfileOption(settings: { + readonly backgroundActivity: BackgroundActivitySettings; +}): BackgroundActivityProfileOption { + return settings.backgroundActivity.profile === "custom" + ? "advanced" + : settings.backgroundActivity.profile; +} + +function resetBackgroundActivitySettings() { + return { + backgroundActivity: DEFAULT_UNIFIED_SETTINGS.backgroundActivity, + }; +} + +function backgroundActivityProfileSettings(profile: BackgroundActivityProfile) { + return { + backgroundActivity: { + schemaVersion: 1 as const, + profile, + overrides: {}, + }, + }; +} + +function backgroundActivityOverrideSettings( + current: BackgroundActivitySettings, + overrides: BackgroundActivityOverridePatch, +) { + const nextOverrides: BackgroundActivityOverridePatch = { + ...current.overrides, + ...overrides, + }; + for (const [key, value] of Object.entries(nextOverrides)) { + if (value === undefined) { + delete nextOverrides[key as keyof typeof nextOverrides]; + } + } + return { + backgroundActivity: { + schemaVersion: 1 as const, + profile: "custom" as const, + baseProfile: getBackgroundActivityBaseProfile(current), + overrides: nextOverrides as BackgroundActivitySettings["overrides"], + }, + }; +} + +function PolicyTooltip({ children }: { readonly children: string }) { + return ( + + + + + } + /> + + {children} + + + ); +} function withoutProviderInstanceKey( record: Readonly> | undefined, @@ -401,9 +553,8 @@ export function useSettingsRestore(onRestored?: () => void) { ...(settings.enableAssistantStreaming !== DEFAULT_UNIFIED_SETTINGS.enableAssistantStreaming ? ["Assistant output"] : []), - ...(Duration.toMillis(settings.automaticGitFetchInterval) !== - Duration.toMillis(DEFAULT_UNIFIED_SETTINGS.automaticGitFetchInterval) - ? ["Automatic Git fetch interval"] + ...(!Equal.equals(settings.backgroundActivity, DEFAULT_UNIFIED_SETTINGS.backgroundActivity) + ? ["Background activity"] : []), ...(settings.defaultThreadEnvMode !== DEFAULT_UNIFIED_SETTINGS.defaultThreadEnvMode ? ["New thread mode"] @@ -433,7 +584,7 @@ export function useSettingsRestore(onRestored?: () => void) { settings.newWorktreesStartFromOrigin, settings.diffIgnoreWhitespace, settings.diffWordWrap, - settings.automaticGitFetchInterval, + settings.backgroundActivity, settings.enableAssistantStreaming, settings.sidebarThreadPreviewCount, settings.timestampFormat, @@ -459,7 +610,7 @@ export function useSettingsRestore(onRestored?: () => void) { sidebarThreadPreviewCount: DEFAULT_UNIFIED_SETTINGS.sidebarThreadPreviewCount, autoOpenPlanSidebar: DEFAULT_UNIFIED_SETTINGS.autoOpenPlanSidebar, enableAssistantStreaming: DEFAULT_UNIFIED_SETTINGS.enableAssistantStreaming, - automaticGitFetchInterval: DEFAULT_UNIFIED_SETTINGS.automaticGitFetchInterval, + backgroundActivity: DEFAULT_UNIFIED_SETTINGS.backgroundActivity, defaultThreadEnvMode: DEFAULT_UNIFIED_SETTINGS.defaultThreadEnvMode, newWorktreesStartFromOrigin: DEFAULT_UNIFIED_SETTINGS.newWorktreesStartFromOrigin, addProjectBaseDirectory: DEFAULT_UNIFIED_SETTINGS.addProjectBaseDirectory, @@ -476,10 +627,255 @@ export function useSettingsRestore(onRestored?: () => void) { }; } +function BackgroundActivityAdvancedDialog({ + open, + onOpenChange, +}: { + readonly open: boolean; + readonly onOpenChange: (open: boolean) => void; +}) { + const settings = useSettings(); + const updateSettings = useUpdateSettings(); + const resolvedBackgroundActivity = resolveServerBackgroundActivitySettings(settings); + const activeProfile = getBackgroundActivityBaseProfile(settings.backgroundActivity); + const automaticGitFetchIntervalSeconds = durationToSeconds( + resolvedBackgroundActivity.automaticGitFetchInterval, + ); + const providerHealthRefreshIntervalSeconds = durationToSeconds( + resolvedBackgroundActivity.providerHealthRefreshInterval, + ); + const hostPowerMonitorActiveIntervalSeconds = durationToSeconds( + resolvedBackgroundActivity.hostPowerMonitorActiveInterval, + ); + const hostPowerMonitorIdleIntervalSeconds = durationToSeconds( + resolvedBackgroundActivity.hostPowerMonitorIdleInterval, + ); + + return ( + + + + Background Activity + + Tune the shared power policy and the background intervals that feed it. + + + +
+
+
+
Shared policy
+

+ Controls whether background work may run after a subscribed interval fires. +

+
+ +
+ +
+
+
Git fetch interval
+

+ Refresh remote branch status in the background. +

+
+
+ + updateSettings( + backgroundActivityOverrideSettings(settings.backgroundActivity, { + automaticGitFetchInterval: Duration.seconds( + normalizeIntervalSeconds(value), + ), + }), + ) + } + > + + + + + + + seconds +
+
+ +
+
+
Provider health interval
+

+ Refresh provider availability, versions, auth state, and model metadata. +

+
+
+ + updateSettings( + backgroundActivityOverrideSettings(settings.backgroundActivity, { + providerHealthRefreshInterval: Duration.seconds( + normalizeIntervalSeconds(value), + ), + }), + ) + } + > + + + + + + + seconds +
+
+ +
+
+
Host power monitor
+

+ Poll host power state while clients are active. +

+
+
+ + updateSettings( + backgroundActivityOverrideSettings(settings.backgroundActivity, { + hostPowerMonitorActiveInterval: Duration.seconds( + normalizeIntervalSeconds(value), + ), + }), + ) + } + > + + + + + + + seconds +
+
+ +
+
+
Idle host monitor
+

+ Poll host power state when no foreground client is active. +

+
+
+ + updateSettings( + backgroundActivityOverrideSettings(settings.backgroundActivity, { + hostPowerMonitorIdleInterval: Duration.seconds( + normalizeIntervalSeconds(value), + ), + }), + ) + } + > + + + + + + + seconds +
+
+ +
+ {BACKGROUND_ACTIVITY_BOOLEAN_OVERRIDES.map(({ key, label }) => ( + + ))} +
+
+
+ + + + +
+
+ ); +} + export function GeneralSettingsPanel() { const { theme, setTheme } = useTheme(); const settings = useSettings(); const updateSettings = useUpdateSettings(); + const [backgroundActivityDialogOpen, setBackgroundActivityDialogOpen] = useState(false); const observability = useAtomValue(primaryServerObservabilityAtom); const serverProviders = useAtomValue(primaryServerProvidersAtom); const diagnosticsDescription = formatDiagnosticsDescription({ @@ -512,6 +908,21 @@ export function GeneralSettingsPanel() { settings.textGenerationModelSelection ?? null, DEFAULT_UNIFIED_SETTINGS.textGenerationModelSelection ?? null, ); + const resolvedBackgroundActivity = resolveServerBackgroundActivitySettings(settings); + const activeBackgroundActivityProfile = getBackgroundActivityBaseProfile( + settings.backgroundActivity, + ); + const backgroundActivityProfileOption = resolveBackgroundActivityProfileOption(settings); + const backgroundActivityDescription = + backgroundActivityProfileOption === "advanced" + ? `${ADVANCED_BACKGROUND_ACTIVITY_DESCRIPTION} Current shared policy: ${ + BACKGROUND_ACTIVITY_PROFILE_LABELS[activeBackgroundActivityProfile] + }.` + : BACKGROUND_ACTIVITY_PROFILE_DESCRIPTIONS[resolvedBackgroundActivity.profile]; + const canResetBackgroundActivity = !Equal.equals( + settings.backgroundActivity, + DEFAULT_UNIFIED_SETTINGS.backgroundActivity, + ); return ( @@ -668,6 +1079,88 @@ export function GeneralSettingsPanel() { } /> + + Background activity + + This shared policy gates background work such as Git refreshes and provider health + probes after their individual intervals elapse. + + + } + description={backgroundActivityDescription} + resetAction={ + canResetBackgroundActivity ? ( + updateSettings(resetBackgroundActivitySettings())} + /> + ) : null + } + control={ + <> + + {backgroundActivityProfileOption === "advanced" ? ( + + setBackgroundActivityDialogOpen(true)} + > + + + } + /> + Configure background activity + + ) : null} + + + } + /> + 0 ? serverProviders.reduce( @@ -1282,6 +1783,61 @@ export function ProviderSettingsPanel() { } > + + Health check interval + + This interval is configured here, then the shared Background activity policy decides + whether provider probes may run when the timer fires. Custom intervals appear as + Advanced in General settings. + + + } + description="Refresh provider availability, versions, auth state, and model metadata in the background. Set this to 0 seconds to rely on manual refreshes." + resetAction={ + providerHealthRefreshIntervalSeconds !== defaultProviderHealthRefreshIntervalSeconds ? ( + + updateSettings( + backgroundActivityOverrideSettings(settings.backgroundActivity, { + providerHealthRefreshInterval: undefined, + }), + ) + } + /> + ) : null + } + control={ +
+ + updateSettings( + backgroundActivityOverrideSettings(settings.backgroundActivity, { + providerHealthRefreshInterval: Duration.seconds( + normalizeIntervalSeconds(value), + ), + }), + ) + } + > + + + + + + + seconds +
+ } + /> + {rows.map((row) => { const driverOption = getDriverOption(row.driver); const liveProvider = serverProviders.find( diff --git a/apps/web/src/components/settings/SourceControlSettings.tsx b/apps/web/src/components/settings/SourceControlSettings.tsx index db1b2393626..8de7374d2a5 100644 --- a/apps/web/src/components/settings/SourceControlSettings.tsx +++ b/apps/web/src/components/settings/SourceControlSettings.tsx @@ -1,8 +1,9 @@ -import { ChevronDownIcon, GitPullRequestIcon, RefreshCwIcon } from "lucide-react"; +import { ChevronDownIcon, GitPullRequestIcon, InfoIcon, RefreshCwIcon } from "lucide-react"; import * as Duration from "effect/Duration"; import * as Option from "effect/Option"; import { useState, type ReactNode } from "react"; import type { + BackgroundActivitySettings, SourceControlProviderKind, SourceControlDiscoveryResult, SourceControlProviderAuth, @@ -10,7 +11,11 @@ import type { VcsDriverKind, VcsDiscoveryItem, } from "@t3tools/contracts"; -import { DEFAULT_UNIFIED_SETTINGS } from "@t3tools/contracts/settings"; +import { + getBackgroundActivityBaseProfile, + getBackgroundActivityPresetSettings, + resolveServerBackgroundActivitySettings, +} from "@t3tools/shared/backgroundActivitySettings"; import { useSettings, useUpdateSettings } from "../../hooks/useSettings"; import { cn } from "../../lib/utils"; @@ -69,6 +74,11 @@ const VCS_ICONS: Partial> = { const SOURCE_CONTROL_SKELETON_ROWS = ["primary", "secondary"] as const; const GIT_FETCH_INTERVAL_STEP_SECONDS = 5; +type BackgroundActivityOverridePatch = Partial<{ + [K in keyof BackgroundActivitySettings["overrides"]]: + | BackgroundActivitySettings["overrides"][K] + | undefined; +}>; function durationToSeconds(duration: Duration.Duration): number { return Math.round(Duration.toMillis(duration) / 1_000); @@ -81,6 +91,27 @@ function normalizeFetchIntervalSeconds(value: number | null): number { return Math.max(0, Math.round(value)); } +function BackgroundPolicyTooltip({ children }: { readonly children: string }) { + return ( + + + + + } + /> + + {children} + + + ); +} + function optionLabel(value: Option.Option): string | null { return Option.getOrNull(value); } @@ -291,14 +322,41 @@ function DiscoveryItemRow({ } function GitFetchIntervalSettings() { - const automaticGitFetchInterval = useSettings((settings) => settings.automaticGitFetchInterval); + const settings = useSettings(); const updateSettings = useUpdateSettings(); - const automaticGitFetchIntervalSeconds = durationToSeconds(automaticGitFetchInterval); + const resolvedBackgroundActivity = resolveServerBackgroundActivitySettings(settings); + const automaticGitFetchIntervalSeconds = durationToSeconds( + resolvedBackgroundActivity.automaticGitFetchInterval, + ); const defaultAutomaticGitFetchIntervalSeconds = durationToSeconds( - DEFAULT_UNIFIED_SETTINGS.automaticGitFetchInterval, + getBackgroundActivityPresetSettings( + getBackgroundActivityBaseProfile(settings.backgroundActivity), + ).automaticGitFetchInterval, ); const canResetFetchInterval = automaticGitFetchIntervalSeconds !== defaultAutomaticGitFetchIntervalSeconds; + const backgroundActivityOverrideSettings = ( + current: BackgroundActivitySettings, + overrides: BackgroundActivityOverridePatch, + ) => { + const nextOverrides: BackgroundActivityOverridePatch = { + ...current.overrides, + ...overrides, + }; + for (const [key, value] of Object.entries(nextOverrides)) { + if (value === undefined) { + delete nextOverrides[key as keyof typeof nextOverrides]; + } + } + return { + backgroundActivity: { + schemaVersion: 1 as const, + profile: "custom" as const, + baseProfile: getBackgroundActivityBaseProfile(current), + overrides: nextOverrides as BackgroundActivitySettings["overrides"], + }, + }; + }; return (
@@ -306,6 +364,11 @@ function GitFetchIntervalSettings() {
Fetch interval + + This interval is configured for Git only. The shared Background activity policy still + decides whether Git refreshes may run when the timer fires. Custom intervals appear as + Advanced in General settings. + - updateSettings({ - automaticGitFetchInterval: DEFAULT_UNIFIED_SETTINGS.automaticGitFetchInterval, - }) + updateSettings( + backgroundActivityOverrideSettings(settings.backgroundActivity, { + automaticGitFetchInterval: undefined, + }), + ) } /> ) : null} @@ -338,9 +403,11 @@ function GitFetchIntervalSettings() { size="sm" className="w-32" onValueChange={(value) => - updateSettings({ - automaticGitFetchInterval: Duration.seconds(normalizeFetchIntervalSeconds(value)), - }) + updateSettings( + backgroundActivityOverrideSettings(settings.backgroundActivity, { + automaticGitFetchInterval: Duration.seconds(normalizeFetchIntervalSeconds(value)), + }), + ) } > diff --git a/apps/web/src/connection/runtime.ts b/apps/web/src/connection/runtime.ts index 3b1eade0818..5be9b2bdf1e 100644 --- a/apps/web/src/connection/runtime.ts +++ b/apps/web/src/connection/runtime.ts @@ -3,14 +3,28 @@ import * as Layer from "effect/Layer"; import { Atom } from "effect/unstable/reactivity"; import { runtimeContextLayer } from "../lib/runtime"; +import { + backgroundActivityObserverLayer, + backgroundActivityReporterLayer, +} from "../lib/backgroundActivityReporter"; import { connectionPlatformLayer } from "./platform"; const providedConnectionPlatformLayer = connectionPlatformLayer.pipe( Layer.provide(runtimeContextLayer), ); -export const connectionLayer = clientConnectionLayer.pipe( - Layer.provideMerge(Layer.mergeAll(runtimeContextLayer, providedConnectionPlatformLayer)), +const providedClientConnectionLayer = clientConnectionLayer.pipe( + Layer.provideMerge( + Layer.mergeAll( + runtimeContextLayer, + providedConnectionPlatformLayer, + backgroundActivityObserverLayer, + ), + ), +); + +export const connectionLayer = backgroundActivityReporterLayer.pipe( + Layer.provideMerge(providedClientConnectionLayer), ); export const connectionAtomRuntime = Atom.runtime(connectionLayer); diff --git a/apps/web/src/env.ts b/apps/web/src/env.ts index fb2e493cada..2e08dd33698 100644 --- a/apps/web/src/env.ts +++ b/apps/web/src/env.ts @@ -1,8 +1,6 @@ /** * True when running inside the Electron preload bridge, false in a regular browser. - * The preload script sets window.nativeApi via contextBridge before any web-app + * The preload script sets window.desktopBridge via contextBridge before any web-app * code executes, so this is reliable at module load time. */ -export const isElectron = - typeof window !== "undefined" && - (window.desktopBridge !== undefined || window.nativeApi !== undefined); +export const isElectron = typeof window !== "undefined" && window.desktopBridge !== undefined; diff --git a/apps/web/src/hooks/useSettings.ts b/apps/web/src/hooks/useSettings.ts index 6759b227a13..a7cb8a358af 100644 --- a/apps/web/src/hooks/useSettings.ts +++ b/apps/web/src/hooks/useSettings.ts @@ -218,7 +218,6 @@ export function useUpdateSettings() { }); } } - if (Object.keys(clientPatch).length > 0) { persistClientSettings({ ...getClientSettingsSnapshot(), diff --git a/apps/web/src/lib/backgroundActivityReporter.ts b/apps/web/src/lib/backgroundActivityReporter.ts new file mode 100644 index 00000000000..95592238093 --- /dev/null +++ b/apps/web/src/lib/backgroundActivityReporter.ts @@ -0,0 +1,205 @@ +import { EnvironmentRegistry } from "@t3tools/client-runtime/connection"; +import { + EnvironmentRpcSubscriptionObserver, + request, + type EnvironmentRpcSubscriptionObservation, +} from "@t3tools/client-runtime/rpc"; +import { + type BackgroundScope, + type ClientActivityReportInput, + type EnvironmentId, + WS_METHODS, +} from "@t3tools/contracts"; +import * as DateTime from "effect/DateTime"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as Queue from "effect/Queue"; +import * as Schedule from "effect/Schedule"; +import * as Stream from "effect/Stream"; +import * as SubscriptionRef from "effect/SubscriptionRef"; + +import { randomUUID } from "./utils"; + +const CLIENT_ID_STORAGE_KEY = "t3.backgroundActivity.clientId"; +const REPORT_INTERVAL_MS = 25_000; +const LEASE_TTL_MS = 45_000; +const BASELINE_SCOPES: ReadonlyArray = [{ type: "provider-status" }]; + +interface RetainedScope { + readonly environmentId: EnvironmentId; + readonly scope: BackgroundScope; + refCount: number; +} + +const retainedScopes = new Map(); +const retainedScopeListeners = new Set<() => void>(); + +function notifyRetainedScopesChanged(): void { + for (const listener of retainedScopeListeners) { + listener(); + } +} + +function stableScopeKey(environmentId: EnvironmentId, scope: BackgroundScope): string { + const prefix = `${environmentId}:`; + switch (scope.type) { + case "server-config": + case "diagnostics": + return `${prefix}${scope.type}`; + case "provider-status": + return scope.instanceId + ? `${prefix}${scope.type}:${scope.instanceId}` + : `${prefix}${scope.type}`; + case "vcs-status": + case "git-refs": + return `${prefix}${scope.type}:${scope.cwd}`; + case "thread": + return `${prefix}${scope.type}:${scope.threadId}`; + } +} + +function getClientId(): string { + try { + const existing = window.localStorage.getItem(CLIENT_ID_STORAGE_KEY); + if (existing) return existing; + const next = randomUUID(); + window.localStorage.setItem(CLIENT_ID_STORAGE_KEY, next); + return next; + } catch { + return "ephemeral-browser-client"; + } +} + +function resolveClientKind(): ClientActivityReportInput["clientKind"] { + return window.desktopBridge ? "desktop-renderer" : "web"; +} + +function createActivityReport(environmentId: EnvironmentId): ClientActivityReportInput { + return { + environmentId, + clientId: getClientId(), + clientKind: resolveClientKind(), + visible: document.visibilityState === "visible", + focused: document.hasFocus(), + recentlyInteracted: document.hasFocus(), + appState: document.visibilityState === "visible" ? "active" : "background", + scopes: [ + ...BASELINE_SCOPES, + ...[...retainedScopes.values()] + .filter((entry) => entry.environmentId === environmentId) + .map((entry) => entry.scope), + ], + ttlMs: LEASE_TTL_MS, + observedAt: DateTime.makeUnsafe(new Date().toISOString()), + }; +} + +function scopeForSubscription( + observation: EnvironmentRpcSubscriptionObservation, +): BackgroundScope | null { + if (observation.method === WS_METHODS.subscribeResourceTelemetry) { + return { type: "diagnostics" }; + } + if (observation.method !== WS_METHODS.subscribeVcsStatus) { + return null; + } + const input = observation.input as { readonly cwd?: unknown }; + return typeof input.cwd === "string" ? { type: "vcs-status", cwd: input.cwd } : null; +} + +function retainBackgroundScope(environmentId: EnvironmentId, scope: BackgroundScope): () => void { + const key = stableScopeKey(environmentId, scope); + const existing = retainedScopes.get(key); + if (existing) { + existing.refCount += 1; + } else { + retainedScopes.set(key, { environmentId, scope, refCount: 1 }); + notifyRetainedScopesChanged(); + } + + return () => { + const current = retainedScopes.get(key); + if (!current) return; + current.refCount -= 1; + if (current.refCount <= 0) { + retainedScopes.delete(key); + notifyRetainedScopesChanged(); + } + }; +} + +export const backgroundActivityObserverLayer = Layer.succeed( + EnvironmentRpcSubscriptionObserver, + EnvironmentRpcSubscriptionObserver.of({ + observe: (observation) => { + const scope = scopeForSubscription(observation); + if (scope === null) { + return Effect.succeed(Effect.void); + } + return Effect.sync(() => + Effect.sync(retainBackgroundScope(observation.environmentId as EnvironmentId, scope)), + ); + }, + }), +); + +export const backgroundActivityReporterLayer = Layer.effectDiscard( + Effect.gen(function* () { + if (typeof window === "undefined" || typeof document === "undefined") { + return; + } + + const registry = yield* EnvironmentRegistry; + const reportRequests = yield* Queue.sliding(1); + const requestReport = () => Queue.offerUnsafe(reportRequests, undefined); + + const report = Effect.gen(function* () { + const entries = yield* SubscriptionRef.get(registry.entries); + yield* Effect.forEach( + entries.keys(), + (environmentId) => + registry + .run( + environmentId, + request(WS_METHODS.serverReportClientActivity, createActivityReport(environmentId)), + ) + .pipe(Effect.ignore), + { concurrency: "unbounded", discard: true }, + ); + }).pipe(Effect.withSpan("web.backgroundActivity.report")); + + yield* Effect.acquireRelease( + Effect.sync(() => { + retainedScopeListeners.add(requestReport); + document.addEventListener("visibilitychange", requestReport); + window.addEventListener("focus", requestReport); + window.addEventListener("blur", requestReport); + window.addEventListener("online", requestReport); + }), + () => + Effect.sync(() => { + retainedScopeListeners.delete(requestReport); + document.removeEventListener("visibilitychange", requestReport); + window.removeEventListener("focus", requestReport); + window.removeEventListener("blur", requestReport); + window.removeEventListener("online", requestReport); + }), + ); + + yield* SubscriptionRef.changes(registry.entries).pipe( + Stream.runForEach(() => Effect.sync(requestReport)), + Effect.forkScoped, + ); + yield* Stream.fromQueue(reportRequests).pipe( + Stream.debounce("250 millis"), + Stream.runForEach(() => report), + Effect.forkScoped, + ); + yield* report.pipe( + Effect.repeat(Schedule.spaced(`${REPORT_INTERVAL_MS} millis`)), + Effect.forkScoped, + ); + + requestReport(); + }), +); diff --git a/apps/web/src/lib/resourceTelemetryState.ts b/apps/web/src/lib/resourceTelemetryState.ts new file mode 100644 index 00000000000..47ca79898df --- /dev/null +++ b/apps/web/src/lib/resourceTelemetryState.ts @@ -0,0 +1,51 @@ +import type { ResourceTelemetryHistoryInput, ResourceTelemetrySnapshot } from "@t3tools/contracts"; +import * as Cause from "effect/Cause"; +import { useCallback } from "react"; + +import { usePrimaryEnvironment } from "../state/environments"; +import { useEnvironmentQuery } from "../state/query"; +import { serverEnvironment } from "../state/server"; +import { useAtomCommand } from "../state/use-atom-command"; + +export interface ResourceTelemetryState { + readonly data: ResourceTelemetrySnapshot | null; + readonly error: string | null; + readonly isPending: boolean; + readonly refresh: () => void; + readonly retry: () => Promise; +} + +export function useResourceTelemetry(): ResourceTelemetryState { + const primaryEnvironment = usePrimaryEnvironment(); + const environmentId = primaryEnvironment?.environmentId ?? null; + const query = useEnvironmentQuery( + environmentId === null + ? null + : serverEnvironment.resourceTelemetry({ environmentId, input: {} }), + ); + const retryCommand = useAtomCommand(serverEnvironment.retryResourceTelemetry, { + reportFailure: false, + }); + const retry = useCallback(async () => { + if (environmentId === null) { + throw new Error("No environment is selected."); + } + const result = await retryCommand({ environmentId, input: {} }); + if (result._tag === "Failure") { + throw Cause.squash(result.cause); + } + return result.value.snapshot; + }, [environmentId, retryCommand]); + + return { ...query, retry }; +} + +export function useResourceTelemetryHistory(input: ResourceTelemetryHistoryInput) { + const primaryEnvironment = usePrimaryEnvironment(); + const environmentId = primaryEnvironment?.environmentId ?? null; + return useEnvironmentQuery( + environmentId === null + ? null + : serverEnvironment.resourceTelemetryHistory({ environmentId, input }), + ); +} diff --git a/apps/web/src/localApi.test.ts b/apps/web/src/localApi.test.ts index 3379f5ed989..260256c1250 100644 --- a/apps/web/src/localApi.test.ts +++ b/apps/web/src/localApi.test.ts @@ -49,7 +49,6 @@ beforeEach(() => { }); } Reflect.deleteProperty(testWindow(), "desktopBridge"); - Reflect.deleteProperty(testWindow(), "nativeApi"); Object.defineProperty(testWindow(), "localStorage", { configurable: true, value: createLocalStorageStub(), @@ -61,16 +60,12 @@ afterEach(() => { }); describe("LocalApi", () => { - it("keeps backend operations unavailable in the browser facade", async () => { + it("keeps backend operations out of the local host facade", async () => { const { createLocalApi } = await import("./localApi"); const api = createLocalApi(); - await expect(api.server.getConfig()).rejects.toThrow( - "Local backend API is unavailable before a backend is paired.", - ); - await expect(api.shell.openInEditor("/tmp", "cursor")).rejects.toThrow( - "Local backend API is unavailable before a backend is paired.", - ); + expect(api).not.toHaveProperty("server"); + expect(api.shell).not.toHaveProperty("openInEditor"); }); it("uses the browser context-menu fallback without a desktop bridge", async () => { @@ -120,12 +115,4 @@ describe("LocalApi", () => { await api.persistence.setClientSettings(settings); await expect(api.persistence.getClientSettings()).resolves.toEqual(settings); }); - - it("prefers the native LocalApi when one is injected", async () => { - const nativeApi = { dialogs: {} }; - testWindow().nativeApi = nativeApi as never; - const { readLocalApi } = await import("./localApi"); - - expect(readLocalApi()).toBe(nativeApi); - }); }); diff --git a/apps/web/src/localApi.ts b/apps/web/src/localApi.ts index 2fbf183f91b..b42702c7a4a 100644 --- a/apps/web/src/localApi.ts +++ b/apps/web/src/localApi.ts @@ -6,10 +6,6 @@ import { readBrowserClientSettings, writeBrowserClientSettings } from "./clientP let cachedApi: LocalApi | undefined; -function unavailableLocalBackendError(): Error { - return new Error("Local backend API is unavailable before a backend is paired."); -} - function createBrowserLocalApi(): LocalApi { return { dialogs: { @@ -25,7 +21,6 @@ function createBrowserLocalApi(): LocalApi { }, }, shell: { - openInEditor: () => Promise.reject(unavailableLocalBackendError()), openExternal: async (url) => { if (window.desktopBridge) { const opened = await window.desktopBridge.openExternal(url); @@ -63,20 +58,6 @@ function createBrowserLocalApi(): LocalApi { writeBrowserClientSettings(settings); }, }, - server: { - getConfig: () => Promise.reject(unavailableLocalBackendError()), - refreshProviders: () => Promise.reject(unavailableLocalBackendError()), - updateProvider: () => Promise.reject(unavailableLocalBackendError()), - upsertKeybinding: () => Promise.reject(unavailableLocalBackendError()), - removeKeybinding: () => Promise.reject(unavailableLocalBackendError()), - getSettings: () => Promise.reject(unavailableLocalBackendError()), - updateSettings: () => Promise.reject(unavailableLocalBackendError()), - discoverSourceControl: () => Promise.reject(unavailableLocalBackendError()), - getTraceDiagnostics: () => Promise.reject(unavailableLocalBackendError()), - getProcessDiagnostics: () => Promise.reject(unavailableLocalBackendError()), - getProcessResourceHistory: () => Promise.reject(unavailableLocalBackendError()), - signalProcess: () => Promise.reject(unavailableLocalBackendError()), - }, }; } @@ -88,12 +69,7 @@ export function readLocalApi(): LocalApi | undefined { if (typeof window === "undefined") return undefined; if (cachedApi) return cachedApi; - if (window.nativeApi) { - cachedApi = window.nativeApi; - return cachedApi; - } - - cachedApi = createBrowserLocalApi(); + cachedApi = createLocalApi(); return cachedApi; } diff --git a/apps/web/src/vite-env.d.ts b/apps/web/src/vite-env.d.ts index d8a6d71b49a..d4fae7378f8 100644 --- a/apps/web/src/vite-env.d.ts +++ b/apps/web/src/vite-env.d.ts @@ -1,6 +1,6 @@ /// -import type { DesktopBridge, LocalApi } from "@t3tools/contracts"; +import type { DesktopBridge } from "@t3tools/contracts"; interface ImportMetaEnv { readonly VITE_HTTP_URL: string; @@ -21,7 +21,6 @@ interface ImportMeta { declare global { interface Window { - nativeApi?: LocalApi; desktopBridge?: DesktopBridge; } } diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md index ce5a0afe92a..236af7b2c07 100644 --- a/docs/architecture/overview.md +++ b/docs/architecture/overview.md @@ -37,6 +37,10 @@ T3 Code runs as a **Node.js WebSocket server** that wraps `codex app-server` (JS - **Runtime signals**: The server emits lightweight typed receipts when important async milestones finish, such as checkpoint capture, diff finalization, or a turn becoming fully quiescent. Tests and orchestration code wait on these signals instead of polling internal state. +Related design: + +- [Resource telemetry architecture](./resource-telemetry.md) + ## Event Lifecycle ### Startup and client connect diff --git a/docs/architecture/resource-telemetry.md b/docs/architecture/resource-telemetry.md new file mode 100644 index 00000000000..a72483472cc --- /dev/null +++ b/docs/architecture/resource-telemetry.md @@ -0,0 +1,371 @@ +# Resource telemetry architecture + +Status: implemented + +## Purpose + +Resource telemetry replaces recurring `ps`, PowerShell, `ioreg`, and `pmset` +subprocess probes with two persistent, direct data sources: + +1. a standalone Rust resource-monitor executable that reads process counters + through operating-system APIs via `sysinfo`; +2. Electron main-process APIs for Electron process metrics and host power state. + +The native monitor owns bounded in-memory history. The server only merges and +summarizes that history when diagnostics requests it. Telemetry history is not +persisted to disk or continuously copied into Node. + +## Why a standalone executable + +The monitor is intentionally not a Node native addon. + +- No N-API, `ffi-rs`, or dynamic-library ABI is loaded into the server process. +- A monitor crash cannot corrupt the Node runtime. +- The server can supervise, restart, version-check, and measure the monitor as a + normal child process. +- The same protocol works for the desktop app and the published CLI. +- Packaging is a single platform executable instead of an addon toolchain plus + Node/Electron ABI matrix. + +The cost is one persistent child process and NDJSON serialization. That is a +better failure boundary than repeatedly spawning shell utilities or loading +native code into Node. + +## Runtime topology + +### Desktop + +```text +Electron main + ├─ powerMonitor + ├─ app.getAppMetrics() while diagnostics is open + ├─ inherited fd 4, telemetry NDJSON ─────────────┐ + └─ inherited fd 5, demand-control NDJSON ◀──────┤ + ▼ +Node server ── stdin/stdout NDJSON ── Rust resource monitor + │ + ├─ ResourceTelemetry Effect service + ├─ background power policy projection + └─ WebSocket RPC/subscription ── diagnostics UI +``` + +### Web, headless, and remote server + +Electron telemetry is unavailable. The native monitor still runs beside the +server and tracks the server process tree. Power fields degrade to `unknown` +instead of invoking platform shell commands. + +## Native monitor + +The executable lives in `native/resource-monitor`. + +It receives schema-compatible commands on stdin and emits one JSON object per +line on stdout: + +- `configure` +- `setExternalProcesses` +- `setSampleInterval` +- `setStreaming` +- `sampleNow` +- `readHistory` +- `shutdown` +- `hello` +- `snapshot` +- `historyChunk` +- `error` + +The protocol version is defined by +`RESOURCE_MONITOR_PROTOCOL_VERSION` in +`packages/contracts/src/resourceTelemetry.ts`. + +### Collection + +The monitor keeps one `sysinfo::System` instance and refreshes it at the +power-adaptive interval selected by the server. It collects: + +- PID and parent PID; +- process start time and run time; +- process name and command line; +- current and cumulative CPU usage; +- resident and virtual memory; +- cumulative process I/O counters. + +On Linux, task/thread enumeration is disabled. Command lines are loaded only +when first needed. This avoids the expensive default behavior of walking every +`/proc//task/` directory on each refresh. + +### Process-tree selection + +Each sample scans the accessible process table, builds the PID/PPID graph, and +retains: + +- the server process; +- every descendant of the server, including provider-spawned grandchildren such + as shells, `node`, `tsgo`, language servers, and other tools; +- Electron processes supplied as explicit external roots; +- descendants of those Electron roots; +- the resource monitor itself, because it is a server child. + +Process identity is `(pid, startTimeMs)`, not PID alone. Electron and native +start times are matched with a two-second tolerance because native start times +can have coarser platform resolution. + +The process list is emitted in depth-first tree order so renderer collapse and +expansion preserves complete subtrees. + +### Native history and streaming + +Every native sample is appended to a one-hour in-memory ring bounded to 3,600 +snapshots and 20,000 retained process rows. History stays in the sidecar until a +`readHistory` request and is returned in bounded chunks. The first bound reached +wins, so high process counts shorten the effective history window. + +Periodic snapshot streaming is disabled by default. The server enables it only +while at least one diagnostics subscription is retained. `sampleNow` remains +available for explicit refreshes and identity validation. + +The server adjusts native sampling without restarting the sidecar: + +- suspended: paused; +- locked, low-power, or serious/critical thermal state: 15 seconds; +- battery: 5 seconds; +- normal AC: 1 second; +- unknown or stale power: 5 seconds in the background and 1 second while live + diagnostics is open. + +### Sampling limits + +This is counter sampling, not syscall tracing. + +- A process that starts and exits entirely between samples may not be observed. +- Cumulative CPU and I/O counters still provide accurate deltas for processes + that survive across samples. +- Exact file paths, individual write syscalls, ETW events, eBPF events, and + Endpoint Security events are outside this implementation. + +Those deeper tracing systems can be added later as opt-in diagnostic modes +without changing the public `ResourceTelemetry` model. + +## I/O semantics + +The monitor preserves platform semantics instead of presenting all counters as +equivalent: + +- Unix-like platforms report storage I/O counters exposed by `sysinfo`. +- Windows reports all process I/O bytes, not only disk bytes. +- Operating-system caches can prevent logical application reads or writes from + appearing as physical storage bytes. + +The UI therefore labels these values as I/O reads and writes and exposes the +per-process `ioSemantics` value. + +Group totals are observed deltas since telemetry startup. Per-process total +columns are the operating system's cumulative counters for that process. + +## Electron telemetry + +Electron main owns `DesktopTelemetryPublisher`. + +Power events trigger an immediate snapshot. A low-rate 30-second heartbeat keeps +the server-side power state fresh while diagnostics is closed. During that +heartbeat Electron reads: + +- `powerMonitor.isOnBatteryPower()`; +- `powerMonitor.getSystemIdleTime()`; +- `powerMonitor.getSystemIdleState()`; +- `powerMonitor.getCurrentThermalState()`. + +`app.getAppMetrics()` is only called while diagnostics demand is active. Its +live cadence is 1 second on AC, 5 seconds on battery, and 15 seconds while +locked, suspended, or thermally constrained. + +It also listens for: + +- lock and unlock; +- suspend and resume; +- AC and battery transitions; +- thermal-state changes; +- CPU speed-limit changes. + +Electron does not expose a cross-platform low-power-mode getter, so that field +remains `unknown`. + +The desktop backend is spawned with: + +- fd 3 for the existing bootstrap payload; +- fd 4 for Electron-to-server telemetry NDJSON; +- fd 5 for server-to-Electron diagnostics-demand NDJSON. + +These are private Electron-main/server pipes. They do not use the renderer +WebSocket and are recreated for every backend restart. + +## Server Effect services + +The implementation is under `apps/server/src/resourceTelemetry`. + +### `ResourceMonitorBinary` + +Resolves an executable from: + +1. `T3CODE_RESOURCE_MONITOR_PATH`; +2. desktop bootstrap configuration; +3. bundled CLI resources; +4. local Cargo build outputs. + +Unsupported platforms, missing binaries, and non-executable binaries use +schema-backed tagged errors with descriptive messages. + +### `NativeTelemetryClient` + +Owns the resource-monitor process and protocol. + +- validates the hello/version handshake; +- sends configuration and external process roots; +- adapts the native interval from host power state; +- enables streaming only for scoped live subscribers; +- reads chunked native history on demand; +- exposes `sampleNow`; +- serializes commands; +- supervises process exit and protocol failure; +- restarts with bounded exponential backoff; +- opens a circuit after repeated failures; +- supports explicit retry; +- publishes health changes immediately. + +Snapshot sequence numbers are scoped to a monitor generation. Server ingestion +uses the monitor restart count as the generation key, so sequence reset after a +restart cannot freeze telemetry. + +### `DesktopTelemetryReceiver` + +Reads fd 4, decodes schema-validated messages, stores the latest Electron +snapshot, and publishes desktop health. It writes diagnostics demand to fd 5 +and marks the source stale after 90 seconds without a heartbeat. Decode errors, +protocol mismatch, control-write failure, stream failure, stale input, and +normal stream closure are represented explicitly. + +### `ResourceTelemetry` + +Merges native and Electron data and owns public telemetry semantics. + +- calculates CPU and I/O rates from cumulative native counters; +- preserves the last native rates during desktop-only updates; +- classifies backend, Electron, and monitor processes; +- computes process depth and child relationships; +- tracks starts, exits, CPU time, and observed I/O; +- projects power data; +- acquires native streaming and Electron process metrics only for scoped live + subscribers; +- queries and replays native history only when requested; +- validates `(pid, startTimeMs)` before process signaling; +- updates history health even when no further native sample arrives. + +Electron and monitor processes are visible but are not valid targets for the +existing process-signal RPC. + +### History projection + +`ResourceTelemetryHistory` is a pure on-demand projection. It replays raw native +snapshots to derive rates, lifecycle counters, buckets, and process summaries. +Current Electron process metrics are intentionally excluded from historical +replay so they cannot overwrite older native CPU or memory samples. + +### `ResourceAttribution` + +Tracks known logical application I/O separately from OS counters. Current +integration points record successful writes for: + +- provider native and canonical event logs; +- the local server trace sink. + +Entries contain component, operation, logical bytes, count, and elapsed time. +Future persistence paths should call `ResourceAttribution.record` rather than +adding diagnostics-specific counters. + +## Background policy integration + +`HostPowerMonitor` consumes `DesktopTelemetryReceiver` directly; observing host +power does not retain live resource diagnostics or invoke shell probes. + +The monitor updates its latest timestamp on every Electron sample but only +publishes semantic state changes. Increasing idle seconds alone does not cause a +background-policy broadcast every second. + +## Public API and UI + +The WebSocket RPC surface provides: + +- current snapshot; +- bounded history; +- explicit monitor retry; +- a live snapshot subscription. + +The diagnostics page displays: + +- aggregate CPU, memory, I/O, and process counts; +- backend, Electron, and monitor overhead groups; +- power and thermal state; +- collector health and restart information; +- CPU and I/O history; +- a collapsible live process tree; +- safe process signaling for backend descendants; +- instrumented logical application I/O. + +Legacy process diagnostics RPCs are projected from the same service so they no +longer start recurring process-table commands. + +## Packaging + +Desktop artifact builds compile the Rust target, stage it as +`resources/resource-monitor/t3-resource-monitor[.exe]`, and pass its path to the +backend bootstrap. + +CLI release jobs upload each active platform monitor artifact and copy it into: + +```text +apps/server/dist/resource-monitor/-/ +``` + +The published server package already includes `dist`, so those executables ship +with the CLI. Missing platform artifacts degrade native telemetry to +`unavailable`; the server continues running. + +## Resource and failure behavior + +Steady state uses: + +- one native process; +- power-adaptive native counter sampling with no periodic Node snapshot stream; +- event-driven Electron power updates plus a 30-second heartbeat; +- no `app.getAppMetrics()` calls while diagnostics is closed; +- no telemetry database; +- no recurring shell probes; +- bounded PubSub queues and native ring history. + +The diagnostics page exposes the monitor's own process resource usage and +collection duration so the observer's cost is measurable. + +Failures are isolated: + +- native failure does not stop the server; +- Electron telemetry loss does not stop native telemetry; +- schema/version errors are visible in health; +- repeated native failures stop automatic restart churn until explicit retry; +- server and desktop shutdown close their respective streams and child process + scopes. + +## Future integration points + +High-value follow-up work can use the existing service boundaries: + +- opt-in file-path attribution through platform-specific tracing; +- process lifecycle events to reduce the chance of missing very short-lived + children; +- additional `ResourceAttribution` instrumentation for databases, checkpoints, + caches, and file synchronization; +- exported diagnostic bundles; +- adaptive sample intervals based on diagnostics visibility and active work. + +These additions should preserve the current rules: direct platform APIs, +schema-validated boundaries, explicit metric semantics, bounded retention, and +no mandatory telemetry persistence. diff --git a/native/resource-monitor/Cargo.lock b/native/resource-monitor/Cargo.lock new file mode 100644 index 00000000000..cdc5f952288 --- /dev/null +++ b/native/resource-monitor/Cargo.lock @@ -0,0 +1,343 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "bitflags" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8" + +[[package]] +name = "dispatch2" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38" +dependencies = [ + "bitflags", + "objc2", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "memchr" +version = "2.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4" + +[[package]] +name = "ntapi" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" +dependencies = [ + "winapi", +] + +[[package]] +name = "objc2" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a12a8ed07aefc768292f076dc3ac8c48f3781c8f2d5851dd3d98950e8c5a89f" +dependencies = [ + "objc2-encode", +] + +[[package]] +name = "objc2-core-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" +dependencies = [ + "bitflags", + "dispatch2", + "objc2", +] + +[[package]] +name = "objc2-encode" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33" + +[[package]] +name = "objc2-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272" +dependencies = [ + "bitflags", + "objc2", +] + +[[package]] +name = "objc2-io-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15" +dependencies = [ + "libc", + "objc2-core-foundation", +] + +[[package]] +name = "objc2-open-directory" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb82bed227edf5201dfedf072bba4015a33d3d4a98519837295a90f0a23f676d" +dependencies = [ + "objc2", + "objc2-core-foundation", + "objc2-foundation", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.150" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "syn" +version = "2.0.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sysinfo" +version = "0.39.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21d0d938c10fcda3e897e28aaddf4ab462375d411f4378cd63b1c945f69aba96" +dependencies = [ + "libc", + "memchr", + "ntapi", + "objc2-core-foundation", + "objc2-io-kit", + "objc2-open-directory", + "windows", +] + +[[package]] +name = "t3-resource-monitor" +version = "0.1.0" +dependencies = [ + "serde", + "serde_json", + "sysinfo", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580" +dependencies = [ + "windows-collections", + "windows-core", + "windows-future", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610" +dependencies = [ + "windows-core", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-future" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb" +dependencies = [ + "windows-core", + "windows-link", + "windows-threading", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-numerics" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26" +dependencies = [ + "windows-core", + "windows-link", +] + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-threading" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37" +dependencies = [ + "windows-link", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/native/resource-monitor/Cargo.toml b/native/resource-monitor/Cargo.toml new file mode 100644 index 00000000000..30cf2ad7892 --- /dev/null +++ b/native/resource-monitor/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "t3-resource-monitor" +version = "0.1.0" +edition = "2024" +license = "MIT" +publish = false + +[dependencies] +serde = { version = "1.0.228", features = ["derive"] } +serde_json = "1.0.150" +sysinfo = "0.39.3" + +[profile.release] +codegen-units = 1 +lto = "thin" +panic = "abort" +strip = true diff --git a/native/resource-monitor/src/main.rs b/native/resource-monitor/src/main.rs new file mode 100644 index 00000000000..c093cfdaa5a --- /dev/null +++ b/native/resource-monitor/src/main.rs @@ -0,0 +1,766 @@ +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, HashSet, VecDeque}; +use std::io::{self, BufRead, BufWriter, Write}; +use std::sync::mpsc::{self, Receiver, RecvTimeoutError}; +use std::thread; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; +use sysinfo::{Pid, ProcessRefreshKind, ProcessesToUpdate, System, UpdateKind}; + +const PROTOCOL_VERSION: u32 = 2; +const MIN_SAMPLE_INTERVAL_MS: u64 = 250; +const MAX_SAMPLE_INTERVAL_MS: u64 = 60_000; +const EXTERNAL_PROCESS_START_TOLERANCE_MS: u64 = 2_000; +const HISTORY_RETENTION_MS: u64 = 60 * 60_000; +const MAX_HISTORY_SNAPSHOTS: usize = 3_600; +const MAX_HISTORY_PROCESS_SAMPLES: usize = 20_000; +const HISTORY_CHUNK_SNAPSHOTS: usize = 32; + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +struct ExternalProcess { + pid: u32, + #[serde(default)] + start_time_ms: Option, +} + +#[derive(Debug, Deserialize)] +#[serde( + tag = "type", + rename_all = "camelCase", + rename_all_fields = "camelCase" +)] +enum Command { + Configure { + version: u32, + root_pid: u32, + sample_interval_ms: u64, + #[serde(default)] + external_processes: Vec, + }, + SetExternalProcesses { + version: u32, + processes: Vec, + }, + SetSampleInterval { + version: u32, + sample_interval_ms: u64, + }, + SetStreaming { + version: u32, + enabled: bool, + }, + SampleNow { + version: u32, + request_id: String, + }, + ReadHistory { + version: u32, + request_id: String, + window_ms: u64, + }, + Shutdown { + version: u32, + }, +} + +impl Command { + fn version(&self) -> u32 { + match self { + Self::Configure { version, .. } + | Self::SetExternalProcesses { version, .. } + | Self::SetSampleInterval { version, .. } + | Self::SetStreaming { version, .. } + | Self::SampleNow { version, .. } + | Self::ReadHistory { version, .. } + | Self::Shutdown { version } => *version, + } + } +} + +enum Input { + Command(Command), + Invalid(String), +} + +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct Capabilities { + cumulative_cpu_time: bool, + current_cpu_percent: bool, + resident_memory: bool, + virtual_memory: bool, + io_bytes: bool, + process_start_time: bool, + process_tree: bool, +} + +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct HelloEvent { + version: u32, + #[serde(rename = "type")] + event_type: &'static str, + sidecar_version: &'static str, + sidecar_pid: u32, + platform: &'static str, + arch: &'static str, + capabilities: Capabilities, +} + +#[derive(Debug, Clone, Copy, Serialize)] +#[serde(rename_all = "kebab-case")] +enum IoSemantics { + Storage, + AllIo, +} + +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +struct ProcessSample { + pid: u32, + ppid: u32, + start_time_ms: u64, + run_time_ms: u64, + name: String, + command: String, + status: String, + cpu_percent: f32, + cpu_time_ms: u64, + resident_bytes: u64, + virtual_bytes: u64, + io_read_bytes: u64, + io_write_bytes: u64, + io_semantics: IoSemantics, +} + +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +struct SnapshotEvent { + version: u32, + #[serde(rename = "type")] + event_type: &'static str, + sequence: u64, + sampled_at_unix_ms: u64, + collection_duration_micros: u64, + scanned_process_count: usize, + retained_process_count: usize, + inaccessible_process_count: usize, + #[serde(skip_serializing_if = "Option::is_none")] + request_id: Option, + processes: Vec, +} + +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct HistoryChunkEvent<'a> { + version: u32, + #[serde(rename = "type")] + event_type: &'static str, + request_id: &'a str, + done: bool, + snapshots: &'a [SnapshotEvent], +} + +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct ErrorEvent { + version: u32, + #[serde(rename = "type")] + event_type: &'static str, + code: &'static str, + message: String, + recoverable: bool, +} + +#[derive(Debug, Clone)] +struct CollectorConfig { + root_pid: u32, + sample_interval: Option, + external_processes: HashMap>, +} + +#[derive(Default)] +struct HistoryRecorder { + snapshots: VecDeque, + process_sample_count: usize, +} + +impl HistoryRecorder { + fn record(&mut self, snapshot: &SnapshotEvent) { + let mut retained = snapshot.clone(); + retained.request_id = None; + self.process_sample_count = self + .process_sample_count + .saturating_add(retained.processes.len()); + self.snapshots.push_back(retained); + self.trim(snapshot.sampled_at_unix_ms); + } + + fn trim(&mut self, now_ms: u64) { + while self.snapshots.front().is_some_and(|snapshot| { + snapshot.sampled_at_unix_ms < now_ms.saturating_sub(HISTORY_RETENTION_MS) + || self.snapshots.len() > MAX_HISTORY_SNAPSHOTS + || self.process_sample_count > MAX_HISTORY_PROCESS_SAMPLES + }) { + if let Some(removed) = self.snapshots.pop_front() { + self.process_sample_count = self + .process_sample_count + .saturating_sub(removed.processes.len()); + } + } + } + + fn read(&self, window_ms: u64, now_ms: u64) -> Vec { + let started_at_ms = now_ms.saturating_sub(window_ms.min(HISTORY_RETENTION_MS)); + self.snapshots + .iter() + .filter(|snapshot| snapshot.sampled_at_unix_ms >= started_at_ms) + .cloned() + .collect() + } +} + +struct Collector { + system: System, + sequence: u64, +} + +impl Collector { + fn new() -> Self { + Self { + system: System::new(), + sequence: 0, + } + } + + fn sample(&mut self, config: &CollectorConfig, request_id: Option) -> SnapshotEvent { + let collection_started = Instant::now(); + self.system.refresh_processes_specifics( + ProcessesToUpdate::All, + true, + process_refresh_kind(), + ); + + let rows = self + .system + .processes() + .iter() + .map(|(pid, process)| { + let pid = pid.as_u32(); + let ppid = process.parent().map(Pid::as_u32).unwrap_or(0); + (pid, ppid, process.start_time().saturating_mul(1_000)) + }) + .collect::>(); + let mut roots = config + .external_processes + .iter() + .filter_map(|(pid, expected_start_time_ms)| { + let (_, _, actual_start_time_ms) = rows + .iter() + .find(|(candidate_pid, _, _)| candidate_pid == pid)?; + matches_external_identity(*actual_start_time_ms, *expected_start_time_ms) + .then_some(*pid) + }) + .collect::>(); + roots.insert(config.root_pid); + let tracked = select_tracked_pids(&rows, &roots); + let mut processes = tracked + .into_iter() + .filter_map(|pid| { + let process = self.system.process(Pid::from_u32(pid))?; + let disk_usage = process.disk_usage(); + let command = if process.cmd().is_empty() { + process.name().to_string_lossy().into_owned() + } else { + process + .cmd() + .iter() + .map(|part| part.to_string_lossy()) + .collect::>() + .join(" ") + }; + + Some(ProcessSample { + pid, + ppid: process.parent().map(Pid::as_u32).unwrap_or(0), + start_time_ms: process.start_time().saturating_mul(1_000), + run_time_ms: process.run_time().saturating_mul(1_000), + name: process.name().to_string_lossy().into_owned(), + command, + status: format!("{:?}", process.status()), + cpu_percent: process.cpu_usage(), + cpu_time_ms: process.accumulated_cpu_time(), + resident_bytes: process.memory(), + virtual_bytes: process.virtual_memory(), + io_read_bytes: disk_usage.total_read_bytes, + io_write_bytes: disk_usage.total_written_bytes, + io_semantics: io_semantics(), + }) + }) + .collect::>(); + processes.sort_by_key(|process| process.pid); + self.sequence = self.sequence.saturating_add(1); + + SnapshotEvent { + version: PROTOCOL_VERSION, + event_type: "snapshot", + sequence: self.sequence, + sampled_at_unix_ms: unix_time_ms(), + collection_duration_micros: collection_started.elapsed().as_micros() as u64, + scanned_process_count: self.system.processes().len(), + retained_process_count: processes.len(), + inaccessible_process_count: 0, + request_id, + processes, + } + } +} + +fn process_refresh_kind() -> ProcessRefreshKind { + ProcessRefreshKind::nothing() + .with_memory() + .with_cpu() + .with_disk_usage() + .with_cmd(UpdateKind::OnlyIfNotSet) + .without_tasks() +} + +fn matches_external_identity( + actual_start_time_ms: u64, + expected_start_time_ms: Option, +) -> bool { + expected_start_time_ms.is_none_or(|expected| { + actual_start_time_ms.abs_diff(expected) <= EXTERNAL_PROCESS_START_TOLERANCE_MS + }) +} + +fn select_tracked_pids(rows: &[(u32, u32, u64)], roots: &HashSet) -> HashSet { + let mut children_by_parent = HashMap::>::new(); + for (pid, ppid, _) in rows { + children_by_parent.entry(*ppid).or_default().push(*pid); + } + + let known_pids = rows.iter().map(|(pid, _, _)| *pid).collect::>(); + let mut tracked = HashSet::new(); + let mut queue = roots + .iter() + .copied() + .filter(|pid| known_pids.contains(pid)) + .collect::>(); + + while let Some(pid) = queue.pop_front() { + if !tracked.insert(pid) { + continue; + } + if let Some(children) = children_by_parent.get(&pid) { + queue.extend(children.iter().copied()); + } + } + + tracked +} + +fn io_semantics() -> IoSemantics { + if cfg!(target_os = "windows") { + IoSemantics::AllIo + } else { + IoSemantics::Storage + } +} + +fn unix_time_ms() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64 +} + +fn clamp_sample_interval(sample_interval_ms: u64) -> Option { + (sample_interval_ms > 0).then(|| { + Duration::from_millis( + sample_interval_ms.clamp(MIN_SAMPLE_INTERVAL_MS, MAX_SAMPLE_INTERVAL_MS), + ) + }) +} + +fn spawn_input_reader() -> Receiver { + let (sender, receiver) = mpsc::channel(); + thread::spawn(move || { + let stdin = io::stdin(); + for line in stdin.lock().lines() { + let line = match line { + Ok(line) => line, + Err(error) => { + let _ = sender.send(Input::Invalid(format!( + "failed reading command stream: {error}" + ))); + return; + } + }; + if line.trim().is_empty() { + continue; + } + match serde_json::from_str::(&line) { + Ok(command) => { + if sender.send(Input::Command(command)).is_err() { + return; + } + } + Err(error) => { + if sender + .send(Input::Invalid(format!("invalid command: {error}"))) + .is_err() + { + return; + } + } + } + } + }); + receiver +} + +fn write_event(writer: &mut impl Write, event: &T) -> io::Result<()> { + serde_json::to_writer(&mut *writer, event)?; + writer.write_all(b"\n")?; + writer.flush() +} + +fn write_error( + writer: &mut impl Write, + code: &'static str, + message: impl Into, + recoverable: bool, +) -> io::Result<()> { + write_event( + writer, + &ErrorEvent { + version: PROTOCOL_VERSION, + event_type: "error", + code, + message: message.into(), + recoverable, + }, + ) +} + +fn write_history( + writer: &mut impl Write, + request_id: &str, + snapshots: &[SnapshotEvent], +) -> io::Result<()> { + if snapshots.is_empty() { + return write_event( + writer, + &HistoryChunkEvent { + version: PROTOCOL_VERSION, + event_type: "historyChunk", + request_id, + done: true, + snapshots, + }, + ); + } + + let chunk_count = snapshots.len().div_ceil(HISTORY_CHUNK_SNAPSHOTS); + for (index, chunk) in snapshots.chunks(HISTORY_CHUNK_SNAPSHOTS).enumerate() { + write_event( + writer, + &HistoryChunkEvent { + version: PROTOCOL_VERSION, + event_type: "historyChunk", + request_id, + done: index + 1 == chunk_count, + snapshots: chunk, + }, + )?; + } + Ok(()) +} + +fn main() -> io::Result<()> { + let mut writer = BufWriter::new(io::stdout().lock()); + write_event( + &mut writer, + &HelloEvent { + version: PROTOCOL_VERSION, + event_type: "hello", + sidecar_version: env!("CARGO_PKG_VERSION"), + sidecar_pid: std::process::id(), + platform: std::env::consts::OS, + arch: std::env::consts::ARCH, + capabilities: Capabilities { + cumulative_cpu_time: true, + current_cpu_percent: true, + resident_memory: true, + virtual_memory: true, + io_bytes: true, + process_start_time: true, + process_tree: true, + }, + }, + )?; + + let receiver = spawn_input_reader(); + let mut collector = Collector::new(); + let mut history = HistoryRecorder::default(); + let mut config: Option = None; + let mut next_sample_at: Option = None; + let mut streaming_enabled = false; + + loop { + let timeout = next_sample_at + .map(|deadline| deadline.saturating_duration_since(Instant::now())) + .unwrap_or(Duration::from_secs(60)); + + match receiver.recv_timeout(timeout) { + Ok(Input::Invalid(message)) => { + write_error(&mut writer, "invalid-command", message, true)?; + } + Ok(Input::Command(command)) => { + if command.version() != PROTOCOL_VERSION { + write_error( + &mut writer, + "protocol-mismatch", + format!( + "unsupported protocol version {}; expected {PROTOCOL_VERSION}", + command.version() + ), + false, + )?; + continue; + } + + match command { + Command::Configure { + root_pid, + sample_interval_ms, + external_processes, + .. + } => { + let sample_interval = clamp_sample_interval(sample_interval_ms); + config = Some(CollectorConfig { + root_pid, + sample_interval, + external_processes: external_processes + .into_iter() + .map(|process| (process.pid, process.start_time_ms)) + .collect(), + }); + next_sample_at = sample_interval.map(|_| Instant::now()); + } + Command::SetExternalProcesses { processes, .. } => { + if let Some(current) = config.as_mut() { + current.external_processes = processes + .into_iter() + .map(|process| (process.pid, process.start_time_ms)) + .collect(); + } else { + write_error( + &mut writer, + "not-configured", + "configure must be sent before external processes", + true, + )?; + } + } + Command::SetSampleInterval { + sample_interval_ms, .. + } => { + if let Some(current) = config.as_mut() { + current.sample_interval = clamp_sample_interval(sample_interval_ms); + next_sample_at = current + .sample_interval + .map(|interval| Instant::now() + interval); + } else { + write_error( + &mut writer, + "not-configured", + "configure must be sent before changing the sample interval", + true, + )?; + } + } + Command::SetStreaming { enabled, .. } => { + streaming_enabled = enabled; + } + Command::SampleNow { request_id, .. } => { + if let Some(current) = config.as_ref() { + let event = collector.sample(current, Some(request_id)); + history.record(&event); + write_event(&mut writer, &event)?; + next_sample_at = current + .sample_interval + .map(|interval| Instant::now() + interval); + } else { + write_error( + &mut writer, + "not-configured", + "configure must be sent before sampling", + true, + )?; + } + } + Command::ReadHistory { + request_id, + window_ms, + .. + } => { + if config.is_some() { + let snapshots = history.read(window_ms, unix_time_ms()); + write_history(&mut writer, &request_id, &snapshots)?; + } else { + write_error( + &mut writer, + "not-configured", + "configure must be sent before reading history", + true, + )?; + } + } + Command::Shutdown { .. } => return Ok(()), + } + } + Err(RecvTimeoutError::Timeout) => { + if let Some(current) = config.as_ref() { + if let Some(interval) = current.sample_interval { + let event = collector.sample(current, None); + history.record(&event); + if streaming_enabled { + write_event(&mut writer, &event)?; + } + next_sample_at = Some(Instant::now() + interval); + } else { + next_sample_at = None; + } + } + } + Err(RecvTimeoutError::Disconnected) => return Ok(()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn selects_roots_and_all_descendants() { + let rows = vec![ + (10, 1, 1_000), + (11, 10, 1_100), + (12, 11, 1_200), + (20, 1, 2_000), + (21, 20, 2_100), + (30, 99, 3_000), + ]; + let tracked = select_tracked_pids(&rows, &HashSet::from([10, 20])); + + assert_eq!(tracked, HashSet::from([10, 11, 12, 20, 21])); + } + + #[test] + fn ignores_missing_roots() { + let rows = vec![(10, 1, 1_000), (11, 10, 1_100)]; + let tracked = select_tracked_pids(&rows, &HashSet::from([99])); + + assert!(tracked.is_empty()); + } + + #[test] + fn validates_external_process_start_identity() { + assert!(matches_external_identity(10_000, None)); + assert!(matches_external_identity(10_000, Some(11_999))); + assert!(!matches_external_identity(10_000, Some(12_001))); + } + + #[test] + fn decodes_protocol_commands() { + let configure = serde_json::from_str::( + r#"{"version":2,"type":"configure","rootPid":42,"sampleIntervalMs":1000,"externalProcesses":[{"pid":7}]}"#, + ) + .expect("configure command"); + + match configure { + Command::Configure { + root_pid, + sample_interval_ms, + external_processes, + .. + } => { + assert_eq!(root_pid, 42); + assert_eq!(sample_interval_ms, 1_000); + assert_eq!(external_processes[0].pid, 7); + assert_eq!(external_processes[0].start_time_ms, None); + } + _ => panic!("unexpected command"), + } + + let read_history = serde_json::from_str::( + r#"{"version":2,"type":"readHistory","requestId":"history-1","windowMs":60000}"#, + ) + .expect("read history command"); + assert!(matches!( + read_history, + Command::ReadHistory { + request_id, + window_ms: 60_000, + .. + } if request_id == "history-1" + )); + } + + #[test] + fn clamps_sample_interval() { + assert_eq!(clamp_sample_interval(0), None); + assert_eq!(clamp_sample_interval(1), Some(Duration::from_millis(250))); + assert_eq!( + clamp_sample_interval(100_000), + Some(Duration::from_millis(60_000)) + ); + } + + #[test] + fn retains_bounded_history_without_request_ids() { + let mut history = HistoryRecorder::default(); + for sequence in 0..=MAX_HISTORY_SNAPSHOTS { + history.record(&SnapshotEvent { + version: PROTOCOL_VERSION, + event_type: "snapshot", + sequence: sequence as u64, + sampled_at_unix_ms: sequence as u64 * 1_000, + collection_duration_micros: 1, + scanned_process_count: 0, + retained_process_count: 0, + inaccessible_process_count: 0, + request_id: Some("request".to_owned()), + processes: Vec::new(), + }); + } + + assert_eq!(history.snapshots.len(), MAX_HISTORY_SNAPSHOTS); + assert!( + history + .snapshots + .iter() + .all(|snapshot| snapshot.request_id.is_none()) + ); + assert_eq!( + history + .read(10_000, MAX_HISTORY_SNAPSHOTS as u64 * 1_000) + .len(), + 11 + ); + } + + #[test] + fn refreshes_commands_without_enumerating_linux_tasks() { + let refresh_kind = process_refresh_kind(); + + assert_eq!(refresh_kind.cmd(), UpdateKind::OnlyIfNotSet); + assert!(!refresh_kind.tasks()); + assert!(refresh_kind.cpu()); + assert!(refresh_kind.memory()); + assert!(refresh_kind.disk_usage()); + } +} diff --git a/package.json b/package.json index f97275e60bb..90ecaec834b 100644 --- a/package.json +++ b/package.json @@ -16,11 +16,13 @@ "build": "vp run --filter './apps/*' --filter './packages/*' --filter './oxlint-plugin-t3code' --filter './scripts' build", "build:marketing": "vp run --filter @t3tools/marketing build", "build:desktop": "vp run --filter @t3tools/desktop --filter t3 build", + "build:resource-monitor": "cargo build --locked --release --manifest-path native/resource-monitor/Cargo.toml", "typecheck": "vp run -r --concurrency-limit 2 typecheck", "tc": "vp run -r --concurrency-limit 2 typecheck", "lint": "vp lint --report-unused-disable-directives", "lint:mobile": "node scripts/mobile-native-static-check.ts", "test": "vp run -r test", + "test:resource-monitor": "cargo test --locked --manifest-path native/resource-monitor/Cargo.toml", "test:desktop-smoke": "vp run --filter @t3tools/desktop smoke-test", "fmt": "vp fmt", "fmt:check": "vp fmt --check", diff --git a/packages/client-runtime/src/rpc/client.ts b/packages/client-runtime/src/rpc/client.ts index 882d8f51b53..75596f80bef 100644 --- a/packages/client-runtime/src/rpc/client.ts +++ b/packages/client-runtime/src/rpc/client.ts @@ -49,6 +49,7 @@ export type EnvironmentSubscriptionRpcTag = | typeof WS_METHODS.subscribeTerminalMetadata | typeof WS_METHODS.subscribePreviewEvents | typeof WS_METHODS.subscribeDiscoveredLocalServers + | typeof WS_METHODS.subscribeResourceTelemetry | typeof WS_METHODS.previewAutomationConnect | typeof WS_METHODS.subscribeVcsStatus | typeof WS_METHODS.terminalAttach; @@ -62,6 +63,23 @@ export type EnvironmentStreamRpcTag = | EnvironmentStreamCommandRpcTag; export type EnvironmentUnaryRpcTag = Exclude; + +export interface EnvironmentRpcSubscriptionObservation { + readonly environmentId: string; + readonly method: EnvironmentSubscriptionRpcTag; + readonly input: unknown; +} + +export class EnvironmentRpcSubscriptionObserver extends Context.Reference<{ + readonly observe: ( + subscription: EnvironmentRpcSubscriptionObservation, + ) => Effect.Effect>; +}>("@t3tools/client-runtime/rpc/EnvironmentRpcSubscriptionObserver", { + defaultValue: () => ({ + observe: () => Effect.succeed(Effect.void), + }), +}) {} + const isRpcClientError = Schema.is(RpcClientError.RpcClientError); export type EnvironmentRpcInput = Parameters>[0]; @@ -163,73 +181,79 @@ export function subscribe( EnvironmentSupervisor > { return Stream.unwrap( - EnvironmentSupervisor.pipe( - Effect.map((supervisor) => - SubscriptionRef.changes(supervisor.session).pipe( - Stream.switchMap( - Option.match({ - onNone: () => Stream.empty, - onSome: (session) => { - const method = session.client[tag] as ( - input: EnvironmentRpcInput, - ) => Stream.Stream< - EnvironmentRpcStreamValue, - EnvironmentRpcStreamFailure - >; - const subscribeToSession = (): Stream.Stream< - EnvironmentRpcStreamValue, - EnvironmentRpcStreamFailure - > => - Stream.suspend(() => - method(input).pipe( - Stream.catchCause((cause) => { - const hasOnlyExpectedFailures = - cause.reasons.length > 0 && - cause.reasons.every((reason) => reason._tag === "Fail"); - const isTransportFailure = - hasOnlyExpectedFailures && - cause.reasons.every( - (reason) => reason._tag === "Fail" && isRpcClientError(reason.error), - ); - if (isTransportFailure) { - return Stream.fromEffect( - Effect.logWarning( - "Durable RPC subscription lost its transport; waiting for the next session.", - { - cause: Cause.pretty(cause), - method: tag, - environmentId: supervisor.target.environmentId, - }, - ), - ).pipe(Stream.drain); + Effect.gen(function* () { + const supervisor = yield* EnvironmentSupervisor; + const observer = yield* EnvironmentRpcSubscriptionObserver; + const completeObservation = yield* observer.observe({ + environmentId: supervisor.target.environmentId, + method: tag, + input, + }); + return SubscriptionRef.changes(supervisor.session).pipe( + Stream.switchMap( + Option.match({ + onNone: () => Stream.empty, + onSome: (session) => { + const method = session.client[tag] as ( + input: EnvironmentRpcInput, + ) => Stream.Stream< + EnvironmentRpcStreamValue, + EnvironmentRpcStreamFailure + >; + const subscribeToSession = (): Stream.Stream< + EnvironmentRpcStreamValue, + EnvironmentRpcStreamFailure + > => + Stream.suspend(() => + method(input).pipe( + Stream.catchCause((cause) => { + const hasOnlyExpectedFailures = + cause.reasons.length > 0 && + cause.reasons.every((reason) => reason._tag === "Fail"); + const isTransportFailure = + hasOnlyExpectedFailures && + cause.reasons.every( + (reason) => reason._tag === "Fail" && isRpcClientError(reason.error), + ); + if (isTransportFailure) { + return Stream.fromEffect( + Effect.logWarning( + "Durable RPC subscription lost its transport; waiting for the next session.", + { + cause: Cause.pretty(cause), + method: tag, + environmentId: supervisor.target.environmentId, + }, + ), + ).pipe(Stream.drain); + } + if (hasOnlyExpectedFailures && options?.onExpectedFailure !== undefined) { + const handled = Stream.fromEffect(options.onExpectedFailure(cause)).pipe( + Stream.drain, + ); + if (options.retryExpectedFailureAfter === undefined) { + return handled; } - if (hasOnlyExpectedFailures && options?.onExpectedFailure !== undefined) { - const handled = Stream.fromEffect(options.onExpectedFailure(cause)).pipe( - Stream.drain, - ); - if (options.retryExpectedFailureAfter === undefined) { - return handled; - } - return handled.pipe( - Stream.concat( - Stream.fromEffect( - Effect.sleep(options.retryExpectedFailureAfter), - ).pipe(Stream.drain), + return handled.pipe( + Stream.concat( + Stream.fromEffect(Effect.sleep(options.retryExpectedFailureAfter)).pipe( + Stream.drain, ), - Stream.concat(subscribeToSession()), - ); - } - return Stream.failCause(cause); - }), - ), - ); - return subscribeToSession(); - }, - }), - ), + ), + Stream.concat(subscribeToSession()), + ); + } + return Stream.failCause(cause); + }), + ), + ); + return subscribeToSession(); + }, + }), ), - ), - ), + Stream.ensuring(completeObservation), + ); + }), ).pipe( Stream.withSpan("EnvironmentRpc.subscribe", { attributes: { "rpc.method": tag }, diff --git a/packages/client-runtime/src/state/server.ts b/packages/client-runtime/src/state/server.ts index 23bb7bff2a9..1b33ba95ced 100644 --- a/packages/client-runtime/src/state/server.ts +++ b/packages/client-runtime/src/state/server.ts @@ -133,6 +133,16 @@ export function createServerEnvironmentAtoms( label: "environment-data:server:process-resource-history", tag: WS_METHODS.serverGetProcessResourceHistory, }), + resourceTelemetry: createEnvironmentRpcSubscriptionAtomFamily(runtime, { + label: "environment-data:server:resource-telemetry", + tag: WS_METHODS.subscribeResourceTelemetry, + idleTtlMs: 0, + }), + resourceTelemetryHistory: createEnvironmentRpcQueryAtomFamily(runtime, { + label: "environment-data:server:resource-telemetry-history", + tag: WS_METHODS.serverGetResourceTelemetryHistory, + staleTimeMs: 5_000, + }), configProjection, welcome: createEnvironmentRpcSubscriptionAtomFamily(runtime, { label: "environment-data:server:welcome", @@ -178,5 +188,13 @@ export function createServerEnvironmentAtoms( label: "environment-data:server:signal-process", tag: WS_METHODS.serverSignalProcess, }), + retryResourceTelemetry: createEnvironmentRpcCommand(runtime, { + label: "environment-data:server:retry-resource-telemetry", + tag: WS_METHODS.serverRetryResourceTelemetry, + concurrency: { + mode: "singleFlight", + key: ({ environmentId }) => environmentId, + }, + }), }; } diff --git a/packages/contracts/src/background.ts b/packages/contracts/src/background.ts new file mode 100644 index 00000000000..afa25de0768 --- /dev/null +++ b/packages/contracts/src/background.ts @@ -0,0 +1,101 @@ +import * as Schema from "effect/Schema"; + +import { AuthSessionId, EnvironmentId, RpcClientId, ThreadId } from "./baseSchemas.ts"; +import { ProviderInstanceId } from "./providerInstance.ts"; + +export const BackgroundBooleanState = Schema.Literals(["true", "false", "unknown"]); +export type BackgroundBooleanState = typeof BackgroundBooleanState.Type; + +export const HostPowerThermalState = Schema.Literals([ + "unknown", + "nominal", + "fair", + "serious", + "critical", +]); +export type HostPowerThermalState = typeof HostPowerThermalState.Type; + +export const HostPowerSource = Schema.Literals([ + "unknown", + "node-macos-shell", + "node-macos-native", + "node-linux", + "node-windows", + "electron-main", +]); +export type HostPowerSource = typeof HostPowerSource.Type; + +export const HostPowerSnapshot = Schema.Struct({ + source: HostPowerSource, + idle: BackgroundBooleanState, + idleSeconds: Schema.NullOr(Schema.Number), + locked: BackgroundBooleanState, + suspended: Schema.Boolean, + onBattery: BackgroundBooleanState, + lowPowerMode: BackgroundBooleanState, + thermalState: HostPowerThermalState, + stale: Schema.Boolean, + updatedAt: Schema.DateTimeUtc, +}); +export type HostPowerSnapshot = typeof HostPowerSnapshot.Type; + +export const BackgroundScope = Schema.Union([ + Schema.Struct({ type: Schema.Literal("server-config") }), + Schema.Struct({ + type: Schema.Literal("provider-status"), + instanceId: Schema.optionalKey(ProviderInstanceId), + }), + Schema.Struct({ type: Schema.Literal("vcs-status"), cwd: Schema.String }), + Schema.Struct({ type: Schema.Literal("git-refs"), cwd: Schema.String }), + Schema.Struct({ type: Schema.Literal("diagnostics") }), + Schema.Struct({ type: Schema.Literal("thread"), threadId: ThreadId }), +]); +export type BackgroundScope = typeof BackgroundScope.Type; + +export const ClientKind = Schema.Literals(["web", "desktop-renderer", "mobile", "unknown"]); +export type ClientKind = typeof ClientKind.Type; + +export const ClientActivityReportInput = Schema.Struct({ + environmentId: Schema.optionalKey(EnvironmentId), + clientId: Schema.String, + clientKind: ClientKind, + visible: Schema.Boolean, + focused: Schema.Boolean, + recentlyInteracted: Schema.Boolean, + appState: Schema.optionalKey(Schema.Literals(["active", "inactive", "background", "unknown"])), + lowPowerMode: Schema.optionalKey(BackgroundBooleanState), + batteryState: Schema.optionalKey(Schema.Literals(["unknown", "unplugged", "charging", "full"])), + networkType: Schema.optionalKey(Schema.String), + scopes: Schema.Array(BackgroundScope), + ttlMs: Schema.optionalKey(Schema.Number), + observedAt: Schema.DateTimeUtc, +}); +export type ClientActivityReportInput = typeof ClientActivityReportInput.Type; + +export const ClientActivityLease = Schema.Struct({ + sessionId: AuthSessionId, + rpcClientId: RpcClientId, + clientId: Schema.String, + clientKind: ClientKind, + visible: Schema.Boolean, + focused: Schema.Boolean, + recentlyInteracted: Schema.Boolean, + appState: Schema.optionalKey(Schema.Literals(["active", "inactive", "background", "unknown"])), + lowPowerMode: Schema.optionalKey(BackgroundBooleanState), + batteryState: Schema.optionalKey(Schema.Literals(["unknown", "unplugged", "charging", "full"])), + networkType: Schema.optionalKey(Schema.String), + scopes: Schema.Array(BackgroundScope), + updatedAt: Schema.DateTimeUtc, + expiresAt: Schema.DateTimeUtc, +}); +export type ClientActivityLease = typeof ClientActivityLease.Type; + +export const BackgroundPolicySnapshot = Schema.Struct({ + hostPower: HostPowerSnapshot, + leases: Schema.Array(ClientActivityLease), + activeForegroundLeaseCount: Schema.Number, + activeScopeKeys: Schema.Array(Schema.String), + shouldRunOpportunisticWork: Schema.Boolean, + updatedAt: Schema.DateTimeUtc, +}); +export type BackgroundPolicySnapshot = typeof BackgroundPolicySnapshot.Type; diff --git a/packages/contracts/src/baseSchemas.ts b/packages/contracts/src/baseSchemas.ts index 614ea5131fb..a8fa565cef4 100644 --- a/packages/contracts/src/baseSchemas.ts +++ b/packages/contracts/src/baseSchemas.ts @@ -43,6 +43,8 @@ export const TurnId = makeEntityId("TurnId"); export type TurnId = typeof TurnId.Type; export const AuthSessionId = makeEntityId("AuthSessionId"); export type AuthSessionId = typeof AuthSessionId.Type; +export const RpcClientId = NonNegativeInt.pipe(Schema.brand("RpcClientId")); +export type RpcClientId = typeof RpcClientId.Type; export const ProviderItemId = makeEntityId("ProviderItemId"); export type ProviderItemId = typeof ProviderItemId.Type; diff --git a/packages/contracts/src/desktopBootstrap.ts b/packages/contracts/src/desktopBootstrap.ts index c23dbbb3960..61826e64998 100644 --- a/packages/contracts/src/desktopBootstrap.ts +++ b/packages/contracts/src/desktopBootstrap.ts @@ -1,6 +1,6 @@ import * as Schema from "effect/Schema"; -import { PortSchema } from "./baseSchemas.ts"; +import { PortSchema, PositiveInt, TrimmedNonEmptyString } from "./baseSchemas.ts"; export const DesktopBackendBootstrap = Schema.Struct({ mode: Schema.Literal("desktop"), @@ -13,6 +13,9 @@ export const DesktopBackendBootstrap = Schema.Struct({ tailscaleServePort: PortSchema, otlpTracesUrl: Schema.optional(Schema.String), otlpMetricsUrl: Schema.optional(Schema.String), + desktopTelemetryFd: Schema.optionalKey(PositiveInt), + desktopTelemetryControlFd: Schema.optionalKey(PositiveInt), + resourceMonitorPath: Schema.optionalKey(TrimmedNonEmptyString), }); export type DesktopBackendBootstrap = typeof DesktopBackendBootstrap.Type; diff --git a/packages/contracts/src/index.ts b/packages/contracts/src/index.ts index 43270efdec7..a79341de2f1 100644 --- a/packages/contracts/src/index.ts +++ b/packages/contracts/src/index.ts @@ -1,4 +1,5 @@ export * from "./baseSchemas.ts"; +export * from "./background.ts"; export * from "./auth.ts"; export * from "./environment.ts"; export * from "./environmentHttp.ts"; @@ -25,4 +26,5 @@ export * from "./assets.ts"; export * from "./review.ts"; export * from "./preview.ts"; export * from "./previewAutomation.ts"; +export * from "./resourceTelemetry.ts"; export * from "./rpc.ts"; diff --git a/packages/contracts/src/ipc.ts b/packages/contracts/src/ipc.ts index 03c06d2f81a..9f505c7d9db 100644 --- a/packages/contracts/src/ipc.ts +++ b/packages/contracts/src/ipc.ts @@ -1,115 +1,19 @@ -import type { - VcsCreateRefInput, - VcsCreateRefResult, - VcsCreateWorktreeInput, - VcsCreateWorktreeResult, - VcsInitInput, - VcsListRefsInput, - VcsListRefsResult, - VcsPullInput, - VcsPullResult, - VcsRemoveWorktreeInput, - VcsSwitchRefInput, - VcsSwitchRefResult, - GitPreparePullRequestThreadInput, - GitPreparePullRequestThreadResult, - GitPullRequestRefInput, - GitResolvePullRequestResult, - VcsStatusInput, - VcsStatusResult, -} from "./git.ts"; -import type { ReviewDiffPreviewInput, ReviewDiffPreviewResult } from "./review.ts"; -import type { FilesystemBrowseInput, FilesystemBrowseResult } from "./filesystem.ts"; -import type { AssetCreateUrlInput, AssetCreateUrlResult } from "./assets.ts"; -import type { - ProjectListEntriesInput, - ProjectListEntriesResult, - ProjectReadFileInput, - ProjectReadFileResult, - ProjectSearchEntriesInput, - ProjectSearchEntriesResult, - ProjectWriteFileInput, - ProjectWriteFileResult, -} from "./project.ts"; -import type { ProviderInstanceId } from "./providerInstance.ts"; -import type { - ServerConfig, - ServerProcessDiagnosticsResult, - ServerProcessResourceHistoryInput, - ServerProcessResourceHistoryResult, - ServerProviderUpdateInput, - ServerProviderUpdatedPayload, - ServerRemoveKeybindingResult, - ServerSignalProcessInput, - ServerSignalProcessResult, - ServerTraceDiagnosticsResult, - ServerUpsertKeybindingResult, -} from "./server.ts"; -import type { - TerminalAttachInput, - TerminalAttachStreamEvent, - TerminalClearInput, - TerminalCloseInput, - TerminalMetadataStreamEvent, - TerminalOpenInput, - TerminalResizeInput, - TerminalRestartInput, - TerminalSessionSnapshot, - TerminalWriteInput, -} from "./terminal.ts"; -import type { ServerRemoveKeybindingInput, ServerUpsertKeybindingInput } from "./server.ts"; import * as Schema from "effect/Schema"; -import type { - DiscoveredLocalServerList, - PreviewCloseInput, - PreviewEvent, - PreviewListInput, - PreviewListResult, - PreviewNavigateInput, - PreviewOpenInput, - PreviewRefreshInput, - PreviewReportStatusInput, - PreviewSessionSnapshot, -} from "./preview.ts"; import { PreviewAutomationClickInput, PreviewAutomationEvaluateInput, - PreviewAutomationOwner, PreviewAutomationPressInput, - PreviewAutomationRequest, - PreviewAutomationResponse, PreviewAutomationScrollInput, PreviewAutomationSnapshot, PreviewAutomationStatus, PreviewAutomationTypeInput, PreviewAutomationWaitForInput, } from "./previewAutomation.ts"; -import type { - ClientOrchestrationCommand, - OrchestrationGetFullThreadDiffInput, - OrchestrationGetFullThreadDiffResult, - OrchestrationGetTurnDiffInput, - OrchestrationGetTurnDiffResult, - OrchestrationShellSnapshot, - OrchestrationShellStreamItem, - OrchestrationSubscribeThreadInput, - OrchestrationThreadStreamItem, -} from "./orchestration.ts"; import { EnvironmentId } from "./baseSchemas.ts"; import { AuthAccessTokenResult, AuthSessionState, AuthWebSocketTicketResult } from "./auth.ts"; import { AdvertisedEndpoint } from "./remoteAccess.ts"; -import { EditorId } from "./editor.ts"; import { ExecutionEnvironmentDescriptor } from "./environment.ts"; -import type { ClientSettings, ServerSettings, ServerSettingsPatch } from "./settings.ts"; -import type { - SourceControlCloneRepositoryInput, - SourceControlCloneRepositoryResult, - SourceControlDiscoveryResult, - SourceControlPublishRepositoryInput, - SourceControlPublishRepositoryResult, - SourceControlRepositoryInfo, - SourceControlRepositoryLookupInput, -} from "./sourceControl.ts"; +import type { ClientSettings } from "./settings.ts"; export interface ContextMenuItem { id: T; @@ -1021,7 +925,7 @@ export interface DesktopPreviewBridge { * APIs bound to the local app shell, not to any particular backend environment. * * These capabilities describe the desktop/browser host that the user is - * currently running: dialogs, editor/external-link opening, context menus, and + * currently running: dialogs, external-link opening, context menus, and * app-level settings/config access. They must not be used as a proxy for * "whatever environment the user is targeting", because in a multi-environment * world the local shell and a selected backend environment are distinct @@ -1033,7 +937,6 @@ export interface LocalApi { confirm: (message: string) => Promise; }; shell: { - openInEditor: (cwd: string, editor: EditorId) => Promise; openExternal: (url: string) => Promise; }; contextMenu: { @@ -1046,156 +949,4 @@ export interface LocalApi { getClientSettings: () => Promise; setClientSettings: (settings: ClientSettings) => Promise; }; - server: { - getConfig: () => Promise; - /** - * Refresh provider snapshots. When `input.instanceId` is supplied only that - * configured instance is probed; otherwise every configured instance is - * refreshed (legacy untargeted refresh). - */ - refreshProviders: (input?: { - readonly instanceId?: ProviderInstanceId; - }) => Promise; - updateProvider: (input: ServerProviderUpdateInput) => Promise; - upsertKeybinding: (input: ServerUpsertKeybindingInput) => Promise; - removeKeybinding: (input: ServerRemoveKeybindingInput) => Promise; - getSettings: () => Promise; - updateSettings: (patch: ServerSettingsPatch) => Promise; - discoverSourceControl: () => Promise; - getTraceDiagnostics: () => Promise; - getProcessDiagnostics: () => Promise; - getProcessResourceHistory: ( - input: ServerProcessResourceHistoryInput, - ) => Promise; - signalProcess: (input: ServerSignalProcessInput) => Promise; - }; -} - -/** - * APIs bound to a specific backend environment connection. - * - * These operations must always be routed with explicit environment context. - * They represent remote stateful capabilities such as orchestration, terminal, - * project, VCS, and provider operations. In multi-environment mode, each environment gets - * its own instance of this surface, and callers should resolve it by - * `environmentId` rather than reaching through the local desktop bridge. - */ -export interface EnvironmentApi { - terminal: { - open: (input: typeof TerminalOpenInput.Encoded) => Promise; - attach: ( - input: typeof TerminalAttachInput.Encoded, - callback: (event: TerminalAttachStreamEvent) => void, - options?: { - onResubscribe?: () => void; - }, - ) => () => void; - write: (input: typeof TerminalWriteInput.Encoded) => Promise; - resize: (input: typeof TerminalResizeInput.Encoded) => Promise; - clear: (input: typeof TerminalClearInput.Encoded) => Promise; - restart: (input: typeof TerminalRestartInput.Encoded) => Promise; - close: (input: typeof TerminalCloseInput.Encoded) => Promise; - onMetadata: ( - callback: (event: TerminalMetadataStreamEvent) => void, - options?: { - onResubscribe?: () => void; - }, - ) => () => void; - }; - projects: { - listEntries: (input: ProjectListEntriesInput) => Promise; - readFile: (input: ProjectReadFileInput) => Promise; - searchEntries: (input: ProjectSearchEntriesInput) => Promise; - writeFile: (input: ProjectWriteFileInput) => Promise; - }; - filesystem: { - browse: (input: FilesystemBrowseInput) => Promise; - }; - assets: { - createUrl: (input: AssetCreateUrlInput) => Promise; - }; - sourceControl: { - lookupRepository: ( - input: SourceControlRepositoryLookupInput, - ) => Promise; - cloneRepository: ( - input: SourceControlCloneRepositoryInput, - ) => Promise; - publishRepository: ( - input: SourceControlPublishRepositoryInput, - ) => Promise; - }; - vcs: { - listRefs: (input: VcsListRefsInput) => Promise; - createWorktree: (input: VcsCreateWorktreeInput) => Promise; - removeWorktree: (input: VcsRemoveWorktreeInput) => Promise; - createRef: (input: VcsCreateRefInput) => Promise; - switchRef: (input: VcsSwitchRefInput) => Promise; - init: (input: VcsInitInput) => Promise; - pull: (input: VcsPullInput) => Promise; - refreshStatus: (input: VcsStatusInput) => Promise; - onStatus: ( - input: VcsStatusInput, - callback: (status: VcsStatusResult) => void, - options?: { - onResubscribe?: () => void; - }, - ) => () => void; - }; - git: { - resolvePullRequest: (input: GitPullRequestRefInput) => Promise; - preparePullRequestThread: ( - input: GitPreparePullRequestThreadInput, - ) => Promise; - }; - review: { - getDiffPreview: (input: ReviewDiffPreviewInput) => Promise; - }; - orchestration: { - dispatchCommand: (command: ClientOrchestrationCommand) => Promise<{ sequence: number }>; - getTurnDiff: (input: OrchestrationGetTurnDiffInput) => Promise; - getFullThreadDiff: ( - input: OrchestrationGetFullThreadDiffInput, - ) => Promise; - getArchivedShellSnapshot: () => Promise; - subscribeShell: ( - callback: (event: OrchestrationShellStreamItem) => void, - options?: { - onResubscribe?: () => void; - }, - ) => () => void; - subscribeThread: ( - input: OrchestrationSubscribeThreadInput, - callback: (event: OrchestrationThreadStreamItem) => void, - options?: { - onResubscribe?: () => void; - }, - ) => () => void; - }; - preview: { - open: (input: typeof PreviewOpenInput.Encoded) => Promise; - navigate: (input: typeof PreviewNavigateInput.Encoded) => Promise; - refresh: (input: typeof PreviewRefreshInput.Encoded) => Promise; - close: (input: typeof PreviewCloseInput.Encoded) => Promise; - list: (input: typeof PreviewListInput.Encoded) => Promise; - reportStatus: (input: typeof PreviewReportStatusInput.Encoded) => Promise; - automation: { - connect: ( - input: { clientId: string }, - callback: (request: PreviewAutomationRequest) => void, - options?: { onResubscribe?: () => void }, - ) => () => void; - respond: (response: PreviewAutomationResponse) => Promise; - reportOwner: (owner: PreviewAutomationOwner) => Promise; - clearOwner: (input: { clientId: string }) => Promise; - }; - onEvent: ( - callback: (event: PreviewEvent) => void, - options?: { onResubscribe?: () => void }, - ) => () => void; - subscribePorts: ( - callback: (servers: DiscoveredLocalServerList) => void, - options?: { onResubscribe?: () => void }, - ) => () => void; - }; } diff --git a/packages/contracts/src/resourceTelemetry.ts b/packages/contracts/src/resourceTelemetry.ts new file mode 100644 index 00000000000..5827fd854c9 --- /dev/null +++ b/packages/contracts/src/resourceTelemetry.ts @@ -0,0 +1,412 @@ +import * as Schema from "effect/Schema"; + +import { NonNegativeInt, PositiveInt, TrimmedNonEmptyString } from "./baseSchemas.ts"; +import { HostPowerSnapshot } from "./background.ts"; + +export const RESOURCE_MONITOR_PROTOCOL_VERSION = 2 as const; + +export const ResourceTelemetryIoSemantics = Schema.Literals([ + "storage", + "logical", + "all-io", + "unavailable", +]); +export type ResourceTelemetryIoSemantics = typeof ResourceTelemetryIoSemantics.Type; + +export const ResourceTelemetryProcessCategory = Schema.Literals([ + "server", + "server-child", + "provider-root", + "terminal-root", + "electron-main", + "electron-renderer", + "electron-gpu", + "electron-utility", + "resource-monitor", + "unknown-t3", +]); +export type ResourceTelemetryProcessCategory = typeof ResourceTelemetryProcessCategory.Type; + +export const ResourceTelemetrySourceStatus = Schema.Literals([ + "starting", + "healthy", + "degraded", + "unavailable", + "stopped", +]); +export type ResourceTelemetrySourceStatus = typeof ResourceTelemetrySourceStatus.Type; + +export const ResourceTelemetryProcessIdentity = Schema.Struct({ + pid: PositiveInt, + startTimeMs: NonNegativeInt, +}); +export type ResourceTelemetryProcessIdentity = typeof ResourceTelemetryProcessIdentity.Type; + +export const ResourceMonitorExternalProcess = Schema.Struct({ + pid: PositiveInt, + startTimeMs: Schema.optionalKey(NonNegativeInt), +}); +export type ResourceMonitorExternalProcess = typeof ResourceMonitorExternalProcess.Type; + +export const ResourceMonitorCapabilities = Schema.Struct({ + cumulativeCpuTime: Schema.Boolean, + currentCpuPercent: Schema.Boolean, + residentMemory: Schema.Boolean, + virtualMemory: Schema.Boolean, + ioBytes: Schema.Boolean, + processStartTime: Schema.Boolean, + processTree: Schema.Boolean, +}); +export type ResourceMonitorCapabilities = typeof ResourceMonitorCapabilities.Type; + +export const ResourceMonitorProcessSample = Schema.Struct({ + pid: PositiveInt, + ppid: NonNegativeInt, + startTimeMs: NonNegativeInt, + runTimeMs: NonNegativeInt, + name: Schema.String, + command: Schema.String, + status: Schema.String, + cpuPercent: Schema.Number, + cpuTimeMs: NonNegativeInt, + residentBytes: NonNegativeInt, + virtualBytes: NonNegativeInt, + ioReadBytes: NonNegativeInt, + ioWriteBytes: NonNegativeInt, + ioSemantics: Schema.Literals(["storage", "all-io"]), +}); +export type ResourceMonitorProcessSample = typeof ResourceMonitorProcessSample.Type; + +export const ResourceMonitorConfigureCommand = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("configure"), + rootPid: PositiveInt, + sampleIntervalMs: NonNegativeInt, + externalProcesses: Schema.Array(ResourceMonitorExternalProcess), +}); +export type ResourceMonitorConfigureCommand = typeof ResourceMonitorConfigureCommand.Type; + +export const ResourceMonitorSetExternalProcessesCommand = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("setExternalProcesses"), + processes: Schema.Array(ResourceMonitorExternalProcess), +}); +export type ResourceMonitorSetExternalProcessesCommand = + typeof ResourceMonitorSetExternalProcessesCommand.Type; + +export const ResourceMonitorSampleNowCommand = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("sampleNow"), + requestId: TrimmedNonEmptyString, +}); +export type ResourceMonitorSampleNowCommand = typeof ResourceMonitorSampleNowCommand.Type; + +export const ResourceMonitorSetSampleIntervalCommand = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("setSampleInterval"), + sampleIntervalMs: NonNegativeInt, +}); +export type ResourceMonitorSetSampleIntervalCommand = + typeof ResourceMonitorSetSampleIntervalCommand.Type; + +export const ResourceMonitorSetStreamingCommand = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("setStreaming"), + enabled: Schema.Boolean, +}); +export type ResourceMonitorSetStreamingCommand = typeof ResourceMonitorSetStreamingCommand.Type; + +export const ResourceMonitorReadHistoryCommand = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("readHistory"), + requestId: TrimmedNonEmptyString, + windowMs: NonNegativeInt, +}); +export type ResourceMonitorReadHistoryCommand = typeof ResourceMonitorReadHistoryCommand.Type; + +export const ResourceMonitorShutdownCommand = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("shutdown"), +}); +export type ResourceMonitorShutdownCommand = typeof ResourceMonitorShutdownCommand.Type; + +export const ResourceMonitorCommand = Schema.Union([ + ResourceMonitorConfigureCommand, + ResourceMonitorSetExternalProcessesCommand, + ResourceMonitorSetSampleIntervalCommand, + ResourceMonitorSetStreamingCommand, + ResourceMonitorSampleNowCommand, + ResourceMonitorReadHistoryCommand, + ResourceMonitorShutdownCommand, +]); +export type ResourceMonitorCommand = typeof ResourceMonitorCommand.Type; + +export const ResourceMonitorHelloEvent = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("hello"), + sidecarVersion: TrimmedNonEmptyString, + sidecarPid: PositiveInt, + platform: TrimmedNonEmptyString, + arch: TrimmedNonEmptyString, + capabilities: ResourceMonitorCapabilities, +}); +export type ResourceMonitorHelloEvent = typeof ResourceMonitorHelloEvent.Type; + +export const ResourceMonitorSnapshotEvent = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("snapshot"), + sequence: NonNegativeInt, + sampledAtUnixMs: NonNegativeInt, + collectionDurationMicros: NonNegativeInt, + scannedProcessCount: NonNegativeInt, + retainedProcessCount: NonNegativeInt, + inaccessibleProcessCount: NonNegativeInt, + requestId: Schema.optionalKey(TrimmedNonEmptyString), + processes: Schema.Array(ResourceMonitorProcessSample), +}); +export type ResourceMonitorSnapshotEvent = typeof ResourceMonitorSnapshotEvent.Type; + +export const ResourceMonitorHistoryChunkEvent = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("historyChunk"), + requestId: TrimmedNonEmptyString, + done: Schema.Boolean, + snapshots: Schema.Array(ResourceMonitorSnapshotEvent), +}); +export type ResourceMonitorHistoryChunkEvent = typeof ResourceMonitorHistoryChunkEvent.Type; + +export const ResourceMonitorErrorEvent = Schema.Struct({ + version: Schema.Literal(RESOURCE_MONITOR_PROTOCOL_VERSION), + type: Schema.Literal("error"), + code: TrimmedNonEmptyString, + message: TrimmedNonEmptyString, + recoverable: Schema.Boolean, +}); +export type ResourceMonitorErrorEvent = typeof ResourceMonitorErrorEvent.Type; + +export const ResourceMonitorEvent = Schema.Union([ + ResourceMonitorHelloEvent, + ResourceMonitorSnapshotEvent, + ResourceMonitorHistoryChunkEvent, + ResourceMonitorErrorEvent, +]); +export type ResourceMonitorEvent = typeof ResourceMonitorEvent.Type; + +export const DesktopElectronProcessType = Schema.Literals([ + "Browser", + "Tab", + "Utility", + "Zygote", + "Sandbox helper", + "GPU", + "Pepper Plugin", + "Pepper Plugin Broker", + "Unknown", +]); +export type DesktopElectronProcessType = typeof DesktopElectronProcessType.Type; + +export const DesktopElectronProcessMetric = Schema.Struct({ + pid: PositiveInt, + creationTimeMs: NonNegativeInt, + type: DesktopElectronProcessType, + name: Schema.optionalKey(Schema.String), + serviceName: Schema.optionalKey(Schema.String), + cpuPercent: Schema.Number, + cumulativeCpuSeconds: Schema.optionalKey(Schema.Number), + idleWakeupsPerSecond: Schema.Number, + workingSetBytes: NonNegativeInt, + peakWorkingSetBytes: NonNegativeInt, +}); +export type DesktopElectronProcessMetric = typeof DesktopElectronProcessMetric.Type; + +export const DesktopHostTelemetrySnapshot = Schema.Struct({ + version: Schema.Literal(1), + type: Schema.Literal("desktopTelemetry"), + sequence: NonNegativeInt, + sampledAtUnixMs: NonNegativeInt, + electronPid: PositiveInt, + power: HostPowerSnapshot, + speedLimitPercent: Schema.Option(Schema.Number), + electronProcesses: Schema.Array(DesktopElectronProcessMetric), +}); +export type DesktopHostTelemetrySnapshot = typeof DesktopHostTelemetrySnapshot.Type; + +export const DesktopHostTelemetryHello = Schema.Struct({ + version: Schema.Literal(1), + type: Schema.Literal("desktopTelemetryHello"), + electronPid: PositiveInt, +}); +export type DesktopHostTelemetryHello = typeof DesktopHostTelemetryHello.Type; + +export const DesktopHostTelemetryMessage = Schema.Union([ + DesktopHostTelemetryHello, + DesktopHostTelemetrySnapshot, +]); +export type DesktopHostTelemetryMessage = typeof DesktopHostTelemetryMessage.Type; + +export const DesktopTelemetrySetDiagnosticsDemand = Schema.Struct({ + version: Schema.Literal(1), + type: Schema.Literal("setDiagnosticsDemand"), + enabled: Schema.Boolean, +}); +export type DesktopTelemetrySetDiagnosticsDemand = typeof DesktopTelemetrySetDiagnosticsDemand.Type; + +export const DesktopTelemetryControlMessage = Schema.Union([DesktopTelemetrySetDiagnosticsDemand]); +export type DesktopTelemetryControlMessage = typeof DesktopTelemetryControlMessage.Type; + +export const ResourceTelemetryProcess = Schema.Struct({ + identity: ResourceTelemetryProcessIdentity, + ppid: NonNegativeInt, + childPids: Schema.Array(PositiveInt), + depth: NonNegativeInt, + name: Schema.String, + command: Schema.String, + status: Schema.String, + category: ResourceTelemetryProcessCategory, + electronType: Schema.optionalKey(DesktopElectronProcessType), + electronServiceName: Schema.optionalKey(Schema.String), + cpuPercent: Schema.Number, + cpuTimeMs: NonNegativeInt, + residentBytes: NonNegativeInt, + peakResidentBytes: NonNegativeInt, + virtualBytes: NonNegativeInt, + ioReadBytes: NonNegativeInt, + ioWriteBytes: NonNegativeInt, + ioReadBytesPerSecond: Schema.Number, + ioWriteBytesPerSecond: Schema.Number, + ioSemantics: ResourceTelemetryIoSemantics, + idleWakeupsPerSecond: Schema.optionalKey(Schema.Number), + runTimeMs: NonNegativeInt, + firstSeenAt: Schema.DateTimeUtc, + lastSeenAt: Schema.DateTimeUtc, +}); +export type ResourceTelemetryProcess = typeof ResourceTelemetryProcess.Type; + +export const ResourceTelemetryAggregate = Schema.Struct({ + processCount: NonNegativeInt, + currentCpuPercent: Schema.Number, + cpuTimeMs: NonNegativeInt, + currentRssBytes: NonNegativeInt, + peakRssBytes: NonNegativeInt, + ioReadBytes: NonNegativeInt, + ioWriteBytes: NonNegativeInt, + ioReadBytesPerSecond: Schema.Number, + ioWriteBytesPerSecond: Schema.Number, + processStarts: NonNegativeInt, + processExits: NonNegativeInt, +}); +export type ResourceTelemetryAggregate = typeof ResourceTelemetryAggregate.Type; + +export const ResourceTelemetryGroups = Schema.Struct({ + backend: ResourceTelemetryAggregate, + electron: ResourceTelemetryAggregate, + monitor: ResourceTelemetryAggregate, + allT3: ResourceTelemetryAggregate, +}); +export type ResourceTelemetryGroups = typeof ResourceTelemetryGroups.Type; + +export const ResourceTelemetrySourceHealth = Schema.Struct({ + status: ResourceTelemetrySourceStatus, + lastSampleAt: Schema.Option(Schema.DateTimeUtc), + lastError: Schema.Option(TrimmedNonEmptyString), +}); +export type ResourceTelemetrySourceHealth = typeof ResourceTelemetrySourceHealth.Type; + +export const ResourceTelemetryHealth = Schema.Struct({ + native: ResourceTelemetrySourceHealth, + desktop: ResourceTelemetrySourceHealth, + sidecarVersion: Schema.Option(TrimmedNonEmptyString), + sidecarPid: Schema.Option(PositiveInt), + restartCount: NonNegativeInt, + collectionDurationMicros: NonNegativeInt, + scannedProcessCount: NonNegativeInt, + retainedProcessCount: NonNegativeInt, + inaccessibleProcessCount: NonNegativeInt, +}); +export type ResourceTelemetryHealth = typeof ResourceTelemetryHealth.Type; + +export const ResourceAttributionEntry = Schema.Struct({ + component: TrimmedNonEmptyString, + operation: TrimmedNonEmptyString, + logicalReadBytes: NonNegativeInt, + logicalWriteBytes: NonNegativeInt, + count: NonNegativeInt, + durationMs: NonNegativeInt, +}); +export type ResourceAttributionEntry = typeof ResourceAttributionEntry.Type; + +export const ResourceAttributionSnapshot = Schema.Struct({ + readAt: Schema.DateTimeUtc, + entries: Schema.Array(ResourceAttributionEntry), +}); +export type ResourceAttributionSnapshot = typeof ResourceAttributionSnapshot.Type; + +export const ResourceTelemetrySnapshot = Schema.Struct({ + readAt: Schema.DateTimeUtc, + sampleIntervalMs: NonNegativeInt, + processes: Schema.Array(ResourceTelemetryProcess), + groups: ResourceTelemetryGroups, + power: HostPowerSnapshot, + speedLimitPercent: Schema.Option(Schema.Number), + attribution: ResourceAttributionSnapshot, + health: ResourceTelemetryHealth, +}); +export type ResourceTelemetrySnapshot = typeof ResourceTelemetrySnapshot.Type; + +export const ResourceTelemetryHistoryInput = Schema.Struct({ + windowMs: NonNegativeInt, + bucketMs: NonNegativeInt, +}); +export type ResourceTelemetryHistoryInput = typeof ResourceTelemetryHistoryInput.Type; + +export const ResourceTelemetryHistoryBucket = Schema.Struct({ + startedAt: Schema.DateTimeUtc, + endedAt: Schema.DateTimeUtc, + avgCpuPercent: Schema.Number, + maxCpuPercent: Schema.Number, + maxRssBytes: NonNegativeInt, + ioReadBytes: NonNegativeInt, + ioWriteBytes: NonNegativeInt, + maxProcessCount: NonNegativeInt, +}); +export type ResourceTelemetryHistoryBucket = typeof ResourceTelemetryHistoryBucket.Type; + +export const ResourceTelemetryProcessSummary = Schema.Struct({ + identity: ResourceTelemetryProcessIdentity, + ppid: NonNegativeInt, + depth: NonNegativeInt, + name: Schema.String, + command: Schema.String, + category: ResourceTelemetryProcessCategory, + firstSeenAt: Schema.DateTimeUtc, + lastSeenAt: Schema.DateTimeUtc, + currentCpuPercent: Schema.Number, + avgCpuPercent: Schema.Number, + maxCpuPercent: Schema.Number, + cpuTimeMs: NonNegativeInt, + currentRssBytes: NonNegativeInt, + peakRssBytes: NonNegativeInt, + ioReadBytes: NonNegativeInt, + ioWriteBytes: NonNegativeInt, + ioSemantics: ResourceTelemetryIoSemantics, + sampleCount: NonNegativeInt, +}); +export type ResourceTelemetryProcessSummary = typeof ResourceTelemetryProcessSummary.Type; + +export const ResourceTelemetryHistory = Schema.Struct({ + readAt: Schema.DateTimeUtc, + windowMs: NonNegativeInt, + bucketMs: NonNegativeInt, + sampleIntervalMs: NonNegativeInt, + retainedSampleCount: NonNegativeInt, + buckets: Schema.Array(ResourceTelemetryHistoryBucket), + topProcesses: Schema.Array(ResourceTelemetryProcessSummary), + health: ResourceTelemetryHealth, +}); +export type ResourceTelemetryHistory = typeof ResourceTelemetryHistory.Type; + +export const ResourceTelemetryRetryResult = Schema.Struct({ + accepted: Schema.Boolean, + snapshot: ResourceTelemetrySnapshot, +}); +export type ResourceTelemetryRetryResult = typeof ResourceTelemetryRetryResult.Type; diff --git a/packages/contracts/src/rpc.ts b/packages/contracts/src/rpc.ts index 87c5a49c73b..f4aa570904f 100644 --- a/packages/contracts/src/rpc.ts +++ b/packages/contracts/src/rpc.ts @@ -8,6 +8,11 @@ import { AuthAccessStreamEvent, EnvironmentAuthorizationError, } from "./auth.ts"; +import { + BackgroundPolicySnapshot, + ClientActivityReportInput, + HostPowerSnapshot, +} from "./background.ts"; import { FilesystemBrowseInput, FilesystemBrowseResult, @@ -129,6 +134,12 @@ import { ServerUpsertKeybindingInput, ServerUpsertKeybindingResult, } from "./server.ts"; +import { + ResourceTelemetryHistory, + ResourceTelemetryHistoryInput, + ResourceTelemetryRetryResult, + ResourceTelemetrySnapshot, +} from "./resourceTelemetry.ts"; import { ServerSettings, ServerSettingsError, ServerSettingsPatch } from "./settings.ts"; import { SourceControlCloneRepositoryInput, @@ -210,7 +221,12 @@ export const WS_METHODS = { serverGetTraceDiagnostics: "server.getTraceDiagnostics", serverGetProcessDiagnostics: "server.getProcessDiagnostics", serverGetProcessResourceHistory: "server.getProcessResourceHistory", + serverGetResourceTelemetryHistory: "server.getResourceTelemetryHistory", + serverRetryResourceTelemetry: "server.retryResourceTelemetry", serverSignalProcess: "server.signalProcess", + serverReportClientActivity: "server.reportClientActivity", + serverReportHostPowerState: "server.reportHostPowerState", + serverGetBackgroundPolicy: "server.getBackgroundPolicy", // Cloud environment methods cloudGetRelayClientStatus: "cloud.getRelayClientStatus", @@ -230,6 +246,8 @@ export const WS_METHODS = { subscribeServerConfig: "subscribeServerConfig", subscribeServerLifecycle: "subscribeServerLifecycle", subscribeAuthAccess: "subscribeAuthAccess", + subscribeBackgroundPolicy: "subscribeBackgroundPolicy", + subscribeResourceTelemetry: "subscribeResourceTelemetry", } as const; export const WsServerUpsertKeybindingRpc = Rpc.make(WS_METHODS.serverUpsertKeybinding, { @@ -309,6 +327,21 @@ export const WsServerGetProcessResourceHistoryRpc = Rpc.make( }, ); +export const WsServerGetResourceTelemetryHistoryRpc = Rpc.make( + WS_METHODS.serverGetResourceTelemetryHistory, + { + payload: ResourceTelemetryHistoryInput, + success: ResourceTelemetryHistory, + error: EnvironmentAuthorizationError, + }, +); + +export const WsServerRetryResourceTelemetryRpc = Rpc.make(WS_METHODS.serverRetryResourceTelemetry, { + payload: Schema.Struct({}), + success: ResourceTelemetryRetryResult, + error: EnvironmentAuthorizationError, +}); + export const WsServerSignalProcessRpc = Rpc.make(WS_METHODS.serverSignalProcess, { payload: ServerSignalProcessInput, success: ServerSignalProcessResult, @@ -328,6 +361,22 @@ export const WsCloudInstallRelayClientRpc = Rpc.make(WS_METHODS.cloudInstallRela stream: true, }); +export const WsServerReportClientActivityRpc = Rpc.make(WS_METHODS.serverReportClientActivity, { + payload: ClientActivityReportInput, + error: EnvironmentAuthorizationError, +}); + +export const WsServerReportHostPowerStateRpc = Rpc.make(WS_METHODS.serverReportHostPowerState, { + payload: HostPowerSnapshot, + error: EnvironmentAuthorizationError, +}); + +export const WsServerGetBackgroundPolicyRpc = Rpc.make(WS_METHODS.serverGetBackgroundPolicy, { + payload: Schema.Struct({}), + success: BackgroundPolicySnapshot, + error: EnvironmentAuthorizationError, +}); + export const WsSourceControlLookupRepositoryRpc = Rpc.make( WS_METHODS.sourceControlLookupRepository, { @@ -678,6 +727,20 @@ export const WsSubscribeAuthAccessRpc = Rpc.make(WS_METHODS.subscribeAuthAccess, stream: true, }); +export const WsSubscribeBackgroundPolicyRpc = Rpc.make(WS_METHODS.subscribeBackgroundPolicy, { + payload: Schema.Struct({}), + success: BackgroundPolicySnapshot, + error: EnvironmentAuthorizationError, + stream: true, +}); + +export const WsSubscribeResourceTelemetryRpc = Rpc.make(WS_METHODS.subscribeResourceTelemetry, { + payload: Schema.Struct({}), + success: ResourceTelemetrySnapshot, + error: EnvironmentAuthorizationError, + stream: true, +}); + export const WsRpcGroup = RpcGroup.make( WsServerGetConfigRpc, WsServerRefreshProvidersRpc, @@ -690,7 +753,12 @@ export const WsRpcGroup = RpcGroup.make( WsServerGetTraceDiagnosticsRpc, WsServerGetProcessDiagnosticsRpc, WsServerGetProcessResourceHistoryRpc, + WsServerGetResourceTelemetryHistoryRpc, + WsServerRetryResourceTelemetryRpc, WsServerSignalProcessRpc, + WsServerReportClientActivityRpc, + WsServerReportHostPowerStateRpc, + WsServerGetBackgroundPolicyRpc, WsCloudGetRelayClientStatusRpc, WsCloudInstallRelayClientRpc, WsSourceControlLookupRepositoryRpc, @@ -740,6 +808,8 @@ export const WsRpcGroup = RpcGroup.make( WsSubscribeServerConfigRpc, WsSubscribeServerLifecycleRpc, WsSubscribeAuthAccessRpc, + WsSubscribeBackgroundPolicyRpc, + WsSubscribeResourceTelemetryRpc, WsOrchestrationDispatchCommandRpc, WsOrchestrationGetTurnDiffRpc, WsOrchestrationGetFullThreadDiffRpc, diff --git a/packages/contracts/src/server.ts b/packages/contracts/src/server.ts index 1aa280ad63b..a2afa4e61aa 100644 --- a/packages/contracts/src/server.ts +++ b/packages/contracts/src/server.ts @@ -302,6 +302,7 @@ export type ServerProcessSignal = typeof ServerProcessSignal.Type; export const ServerProcessDiagnosticsEntry = Schema.Struct({ pid: PositiveInt, + startTimeMs: NonNegativeInt, ppid: NonNegativeInt, pgid: Schema.Option(Schema.Int), status: TrimmedNonEmptyString, @@ -383,6 +384,7 @@ export type ServerProcessResourceHistoryResult = typeof ServerProcessResourceHis export const ServerSignalProcessInput = Schema.Struct({ pid: PositiveInt, + startTimeMs: NonNegativeInt, signal: ServerProcessSignal, }); export type ServerSignalProcessInput = typeof ServerSignalProcessInput.Type; diff --git a/packages/contracts/src/settings.ts b/packages/contracts/src/settings.ts index 0463a441759..69d93c157d3 100644 --- a/packages/contracts/src/settings.ts +++ b/packages/contracts/src/settings.ts @@ -362,14 +362,65 @@ export const ObservabilitySettings = Schema.Struct({ export type ObservabilitySettings = typeof ObservabilitySettings.Type; export const DEFAULT_AUTOMATIC_GIT_FETCH_INTERVAL = Duration.seconds(30); +export const DEFAULT_PROVIDER_HEALTH_REFRESH_INTERVAL = Duration.minutes(5); + +export const BackgroundActivityProfile = Schema.Literals([ + "balanced", + "performance", + "battery-saver", +]); +export type BackgroundActivityProfile = typeof BackgroundActivityProfile.Type; +export const DEFAULT_BACKGROUND_ACTIVITY_PROFILE: BackgroundActivityProfile = "balanced"; + +export const BackgroundActivityProfileSelection = Schema.Literals([ + "balanced", + "performance", + "battery-saver", + "custom", +]); +export type BackgroundActivityProfileSelection = typeof BackgroundActivityProfileSelection.Type; + +export const BackgroundActivityOverrides = Schema.Struct({ + automaticGitFetchInterval: Schema.optionalKey(Schema.DurationFromMillis), + providerHealthRefreshInterval: Schema.optionalKey(Schema.DurationFromMillis), + hostPowerMonitorActiveInterval: Schema.optionalKey(Schema.DurationFromMillis), + hostPowerMonitorIdleInterval: Schema.optionalKey(Schema.DurationFromMillis), + idleClientTtl: Schema.optionalKey(Schema.DurationFromMillis), + pauseWhenHostLocked: Schema.optionalKey(Schema.Boolean), + pauseWhenHostLowPower: Schema.optionalKey(Schema.Boolean), + pauseWhenClientLowPower: Schema.optionalKey(Schema.Boolean), + pauseWhenOnBattery: Schema.optionalKey(Schema.Boolean), +}); +export type BackgroundActivityOverrides = typeof BackgroundActivityOverrides.Type; + +export const BackgroundActivitySettings = Schema.Struct({ + schemaVersion: Schema.Literal(1).pipe(Schema.withDecodingDefault(Effect.succeed(1 as const))), + profile: BackgroundActivityProfileSelection.pipe( + Schema.withDecodingDefault(Effect.succeed(DEFAULT_BACKGROUND_ACTIVITY_PROFILE)), + ), + baseProfile: Schema.optionalKey(BackgroundActivityProfile), + overrides: BackgroundActivityOverrides.pipe(Schema.withDecodingDefault(Effect.succeed({}))), +}).pipe(Schema.withDecodingDefault(Effect.succeed({}))); +export type BackgroundActivitySettings = typeof BackgroundActivitySettings.Type; export const ServerSettings = Schema.Struct({ enableAssistantStreaming: Schema.Boolean.pipe(Schema.withDecodingDefault(Effect.succeed(false))), + backgroundActivity: BackgroundActivitySettings, + // Legacy flat fields retained for old settings files and old clients. New + // consumers should resolve `backgroundActivity` instead. automaticGitFetchInterval: Schema.DurationFromMillis.pipe( Schema.withDecodingDefault( Effect.succeed(Duration.toMillis(DEFAULT_AUTOMATIC_GIT_FETCH_INTERVAL)), ), ), + providerHealthRefreshInterval: Schema.DurationFromMillis.pipe( + Schema.withDecodingDefault( + Effect.succeed(Duration.toMillis(DEFAULT_PROVIDER_HEALTH_REFRESH_INTERVAL)), + ), + ), + backgroundActivityProfile: BackgroundActivityProfile.pipe( + Schema.withDecodingDefault(Effect.succeed(DEFAULT_BACKGROUND_ACTIVITY_PROFILE)), + ), defaultThreadEnvMode: ThreadEnvMode.pipe( Schema.withDecodingDefault(Effect.succeed("local" as const satisfies ThreadEnvMode)), ), @@ -482,7 +533,17 @@ const OpenCodeSettingsPatch = Schema.Struct({ export const ServerSettingsPatch = Schema.Struct({ // Server settings enableAssistantStreaming: Schema.optionalKey(Schema.Boolean), + backgroundActivity: Schema.optionalKey( + Schema.Struct({ + schemaVersion: Schema.optionalKey(Schema.Literal(1)), + profile: Schema.optionalKey(BackgroundActivityProfileSelection), + baseProfile: Schema.optionalKey(BackgroundActivityProfile), + overrides: Schema.optionalKey(BackgroundActivityOverrides), + }), + ), automaticGitFetchInterval: Schema.optionalKey(Schema.DurationFromMillis), + providerHealthRefreshInterval: Schema.optionalKey(Schema.DurationFromMillis), + backgroundActivityProfile: Schema.optionalKey(BackgroundActivityProfile), defaultThreadEnvMode: Schema.optionalKey(ThreadEnvMode), newWorktreesStartFromOrigin: Schema.optionalKey(Schema.Boolean), addProjectBaseDirectory: Schema.optionalKey(TrimmedString), diff --git a/packages/shared/package.json b/packages/shared/package.json index 23705178bef..7fed2955465 100644 --- a/packages/shared/package.json +++ b/packages/shared/package.json @@ -75,6 +75,10 @@ "types": "./src/serverSettings.ts", "import": "./src/serverSettings.ts" }, + "./backgroundActivitySettings": { + "types": "./src/backgroundActivitySettings.ts", + "import": "./src/backgroundActivitySettings.ts" + }, "./String": { "types": "./src/String.ts", "import": "./src/String.ts" diff --git a/packages/shared/src/backgroundActivitySettings.ts b/packages/shared/src/backgroundActivitySettings.ts new file mode 100644 index 00000000000..1fa941eb82f --- /dev/null +++ b/packages/shared/src/backgroundActivitySettings.ts @@ -0,0 +1,248 @@ +import { + type BackgroundActivityProfile, + type BackgroundActivitySettings, + DEFAULT_BACKGROUND_ACTIVITY_PROFILE, + DEFAULT_AUTOMATIC_GIT_FETCH_INTERVAL, + DEFAULT_PROVIDER_HEALTH_REFRESH_INTERVAL, + type ServerSettings, +} from "@t3tools/contracts"; +import * as Duration from "effect/Duration"; + +export interface ResolvedBackgroundActivitySettings { + readonly profile: BackgroundActivityProfile; + readonly automaticGitFetchInterval: Duration.Duration; + readonly providerHealthRefreshInterval: Duration.Duration; + readonly hostPowerMonitorActiveInterval: Duration.Duration; + readonly hostPowerMonitorIdleInterval: Duration.Duration; + readonly idleClientTtl: Duration.Duration; + readonly pauseWhenHostLocked: boolean; + readonly pauseWhenHostLowPower: boolean; + readonly pauseWhenClientLowPower: boolean; + readonly pauseWhenOnBattery: boolean; +} + +const PRESET_SETTINGS: Record = { + performance: { + profile: "performance", + automaticGitFetchInterval: Duration.seconds(15), + providerHealthRefreshInterval: Duration.minutes(1), + hostPowerMonitorActiveInterval: Duration.seconds(30), + hostPowerMonitorIdleInterval: Duration.minutes(2), + idleClientTtl: Duration.seconds(45), + pauseWhenHostLocked: true, + pauseWhenHostLowPower: false, + pauseWhenClientLowPower: false, + pauseWhenOnBattery: false, + }, + balanced: { + profile: "balanced", + automaticGitFetchInterval: DEFAULT_AUTOMATIC_GIT_FETCH_INTERVAL, + providerHealthRefreshInterval: DEFAULT_PROVIDER_HEALTH_REFRESH_INTERVAL, + hostPowerMonitorActiveInterval: Duration.seconds(30), + hostPowerMonitorIdleInterval: Duration.minutes(5), + idleClientTtl: Duration.seconds(45), + pauseWhenHostLocked: true, + pauseWhenHostLowPower: true, + pauseWhenClientLowPower: true, + pauseWhenOnBattery: false, + }, + "battery-saver": { + profile: "battery-saver", + automaticGitFetchInterval: Duration.seconds(0), + providerHealthRefreshInterval: Duration.minutes(15), + hostPowerMonitorActiveInterval: Duration.minutes(1), + hostPowerMonitorIdleInterval: Duration.minutes(10), + idleClientTtl: Duration.seconds(45), + pauseWhenHostLocked: true, + pauseWhenHostLowPower: true, + pauseWhenClientLowPower: true, + pauseWhenOnBattery: true, + }, +}; + +export function getBackgroundActivityPresetSettings( + profile: BackgroundActivityProfile, +): ResolvedBackgroundActivitySettings { + return PRESET_SETTINGS[profile]; +} + +export function getBackgroundActivityBaseProfile( + backgroundActivity: BackgroundActivitySettings, +): BackgroundActivityProfile { + if (backgroundActivity.profile === "custom") { + return backgroundActivity.baseProfile ?? DEFAULT_BACKGROUND_ACTIVITY_PROFILE; + } + return backgroundActivity.profile; +} + +export function resolveBackgroundActivitySettings( + backgroundActivity: BackgroundActivitySettings, +): ResolvedBackgroundActivitySettings { + const baseProfile = getBackgroundActivityBaseProfile(backgroundActivity); + const preset = PRESET_SETTINGS[baseProfile]; + const { overrides } = backgroundActivity; + return { + profile: baseProfile, + automaticGitFetchInterval: + overrides.automaticGitFetchInterval ?? preset.automaticGitFetchInterval, + providerHealthRefreshInterval: + overrides.providerHealthRefreshInterval ?? preset.providerHealthRefreshInterval, + hostPowerMonitorActiveInterval: + overrides.hostPowerMonitorActiveInterval ?? preset.hostPowerMonitorActiveInterval, + hostPowerMonitorIdleInterval: + overrides.hostPowerMonitorIdleInterval ?? preset.hostPowerMonitorIdleInterval, + idleClientTtl: overrides.idleClientTtl ?? preset.idleClientTtl, + pauseWhenHostLocked: overrides.pauseWhenHostLocked ?? preset.pauseWhenHostLocked, + pauseWhenHostLowPower: overrides.pauseWhenHostLowPower ?? preset.pauseWhenHostLowPower, + pauseWhenClientLowPower: overrides.pauseWhenClientLowPower ?? preset.pauseWhenClientLowPower, + pauseWhenOnBattery: overrides.pauseWhenOnBattery ?? preset.pauseWhenOnBattery, + }; +} + +function durationsEqual(a: Duration.Duration, b: Duration.Duration): boolean { + return Duration.toMillis(a) === Duration.toMillis(b); +} + +function resolvedSettingsEqual( + a: ResolvedBackgroundActivitySettings, + b: ResolvedBackgroundActivitySettings, +): boolean { + return ( + durationsEqual(a.automaticGitFetchInterval, b.automaticGitFetchInterval) && + durationsEqual(a.providerHealthRefreshInterval, b.providerHealthRefreshInterval) && + durationsEqual(a.hostPowerMonitorActiveInterval, b.hostPowerMonitorActiveInterval) && + durationsEqual(a.hostPowerMonitorIdleInterval, b.hostPowerMonitorIdleInterval) && + durationsEqual(a.idleClientTtl, b.idleClientTtl) && + a.pauseWhenHostLocked === b.pauseWhenHostLocked && + a.pauseWhenHostLowPower === b.pauseWhenHostLowPower && + a.pauseWhenClientLowPower === b.pauseWhenClientLowPower && + a.pauseWhenOnBattery === b.pauseWhenOnBattery + ); +} + +export function normalizeBackgroundActivitySettings( + backgroundActivity: BackgroundActivitySettings, +): BackgroundActivitySettings { + if (backgroundActivity.profile !== "custom") { + return { + schemaVersion: 1, + profile: backgroundActivity.profile, + overrides: {}, + }; + } + + const resolved = resolveBackgroundActivitySettings(backgroundActivity); + const profiles: ReadonlyArray = [ + getBackgroundActivityBaseProfile(backgroundActivity), + "balanced", + "performance", + "battery-saver", + ]; + for (const profile of profiles) { + if (resolvedSettingsEqual(resolved, PRESET_SETTINGS[profile])) { + return { + schemaVersion: 1, + profile, + overrides: {}, + }; + } + } + + const baseProfile = getBackgroundActivityBaseProfile(backgroundActivity); + const preset = PRESET_SETTINGS[baseProfile]; + const overrides: BackgroundActivitySettings["overrides"] = { + ...(!durationsEqual(resolved.automaticGitFetchInterval, preset.automaticGitFetchInterval) + ? { automaticGitFetchInterval: resolved.automaticGitFetchInterval } + : {}), + ...(!durationsEqual( + resolved.providerHealthRefreshInterval, + preset.providerHealthRefreshInterval, + ) + ? { providerHealthRefreshInterval: resolved.providerHealthRefreshInterval } + : {}), + ...(!durationsEqual( + resolved.hostPowerMonitorActiveInterval, + preset.hostPowerMonitorActiveInterval, + ) + ? { hostPowerMonitorActiveInterval: resolved.hostPowerMonitorActiveInterval } + : {}), + ...(!durationsEqual(resolved.hostPowerMonitorIdleInterval, preset.hostPowerMonitorIdleInterval) + ? { hostPowerMonitorIdleInterval: resolved.hostPowerMonitorIdleInterval } + : {}), + ...(!durationsEqual(resolved.idleClientTtl, preset.idleClientTtl) + ? { idleClientTtl: resolved.idleClientTtl } + : {}), + ...(resolved.pauseWhenHostLocked !== preset.pauseWhenHostLocked + ? { pauseWhenHostLocked: resolved.pauseWhenHostLocked } + : {}), + ...(resolved.pauseWhenHostLowPower !== preset.pauseWhenHostLowPower + ? { pauseWhenHostLowPower: resolved.pauseWhenHostLowPower } + : {}), + ...(resolved.pauseWhenClientLowPower !== preset.pauseWhenClientLowPower + ? { pauseWhenClientLowPower: resolved.pauseWhenClientLowPower } + : {}), + ...(resolved.pauseWhenOnBattery !== preset.pauseWhenOnBattery + ? { pauseWhenOnBattery: resolved.pauseWhenOnBattery } + : {}), + }; + + return { + schemaVersion: 1, + profile: "custom", + baseProfile, + overrides, + }; +} + +export function resolveServerBackgroundActivitySettings( + settings: ServerSettings, +): ResolvedBackgroundActivitySettings { + const defaultBackgroundActivity: BackgroundActivitySettings = { + schemaVersion: 1, + profile: DEFAULT_BACKGROUND_ACTIVITY_PROFILE, + overrides: {}, + }; + const backgroundActivityIsDefault = + settings.backgroundActivity.profile === defaultBackgroundActivity.profile && + settings.backgroundActivity.baseProfile === undefined && + Object.keys(settings.backgroundActivity.overrides).length === 0; + const legacyProfile = settings.backgroundActivityProfile; + const hasLegacyOverrides = + legacyProfile !== DEFAULT_BACKGROUND_ACTIVITY_PROFILE || + Duration.toMillis(settings.automaticGitFetchInterval) !== + Duration.toMillis(DEFAULT_AUTOMATIC_GIT_FETCH_INTERVAL) || + Duration.toMillis(settings.providerHealthRefreshInterval) !== + Duration.toMillis(DEFAULT_PROVIDER_HEALTH_REFRESH_INTERVAL); + if (backgroundActivityIsDefault && hasLegacyOverrides) { + return resolveBackgroundActivitySettings({ + schemaVersion: 1, + profile: + Duration.toMillis(settings.automaticGitFetchInterval) === + Duration.toMillis( + getBackgroundActivityPresetSettings(legacyProfile).automaticGitFetchInterval, + ) && + Duration.toMillis(settings.providerHealthRefreshInterval) === + Duration.toMillis( + getBackgroundActivityPresetSettings(legacyProfile).providerHealthRefreshInterval, + ) + ? legacyProfile + : "custom", + baseProfile: legacyProfile, + overrides: { + ...(Duration.toMillis(settings.automaticGitFetchInterval) !== + Duration.toMillis( + getBackgroundActivityPresetSettings(legacyProfile).automaticGitFetchInterval, + ) + ? { automaticGitFetchInterval: settings.automaticGitFetchInterval } + : {}), + ...(Duration.toMillis(settings.providerHealthRefreshInterval) !== + Duration.toMillis( + getBackgroundActivityPresetSettings(legacyProfile).providerHealthRefreshInterval, + ) + ? { providerHealthRefreshInterval: settings.providerHealthRefreshInterval } + : {}), + }, + }); + } + return resolveBackgroundActivitySettings(settings.backgroundActivity); +} diff --git a/packages/shared/src/observability.test.ts b/packages/shared/src/observability.test.ts index 57537b63e19..c98217851ce 100644 --- a/packages/shared/src/observability.test.ts +++ b/packages/shared/src/observability.test.ts @@ -8,6 +8,7 @@ import * as Layer from "effect/Layer"; import * as Logger from "effect/Logger"; import * as Order from "effect/Order"; import * as Path from "effect/Path"; +import * as Ref from "effect/Ref"; import * as References from "effect/References"; import * as Schema from "effect/Schema"; import * as Tracer from "effect/Tracer"; @@ -18,6 +19,7 @@ import { makeLocalFileTracer, makeTraceSink, type TraceRecord, + type TraceSinkFlushStats, } from "./observability.ts"; describe("causeErrorTag", () => { @@ -167,6 +169,34 @@ describe("observability", () => { ), ); + it.effect("reports successful logical trace writes", () => + Effect.scoped( + Effect.gen(function* () { + const fileSystem = yield* FileSystem.FileSystem; + const path = yield* Path.Path; + const tempDir = yield* fileSystem.makeTempDirectoryScoped({ prefix: "t3-trace-sink-" }); + const tracePath = path.join(tempDir, "shared.trace.ndjson"); + const reported = yield* Ref.make>([]); + + const sink = yield* makeTraceSink({ + filePath: tracePath, + maxBytes: 1024, + maxFiles: 2, + batchWindowMs: 10_000, + onFlush: (stats) => Ref.update(reported, (current) => [...current, stats]), + }); + + sink.push(makeRecord("attributed")); + yield* sink.flush; + + const stats = yield* Ref.get(reported); + assert.equal(stats.length, 1); + assert.equal(stats[0]?.count, 1); + assert.isAbove(stats[0]?.logicalWriteBytes ?? 0, 0); + }), + ), + ); + it.effect("rotates the trace file when the configured max size is exceeded", () => Effect.scoped( Effect.gen(function* () { diff --git a/packages/shared/src/observability.ts b/packages/shared/src/observability.ts index 68d4985db95..48ed58bcd2b 100644 --- a/packages/shared/src/observability.ts +++ b/packages/shared/src/observability.ts @@ -8,7 +8,8 @@ import { OtlpResource, OtlpTracer } from "effect/unstable/observability"; import { RotatingFileSink } from "./logging.ts"; -const FLUSH_BUFFER_THRESHOLD = 32; +const FLUSH_BUFFER_THRESHOLD = 256; +const textEncoder = new TextEncoder(); export type TraceAttributes = Readonly>; @@ -94,6 +95,13 @@ export interface TraceSinkOptions { readonly maxBytes: number; readonly maxFiles: number; readonly batchWindowMs: number; + readonly onFlush?: (stats: TraceSinkFlushStats) => Effect.Effect; +} + +export interface TraceSinkFlushStats { + readonly logicalWriteBytes: number; + readonly count: number; + readonly durationMs: number; } export interface TraceSink { @@ -263,23 +271,50 @@ export const makeTraceSink = Effect.fn("makeTraceSink")(function* (options: Trac }); let buffer: Array = []; + let pendingFlushStats: TraceSinkFlushStats = { + logicalWriteBytes: 0, + count: 0, + durationMs: 0, + }; const flushUnsafe = () => { if (buffer.length === 0) { return; } - const chunk = buffer.join(""); + const records = buffer; + const chunk = records.join(""); buffer = []; + const startedAt = performance.now(); try { sink.write(chunk); + pendingFlushStats = { + logicalWriteBytes: + pendingFlushStats.logicalWriteBytes + textEncoder.encode(chunk).byteLength, + count: pendingFlushStats.count + records.length, + durationMs: pendingFlushStats.durationMs + Math.max(0, performance.now() - startedAt), + }; } catch { - buffer.unshift(chunk); + buffer.unshift(...records); } }; - const flush = Effect.sync(flushUnsafe).pipe(Effect.withTracerEnabled(false)); + const flush = Effect.sync(() => { + flushUnsafe(); + const stats = pendingFlushStats; + pendingFlushStats = { + logicalWriteBytes: 0, + count: 0, + durationMs: 0, + }; + return stats; + }).pipe( + Effect.flatMap((stats) => + stats.count > 0 && options.onFlush ? options.onFlush(stats).pipe(Effect.ignore) : Effect.void, + ), + Effect.withTracerEnabled(false), + ); yield* Effect.addFinalizer(() => flush.pipe(Effect.ignore)); yield* Effect.forkScoped( diff --git a/packages/shared/src/serverSettings.test.ts b/packages/shared/src/serverSettings.test.ts index 5bec7d386b6..77d287b00b1 100644 --- a/packages/shared/src/serverSettings.test.ts +++ b/packages/shared/src/serverSettings.test.ts @@ -3,7 +3,9 @@ import { ProviderDriverKind, ProviderInstanceId, } from "@t3tools/contracts"; +import * as Duration from "effect/Duration"; import { describe, expect, it } from "vite-plus/test"; +import { resolveServerBackgroundActivitySettings } from "./backgroundActivitySettings.ts"; import { createModelSelection } from "./model.ts"; import { applyServerSettingsPatch, @@ -194,4 +196,80 @@ describe("serverSettings helpers", () => { config: { homePath: "~/.codex" }, }); }); + + it("stores background activity profiles as a versioned object and syncs legacy aliases", () => { + const next = applyServerSettingsPatch(DEFAULT_SERVER_SETTINGS, { + backgroundActivity: { + schemaVersion: 1, + profile: "battery-saver", + overrides: {}, + }, + }); + + expect(next.backgroundActivity).toEqual({ + schemaVersion: 1, + profile: "battery-saver", + overrides: {}, + }); + expect(next.backgroundActivityProfile).toBe("battery-saver"); + expect(Duration.toMillis(next.automaticGitFetchInterval)).toBe(0); + expect(Duration.toMillis(next.providerHealthRefreshInterval)).toBe( + Duration.toMillis(Duration.minutes(15)), + ); + }); + + it("turns legacy interval patches into custom background activity overrides", () => { + const next = applyServerSettingsPatch(DEFAULT_SERVER_SETTINGS, { + automaticGitFetchInterval: Duration.seconds(15), + }); + + expect(next.backgroundActivity).toEqual({ + schemaVersion: 1, + profile: "custom", + baseProfile: "balanced", + overrides: { + automaticGitFetchInterval: Duration.seconds(15), + }, + }); + expect(resolveServerBackgroundActivitySettings(next).profile).toBe("balanced"); + expect( + Duration.toMillis(resolveServerBackgroundActivitySettings(next).automaticGitFetchInterval), + ).toBe(15_000); + }); + + it("reconciles custom background activity back to a preset when overrides match the preset", () => { + const custom = applyServerSettingsPatch(DEFAULT_SERVER_SETTINGS, { + automaticGitFetchInterval: Duration.seconds(15), + }); + const next = applyServerSettingsPatch(custom, { + automaticGitFetchInterval: Duration.seconds(30), + }); + + expect(next.backgroundActivity).toEqual({ + schemaVersion: 1, + profile: "balanced", + overrides: {}, + }); + expect(next.backgroundActivityProfile).toBe("balanced"); + expect(Duration.toMillis(next.automaticGitFetchInterval)).toBe(30_000); + }); + + it("drops custom overrides that duplicate the base profile", () => { + const next = applyServerSettingsPatch(DEFAULT_SERVER_SETTINGS, { + backgroundActivity: { + schemaVersion: 1, + profile: "custom", + baseProfile: "balanced", + overrides: { + automaticGitFetchInterval: Duration.seconds(30), + }, + }, + }); + + expect(next.backgroundActivity).toEqual({ + schemaVersion: 1, + profile: "balanced", + overrides: {}, + }); + }); }); diff --git a/packages/shared/src/serverSettings.ts b/packages/shared/src/serverSettings.ts index 1bbf466f60b..c8cee52a68c 100644 --- a/packages/shared/src/serverSettings.ts +++ b/packages/shared/src/serverSettings.ts @@ -4,6 +4,11 @@ import * as Schema from "effect/Schema"; import { deepMerge } from "./Struct.ts"; import { fromLenientJson } from "./schemaJson.ts"; import { createModelSelection } from "./model.ts"; +import { + getBackgroundActivityBaseProfile, + normalizeBackgroundActivitySettings, + resolveBackgroundActivitySettings, +} from "./backgroundActivitySettings.ts"; const ServerSettingsJson = fromLenientJson(ServerSettings); const decodeServerSettingsJson = Schema.decodeUnknownOption(ServerSettingsJson); @@ -76,14 +81,61 @@ export function applyServerSettingsPatch( patch: ServerSettingsPatch, ): ServerSettings { const selectionPatch = patch.textGenerationModelSelection; - const { automaticGitFetchInterval, ...patchForMerge } = patch; + const { + automaticGitFetchInterval, + providerHealthRefreshInterval, + backgroundActivityProfile, + backgroundActivity, + ...patchForMerge + } = patch; + const backgroundActivityPatch = + backgroundActivityProfile !== undefined + ? { + schemaVersion: 1 as const, + profile: backgroundActivityProfile, + overrides: {}, + } + : automaticGitFetchInterval !== undefined || providerHealthRefreshInterval !== undefined + ? { + schemaVersion: 1 as const, + profile: "custom" as const, + baseProfile: getBackgroundActivityBaseProfile(current.backgroundActivity), + overrides: { + ...current.backgroundActivity.overrides, + ...(automaticGitFetchInterval !== undefined ? { automaticGitFetchInterval } : {}), + ...(providerHealthRefreshInterval !== undefined + ? { providerHealthRefreshInterval } + : {}), + }, + } + : undefined; const next = deepMerge(current, patchForMerge); - const nextWithReplacements = { + const nextWithReplacementsBase = { ...next, + ...(backgroundActivity !== undefined + ? { backgroundActivity: deepMerge(current.backgroundActivity, backgroundActivity) } + : {}), + ...(backgroundActivityPatch !== undefined + ? { backgroundActivity: backgroundActivityPatch } + : {}), ...(patch.providerInstances !== undefined ? { providerInstances: patch.providerInstances } : {}), ...(automaticGitFetchInterval !== undefined ? { automaticGitFetchInterval } : {}), + ...(providerHealthRefreshInterval !== undefined ? { providerHealthRefreshInterval } : {}), + }; + const normalizedBackgroundActivity = normalizeBackgroundActivitySettings( + nextWithReplacementsBase.backgroundActivity, + ); + const resolvedBackgroundActivity = resolveBackgroundActivitySettings( + normalizedBackgroundActivity, + ); + const nextWithReplacements = { + ...nextWithReplacementsBase, + backgroundActivity: normalizedBackgroundActivity, + automaticGitFetchInterval: resolvedBackgroundActivity.automaticGitFetchInterval, + providerHealthRefreshInterval: resolvedBackgroundActivity.providerHealthRefreshInterval, + backgroundActivityProfile: resolvedBackgroundActivity.profile, }; if (!selectionPatch) { return nextWithReplacements; diff --git a/scripts/build-desktop-artifact.test.ts b/scripts/build-desktop-artifact.test.ts index 8135f7e259d..e26e2461108 100644 --- a/scripts/build-desktop-artifact.test.ts +++ b/scripts/build-desktop-artifact.test.ts @@ -9,12 +9,15 @@ import { createStageWorkspaceConfig, createStagePnpmConfig, DESKTOP_ASAR_UNPACK, + DESKTOP_EXTRA_RESOURCES, resolveDesktopRuntimeDependencies, resolveFffNativeDependencies, resolveBuildOptions, resolveDesktopBuildIconAssets, resolveDesktopProductName, resolveDesktopUpdateChannel, + resolveResourceMonitorRustTargets, + resourceMonitorExecutableName, resolveGitHubPublishConfig, resolveMockUpdateServerPort, resolveMockUpdateServerUrl, @@ -175,6 +178,27 @@ it.layer(NodeServices.layer)("build-desktop-artifact", (it) => { assert.deepStrictEqual(DESKTOP_ASAR_UNPACK, ["node_modules/@ff-labs/fff-bin-*/**/*"]); }); + it("stages the resource monitor as an external executable resource", () => { + assert.deepStrictEqual(DESKTOP_EXTRA_RESOURCES, [ + { + from: "apps/desktop/prod-resources/resource-monitor", + to: "resource-monitor", + }, + ]); + assert.deepStrictEqual(resolveResourceMonitorRustTargets("mac", "universal"), [ + "aarch64-apple-darwin", + "x86_64-apple-darwin", + ]); + assert.deepStrictEqual(resolveResourceMonitorRustTargets("linux", "x64"), [ + "x86_64-unknown-linux-gnu", + ]); + assert.deepStrictEqual(resolveResourceMonitorRustTargets("win", "arm64"), [ + "aarch64-pc-windows-msvc", + ]); + assert.equal(resourceMonitorExecutableName("mac"), "t3-resource-monitor"); + assert.equal(resourceMonitorExecutableName("win"), "t3-resource-monitor.exe"); + }); + it("promotes target fff binaries to direct staged dependencies", () => { assert.deepStrictEqual(resolveFffNativeDependencies("mac", "arm64", "0.9.4"), { "@ff-labs/fff-bin-darwin-arm64": "0.9.4", diff --git a/scripts/build-desktop-artifact.ts b/scripts/build-desktop-artifact.ts index 6b519b1d4e3..e4e040f7b6b 100644 --- a/scripts/build-desktop-artifact.ts +++ b/scripts/build-desktop-artifact.ts @@ -72,6 +72,26 @@ interface PlatformConfig { readonly archChoices: ReadonlyArray; } +export function resolveResourceMonitorRustTargets( + platform: typeof BuildPlatform.Type, + arch: typeof BuildArch.Type, +): ReadonlyArray { + if (platform === "mac") { + if (arch === "universal") { + return ["aarch64-apple-darwin", "x86_64-apple-darwin"]; + } + return [arch === "arm64" ? "aarch64-apple-darwin" : "x86_64-apple-darwin"]; + } + if (platform === "linux") { + return [arch === "arm64" ? "aarch64-unknown-linux-gnu" : "x86_64-unknown-linux-gnu"]; + } + return [arch === "arm64" ? "aarch64-pc-windows-msvc" : "x86_64-pc-windows-msvc"]; +} + +export function resourceMonitorExecutableName(platform: typeof BuildPlatform.Type): string { + return platform === "win" ? "t3-resource-monitor.exe" : "t3-resource-monitor"; +} + const PLATFORM_CONFIG: Record = { mac: { cliFlag: "--mac", @@ -292,6 +312,12 @@ interface StagePackageJson { export const STAGE_INSTALL_ARGS = ["install", "--prod"] as const; export const DESKTOP_ASAR_UNPACK = ["node_modules/@ff-labs/fff-bin-*/**/*"] as const; +export const DESKTOP_EXTRA_RESOURCES = [ + { + from: "apps/desktop/prod-resources/resource-monitor", + to: "resource-monitor", + }, +] as const; export function resolveFffNativeDependencies( platform: typeof BuildPlatform.Type, @@ -497,6 +523,78 @@ const runCommand = Effect.fn("runCommand")(function* ( } }); +const stageResourceMonitor = Effect.fn("stageResourceMonitor")(function* (input: { + readonly repoRoot: string; + readonly stageResourcesDir: string; + readonly platform: typeof BuildPlatform.Type; + readonly arch: typeof BuildArch.Type; + readonly verbose: boolean; +}) { + const fs = yield* FileSystem.FileSystem; + const path = yield* Path.Path; + const manifestPath = path.join(input.repoRoot, "native/resource-monitor/Cargo.toml"); + const executableName = resourceMonitorExecutableName(input.platform); + const rustTargets = resolveResourceMonitorRustTargets(input.platform, input.arch); + const builtBinaries: string[] = []; + + for (const rustTarget of rustTargets) { + const spawnCommand = yield* resolveSpawnCommand("cargo", [ + "build", + "--locked", + "--release", + "--manifest-path", + manifestPath, + "--target", + rustTarget, + ]); + yield* runCommand( + ChildProcess.make(spawnCommand.command, spawnCommand.args, { + cwd: input.repoRoot, + shell: spawnCommand.shell, + }), + { + label: `cargo build resource monitor (${rustTarget})`, + verbose: input.verbose, + }, + ); + + const binaryPath = path.join( + input.repoRoot, + "native/resource-monitor/target", + rustTarget, + "release", + executableName, + ); + if (!(yield* fs.exists(binaryPath))) { + return yield* new BuildScriptError({ + message: `Resource monitor build did not produce ${binaryPath}.`, + }); + } + builtBinaries.push(binaryPath); + } + + const destinationDirectory = path.join(input.stageResourcesDir, "resource-monitor"); + const destinationPath = path.join(destinationDirectory, executableName); + yield* fs.remove(destinationDirectory, { recursive: true, force: true }).pipe(Effect.ignore); + yield* fs.makeDirectory(destinationDirectory, { recursive: true }); + + if (builtBinaries.length === 1) { + yield* fs.copyFile(builtBinaries[0]!, destinationPath); + } else { + yield* runCommand( + ChildProcess.make("lipo", ["-create", ...builtBinaries, "-output", destinationPath]), + { + label: "lipo resource monitor universal binary", + verbose: input.verbose, + }, + ); + } + + if (input.platform !== "win") { + yield* fs.chmod(destinationPath, 0o755); + } +}); + function generateMacIconSet( sourcePng: string, targetIcns: string, @@ -755,6 +853,7 @@ const createBuildConfig = Effect.fn("createBuildConfig")(function* ( directories: { buildResources: "apps/desktop/resources", }, + extraResources: DESKTOP_EXTRA_RESOURCES, }; const updateChannel = resolveDesktopUpdateChannel(version); const publishConfig = yield* resolveGitHubPublishConfig(updateChannel); @@ -941,6 +1040,13 @@ const buildDesktopArtifact = Effect.fn("buildDesktopArtifact")(function* ( yield* fs.copy(distDirs.desktopDist, path.join(stageAppDir, "apps/desktop/dist-electron")); yield* fs.copy(distDirs.desktopResources, stageResourcesDir); yield* fs.copy(distDirs.serverDist, path.join(stageAppDir, "apps/server/dist")); + yield* stageResourceMonitor({ + repoRoot, + stageResourcesDir, + platform: options.platform, + arch: options.arch, + verbose: options.verbose, + }); yield* assertPlatformBuildResources( options.platform,