From 16adfdc53dd0c8eefdb5ac1ab18fe4dc0f1c2519 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Wed, 8 Apr 2026 22:06:11 -0700 Subject: [PATCH 01/20] fix(ci): fix release-vm-kernel workflow failures Add packages:read permission so Linux container jobs can pull the CI image from GHCR, and reorder brew tap before brew install so the libkrunfw and krunvm formulae are discoverable on macOS. --- .github/workflows/release-vm-kernel.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/release-vm-kernel.yml b/.github/workflows/release-vm-kernel.yml index d461cd16..20e2c7fa 100644 --- a/.github/workflows/release-vm-kernel.yml +++ b/.github/workflows/release-vm-kernel.yml @@ -14,6 +14,7 @@ on: permissions: contents: write + packages: read # Serialize with release-vm-dev.yml — both update the vm-dev release. concurrency: @@ -114,12 +115,9 @@ jobs: - name: Install dependencies run: | set -euo pipefail - brew install rust lld dtc xz - # libkrunfw from Homebrew (used as a fallback/reference by build scripts) - brew install libkrunfw - # krunvm is needed to build the Linux kernel inside a Fedora VM + # Tap slp/krun first so libkrunfw and krunvm formulae are available brew tap slp/krun - brew install krunvm + brew install rust lld dtc xz libkrunfw krunvm - name: Build custom libkrunfw (kernel) run: crates/openshell-vm/runtime/build-custom-libkrunfw.sh From 44f67acadffc718d475e35d7b2e4ad050cf02a1a Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Wed, 8 Apr 2026 22:25:12 -0700 Subject: [PATCH 02/20] refactor(vm): build kernel once on Linux, reuse kernel.c on macOS Eliminate the krunvm/Fedora VM dependency from the macOS CI job by building the aarch64 Linux kernel only on the Linux ARM64 runner and exporting kernel.c as a CI artifact. The macOS job downloads kernel.c and compiles it into libkrunfw.dylib with Apple's cc, cutting macOS CI from ~45 min to ~5 min. Also fixes: - sudo not found in CI containers (use conditional SUDO) - Hardcoded ARCH=arm64 in build-libkrun.sh (now auto-detects) - Missing packages:read permission for GHCR pulls - brew tap ordering for Homebrew formula resolution Removes build-custom-libkrunfw.sh (no longer needed). --- .github/workflows/release-vm-kernel.yml | 43 +- architecture/custom-vm-runtime.md | 40 +- crates/openshell-vm/pins.env | 2 +- crates/openshell-vm/runtime/README.md | 62 +-- .../runtime/build-custom-libkrunfw.sh | 401 ------------------ tasks/scripts/vm/build-libkrun-macos.sh | 173 ++++---- tasks/scripts/vm/build-libkrun.sh | 52 ++- tasks/scripts/vm/vm-setup.sh | 17 +- 8 files changed, 240 insertions(+), 550 deletions(-) delete mode 100755 crates/openshell-vm/runtime/build-custom-libkrunfw.sh diff --git a/.github/workflows/release-vm-kernel.yml b/.github/workflows/release-vm-kernel.yml index 20e2c7fa..01992cdf 100644 --- a/.github/workflows/release-vm-kernel.yml +++ b/.github/workflows/release-vm-kernel.yml @@ -5,6 +5,11 @@ name: Release VM Kernel # "vm-dev" GitHub Release and consumed by release-vm-dev.yml when building the # openshell-vm binary. # +# The Linux kernel is compiled once on aarch64 Linux. The resulting kernel.c +# (a C source file containing the kernel as a byte array) is shared with the +# macOS job, which only needs to compile it into a .dylib — no krunvm, no +# Fedora VM, no kernel rebuild. This cuts macOS CI from ~45 min to ~5 min. +# # This workflow runs on-demand (or when kernel config / pins change). It is # intentionally decoupled from the per-commit VM binary build because the # kernel rarely changes and takes 15-45 minutes to compile. @@ -27,7 +32,7 @@ defaults: jobs: # --------------------------------------------------------------------------- - # Linux ARM64 — native kernel + libkrun build + # Linux ARM64 — native kernel + libkrun build (also exports kernel.c) # --------------------------------------------------------------------------- build-runtime-linux-arm64: name: Build Runtime (Linux ARM64) @@ -57,13 +62,25 @@ jobs: --build-dir target/libkrun-build \ --output artifacts/vm-runtime-linux-aarch64.tar.zst - - name: Upload artifact + - name: Upload runtime artifact uses: actions/upload-artifact@v4 with: name: vm-runtime-linux-arm64 path: artifacts/vm-runtime-linux-aarch64.tar.zst retention-days: 5 + # Export kernel.c + ABI_VERSION for the macOS job. kernel.c contains + # the aarch64 Linux kernel as a byte array — it is OS-agnostic and can + # be compiled into a .dylib by Apple's cc without rebuilding the kernel. + - name: Upload kernel.c for macOS build + uses: actions/upload-artifact@v4 + with: + name: kernel-c-arm64 + path: | + target/libkrun-build/kernel.c + target/libkrun-build/ABI_VERSION + retention-days: 1 + # --------------------------------------------------------------------------- # Linux AMD64 — native kernel + libkrun build # --------------------------------------------------------------------------- @@ -103,31 +120,31 @@ jobs: retention-days: 5 # --------------------------------------------------------------------------- - # macOS ARM64 — kernel built via krunvm, libkrun built natively + # macOS ARM64 — uses pre-built kernel.c from Linux ARM64 job # --------------------------------------------------------------------------- build-runtime-macos-arm64: name: Build Runtime (macOS ARM64) + needs: [build-runtime-linux-arm64] runs-on: macos-latest-xlarge - timeout-minutes: 90 + timeout-minutes: 30 steps: - uses: actions/checkout@v4 - name: Install dependencies run: | set -euo pipefail - # Tap slp/krun first so libkrunfw and krunvm formulae are available - brew tap slp/krun - brew install rust lld dtc xz libkrunfw krunvm + brew install rust lld dtc xz - - name: Build custom libkrunfw (kernel) - run: crates/openshell-vm/runtime/build-custom-libkrunfw.sh + - name: Download pre-built kernel.c + uses: actions/download-artifact@v4 + with: + name: kernel-c-arm64 + path: target/kernel-artifact - - name: Build portable libkrun - run: tasks/scripts/vm/build-libkrun-macos.sh + - name: Build libkrunfw + libkrun from pre-built kernel + run: tasks/scripts/vm/build-libkrun-macos.sh --kernel-dir target/kernel-artifact - name: Package runtime tarball - env: - CUSTOM_PROVENANCE_DIR: target/custom-runtime run: | tasks/scripts/vm/package-vm-runtime.sh \ --platform darwin-aarch64 \ diff --git a/architecture/custom-vm-runtime.md b/architecture/custom-vm-runtime.md index c2e9b57b..abb0aa6f 100644 --- a/architecture/custom-vm-runtime.md +++ b/architecture/custom-vm-runtime.md @@ -116,23 +116,29 @@ and makes it straightforward to correlate VM behavior with a specific runtime ar ```mermaid graph LR subgraph Source["crates/openshell-vm/runtime/"] - BUILD["build-custom-libkrunfw.sh\nClones libkrunfw, applies config, builds"] KCONF["kernel/openshell.kconfig\nKernel config fragment"] README["README.md\nOperator documentation"] end - subgraph Output["target/custom-runtime/"] - LIB["libkrunfw.dylib\nCustom library"] - META["provenance.json\nBuild metadata"] - FRAG["openshell.kconfig\nConfig fragment used"] - FULL["kernel.config\nFull kernel .config"] + subgraph Linux["Linux CI (build-libkrun.sh)"] + BUILD_L["Build kernel + libkrunfw.so + libkrun.so"] + KERNELC["kernel.c\nKernel as C byte array"] end - KCONF --> BUILD - BUILD --> LIB - BUILD --> META - BUILD --> FRAG - BUILD --> FULL + subgraph macOS["macOS CI (build-libkrun-macos.sh)"] + BUILD_M["Compile kernel.c -> libkrunfw.dylib\nBuild libkrun.dylib"] + end + + subgraph Output["target/libkrun-build/"] + LIB_SO["libkrunfw.so + libkrun.so\n(Linux)"] + LIB_DY["libkrunfw.dylib + libkrun.dylib\n(macOS)"] + end + + KCONF --> BUILD_L + BUILD_L --> LIB_SO + BUILD_L --> KERNELC + KERNELC --> BUILD_M + BUILD_M --> LIB_DY ``` ## Kernel Config Fragment @@ -222,16 +228,18 @@ supported platforms. Runs on-demand or when the kernel config / pinned versions | Platform | Runner | Build Method | |----------|--------|-------------| -| Linux ARM64 | `build-arm64` (self-hosted) | Native `build-libkrun.sh` | +| Linux ARM64 | `build-arm64` (self-hosted) | Native `build-libkrun.sh` (also exports kernel.c) | | Linux x86_64 | `build-amd64` (self-hosted) | Native `build-libkrun.sh` | -| macOS ARM64 | `macos-latest-xlarge` (GitHub-hosted) | `build-custom-libkrunfw.sh` (krunvm) + `build-libkrun-macos.sh` | +| macOS ARM64 | `macos-latest-xlarge` (GitHub-hosted) | `build-libkrun-macos.sh --kernel-dir` (uses pre-built kernel.c from ARM64) | Artifacts: `vm-runtime-{platform}.tar.zst` containing libkrun, libkrunfw, gvproxy, and provenance metadata. -The macOS kernel build requires a real macOS ARM64 runner because it uses `krunvm` to -compile the Linux kernel inside a Fedora VM (Hypervisor.framework). The kernel inside -libkrunfw is always Linux regardless of host platform. +The aarch64 Linux kernel is compiled once on the Linux ARM64 runner. The resulting +`kernel.c` (a C source file containing the kernel as a byte array) is passed to the +macOS job, which compiles it into `libkrunfw.dylib` with Apple's `cc`. This eliminates +the need for krunvm/Fedora VM and cuts macOS CI from ~45 min to ~5 min. The kernel +inside libkrunfw is always Linux regardless of host platform. ### VM Binary (`release-vm-dev.yml`) diff --git a/crates/openshell-vm/pins.env b/crates/openshell-vm/pins.env index 3c34a4af..b3d80229 100644 --- a/crates/openshell-vm/pins.env +++ b/crates/openshell-vm/pins.env @@ -4,7 +4,7 @@ # Pinned dependency versions for openshell-vm builds. # # This file is sourced by build-rootfs.sh and -# build-custom-libkrunfw.sh. It centralises version pins and content-addressed +# build-libkrun.sh. It centralises version pins and content-addressed # digests so that builds are reproducible and auditable. # # Environment variables override these defaults — CI and local dev workflows diff --git a/crates/openshell-vm/runtime/README.md b/crates/openshell-vm/runtime/README.md index c30308e3..aec2dce9 100644 --- a/crates/openshell-vm/runtime/README.md +++ b/crates/openshell-vm/runtime/README.md @@ -2,7 +2,7 @@ > Status: Experimental and work in progress (WIP). VM support is under active development and may change. -This directory contains the build infrastructure for a custom `libkrunfw` runtime +This directory contains the kernel config fragment for a custom `libkrunfw` runtime that enables bridge CNI and netfilter support in the OpenShell gateway VM. ## Why @@ -21,51 +21,61 @@ that enables these networking and sandboxing features. ``` runtime/ - build-custom-libkrunfw.sh # Build script for custom libkrunfw kernel/ openshell.kconfig # Kernel config fragment (networking + sandboxing) ``` -## Building +## Build Pipeline -### Prerequisites +The kernel is compiled on Linux CI runners. macOS reuses the pre-built `kernel.c` +artifact from the Linux ARM64 build — no krunvm or Fedora VM needed. -- Rust toolchain -- make, git, curl -- On macOS: Xcode command line tools and cross-compilation tools for aarch64 +``` +Linux ARM64: builds aarch64 kernel -> .so + exports kernel.c (parallel) +Linux AMD64: builds x86_64 kernel -> .so (parallel) +macOS ARM64: reuses aarch64 kernel.c -> .dylib (depends on ARM64) +``` + +### Build Scripts + +| Script | Platform | What it does | +|--------|----------|-------------| +| `tasks/scripts/vm/build-libkrun.sh` | Linux | Builds libkrunfw + libkrun from source, exports kernel.c | +| `tasks/scripts/vm/build-libkrun-macos.sh` | macOS | Compiles pre-built kernel.c into .dylib, builds libkrun | +| `tasks/scripts/vm/package-vm-runtime.sh` | Any | Packages runtime tarball (libs + gvproxy + provenance) | -### Quick Build +### Quick Build (Linux) ```bash -# Build custom libkrunfw (clones libkrunfw repo, applies config, builds) -./crates/openshell-vm/runtime/build-custom-libkrunfw.sh +# Build both libkrunfw and libkrun from source +tasks/scripts/vm/build-libkrun.sh # Or build the full runtime from source via mise: FROM_SOURCE=1 mise run vm:setup ``` -### Output +### Quick Build (macOS) -Build artifacts are placed in `target/custom-runtime/`: +On macOS, you need a pre-built `kernel.c` from a Linux ARM64 build: -``` -target/custom-runtime/ - libkrunfw.dylib # The custom library - libkrunfw..dylib # Version-suffixed copy - provenance.json # Build metadata (commit, hash, timestamp) - openshell.kconfig # The config fragment used - kernel.config # Full kernel .config (for debugging) +```bash +# Download pre-built runtime (recommended, ~30s): +mise run vm:setup + +# Or if you have kernel.c from a Linux build: +tasks/scripts/vm/build-libkrun-macos.sh --kernel-dir target/libkrun-build ``` -### Using the Custom Runtime +### Output -```bash -# Point the bundle script at the custom build and rebuild: -export OPENSHELL_VM_RUNTIME_SOURCE_DIR=target/custom-runtime -mise run vm:build +Build artifacts are placed in `target/libkrun-build/`: -# Then boot the VM as usual: -mise run vm +``` +target/libkrun-build/ + libkrun.so / libkrun.dylib # The VMM library + libkrunfw.so* / libkrunfw.dylib # Kernel firmware library + kernel.c # Linux kernel as C byte array (Linux only) + ABI_VERSION # ABI version number (Linux only) ``` ## Networking diff --git a/crates/openshell-vm/runtime/build-custom-libkrunfw.sh b/crates/openshell-vm/runtime/build-custom-libkrunfw.sh deleted file mode 100755 index 5d50c915..00000000 --- a/crates/openshell-vm/runtime/build-custom-libkrunfw.sh +++ /dev/null @@ -1,401 +0,0 @@ -#!/usr/bin/env bash -# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - -# Build a custom libkrunfw with bridge/netfilter kernel support. -# -# This script clones libkrunfw, applies the OpenShell kernel config -# fragment (bridge CNI, iptables, conntrack), builds the library, and -# stages the artifact with provenance metadata. -# -# Prerequisites: -# - Rust toolchain (cargo) -# - make, git, curl -# - Cross-compilation toolchain for aarch64 (if building on x86_64) -# - On macOS: Xcode command line tools -# -# Usage: -# ./build-custom-libkrunfw.sh [--output-dir DIR] [--libkrunfw-ref REF] -# -# Environment: -# LIBKRUNFW_REF - git ref to check out (default: main) -# LIBKRUNFW_REPO - git repo URL (default: github.com/containers/libkrunfw) -# OPENSHELL_RUNTIME_OUTPUT_DIR - output directory for built artifacts - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" -KERNEL_CONFIG_FRAGMENT="${SCRIPT_DIR}/kernel/openshell.kconfig" - -# Source pinned dependency versions (digests, checksums, commit SHAs). -# Environment variables override pins — see pins.env for details. -PINS_FILE="${SCRIPT_DIR}/../pins.env" -if [ -f "$PINS_FILE" ]; then - # shellcheck source=../pins.env - source "$PINS_FILE" -fi - -# Defaults (LIBKRUNFW_REF is commit-pinned in pins.env; falls back to main -# only if pins.env is missing and no env var is set). -LIBKRUNFW_REPO="${LIBKRUNFW_REPO:-https://github.com/containers/libkrunfw.git}" -LIBKRUNFW_REF="${LIBKRUNFW_REF:-main}" -OUTPUT_DIR="${OPENSHELL_RUNTIME_OUTPUT_DIR:-${PROJECT_ROOT}/target/custom-runtime}" -BUILD_DIR="${PROJECT_ROOT}/target/libkrunfw-build" - -# Parse arguments -while [[ $# -gt 0 ]]; do - case "$1" in - --output-dir) - OUTPUT_DIR="$2"; shift 2 ;; - --libkrunfw-ref) - LIBKRUNFW_REF="$2"; shift 2 ;; - --help|-h) - echo "Usage: $0 [--output-dir DIR] [--libkrunfw-ref REF]" - echo "" - echo "Build a custom libkrunfw with bridge/netfilter kernel support." - echo "" - echo "Options:" - echo " --output-dir DIR Output directory for built artifacts" - echo " --libkrunfw-ref REF Git ref to check out (default: main)" - echo "" - echo "Environment:" - echo " LIBKRUNFW_REPO Git repo URL" - echo " LIBKRUNFW_REF Git ref (branch/tag/commit)" - echo " OPENSHELL_RUNTIME_OUTPUT_DIR Output directory" - exit 0 - ;; - *) - echo "Unknown argument: $1" >&2; exit 1 ;; - esac -done - -echo "==> Building custom libkrunfw" -echo " Repo: ${LIBKRUNFW_REPO}" -echo " Ref: ${LIBKRUNFW_REF}" -echo " Config fragment: ${KERNEL_CONFIG_FRAGMENT}" -echo " Output: ${OUTPUT_DIR}" -echo "" - -# ── Clone / update libkrunfw ──────────────────────────────────────────── - -if [ -d "${BUILD_DIR}/libkrunfw/.git" ]; then - echo "==> Updating existing libkrunfw checkout..." - git -C "${BUILD_DIR}/libkrunfw" fetch origin - git -C "${BUILD_DIR}/libkrunfw" checkout "${LIBKRUNFW_REF}" - git -C "${BUILD_DIR}/libkrunfw" pull --ff-only 2>/dev/null || true -else - echo "==> Cloning libkrunfw..." - mkdir -p "${BUILD_DIR}" - git clone "${LIBKRUNFW_REPO}" "${BUILD_DIR}/libkrunfw" - git -C "${BUILD_DIR}/libkrunfw" checkout "${LIBKRUNFW_REF}" -fi - -LIBKRUNFW_DIR="${BUILD_DIR}/libkrunfw" -LIBKRUNFW_COMMIT=$(git -C "${LIBKRUNFW_DIR}" rev-parse HEAD) -LIBKRUNFW_SHORT=$(git -C "${LIBKRUNFW_DIR}" rev-parse --short HEAD) - -echo " Commit: ${LIBKRUNFW_COMMIT}" - -# ── Detect the kernel version libkrunfw targets ──────────────────────── - -# libkrunfw's Makefile typically sets KERNEL_VERSION or has it in a -# config file. Try to detect it. -KERNEL_VERSION="" -if [ -f "${LIBKRUNFW_DIR}/Makefile" ]; then - KERNEL_VERSION=$(grep -oE 'KERNEL_VERSION\s*=\s*linux-[^\s]+' "${LIBKRUNFW_DIR}/Makefile" 2>/dev/null | head -1 | sed 's/.*= *//' || true) -fi -if [ -z "$KERNEL_VERSION" ] && [ -f "${LIBKRUNFW_DIR}/kernel_version" ]; then - KERNEL_VERSION=$(cat "${LIBKRUNFW_DIR}/kernel_version") -fi -echo " Kernel version: ${KERNEL_VERSION:-unknown}" - -# ── Apply kernel config fragment ──────────────────────────────────────── - -echo "==> Applying OpenShell kernel config fragment..." - -# libkrunfw builds the kernel with a config generated from its own -# sources. The config merge happens after `make olddefconfig` runs -# on the base config. We use the kernel's scripts/kconfig/merge_config.sh -# when available, otherwise do a simple append+olddefconfig. - -MERGE_HOOK="${LIBKRUNFW_DIR}/openshell-kconfig-hook.sh" -cat > "${MERGE_HOOK}" << 'HOOKEOF' -#!/usr/bin/env bash -# Hook called by the libkrunfw build after extracting the kernel source. -# Merges the OpenShell kernel config fragment into .config. -set -euo pipefail - -KERNEL_DIR="$1" -FRAGMENT="$2" - -if [ ! -d "$KERNEL_DIR" ]; then - echo "ERROR: kernel source dir not found: $KERNEL_DIR" >&2 - exit 1 -fi - -if [ ! -f "$FRAGMENT" ]; then - echo "ERROR: config fragment not found: $FRAGMENT" >&2 - exit 1 -fi - -cd "$KERNEL_DIR" - -if [ -f scripts/kconfig/merge_config.sh ]; then - echo " Using kernel merge_config.sh" - KCONFIG_CONFIG=.config ./scripts/kconfig/merge_config.sh -m .config "$FRAGMENT" -else - echo " Appending fragment and running olddefconfig" - cat "$FRAGMENT" >> .config -fi - -# Detect the kernel ARCH value from the host (or krunvm guest) architecture. -case "$(uname -m)" in - aarch64) KARCH="arm64" ;; - x86_64) KARCH="x86_64" ;; - *) KARCH="$(uname -m)" ;; -esac -echo " Kernel ARCH: ${KARCH}" -make ARCH="${KARCH}" olddefconfig - -# Verify critical configs are set -REQUIRED=( - CONFIG_BRIDGE - CONFIG_BRIDGE_NETFILTER - CONFIG_NETFILTER - CONFIG_NF_CONNTRACK - CONFIG_NF_NAT - CONFIG_IP_NF_IPTABLES - CONFIG_IP_NF_FILTER - CONFIG_IP_NF_NAT - CONFIG_NF_TABLES - CONFIG_NFT_NUMGEN - CONFIG_NFT_FIB_IPV4 - CONFIG_NFT_FIB_IPV6 - CONFIG_NFT_CT - CONFIG_NFT_NAT - CONFIG_NFT_MASQ - CONFIG_VETH - CONFIG_NET_NS -) - -MISSING=() -for cfg in "${REQUIRED[@]}"; do - if ! grep -q "^${cfg}=[ym]" .config; then - MISSING+=("$cfg") - fi -done - -if [ ${#MISSING[@]} -gt 0 ]; then - echo "ERROR: Required kernel configs not set after merge:" >&2 - printf " %s\n" "${MISSING[@]}" >&2 - exit 1 -fi - -echo " All required kernel configs verified." -HOOKEOF -chmod +x "${MERGE_HOOK}" - -# ── Build libkrunfw ──────────────────────────────────────────────────── - -echo "==> Building libkrunfw (this may take 10-30 minutes)..." - -cd "${LIBKRUNFW_DIR}" - -# Detect macOS vs Linux and pick the right library extension / target -if [ "$(uname -s)" = "Darwin" ]; then - LIB_EXT="dylib" -else - LIB_EXT="so" -fi - -# Detect the kernel source directory name from the Makefile -KERNEL_DIR_NAME=$(grep -oE 'KERNEL_VERSION\s*=\s*linux-[^\s]+' Makefile | head -1 | sed 's/KERNEL_VERSION *= *//') -if [ -z "$KERNEL_DIR_NAME" ]; then - echo "ERROR: Could not detect KERNEL_VERSION from Makefile" >&2 - exit 1 -fi -echo " Kernel source dir: ${KERNEL_DIR_NAME}" - -if [ "$(uname -s)" = "Darwin" ]; then - # On macOS, use krunvm to build the kernel inside a lightweight Linux VM. - # This matches the upstream libkrunfw build approach and avoids all the - # issues with Docker emulation and APFS filesystem limitations. - # - # Prerequisites: brew tap slp/krun && brew install krunvm - - if ! command -v krunvm &>/dev/null; then - echo "ERROR: krunvm is required to build the kernel on macOS" >&2 - echo " Install with: brew tap slp/krun && brew install krunvm" >&2 - exit 1 - fi - - echo "==> Building kernel inside krunvm (macOS detected)..." - - VM_NAME="libkrunfw-openshell" - - # Clean up any leftover VM from a previous failed run - krunvm delete "${VM_NAME}" 2>/dev/null || true - - # Copy the config fragment into the libkrunfw tree so the VM can see it. - # The merge hook (MERGE_HOOK) is already written there by the cat above. - cp -f "${KERNEL_CONFIG_FRAGMENT}" "${LIBKRUNFW_DIR}/openshell.kconfig" - - echo " Creating VM..." - # krunvm may print "The volume has been configured" on first use of a - # volume path and exit non-zero. Retry once if that happens. - if ! krunvm create fedora \ - --name "${VM_NAME}" \ - --cpus 4 \ - --mem 4096 \ - -v "${LIBKRUNFW_DIR}:/work" \ - -w /work; then - echo " Retrying VM creation..." - krunvm create fedora \ - --name "${VM_NAME}" \ - --cpus 4 \ - --mem 4096 \ - -v "${LIBKRUNFW_DIR}:/work" \ - -w /work - fi - - echo " Installing build dependencies..." - krunvm start "${VM_NAME}" /usr/bin/dnf -- install -y \ - 'dnf-command(builddep)' python3-pyelftools - - krunvm start "${VM_NAME}" /usr/bin/dnf -- builddep -y kernel - - # Step 1: prepare kernel sources (download, extract, patch, base config) - echo " Preparing kernel sources..." - krunvm start "${VM_NAME}" /usr/bin/make -- "${KERNEL_DIR_NAME}" - - # Step 2: merge the OpenShell config fragment - echo " Merging OpenShell kernel config fragment..." - krunvm start "${VM_NAME}" /usr/bin/bash -- \ - /work/openshell-kconfig-hook.sh "/work/${KERNEL_DIR_NAME}" /work/openshell.kconfig - - # Step 3: build the kernel and generate the C bundle - echo " Building kernel (this is the slow part)..." - krunvm start "${VM_NAME}" /usr/bin/make -- -j4 - - echo " Cleaning up VM..." - krunvm delete "${VM_NAME}" - - # Clean up temp files from the libkrunfw tree - rm -f "${LIBKRUNFW_DIR}/openshell.kconfig" - - if [ ! -f "${LIBKRUNFW_DIR}/kernel.c" ]; then - echo "ERROR: kernel.c was not produced — build failed" >&2 - exit 1 - fi - - # Compile the shared library on the host (uses host cc for a .dylib) - echo "==> Compiling libkrunfw.dylib on host..." - ABI_VERSION=$(grep -oE 'ABI_VERSION\s*=\s*[0-9]+' Makefile | head -1 | sed 's/[^0-9]//g') - cc -fPIC -DABI_VERSION="${ABI_VERSION}" -shared -o "libkrunfw.${ABI_VERSION}.dylib" kernel.c -else - # On Linux, we can do everything natively in three steps: - - # Step 1: prepare kernel sources - echo " Preparing kernel sources..." - make "${KERNEL_DIR_NAME}" - - # Step 2: merge config fragment - echo "==> Merging OpenShell kernel config fragment..." - bash "${MERGE_HOOK}" "${LIBKRUNFW_DIR}/${KERNEL_DIR_NAME}" "${KERNEL_CONFIG_FRAGMENT}" - - # Step 3: build the kernel and shared library - make -j"$(nproc)" "$(grep -oE 'KRUNFW_BINARY_Linux\s*=\s*\S+' Makefile | head -1 | sed 's/[^=]*= *//')" || \ - make -j"$(nproc)" libkrunfw.so -fi - -# ── Stage output artifacts ────────────────────────────────────────────── - -echo "==> Staging artifacts..." -mkdir -p "${OUTPUT_DIR}" - -# Find the built library — check versioned names (e.g. libkrunfw.5.dylib) first -BUILT_LIB="" -for candidate in \ - "${LIBKRUNFW_DIR}"/libkrunfw*.${LIB_EXT} \ - "${LIBKRUNFW_DIR}/libkrunfw.${LIB_EXT}" \ - "${LIBKRUNFW_DIR}/target/release/libkrunfw.${LIB_EXT}" \ - "${LIBKRUNFW_DIR}/build/libkrunfw.${LIB_EXT}"; do - if [ -f "$candidate" ]; then - BUILT_LIB="$candidate" - break - fi -done - -if [ -z "$BUILT_LIB" ]; then - echo "ERROR: Could not find built libkrunfw.${LIB_EXT}" >&2 - echo " Searched in ${LIBKRUNFW_DIR}/ for libkrunfw*.${LIB_EXT}" - exit 1 -fi - -echo " Found library: ${BUILT_LIB}" - -# Compute SHA-256 (shasum on macOS, sha256sum on Linux) -if command -v sha256sum &>/dev/null; then - ARTIFACT_HASH=$(sha256sum "${BUILT_LIB}" | cut -d' ' -f1) -else - ARTIFACT_HASH=$(shasum -a 256 "${BUILT_LIB}" | cut -d' ' -f1) -fi -ARTIFACT_HASH_SHORT="${ARTIFACT_HASH:0:12}" - -# Copy the library — always stage as libkrunfw.dylib / libkrunfw.so -# (the base name the runtime loader expects) plus the original name -cp "${BUILT_LIB}" "${OUTPUT_DIR}/libkrunfw.${LIB_EXT}" -BUILT_BASENAME="$(basename "${BUILT_LIB}")" -if [ "${BUILT_BASENAME}" != "libkrunfw.${LIB_EXT}" ]; then - cp "${BUILT_LIB}" "${OUTPUT_DIR}/${BUILT_BASENAME}" -fi - -# Copy the kernel config that was actually used (for reproducibility) -KERNEL_SRC_DIR="" -for candidate in \ - "${LIBKRUNFW_DIR}/linux-"* \ - "${LIBKRUNFW_DIR}/build/linux-"* \ - "${LIBKRUNFW_DIR}/kernel/linux-"*; do - if [ -d "$candidate" ] && [ -f "${candidate}/.config" ]; then - KERNEL_SRC_DIR="$candidate" - break - fi -done - -if [ -n "$KERNEL_SRC_DIR" ] && [ -f "${KERNEL_SRC_DIR}/.config" ]; then - cp "${KERNEL_SRC_DIR}/.config" "${OUTPUT_DIR}/kernel.config" -fi - -# Copy our fragment for reference -cp "${KERNEL_CONFIG_FRAGMENT}" "${OUTPUT_DIR}/openshell.kconfig" - -# ── Write provenance metadata ────────────────────────────────────────── - -cat > "${OUTPUT_DIR}/provenance.json" << EOF -{ - "artifact": "libkrunfw-custom", - "version": "0.1.0-openshell", - "build_timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)", - "libkrunfw_repo": "${LIBKRUNFW_REPO}", - "libkrunfw_ref": "${LIBKRUNFW_REF}", - "libkrunfw_commit": "${LIBKRUNFW_COMMIT}", - "kernel_version": "${KERNEL_VERSION:-unknown}", - "kernel_config_fragment": "openshell.kconfig", - "artifact_sha256": "${ARTIFACT_HASH}", - "host_os": "$(uname -s)", - "host_arch": "$(uname -m)", - "builder": "build-custom-libkrunfw.sh" -} -EOF - -echo "" -echo "==> Build complete" -echo " Library: ${OUTPUT_DIR}/libkrunfw.${LIB_EXT}" -echo " SHA256: ${ARTIFACT_HASH_SHORT}..." -echo " Provenance: ${OUTPUT_DIR}/provenance.json" -echo " Commit: ${LIBKRUNFW_SHORT}" -echo "" -echo "To use this runtime:" -echo " export OPENSHELL_VM_RUNTIME_SOURCE_DIR=${OUTPUT_DIR}" -echo " mise run vm:build" diff --git a/tasks/scripts/vm/build-libkrun-macos.sh b/tasks/scripts/vm/build-libkrun-macos.sh index e203c872..556ae155 100755 --- a/tasks/scripts/vm/build-libkrun-macos.sh +++ b/tasks/scripts/vm/build-libkrun-macos.sh @@ -2,23 +2,31 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -# Build libkrun from source on macOS with portable rpath. +# Build libkrun + libkrunfw from source on macOS with portable rpath. # -# This script builds libkrun WITHOUT GPU support (no virglrenderer/libepoxy/MoltenVK -# dependencies), making the resulting binary fully portable and self-contained. +# This script compiles a pre-built kernel.c (from the Linux ARM64 CI job) into +# libkrunfw.dylib, then builds libkrun.dylib (the VMM) WITHOUT GPU support. +# The resulting binaries are fully portable and self-contained. # -# For openshell-vm, we run headless k3s clusters, so GPU passthrough is not needed. +# The kernel.c file contains the aarch64 Linux kernel as a byte array. It is +# architecture-specific but OS-agnostic — any C compiler can turn it into a +# shared library. This eliminates the need to build the kernel on macOS via +# krunvm, cutting the macOS CI job from ~45 minutes to ~5 minutes. # # Prerequisites: # - macOS ARM64 (Apple Silicon) # - Xcode Command Line Tools -# - Homebrew: brew install rust lld dtc xz libkrunfw +# - Homebrew: brew install rust lld dtc xz +# - Pre-built kernel.c and ABI_VERSION from the Linux ARM64 build # # Usage: -# ./build-libkrun-macos.sh +# ./build-libkrun-macos.sh --kernel-dir +# +# --kernel-dir Directory containing kernel.c and ABI_VERSION (from Linux build) # # Output: -# target/libkrun-build/libkrun.dylib - portable dylib with @loader_path rpath +# target/libkrun-build/libkrun.dylib - portable dylib with @loader_path rpath +# target/libkrun-build/libkrunfw.dylib - portable dylib compiled from kernel.c set -euo pipefail @@ -26,7 +34,26 @@ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" BUILD_DIR="${ROOT}/target/libkrun-build" OUTPUT_DIR="${BUILD_DIR}" BREW_PREFIX="$(brew --prefix 2>/dev/null || echo /opt/homebrew)" -CUSTOM_RUNTIME="${ROOT}/target/custom-runtime" +KERNEL_DIR="" + +# Parse arguments +while [[ $# -gt 0 ]]; do + case "$1" in + --kernel-dir) + KERNEL_DIR="$2"; shift 2 ;; + --help|-h) + echo "Usage: $0 --kernel-dir " + echo "" + echo "Build libkrun + libkrunfw on macOS using a pre-built kernel.c." + echo "" + echo "Options:" + echo " --kernel-dir DIR Directory containing kernel.c and ABI_VERSION" + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2; exit 1 ;; + esac +done if [ "$(uname -s)" != "Darwin" ]; then echo "Error: This script only runs on macOS" >&2 @@ -38,9 +65,30 @@ if [ "$(uname -m)" != "arm64" ]; then exit 1 fi +if [ -z "$KERNEL_DIR" ]; then + echo "Error: --kernel-dir is required" >&2 + echo " This should point to the directory containing kernel.c and ABI_VERSION" >&2 + echo " from the Linux ARM64 build job." >&2 + exit 1 +fi + +if [ ! -f "${KERNEL_DIR}/kernel.c" ]; then + echo "Error: kernel.c not found in ${KERNEL_DIR}" >&2 + exit 1 +fi + +if [ ! -f "${KERNEL_DIR}/ABI_VERSION" ]; then + echo "Error: ABI_VERSION not found in ${KERNEL_DIR}" >&2 + exit 1 +fi + +ABI_VERSION="$(cat "${KERNEL_DIR}/ABI_VERSION")" + ARCH="$(uname -m)" -echo "==> Building libkrun for macOS ${ARCH} (no GPU support)" +echo "==> Building libkrun + libkrunfw for macOS ${ARCH} (no GPU support)" echo " Build directory: ${BUILD_DIR}" +echo " Kernel source: ${KERNEL_DIR}/kernel.c ($(du -sh "${KERNEL_DIR}/kernel.c" | cut -f1))" +echo " ABI version: ${ABI_VERSION}" echo "" # ── Check dependencies ────────────────────────────────────────────────── @@ -65,13 +113,6 @@ check_deps() { MISSING="$MISSING dtc" fi - # Check for libkrunfw - if [ ! -f "${BREW_PREFIX}/lib/libkrunfw.dylib" ] && \ - [ ! -f "${BREW_PREFIX}/lib/libkrunfw.5.dylib" ] && \ - [ ! -f "${CUSTOM_RUNTIME}/libkrunfw.dylib" ]; then - MISSING="$MISSING libkrunfw" - fi - if [ -n "$MISSING" ]; then echo "Error: Missing dependencies:$MISSING" >&2 echo "" >&2 @@ -89,6 +130,23 @@ check_deps mkdir -p "$BUILD_DIR" cd "$BUILD_DIR" +# ── Compile libkrunfw.dylib from pre-built kernel.c ──────────────────── + +echo "" +echo "==> Compiling libkrunfw.dylib from pre-built kernel.c..." + +# kernel.c is a C source file containing the Linux kernel as a byte array. +# It was generated by the Linux ARM64 build job. We just need to compile it +# into a shared library with the correct ABI version. +cc -fPIC -DABI_VERSION="${ABI_VERSION}" -shared \ + -o "${OUTPUT_DIR}/libkrunfw.${ABI_VERSION}.dylib" \ + "${KERNEL_DIR}/kernel.c" + +# Create the unversioned symlink that libkrun links against +cp "${OUTPUT_DIR}/libkrunfw.${ABI_VERSION}.dylib" "${OUTPUT_DIR}/libkrunfw.dylib" + +echo " Built: libkrunfw.${ABI_VERSION}.dylib ($(du -sh "${OUTPUT_DIR}/libkrunfw.${ABI_VERSION}.dylib" | cut -f1))" + # ── Clone libkrun ─────────────────────────────────────────────────────── LIBKRUN_REF="${LIBKRUN_REF:-e5922f6}" @@ -117,21 +175,11 @@ cd libkrun echo "" echo "==> Building libkrun with NET=1 BLK=1 (no GPU)..." -# Find libkrunfw - prefer custom build with bridge support -if [ -f "${CUSTOM_RUNTIME}/provenance.json" ] && [ -f "${CUSTOM_RUNTIME}/libkrunfw.dylib" ]; then - LIBKRUNFW_DIR="${CUSTOM_RUNTIME}" - echo " Using custom libkrunfw from ${LIBKRUNFW_DIR}" -else - LIBKRUNFW_DIR="${BREW_PREFIX}/lib" - echo " Using Homebrew libkrunfw from ${LIBKRUNFW_DIR}" -fi +# Point the linker at our freshly-compiled libkrunfw +export LIBRARY_PATH="${OUTPUT_DIR}:${BREW_PREFIX}/lib:${LIBRARY_PATH:-}" +export DYLD_LIBRARY_PATH="${OUTPUT_DIR}:${BREW_PREFIX}/lib:${DYLD_LIBRARY_PATH:-}" -# Set library search paths for build -export LIBRARY_PATH="${LIBKRUNFW_DIR}:${BREW_PREFIX}/lib:${LIBRARY_PATH:-}" -export DYLD_LIBRARY_PATH="${LIBKRUNFW_DIR}:${BREW_PREFIX}/lib:${DYLD_LIBRARY_PATH:-}" - -# Set up LLVM/clang for bindgen (required by krun_display/krun_input if they get compiled) -# Note: DYLD_LIBRARY_PATH is needed at runtime for the build scripts that use libclang +# Set up LLVM/clang for bindgen LLVM_PREFIX="${BREW_PREFIX}/opt/llvm" if [ -d "$LLVM_PREFIX" ]; then export LIBCLANG_PATH="${LLVM_PREFIX}/lib" @@ -139,14 +187,14 @@ if [ -d "$LLVM_PREFIX" ]; then fi # Build with BLK and NET features only (no GPU) -# This avoids the virglrenderer → libepoxy → MoltenVK dependency chain +# This avoids the virglrenderer -> libepoxy -> MoltenVK dependency chain make clean 2>/dev/null || true make BLK=1 NET=1 -j"$(sysctl -n hw.ncpu)" # ── Rewrite dylib paths for portability ───────────────────────────────── echo "" -echo "==> Making dylib portable with @loader_path..." +echo "==> Making dylibs portable with @loader_path..." DYLIB="target/release/libkrun.dylib" if [ ! -f "$DYLIB" ]; then @@ -166,17 +214,24 @@ otool -L "$DYLIB" | grep -v "^/" | sed 's/^/ /' install_name_tool -id "@loader_path/libkrun.dylib" "$DYLIB" # Rewrite libkrunfw path to @loader_path (will be bundled alongside) -# Find what libkrunfw path is currently referenced -# Note: grep may not find anything (libkrunfw is loaded via dlopen), so we use || true KRUNFW_PATH=$(otool -L "$DYLIB" | grep libkrunfw | awk '{print $1}' || true) if [ -n "$KRUNFW_PATH" ]; then install_name_tool -change "$KRUNFW_PATH" "@loader_path/libkrunfw.dylib" "$DYLIB" - echo " Rewrote: $KRUNFW_PATH → @loader_path/libkrunfw.dylib" + echo " Rewrote: $KRUNFW_PATH -> @loader_path/libkrunfw.dylib" fi # Re-codesign after modifications (required on macOS) codesign -f -s - "$DYLIB" +# Make libkrunfw portable too +install_name_tool -id "@loader_path/libkrunfw.dylib" "${OUTPUT_DIR}/libkrunfw.dylib" +codesign -f -s - "${OUTPUT_DIR}/libkrunfw.dylib" + +if [ -f "${OUTPUT_DIR}/libkrunfw.${ABI_VERSION}.dylib" ]; then + install_name_tool -id "@loader_path/libkrunfw.${ABI_VERSION}.dylib" "${OUTPUT_DIR}/libkrunfw.${ABI_VERSION}.dylib" + codesign -f -s - "${OUTPUT_DIR}/libkrunfw.${ABI_VERSION}.dylib" +fi + # Show final dependencies echo "" echo " Final dependencies:" @@ -189,51 +244,9 @@ if otool -L "$DYLIB" | grep -q "/opt/homebrew"; then otool -L "$DYLIB" | grep "/opt/homebrew" | sed 's/^/ /' else echo "" - echo " ✓ No hardcoded Homebrew paths" + echo " No hardcoded Homebrew paths" fi -# ── Copy libkrunfw to output ──────────────────────────────────────────── - -echo "" -echo "==> Bundling libkrunfw..." - -# Find and copy libkrunfw -KRUNFW_SRC="" -for candidate in \ - "${CUSTOM_RUNTIME}/libkrunfw.dylib" \ - "${CUSTOM_RUNTIME}/libkrunfw.5.dylib" \ - "${BREW_PREFIX}/lib/libkrunfw.dylib" \ - "${BREW_PREFIX}/lib/libkrunfw.5.dylib"; do - if [ -f "$candidate" ]; then - # Resolve symlinks - if [ -L "$candidate" ]; then - KRUNFW_SRC=$(readlink -f "$candidate" 2>/dev/null || readlink "$candidate") - if [[ "$KRUNFW_SRC" != /* ]]; then - KRUNFW_SRC="$(dirname "$candidate")/${KRUNFW_SRC}" - fi - else - KRUNFW_SRC="$candidate" - fi - break - fi -done - -if [ -z "$KRUNFW_SRC" ]; then - echo "Error: Could not find libkrunfw.dylib" >&2 - exit 1 -fi - -cp "$KRUNFW_SRC" "${OUTPUT_DIR}/libkrunfw.dylib" -echo " Copied: $KRUNFW_SRC" - -# Make libkrunfw portable too -install_name_tool -id "@loader_path/libkrunfw.dylib" "${OUTPUT_DIR}/libkrunfw.dylib" -codesign -f -s - "${OUTPUT_DIR}/libkrunfw.dylib" - -# Check libkrunfw dependencies -echo " libkrunfw dependencies:" -otool -L "${OUTPUT_DIR}/libkrunfw.dylib" | grep -v "^/" | sed 's/^/ /' - # ── Summary ───────────────────────────────────────────────────────────── cd "$BUILD_DIR" @@ -252,10 +265,10 @@ ALL_GOOD=true for lib in "${OUTPUT_DIR}"/*.dylib; do if otool -L "$lib" | grep -q "/opt/homebrew"; then - echo " ✗ $(basename "$lib") has hardcoded paths" + echo " x $(basename "$lib") has hardcoded paths" ALL_GOOD=false else - echo " ✓ $(basename "$lib") is portable" + echo " ok $(basename "$lib") is portable" fi done diff --git a/tasks/scripts/vm/build-libkrun.sh b/tasks/scripts/vm/build-libkrun.sh index 2c01c65d..5873a4a4 100755 --- a/tasks/scripts/vm/build-libkrun.sh +++ b/tasks/scripts/vm/build-libkrun.sh @@ -7,6 +7,10 @@ # This script builds libkrun (VMM) and libkrunfw (kernel firmware) from source # with OpenShell's custom kernel configuration for bridge/netfilter support. # +# In addition to the platform's native .so artifacts, this script exports +# kernel.c and ABI_VERSION metadata so that other platforms (e.g. macOS) can +# compile their own libkrunfw wrapper without rebuilding the kernel. +# # Prerequisites: # - Linux (aarch64 or x86_64) # - Build tools: make, git, gcc, flex, bison, bc @@ -36,14 +40,27 @@ if [ "$(uname -s)" != "Linux" ]; then exit 1 fi -ARCH="$(uname -m)" -echo "==> Building libkrun for Linux ${ARCH}" +HOST_ARCH="$(uname -m)" +echo "==> Building libkrun for Linux ${HOST_ARCH}" echo " Build directory: ${BUILD_DIR}" echo " Kernel config: ${KERNEL_CONFIG}" echo "" +# Map host arch to kernel ARCH value +case "$HOST_ARCH" in + aarch64) KARCH="arm64"; KERNEL_IMAGE_PATH="arch/arm64/boot/Image" ;; + x86_64) KARCH="x86"; KERNEL_IMAGE_PATH="arch/x86/boot/bzImage" ;; + *) echo "Error: Unsupported architecture: ${HOST_ARCH}" >&2; exit 1 ;; +esac + # ── Install dependencies ──────────────────────────────────────────────── +# Use sudo only when not already running as root (e.g. inside CI containers). +SUDO="" +if [ "$(id -u)" -ne 0 ]; then + SUDO="sudo" +fi + install_deps() { echo "==> Checking/installing build dependencies..." @@ -58,8 +75,8 @@ install_deps() { done if [ -n "$MISSING" ]; then echo " Installing:$MISSING" - sudo apt-get update - sudo apt-get install -y $MISSING + $SUDO apt-get update + $SUDO apt-get install -y $MISSING else echo " All dependencies installed" fi @@ -68,7 +85,7 @@ install_deps() { # Fedora/RHEL DEPS="make git python3 python3-pyelftools gcc flex bison elfutils-libelf-devel openssl-devel bc glibc-static curl clang-devel" echo " Installing dependencies via dnf..." - sudo dnf install -y $DEPS + $SUDO dnf install -y $DEPS else echo "Warning: Unknown package manager. Please install manually:" >&2 @@ -123,7 +140,7 @@ echo " Building kernel and libkrunfw (this may take 15-20 minutes)..." # Phase 1: Run the Makefile's $(KERNEL_SOURCES) target, which: # - downloads and extracts the kernel tarball (if needed) # - applies patches -# - copies config-libkrunfw_aarch64 to $(KERNEL_SOURCES)/.config +# - copies config-libkrunfw_{arch} to $(KERNEL_SOURCES)/.config # - runs olddefconfig # # Phase 2: Merge our fragment on top of the .config produced by Phase 1 @@ -158,14 +175,14 @@ if [ -f openshell.kconfig ]; then # merge_config.sh must be called with ARCH set so it finds the right Kconfig # entry points. -m means "merge into existing .config" (vs starting fresh). - ARCH=arm64 KCONFIG_CONFIG="${KERNEL_SOURCES}/.config" \ + ARCH="${KARCH}" KCONFIG_CONFIG="${KERNEL_SOURCES}/.config" \ "${KERNEL_SOURCES}/scripts/kconfig/merge_config.sh" \ -m -O "${KERNEL_SOURCES}" \ "${KERNEL_SOURCES}/.config" \ openshell.kconfig # Re-run olddefconfig to fill in any new symbols introduced by the fragment. - make -C "${KERNEL_SOURCES}" ARCH=arm64 olddefconfig + make -C "${KERNEL_SOURCES}" ARCH="${KARCH}" olddefconfig # Verify that the key options were actually applied. all_ok=true @@ -185,7 +202,7 @@ if [ -f openshell.kconfig ]; then # The kernel binary and kernel.c from the previous (bad) build must be # removed so make rebuilds them with the updated .config. - rm -f kernel.c "${KERNEL_SOURCES}/arch/arm64/boot/Image" \ + rm -f kernel.c "${KERNEL_SOURCES}/${KERNEL_IMAGE_PATH}" \ "${KERNEL_SOURCES}/vmlinux" libkrunfw.so* fi @@ -196,6 +213,22 @@ make -j"$(nproc)" cp libkrunfw.so* "$OUTPUT_DIR/" echo " Built: $(ls "$OUTPUT_DIR"/libkrunfw.so* | xargs -n1 basename | tr '\n' ' ')" +# ── Export kernel.c for cross-platform builds ─────────────────────────── +# kernel.c is a C source file containing the compiled Linux kernel as a byte +# array. It is architecture-specific (aarch64 vs x86_64) but OS-agnostic — +# any C compiler can turn it into a .so or .dylib. We export it so the macOS +# job can produce libkrunfw.dylib without rebuilding the kernel. + +ABI_VERSION="$(grep -oE 'ABI_VERSION\s*=\s*[0-9]+' Makefile | head -1 | sed 's/[^0-9]//g')" + +if [ -f kernel.c ]; then + cp kernel.c "$OUTPUT_DIR/kernel.c" + echo "${ABI_VERSION}" > "$OUTPUT_DIR/ABI_VERSION" + echo " Exported kernel.c ($(du -sh kernel.c | cut -f1)) and ABI_VERSION=${ABI_VERSION}" +else + echo "Warning: kernel.c not found — cross-platform builds will not work" >&2 +fi + cd "$BUILD_DIR" # ── Build libkrun (VMM) ───────────────────────────────────────────────── @@ -242,6 +275,7 @@ echo "==> Build complete!" echo " Output directory: ${OUTPUT_DIR}" echo "" echo " Artifacts:" +ls -lah "$OUTPUT_DIR"/*.so* "$OUTPUT_DIR"/kernel.c "$OUTPUT_DIR"/ABI_VERSION 2>/dev/null || \ ls -lah "$OUTPUT_DIR"/*.so* echo "" diff --git a/tasks/scripts/vm/vm-setup.sh b/tasks/scripts/vm/vm-setup.sh index 16eb2aaa..e7ae06d0 100755 --- a/tasks/scripts/vm/vm-setup.sh +++ b/tasks/scripts/vm/vm-setup.sh @@ -64,10 +64,19 @@ if [ "$FROM_SOURCE" = "1" ]; then case "$PLATFORM" in darwin-aarch64) - # macOS: build custom libkrunfw (kernel) then portable libkrun - "${ROOT}/crates/openshell-vm/runtime/build-custom-libkrunfw.sh" - echo "" - "${ROOT}/tasks/scripts/vm/build-libkrun-macos.sh" + # macOS: compile pre-built kernel.c into libkrunfw.dylib, then build libkrun.dylib. + # The kernel.c file must be obtained from a Linux ARM64 build first. + KERNEL_DIR="${ROOT}/target/libkrun-build" + if [ ! -f "${KERNEL_DIR}/kernel.c" ]; then + echo "Error: kernel.c not found at ${KERNEL_DIR}/kernel.c" >&2 + echo "" >&2 + echo "On macOS, the Linux kernel must be cross-compiled on a Linux host first." >&2 + echo "Either:" >&2 + echo " 1. Download pre-built runtime (default): mise run vm:setup" >&2 + echo " 2. Build kernel.c on Linux, copy to ${KERNEL_DIR}/, then re-run." >&2 + exit 1 + fi + "${ROOT}/tasks/scripts/vm/build-libkrun-macos.sh" --kernel-dir "${KERNEL_DIR}" ;; linux-*) # Linux: build both libkrunfw and libkrun in one go From 709bc441e4c44320e87228c11701e7f9972149a6 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Wed, 8 Apr 2026 22:33:18 -0700 Subject: [PATCH 03/20] fix(vm): fix CI kernel build failures - Add cpio to build dependencies (required by CONFIG_IKHEADERS) - Disable CONFIG_IKHEADERS in kconfig fragment (not needed in VM) - Add pip fallback for pyelftools when apt package isn't importable - Add python3-pip to apt dependencies --- .../openshell-vm/runtime/kernel/openshell.kconfig | 3 +++ tasks/scripts/vm/build-libkrun.sh | 14 +++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/crates/openshell-vm/runtime/kernel/openshell.kconfig b/crates/openshell-vm/runtime/kernel/openshell.kconfig index cc219f50..068be8cc 100644 --- a/crates/openshell-vm/runtime/kernel/openshell.kconfig +++ b/crates/openshell-vm/runtime/kernel/openshell.kconfig @@ -116,6 +116,9 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PIDS=y CONFIG_MEMCG=y +# ── Disable kernel headers archive (avoids cpio issues in CI) ────────── +# CONFIG_IKHEADERS is not set + # ── Security features required by the sandbox runtime ─────────────────── CONFIG_SECURITY_LANDLOCK=y CONFIG_SECCOMP_FILTER=y diff --git a/tasks/scripts/vm/build-libkrun.sh b/tasks/scripts/vm/build-libkrun.sh index 5873a4a4..7bfa5c2c 100755 --- a/tasks/scripts/vm/build-libkrun.sh +++ b/tasks/scripts/vm/build-libkrun.sh @@ -66,7 +66,7 @@ install_deps() { if command -v apt-get &>/dev/null; then # Debian/Ubuntu - DEPS="build-essential git python3 python3-pyelftools flex bison libelf-dev libssl-dev bc curl libclang-dev" + DEPS="build-essential git python3 python3-pip python3-pyelftools flex bison libelf-dev libssl-dev bc curl libclang-dev cpio" MISSING="" for dep in $DEPS; do if ! dpkg -s "$dep" &>/dev/null; then @@ -83,14 +83,22 @@ install_deps() { elif command -v dnf &>/dev/null; then # Fedora/RHEL - DEPS="make git python3 python3-pyelftools gcc flex bison elfutils-libelf-devel openssl-devel bc glibc-static curl clang-devel" + DEPS="make git python3 python3-pyelftools gcc flex bison elfutils-libelf-devel openssl-devel bc glibc-static curl clang-devel cpio" echo " Installing dependencies via dnf..." $SUDO dnf install -y $DEPS else echo "Warning: Unknown package manager. Please install manually:" >&2 echo " build-essential git python3 python3-pyelftools flex bison" >&2 - echo " libelf-dev libssl-dev bc curl" >&2 + echo " libelf-dev libssl-dev bc curl cpio" >&2 + fi + + # Ensure pyelftools is importable by the Python that will run bin2cbundle.py. + # The apt package may install to a different Python than the default python3. + if ! python3 -c "import elftools" &>/dev/null; then + echo " pyelftools not importable, installing via pip..." + python3 -m pip install --break-system-packages pyelftools 2>/dev/null || \ + python3 -m pip install pyelftools || true fi } From 4df66b07e2f57974bf8d405367f2a5b870136d99 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Wed, 8 Apr 2026 22:41:01 -0700 Subject: [PATCH 04/20] fix(vm): add zstd and jq to build dependencies Both are needed by package-vm-runtime.sh for tarball compression and provenance metadata generation. --- tasks/scripts/vm/build-libkrun.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/scripts/vm/build-libkrun.sh b/tasks/scripts/vm/build-libkrun.sh index 7bfa5c2c..5b8b775b 100755 --- a/tasks/scripts/vm/build-libkrun.sh +++ b/tasks/scripts/vm/build-libkrun.sh @@ -66,7 +66,7 @@ install_deps() { if command -v apt-get &>/dev/null; then # Debian/Ubuntu - DEPS="build-essential git python3 python3-pip python3-pyelftools flex bison libelf-dev libssl-dev bc curl libclang-dev cpio" + DEPS="build-essential git python3 python3-pip python3-pyelftools flex bison libelf-dev libssl-dev bc curl libclang-dev cpio zstd jq" MISSING="" for dep in $DEPS; do if ! dpkg -s "$dep" &>/dev/null; then @@ -83,7 +83,7 @@ install_deps() { elif command -v dnf &>/dev/null; then # Fedora/RHEL - DEPS="make git python3 python3-pyelftools gcc flex bison elfutils-libelf-devel openssl-devel bc glibc-static curl clang-devel cpio" + DEPS="make git python3 python3-pyelftools gcc flex bison elfutils-libelf-devel openssl-devel bc glibc-static curl clang-devel cpio zstd jq" echo " Installing dependencies via dnf..." $SUDO dnf install -y $DEPS From 7f127e4cb4f60270a544fc8abe4e0d357c8204d0 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Wed, 8 Apr 2026 22:47:59 -0700 Subject: [PATCH 05/20] fix(vm): resolve kernel-dir to absolute path before cd The macOS build script changes directory to the build dir before compiling kernel.c, so relative paths passed via --kernel-dir would fail to resolve. --- tasks/scripts/vm/build-libkrun-macos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/scripts/vm/build-libkrun-macos.sh b/tasks/scripts/vm/build-libkrun-macos.sh index 556ae155..4e89deef 100755 --- a/tasks/scripts/vm/build-libkrun-macos.sh +++ b/tasks/scripts/vm/build-libkrun-macos.sh @@ -40,7 +40,7 @@ KERNEL_DIR="" while [[ $# -gt 0 ]]; do case "$1" in --kernel-dir) - KERNEL_DIR="$2"; shift 2 ;; + KERNEL_DIR="$(cd "$2" && pwd)"; shift 2 ;; --help|-h) echo "Usage: $0 --kernel-dir " echo "" From 242e9f475cd2afd9c73a340838d7ec7c1ebd53dc Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Wed, 8 Apr 2026 22:57:13 -0700 Subject: [PATCH 06/20] fix(vm): use mise for Rust on macOS to avoid LLVM conflicts Homebrew's rust package links against Homebrew's llvm, which conflicts with the lld package's LLVM version. Use mise to install a standalone Rust toolchain (via rustup) that ships its own LLVM. --- .github/workflows/release-vm-kernel.yml | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release-vm-kernel.yml b/.github/workflows/release-vm-kernel.yml index 01992cdf..ae686a8b 100644 --- a/.github/workflows/release-vm-kernel.yml +++ b/.github/workflows/release-vm-kernel.yml @@ -130,10 +130,22 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Install dependencies + - name: Install mise and toolchain run: | set -euo pipefail - brew install rust lld dtc xz + curl https://mise.run | sh + echo "$HOME/.local/bin" >> "$GITHUB_PATH" + echo "$HOME/.local/share/mise/shims" >> "$GITHUB_PATH" + + - name: Install tools via mise + env: + MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + mise trust + mise install rust + mise reshim + brew install lld dtc xz - name: Download pre-built kernel.c uses: actions/download-artifact@v4 From 1b79bdbc97bac7182183101318f179c5499849ca Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Wed, 8 Apr 2026 23:04:27 -0700 Subject: [PATCH 07/20] fix(vm): use rustup directly on macOS, disable sccache Use rustup instead of mise or Homebrew for Rust to avoid LLVM conflicts. Set RUSTC_WRAPPER='' to disable sccache which is not available on the macOS runner. --- .github/workflows/release-vm-kernel.yml | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/.github/workflows/release-vm-kernel.yml b/.github/workflows/release-vm-kernel.yml index ae686a8b..c735c691 100644 --- a/.github/workflows/release-vm-kernel.yml +++ b/.github/workflows/release-vm-kernel.yml @@ -127,24 +127,16 @@ jobs: needs: [build-runtime-linux-arm64] runs-on: macos-latest-xlarge timeout-minutes: 30 + env: + RUSTC_WRAPPER: "" steps: - uses: actions/checkout@v4 - - name: Install mise and toolchain - run: | - set -euo pipefail - curl https://mise.run | sh - echo "$HOME/.local/bin" >> "$GITHUB_PATH" - echo "$HOME/.local/share/mise/shims" >> "$GITHUB_PATH" - - - name: Install tools via mise - env: - MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Install dependencies run: | set -euo pipefail - mise trust - mise install rust - mise reshim + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + echo "$HOME/.cargo/bin" >> "$GITHUB_PATH" brew install lld dtc xz - name: Download pre-built kernel.c From 18a5c586f61b9d412e2032788c31e86805e3b1bb Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Wed, 8 Apr 2026 23:37:40 -0700 Subject: [PATCH 08/20] fix(ci): fix release-vm-dev pipeline failures - Add CI container to download-kernel-runtime job so gh CLI is available (bare build-amd64 runner does not have gh installed) - Fall back to plain cargo build in build-rootfs.sh when cargo-zigbuild is not available (works for native builds in CI where arch matches) --- .github/workflows/release-vm-dev.yml | 5 +++++ crates/openshell-vm/scripts/build-rootfs.sh | 18 ++++++++++-------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/.github/workflows/release-vm-dev.yml b/.github/workflows/release-vm-dev.yml index d08a208b..f2eb3577 100644 --- a/.github/workflows/release-vm-dev.yml +++ b/.github/workflows/release-vm-dev.yml @@ -65,6 +65,11 @@ jobs: name: Download Kernel Runtime runs-on: build-amd64 timeout-minutes: 10 + container: + image: ghcr.io/nvidia/openshell/ci:latest + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v4 diff --git a/crates/openshell-vm/scripts/build-rootfs.sh b/crates/openshell-vm/scripts/build-rootfs.sh index 16a0a23d..d43046d4 100755 --- a/crates/openshell-vm/scripts/build-rootfs.sh +++ b/crates/openshell-vm/scripts/build-rootfs.sh @@ -302,16 +302,18 @@ SUPERVISOR_TARGET="${RUST_TARGET}" SUPERVISOR_BIN="${PROJECT_ROOT}/target/${SUPERVISOR_TARGET}/release/openshell-sandbox" echo "==> Building openshell-sandbox supervisor binary (${SUPERVISOR_TARGET})..." -if ! command -v cargo-zigbuild >/dev/null 2>&1; then - echo "ERROR: cargo-zigbuild is not installed." - echo " Install it with: cargo install cargo-zigbuild" - echo " Also requires: zig (brew install zig)" - exit 1 +if command -v cargo-zigbuild >/dev/null 2>&1; then + cargo zigbuild --release -p openshell-sandbox --target "${SUPERVISOR_TARGET}" \ + --manifest-path "${PROJECT_ROOT}/Cargo.toml" 2>&1 | tail -5 +else + # Fallback: use plain cargo build when cargo-zigbuild is not available. + # This works for native builds (e.g. building x86_64 on x86_64) but + # will fail for true cross-compilation without a cross toolchain. + echo " cargo-zigbuild not found, falling back to cargo build..." + cargo build --release -p openshell-sandbox --target "${SUPERVISOR_TARGET}" \ + --manifest-path "${PROJECT_ROOT}/Cargo.toml" 2>&1 | tail -5 fi -cargo zigbuild --release -p openshell-sandbox --target "${SUPERVISOR_TARGET}" \ - --manifest-path "${PROJECT_ROOT}/Cargo.toml" 2>&1 | tail -5 - if [ ! -f "${SUPERVISOR_BIN}" ]; then echo "ERROR: supervisor binary not found at ${SUPERVISOR_BIN}" exit 1 From 04076c5463136e5b6f140fdc8ea6a4e8b01c585b Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Wed, 8 Apr 2026 23:44:39 -0700 Subject: [PATCH 09/20] fix(ci): add zstd to CI environment for rootfs compression - Install zstd inline in build-rootfs workflow step (immediate fix) - Add zstd to CI Dockerfile for future image builds --- .github/workflows/release-vm-dev.yml | 3 +++ deploy/docker/Dockerfile.ci | 1 + 2 files changed, 4 insertions(+) diff --git a/.github/workflows/release-vm-dev.yml b/.github/workflows/release-vm-dev.yml index f2eb3577..5d14f15b 100644 --- a/.github/workflows/release-vm-dev.yml +++ b/.github/workflows/release-vm-dev.yml @@ -155,6 +155,9 @@ jobs: - name: Install tools run: mise install + - name: Install zstd + run: apt-get update && apt-get install -y --no-install-recommends zstd && rm -rf /var/lib/apt/lists/* + - name: Build base rootfs tarball run: | set -euo pipefail diff --git a/deploy/docker/Dockerfile.ci b/deploy/docker/Dockerfile.ci index b87962b7..fe3241cb 100644 --- a/deploy/docker/Dockerfile.ci +++ b/deploy/docker/Dockerfile.ci @@ -34,6 +34,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ xz-utils \ jq \ rsync \ + zstd \ && apt-get install -y --only-upgrade gpgv python3 \ && rm -rf /var/lib/apt/lists/* From 0e1d0415737a9e613b01a7a4d572d3147722f341 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Thu, 9 Apr 2026 00:00:04 -0700 Subject: [PATCH 10/20] fix(ci): add zstd to build-vm-linux and build-vm-macos jobs The CI container image does not include zstd, which is needed to decompress kernel runtime tarballs and re-compress individual files for embedding into the openshell-vm binary. --- .github/workflows/release-vm-dev.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/release-vm-dev.yml b/.github/workflows/release-vm-dev.yml index 5d14f15b..ce0fef01 100644 --- a/.github/workflows/release-vm-dev.yml +++ b/.github/workflows/release-vm-dev.yml @@ -231,6 +231,9 @@ jobs: cache-directories: .cache/sccache cache-targets: "true" + - name: Install zstd + run: apt-get update && apt-get install -y --no-install-recommends zstd && rm -rf /var/lib/apt/lists/* + - name: Download kernel runtime tarball uses: actions/download-artifact@v4 with: @@ -342,6 +345,9 @@ jobs: - name: Set up Docker Buildx uses: ./.github/actions/setup-buildx + - name: Install zstd + run: apt-get update && apt-get install -y --no-install-recommends zstd && rm -rf /var/lib/apt/lists/* + - name: Download kernel runtime tarball uses: actions/download-artifact@v4 with: From 3a6bde0d7ebef4fe8ab1e97174c8f9fc7fd17b3d Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Thu, 9 Apr 2026 00:26:14 -0700 Subject: [PATCH 11/20] feat(vm): add install-vm.sh one-liner for quick VM binary install Adds a POSIX sh install script (modeled after install.sh) that detects the platform, downloads the correct binary from the vm-dev release, verifies checksums, and codesigns on macOS automatically. Works when piped from curl into any shell (bash, zsh, fish, etc.). Updates both release-vm-dev and release-vm-kernel workflow bodies to include the quick install snippet. --- .github/workflows/release-vm-dev.yml | 7 +- .github/workflows/release-vm-kernel.yml | 7 +- install-vm.sh | 249 ++++++++++++++++++++++++ 3 files changed, 257 insertions(+), 6 deletions(-) create mode 100755 install-vm.sh diff --git a/.github/workflows/release-vm-dev.yml b/.github/workflows/release-vm-dev.yml index ce0fef01..436b326f 100644 --- a/.github/workflows/release-vm-dev.yml +++ b/.github/workflows/release-vm-dev.yml @@ -520,9 +520,10 @@ jobs: | Linux x86_64 | `openshell-vm-x86_64-unknown-linux-gnu.tar.gz` | | macOS ARM64 | `openshell-vm-aarch64-apple-darwin.tar.gz` | - **macOS users:** The binary must be codesigned with the Hypervisor entitlement: - ```bash - codesign --entitlements crates/openshell-vm/entitlements.plist --force -s - ./openshell-vm + ### Quick install + + ``` + curl -fsSL https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install-vm.sh | sh ``` files: | diff --git a/.github/workflows/release-vm-kernel.yml b/.github/workflows/release-vm-kernel.yml index c735c691..198d8b2e 100644 --- a/.github/workflows/release-vm-kernel.yml +++ b/.github/workflows/release-vm-kernel.yml @@ -254,9 +254,10 @@ jobs: | Linux x86_64 | `openshell-vm-x86_64-unknown-linux-gnu.tar.gz` | | macOS ARM64 | `openshell-vm-aarch64-apple-darwin.tar.gz` | - **macOS users:** The binary must be codesigned with the Hypervisor entitlement: - ```bash - codesign --entitlements crates/openshell-vm/entitlements.plist --force -s - ./openshell-vm + ### Quick install + + ``` + curl -fsSL https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install-vm.sh | sh ``` files: | diff --git a/install-vm.sh b/install-vm.sh new file mode 100755 index 00000000..beadebf7 --- /dev/null +++ b/install-vm.sh @@ -0,0 +1,249 @@ +#!/bin/sh +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Install the openshell-vm binary. +# +# Usage: +# curl -fsSL https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install-vm.sh | sh +# +# Or run directly: +# ./install-vm.sh +# +# Environment variables: +# OPENSHELL_VM_INSTALL_DIR - Directory to install into (default: ~/.local/bin) +# +set -eu + +APP_NAME="openshell-vm" +REPO="NVIDIA/OpenShell" +GITHUB_URL="https://github.com/${REPO}" +RELEASE_TAG="vm-dev" + +# --------------------------------------------------------------------------- +# Logging +# --------------------------------------------------------------------------- + +info() { + printf '%s: %s\n' "$APP_NAME" "$*" >&2 +} + +error() { + printf '%s: error: %s\n' "$APP_NAME" "$*" >&2 + exit 1 +} + +# --------------------------------------------------------------------------- +# HTTP helpers +# --------------------------------------------------------------------------- + +has_cmd() { + command -v "$1" >/dev/null 2>&1 +} + +check_downloader() { + if has_cmd curl; then + return 0 + elif has_cmd wget; then + return 0 + else + error "either 'curl' or 'wget' is required to download files" + fi +} + +download() { + _url="$1" + _output="$2" + + if has_cmd curl; then + curl -fLsS --retry 3 --max-redirs 5 -o "$_output" "$_url" + elif has_cmd wget; then + wget -q --tries=3 --max-redirect=5 -O "$_output" "$_url" + fi +} + +# --------------------------------------------------------------------------- +# Platform detection +# --------------------------------------------------------------------------- + +get_target() { + _arch="$(uname -m)" + _os="$(uname -s)" + + case "$_os" in + Darwin) + case "$_arch" in + arm64|aarch64) echo "aarch64-apple-darwin" ;; + *) error "macOS x86_64 is not supported; use Apple Silicon" ;; + esac + ;; + Linux) + case "$_arch" in + x86_64|amd64) echo "x86_64-unknown-linux-gnu" ;; + aarch64|arm64) echo "aarch64-unknown-linux-gnu" ;; + *) error "unsupported architecture: $_arch" ;; + esac + ;; + *) error "unsupported OS: $_os" ;; + esac +} + +# --------------------------------------------------------------------------- +# Checksum verification +# --------------------------------------------------------------------------- + +verify_checksum() { + _vc_archive="$1" + _vc_checksums="$2" + _vc_filename="$3" + + if ! has_cmd shasum && ! has_cmd sha256sum; then + error "neither 'shasum' nor 'sha256sum' found; cannot verify download integrity" + fi + + _vc_expected="$(grep -F "$_vc_filename" "$_vc_checksums" | awk '{print $1}')" + + if [ -z "$_vc_expected" ]; then + error "no checksum entry found for $_vc_filename in checksums file" + fi + + if has_cmd sha256sum; then + echo "$_vc_expected $_vc_archive" | sha256sum -c --quiet 2>/dev/null + elif has_cmd shasum; then + echo "$_vc_expected $_vc_archive" | shasum -a 256 -c --quiet 2>/dev/null + fi +} + +# --------------------------------------------------------------------------- +# Install location +# --------------------------------------------------------------------------- + +get_install_dir() { + if [ -n "${OPENSHELL_VM_INSTALL_DIR:-}" ]; then + echo "$OPENSHELL_VM_INSTALL_DIR" + else + echo "${HOME}/.local/bin" + fi +} + +is_on_path() { + case ":${PATH}:" in + *":$1:"*) return 0 ;; + *) return 1 ;; + esac +} + +# --------------------------------------------------------------------------- +# macOS codesign +# --------------------------------------------------------------------------- + +codesign_binary() { + _binary="$1" + + if [ "$(uname -s)" != "Darwin" ]; then + return 0 + fi + + if ! has_cmd codesign; then + info "warning: codesign not found; the binary will fail without the Hypervisor entitlement" + return 0 + fi + + info "codesigning with Hypervisor entitlement..." + _entitlements="$(mktemp)" + cat > "$_entitlements" <<'PLIST' + + + + + com.apple.security.hypervisor + + + +PLIST + codesign --entitlements "$_entitlements" --force -s - "$_binary" + rm -f "$_entitlements" +} + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +main() { + for arg in "$@"; do + case "$arg" in + --help) + cat </dev/null || true + + if [ -w "$_install_dir" ] || mkdir -p "$_install_dir" 2>/dev/null; then + install -m 755 "${_tmpdir}/${APP_NAME}" "${_install_dir}/${APP_NAME}" + else + info "elevated permissions required to install to ${_install_dir}" + sudo mkdir -p "$_install_dir" + sudo install -m 755 "${_tmpdir}/${APP_NAME}" "${_install_dir}/${APP_NAME}" + fi + + codesign_binary "${_install_dir}/${APP_NAME}" + + info "installed ${APP_NAME} to ${_install_dir}/${APP_NAME}" + + if ! is_on_path "$_install_dir"; then + echo "" + info "${_install_dir} is not on your PATH." + info "" + info "Add it by appending the following to your shell config:" + info "" + + _current_shell="$(basename "${SHELL:-sh}" 2>/dev/null || echo "sh")" + case "$_current_shell" in + fish) info " fish_add_path ${_install_dir}" ;; + *) info " export PATH=\"${_install_dir}:\$PATH\"" ;; + esac + info "" + fi +} + +main "$@" From 70e53fbb06385786e4fe4992c7593a6aa1d54646 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Thu, 9 Apr 2026 00:31:29 -0700 Subject: [PATCH 12/20] fix(vm): align install-vm.sh safeguards with install.sh - Add redirect origin validation (MITM defense, ref issue #638) - Add resolve_redirect() helper matching install.sh - Add warn() helper - Fix checksum tool preference order (shasum first, matching macOS default) - Use caller's tmpdir for entitlements plist (cleaned by trap) - Add version probe after install - Add full PATH guidance with shell config file hints - Add examples to --help output --- install-vm.sh | 78 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 67 insertions(+), 11 deletions(-) diff --git a/install-vm.sh b/install-vm.sh index beadebf7..37674427 100755 --- a/install-vm.sh +++ b/install-vm.sh @@ -28,6 +28,10 @@ info() { printf '%s: %s\n' "$APP_NAME" "$*" >&2 } +warn() { + printf '%s: warning: %s\n' "$APP_NAME" "$*" >&2 +} + error() { printf '%s: error: %s\n' "$APP_NAME" "$*" >&2 exit 1 @@ -62,6 +66,35 @@ download() { fi } +# Follow a URL and print the final resolved URL (for detecting redirect targets). +resolve_redirect() { + _url="$1" + + if has_cmd curl; then + curl -fLsS -o /dev/null -w '%{url_effective}' "$_url" + elif has_cmd wget; then + # wget --spider follows redirects; capture the final Location from stderr + wget --spider --max-redirect=10 "$_url" 2>&1 | sed -n 's/^.*Location: \([^ ]*\).*/\1/p' | tail -1 + fi +} + +# Validate that a download URL resolves to the expected GitHub origin. +# A MITM or DNS hijack could redirect to an attacker-controlled domain, +# which would also serve a matching checksums file (making checksum +# verification useless). See: https://github.com/NVIDIA/OpenShell/issues/638 +validate_download_origin() { + _vdo_url="$1" + _resolved="$(resolve_redirect "$_vdo_url")" || return 0 # best-effort + + case "$_resolved" in + https://github.com/${REPO}/*) ;; + https://objects.githubusercontent.com/*) ;; + *) + error "unexpected redirect target: ${_resolved} (expected github.com/${REPO}/...)" + ;; + esac +} + # --------------------------------------------------------------------------- # Platform detection # --------------------------------------------------------------------------- @@ -107,10 +140,10 @@ verify_checksum() { error "no checksum entry found for $_vc_filename in checksums file" fi - if has_cmd sha256sum; then - echo "$_vc_expected $_vc_archive" | sha256sum -c --quiet 2>/dev/null - elif has_cmd shasum; then + if has_cmd shasum; then echo "$_vc_expected $_vc_archive" | shasum -a 256 -c --quiet 2>/dev/null + elif has_cmd sha256sum; then + echo "$_vc_expected $_vc_archive" | sha256sum -c --quiet 2>/dev/null fi } @@ -139,18 +172,19 @@ is_on_path() { codesign_binary() { _binary="$1" + _cs_tmpdir="$2" # reuse caller's tmpdir for cleanup-safe temp files if [ "$(uname -s)" != "Darwin" ]; then return 0 fi if ! has_cmd codesign; then - info "warning: codesign not found; the binary will fail without the Hypervisor entitlement" + warn "codesign not found; the binary will fail without the Hypervisor entitlement" return 0 fi info "codesigning with Hypervisor entitlement..." - _entitlements="$(mktemp)" + _entitlements="${_cs_tmpdir}/entitlements.plist" cat > "$_entitlements" <<'PLIST' @@ -162,7 +196,6 @@ codesign_binary() { PLIST codesign --entitlements "$_entitlements" --force -s - "$_binary" - rm -f "$_entitlements" } # --------------------------------------------------------------------------- @@ -178,9 +211,20 @@ install-vm.sh — Install the openshell-vm MicroVM runtime USAGE: curl -fsSL https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install-vm.sh | sh + ./install-vm.sh [OPTIONS] + +OPTIONS: + --help Print this help message ENVIRONMENT VARIABLES: OPENSHELL_VM_INSTALL_DIR Directory to install into (default: ~/.local/bin) + +EXAMPLES: + # Install latest dev build + curl -fsSL https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install-vm.sh | sh + + # Install to /usr/local/bin + curl -fsSL https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install-vm.sh | OPENSHELL_VM_INSTALL_DIR=/usr/local/bin sh EOF exit 0 ;; @@ -198,6 +242,9 @@ EOF info "downloading ${APP_NAME} (${_target})..." + # Validate that the download URL resolves to the expected GitHub origin. + validate_download_origin "$_download_url" + _tmpdir="$(mktemp -d)" trap 'rm -rf "$_tmpdir"' EXIT @@ -226,23 +273,32 @@ EOF sudo install -m 755 "${_tmpdir}/${APP_NAME}" "${_install_dir}/${APP_NAME}" fi - codesign_binary "${_install_dir}/${APP_NAME}" + codesign_binary "${_install_dir}/${APP_NAME}" "$_tmpdir" - info "installed ${APP_NAME} to ${_install_dir}/${APP_NAME}" + _installed_version="$("${_install_dir}/${APP_NAME}" --version 2>/dev/null || echo "${RELEASE_TAG}")" + info "installed ${_installed_version} to ${_install_dir}/${APP_NAME}" + # If the install directory isn't on PATH, print instructions if ! is_on_path "$_install_dir"; then echo "" info "${_install_dir} is not on your PATH." info "" - info "Add it by appending the following to your shell config:" + info "Add it by appending the following to your shell configuration file" + info "(e.g. ~/.bashrc, ~/.zshrc, or ~/.config/fish/config.fish):" info "" _current_shell="$(basename "${SHELL:-sh}" 2>/dev/null || echo "sh")" case "$_current_shell" in - fish) info " fish_add_path ${_install_dir}" ;; - *) info " export PATH=\"${_install_dir}:\$PATH\"" ;; + fish) + info " fish_add_path ${_install_dir}" + ;; + *) + info " export PATH=\"${_install_dir}:\$PATH\"" + ;; esac + info "" + info "Then restart your shell or run the command above in your current session." fi } From 463eb2b54740c1f7dd0b7d9dc4eb07ebccf7742b Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Thu, 9 Apr 2026 00:32:07 -0700 Subject: [PATCH 13/20] docs(vm): expand release notes with install-vm.sh usage Add custom install dir example and description of what the installer does (platform detection, checksum verification, macOS codesign). --- .github/workflows/release-vm-dev.yml | 9 +++++++++ .github/workflows/release-vm-kernel.yml | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/.github/workflows/release-vm-dev.yml b/.github/workflows/release-vm-dev.yml index 436b326f..057d8bf7 100644 --- a/.github/workflows/release-vm-dev.yml +++ b/.github/workflows/release-vm-dev.yml @@ -526,6 +526,15 @@ jobs: curl -fsSL https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install-vm.sh | sh ``` + Install to a custom directory: + + ``` + curl -fsSL https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install-vm.sh | OPENSHELL_VM_INSTALL_DIR=/usr/local/bin sh + ``` + + The installer auto-detects your platform, verifies checksums, and codesigns + on macOS (Hypervisor entitlement). Defaults to `~/.local/bin`. + files: | release-final/openshell-vm-aarch64-unknown-linux-gnu.tar.gz release-final/openshell-vm-x86_64-unknown-linux-gnu.tar.gz diff --git a/.github/workflows/release-vm-kernel.yml b/.github/workflows/release-vm-kernel.yml index 198d8b2e..fab67598 100644 --- a/.github/workflows/release-vm-kernel.yml +++ b/.github/workflows/release-vm-kernel.yml @@ -260,6 +260,15 @@ jobs: curl -fsSL https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install-vm.sh | sh ``` + Install to a custom directory: + + ``` + curl -fsSL https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install-vm.sh | OPENSHELL_VM_INSTALL_DIR=/usr/local/bin sh + ``` + + The installer auto-detects your platform, verifies checksums, and codesigns + on macOS (Hypervisor entitlement). Defaults to `~/.local/bin`. + files: | release/vm-runtime-linux-aarch64.tar.zst release/vm-runtime-linux-x86_64.tar.zst From 0765fa2c73c819fc7eaa9be946961eff9d437267 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Thu, 9 Apr 2026 00:32:32 -0700 Subject: [PATCH 14/20] docs(vm): simplify release notes install section --- .github/workflows/release-vm-dev.yml | 9 +-------- .github/workflows/release-vm-kernel.yml | 9 +-------- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/.github/workflows/release-vm-dev.yml b/.github/workflows/release-vm-dev.yml index 057d8bf7..06ba474a 100644 --- a/.github/workflows/release-vm-dev.yml +++ b/.github/workflows/release-vm-dev.yml @@ -526,14 +526,7 @@ jobs: curl -fsSL https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install-vm.sh | sh ``` - Install to a custom directory: - - ``` - curl -fsSL https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install-vm.sh | OPENSHELL_VM_INSTALL_DIR=/usr/local/bin sh - ``` - - The installer auto-detects your platform, verifies checksums, and codesigns - on macOS (Hypervisor entitlement). Defaults to `~/.local/bin`. + Auto-detects your platform, verifies checksums, and codesigns on macOS. files: | release-final/openshell-vm-aarch64-unknown-linux-gnu.tar.gz diff --git a/.github/workflows/release-vm-kernel.yml b/.github/workflows/release-vm-kernel.yml index fab67598..c1593da3 100644 --- a/.github/workflows/release-vm-kernel.yml +++ b/.github/workflows/release-vm-kernel.yml @@ -260,14 +260,7 @@ jobs: curl -fsSL https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install-vm.sh | sh ``` - Install to a custom directory: - - ``` - curl -fsSL https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install-vm.sh | OPENSHELL_VM_INSTALL_DIR=/usr/local/bin sh - ``` - - The installer auto-detects your platform, verifies checksums, and codesigns - on macOS (Hypervisor entitlement). Defaults to `~/.local/bin`. + Auto-detects your platform, verifies checksums, and codesigns on macOS. files: | release/vm-runtime-linux-aarch64.tar.zst From 3ffb0516b430dc07c021706ed4cb08a05c5930a7 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Thu, 9 Apr 2026 00:38:16 -0700 Subject: [PATCH 15/20] fix(vm): allow release-assets.githubusercontent.com in origin validation GitHub now redirects release asset downloads to this domain in addition to objects.githubusercontent.com. --- install-vm.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/install-vm.sh b/install-vm.sh index 37674427..9ea07654 100755 --- a/install-vm.sh +++ b/install-vm.sh @@ -89,6 +89,7 @@ validate_download_origin() { case "$_resolved" in https://github.com/${REPO}/*) ;; https://objects.githubusercontent.com/*) ;; + https://release-assets.githubusercontent.com/*) ;; *) error "unexpected redirect target: ${_resolved} (expected github.com/${REPO}/...)" ;; From b5590571f193fbda6bcb6263c82d58e16a0f6ceb Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Thu, 9 Apr 2026 01:00:08 -0700 Subject: [PATCH 16/20] fix(vm): create libkrunfw.dylib symlink on macOS after extraction libkrun.dylib references libkrunfw via @loader_path/libkrunfw.dylib (unversioned name set by build-libkrun-macos.sh), but the embedded runtime only extracts libkrunfw.5.dylib (versioned). Create an unversioned symlink so dyld can resolve the dependency. --- crates/openshell-vm/src/embedded.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/crates/openshell-vm/src/embedded.rs b/crates/openshell-vm/src/embedded.rs index 15eaf4be..731f34b1 100644 --- a/crates/openshell-vm/src/embedded.rs +++ b/crates/openshell-vm/src/embedded.rs @@ -135,6 +135,18 @@ pub fn ensure_runtime_extracted() -> Result { )?; extract_resource(resources::GVPROXY, &cache_dir.join("gvproxy"))?; + // On macOS, libkrun.dylib references libkrunfw via @loader_path/libkrunfw.dylib + // (the unversioned name), but we embed as libkrunfw.5.dylib. Create the + // unversioned name so dyld can resolve the dependency. + #[cfg(target_os = "macos")] + { + let unversioned = cache_dir.join("libkrunfw.dylib"); + if !unversioned.exists() { + std::os::unix::fs::symlink(resources::LIBKRUNFW_NAME, &unversioned) + .map_err(|e| VmError::HostSetup(format!("symlink libkrunfw.dylib: {e}")))?; + } + } + // Make gvproxy executable #[cfg(unix)] { From de7e439dd797bfdbef909c6835b6aed389c31090 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Thu, 9 Apr 2026 01:17:41 -0700 Subject: [PATCH 17/20] fix(vm): re-exec with DYLD_LIBRARY_PATH on macOS for libkrunfw discovery libkrun internally does dlopen("libkrunfw.5.dylib") with a bare name. On macOS, dyld ignores DYLD_FALLBACK_LIBRARY_PATH set after process start, and the RTLD_GLOBAL preload doesn't help because dyld's dlopen search doesn't match against install names of already-loaded libraries. Fix by re-execing the binary early in main() with DYLD_LIBRARY_PATH set to the runtime directory, so the dynamic linker can find libkrunfw.5.dylib when libkrun requests it. --- crates/openshell-vm/src/lib.rs | 2 +- crates/openshell-vm/src/main.rs | 38 +++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index 4593dd60..7ac112d6 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -368,7 +368,7 @@ fn c_string_array(strings: &[&str]) -> Result<(Vec, Vec<*const libc::c_ const VM_RUNTIME_DIR_ENV: &str = "OPENSHELL_VM_RUNTIME_DIR"; -pub(crate) fn configured_runtime_dir() -> Result { +pub fn configured_runtime_dir() -> Result { // Allow override for development if let Some(path) = std::env::var_os(VM_RUNTIME_DIR_ENV) { let path = PathBuf::from(path); diff --git a/crates/openshell-vm/src/main.rs b/crates/openshell-vm/src/main.rs index ba7c7d6b..bb9d854b 100644 --- a/crates/openshell-vm/src/main.rs +++ b/crates/openshell-vm/src/main.rs @@ -120,6 +120,44 @@ enum GatewayCommand { } fn main() { + // On macOS, libkrun loads libkrunfw.5.dylib via dlopen() with a bare name. + // The dynamic linker only finds it if DYLD_LIBRARY_PATH includes the runtime + // directory, but env vars set after process start are ignored by dyld. To work + // around this, re-exec the binary with DYLD_LIBRARY_PATH set if the runtime + // is available and the variable is not already configured. + #[cfg(target_os = "macos")] + { + if std::env::var_os("__OPENSHELL_VM_REEXEC").is_none() { + if let Ok(runtime_dir) = openshell_vm::configured_runtime_dir() { + let needs_reexec = std::env::var_os("DYLD_LIBRARY_PATH").map_or(true, |v| { + !v.to_string_lossy() + .contains(runtime_dir.to_str().unwrap_or("")) + }); + if needs_reexec { + let mut dyld_paths = vec![runtime_dir]; + if let Some(existing) = std::env::var_os("DYLD_LIBRARY_PATH") { + dyld_paths.extend(std::env::split_paths(&existing)); + } + let joined = std::env::join_paths(&dyld_paths).expect("join DYLD_LIBRARY_PATH"); + let exe = std::env::current_exe().expect("current_exe"); + let args: Vec = std::env::args().skip(1).collect(); + let err = std::process::Command::new(exe) + .args(&args) + .env("DYLD_LIBRARY_PATH", &joined) + .env("__OPENSHELL_VM_REEXEC", "1") + .status(); + match err { + Ok(status) => std::process::exit(status.code().unwrap_or(1)), + Err(e) => { + eprintln!("Error: failed to re-exec with DYLD_LIBRARY_PATH: {e}"); + std::process::exit(1); + } + } + } + } + } + } + tracing_subscriber::fmt::init(); let cli = Cli::parse(); From 4ebfa45ec7ecea795a03f2774da3e1c39a79d0fe Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Thu, 9 Apr 2026 08:01:04 -0700 Subject: [PATCH 18/20] fix(vm): retry gvproxy port forwarding with exponential backoff gvproxy's internal netstack may not be ready when the expose API is called immediately after socket creation, causing HTTP 500 responses. The port forward silently failed, leaving host port 30051 unmapped and causing the gateway health check to time out after 90s. Add retry logic with exponential backoff (100ms to 1s, 10s budget) and fail the launch if retries are exhausted instead of silently continuing. --- crates/openshell-vm/src/lib.rs | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index 7ac112d6..638d8fc6 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -1532,14 +1532,35 @@ pub fn launch(config: &VmConfig) -> Result { r#"{{"local":":{host_port}","remote":"{guest_ip}:{guest_port}","protocol":"tcp"}}"# ); - match gvproxy_expose(api_sock, &expose_body) { - Ok(()) => { - eprintln!(" port {host_port} -> {guest_ip}:{guest_port}"); - } - Err(e) => { - eprintln!(" port {host_port}: {e}"); + // Retry with exponential backoff — gvproxy's internal + // netstack may not be ready immediately after socket creation. + let mut expose_ok = false; + let mut retry_interval = std::time::Duration::from_millis(100); + let expose_deadline = + Instant::now() + std::time::Duration::from_secs(10); + loop { + match gvproxy_expose(api_sock, &expose_body) { + Ok(()) => { + eprintln!(" port {host_port} -> {guest_ip}:{guest_port}"); + expose_ok = true; + break; + } + Err(e) => { + if Instant::now() >= expose_deadline { + eprintln!(" port {host_port}: {e} (retries exhausted)"); + break; + } + std::thread::sleep(retry_interval); + retry_interval = (retry_interval * 2) + .min(std::time::Duration::from_secs(1)); + } } } + if !expose_ok { + return Err(VmError::HostSetup(format!( + "failed to forward port {host_port} via gvproxy" + ))); + } } eprintln!( "Port forwarding ready [{:.1}s]", From be30c762f3360d494ab73103b92fd1a536c766bb Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Thu, 9 Apr 2026 08:40:39 -0700 Subject: [PATCH 19/20] fix(vm): kill stale gvproxy holding target ports when state file is missing When the user deletes the data directory while a VM is running (e.g. rm -rf ~/.local/share/openshell/openshell-vm/), the VM state file containing the gvproxy PID is lost. The stale gvproxy keeps holding the target port, causing new launches to fail with HTTP 500 on the expose API. Add a port-based fallback that uses lsof to find and kill any gvproxy process holding the target ports before starting a new instance. --- crates/openshell-vm/src/lib.rs | 93 ++++++++++++++++++++++++---------- 1 file changed, 67 insertions(+), 26 deletions(-) diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index 638d8fc6..eb3e382c 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -892,14 +892,19 @@ fn gvproxy_expose(api_sock: &Path, body: &str) -> Result<(), String> { /// Kill a stale gvproxy process from a previous openshell-vm run. /// /// If the CLI crashes or is killed before cleanup, gvproxy keeps running -/// and holds port 2222. A new gvproxy instance then fails with -/// "bind: address already in use". +/// and holds its ports. A new gvproxy instance then fails with +/// "bind: address already in use" when trying to forward ports. /// -/// We only kill the specific gvproxy PID recorded in the VM runtime state -/// to avoid disrupting unrelated gvproxy instances (e.g. Podman Desktop). -/// Before sending SIGTERM, we verify the process name contains "gvproxy" -/// to guard against PID reuse. +/// We first try to kill the specific gvproxy PID recorded in the VM +/// runtime state. If the state file was deleted (e.g. the user ran +/// `rm -rf` on the data directory), we fall back to killing any gvproxy +/// process holding the target ports. fn kill_stale_gvproxy(rootfs: &Path) { + kill_stale_gvproxy_by_state(rootfs); +} + +/// Kill stale gvproxy using the PID from the VM state file. +fn kill_stale_gvproxy_by_state(rootfs: &Path) { let state_path = vm_state_path(rootfs); let pid = std::fs::read(&state_path) .ok() @@ -907,29 +912,59 @@ fn kill_stale_gvproxy(rootfs: &Path) { .and_then(|state| state.gvproxy_pid); if let Some(gvproxy_pid) = pid { - // Verify the process is still alive before killing it. - let pid_i32 = gvproxy_pid as libc::pid_t; - let is_alive = unsafe { libc::kill(pid_i32, 0) } == 0; - if is_alive { - // Verify the process is actually gvproxy before killing. - // Without this check, PID reuse could cause us to kill an - // unrelated process. - if !is_process_named(pid_i32, "gvproxy") { - eprintln!( - "Stale gvproxy pid {gvproxy_pid} is no longer gvproxy (PID reused), skipping kill" - ); - return; - } - unsafe { - libc::kill(pid_i32, libc::SIGTERM); + kill_gvproxy_pid(gvproxy_pid); + } +} + +/// Kill any gvproxy process holding a specific TCP port. +/// +/// Used as a fallback when the VM state file is missing (e.g. after the +/// user deleted the data directory while a VM was running). +fn kill_stale_gvproxy_by_port(port: u16) { + // Use lsof to find PIDs listening on the target port. + let output = std::process::Command::new("lsof") + .args(["-ti", &format!(":{port}")]) + .output(); + + let pids = match output { + Ok(o) if o.status.success() => { + String::from_utf8_lossy(&o.stdout).to_string() + } + _ => return, + }; + + for line in pids.lines() { + if let Ok(pid) = line.trim().parse::() { + let pid_i32 = pid as libc::pid_t; + if is_process_named(pid_i32, "gvproxy") { + kill_gvproxy_pid(pid); } - eprintln!("Killed stale gvproxy process (pid {gvproxy_pid})"); - // Brief pause for the port to be released. - std::thread::sleep(std::time::Duration::from_millis(200)); } } } +fn kill_gvproxy_pid(gvproxy_pid: u32) { + let pid_i32 = gvproxy_pid as libc::pid_t; + let is_alive = unsafe { libc::kill(pid_i32, 0) } == 0; + if is_alive { + // Verify the process is actually gvproxy before killing. + // Without this check, PID reuse could cause us to kill an + // unrelated process. + if !is_process_named(pid_i32, "gvproxy") { + eprintln!( + "Stale gvproxy pid {gvproxy_pid} is no longer gvproxy (PID reused), skipping kill" + ); + return; + } + unsafe { + libc::kill(pid_i32, libc::SIGTERM); + } + eprintln!("Killed stale gvproxy process (pid {gvproxy_pid})"); + // Brief pause for the port to be released. + std::thread::sleep(std::time::Duration::from_millis(200)); + } +} + /// Check whether a process with the given PID has the expected name. /// /// On macOS, shells out to `ps` to query the process name. On Linux, reads @@ -1289,9 +1324,15 @@ pub fn launch(config: &VmConfig) -> Result { let api_sock = sock_base.with_extension("a"); // Kill any stale gvproxy process from a previous run. - // If gvproxy is still holding port 2222, the new instance - // will fail with "bind: address already in use". + // First try via the saved PID in the state file, then fall + // back to killing any gvproxy holding our target ports (covers + // the case where the state file was deleted). kill_stale_gvproxy(&config.rootfs); + for pm in &config.port_map { + if let Some(host_port) = pm.split(':').next().and_then(|p| p.parse::().ok()) { + kill_stale_gvproxy_by_port(host_port); + } + } // Clean stale sockets (including the -krun.sock file that // libkrun creates as its datagram endpoint on macOS). From bbe30a22abb47ec891180dd620f9f7c1f85fb8e2 Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Thu, 9 Apr 2026 08:48:59 -0700 Subject: [PATCH 20/20] chore(vm): fix rustfmt formatting --- crates/openshell-vm/src/lib.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/crates/openshell-vm/src/lib.rs b/crates/openshell-vm/src/lib.rs index eb3e382c..9e44c996 100644 --- a/crates/openshell-vm/src/lib.rs +++ b/crates/openshell-vm/src/lib.rs @@ -927,9 +927,7 @@ fn kill_stale_gvproxy_by_port(port: u16) { .output(); let pids = match output { - Ok(o) if o.status.success() => { - String::from_utf8_lossy(&o.stdout).to_string() - } + Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).to_string(), _ => return, }; @@ -1577,8 +1575,7 @@ pub fn launch(config: &VmConfig) -> Result { // netstack may not be ready immediately after socket creation. let mut expose_ok = false; let mut retry_interval = std::time::Duration::from_millis(100); - let expose_deadline = - Instant::now() + std::time::Duration::from_secs(10); + let expose_deadline = Instant::now() + std::time::Duration::from_secs(10); loop { match gvproxy_expose(api_sock, &expose_body) { Ok(()) => { @@ -1592,8 +1589,8 @@ pub fn launch(config: &VmConfig) -> Result { break; } std::thread::sleep(retry_interval); - retry_interval = (retry_interval * 2) - .min(std::time::Duration::from_secs(1)); + retry_interval = + (retry_interval * 2).min(std::time::Duration::from_secs(1)); } } }