diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..e53c95a --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,178 @@ +name: CI + +# Trigger model: +# push (any branch) → lints only +# pull_request → lints + build-smoke + e2e +# push to default branch → lints + build-full (TODO) +# workflow_dispatch (release) → push artifacts (TODO) +on: + pull_request: + push: + +permissions: + contents: read + +concurrency: + group: ci-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + devshell: + runs-on: [self-hosted, shared] + steps: + - uses: actions/checkout@v4 + - name: Build dev shell + run: nix develop --command true + + lints-matrix: + runs-on: [self-hosted, shared] + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + - id: set-matrix + name: Enumerate CI checks + run: | + set -Eeu + matrix="$( + nix eval --json '.#ci.x86_64-linux.checks' --apply ' + cs: { include = map (name: { inherit name; }) (builtins.attrNames cs); } + ' + )" + echo "matrix=$matrix" >> "$GITHUB_OUTPUT" + + lints: + runs-on: [self-hosted, shared] + name: lint:${{ matrix.name }} + needs: + - devshell + - lints-matrix + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.lints-matrix.outputs.matrix) }} + steps: + - uses: actions/checkout@v4 + - name: nix run .#check-${{ matrix.name }} + run: nix run '.#check-${{ matrix.name }}' + + lints-passed: + runs-on: [self-hosted, shared] + if: always() + name: Lints passed + needs: + - devshell + - lints-matrix + - lints + steps: + - name: Require devshell + lints-matrix + all lints succeeded + run: | + set -Eeu + test '${{ needs.devshell.result }}' = 'success' + test '${{ needs.lints-matrix.result }}' = 'success' + test '${{ needs.lints.result }}' = 'success' + + # ─── build-smoke (PR only) ────────────────────────────────────────────── + # Slow native + cross builds gated to PRs. The native builds run cargo test + # as part of buildRustPackage (`doCheck = true` by default), so this stage + # also covers test execution. + + build-smoke-matrix: + if: github.event_name == 'pull_request' + needs: lints-passed + runs-on: [self-hosted, shared] + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + - id: set-matrix + name: Enumerate smoke builds + run: | + set -Eeu + matrix="$( + nix eval --json '.#ci.x86_64-linux.builds' --apply ' + builds: { + include = builtins.attrValues ( + builtins.mapAttrs (name: attr: { inherit name attr; }) builds + ); + } + ' + )" + echo "matrix=$matrix" >> "$GITHUB_OUTPUT" + + build-smoke: + if: github.event_name == 'pull_request' + needs: build-smoke-matrix + runs-on: [self-hosted, shared] + name: build:${{ matrix.name }} + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.build-smoke-matrix.outputs.matrix) }} + steps: + - uses: actions/checkout@v4 + - name: nix build .#${{ matrix.attr }} + run: nix build --print-build-logs --no-link '.#packages.x86_64-linux.${{ matrix.attr }}' + + build-smoke-passed: + if: always() && github.event_name == 'pull_request' + runs-on: [self-hosted, shared] + name: Build smoke passed + needs: + - build-smoke-matrix + - build-smoke + steps: + - name: Require build-smoke-matrix + all smoke builds succeeded + run: | + set -Eeu + test '${{ needs.build-smoke-matrix.result }}' = 'success' + test '${{ needs.build-smoke.result }}' = 'success' + + # ─── e2e (PR only) ────────────────────────────────────────────────────── + # nixosTests run in QEMU VMs. They depend on the same package derivations + # the build-smoke stage built, so this stage reuses them via the nix store + # — no rebuilds. + + e2e-matrix: + if: github.event_name == 'pull_request' + needs: build-smoke-passed + runs-on: [self-hosted, shared] + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + - id: set-matrix + name: Enumerate nixosTests + run: | + set -Eeu + matrix="$( + nix eval --json '.#nixosTests.x86_64-linux' --apply ' + tests: { include = map (name: { inherit name; }) (builtins.attrNames tests); } + ' + )" + echo "matrix=$matrix" >> "$GITHUB_OUTPUT" + + e2e: + if: github.event_name == 'pull_request' + needs: e2e-matrix + runs-on: [self-hosted, shared] + name: e2e:${{ matrix.name }} + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.e2e-matrix.outputs.matrix) }} + steps: + - uses: actions/checkout@v4 + - name: nix build .#nixosTests.x86_64-linux.${{ matrix.name }} + run: nix build --print-build-logs --no-link '.#nixosTests.x86_64-linux.${{ matrix.name }}' + + e2e-passed: + if: always() && github.event_name == 'pull_request' + runs-on: [self-hosted, shared] + name: E2E passed + needs: + - e2e-matrix + - e2e + steps: + - name: Require e2e-matrix + all e2e succeeded + run: | + set -Eeu + test '${{ needs.e2e-matrix.result }}' = 'success' + test '${{ needs.e2e.result }}' = 'success' diff --git a/.gitignore b/.gitignore index a371a14..160af21 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ /target -/result +/result* .direnv .envrc .claude diff --git a/Cargo.lock b/Cargo.lock index 616d3eb..6f09ffe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,14 +4,15 @@ version = 4 [[package]] name = "acto" -version = "0.7.4" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a026259da4f1a13b4af60cda453c392de64c58c12d239c560923e0382f42f2b9" +checksum = "148541f13c28e3e840354ee4d6c99046c10be2c81068bbd23b9e3a38f95a917e" dependencies = [ "parking_lot", "pin-project-lite", "rustc_version", "smol_str", + "sync_wrapper", "tokio", "tracing", ] @@ -24,13 +25,37 @@ checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "aead" -version = "0.6.0-rc.2" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac8202ab55fcbf46ca829833f347a82a2a4ce0596f0304ac322c2d100030cd56" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" dependencies = [ - "bytes", - "crypto-common", - "inout", + "crypto-common 0.1.7", + "generic-array", +] + +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures 0.2.17", +] + +[[package]] +name = "aes-gcm" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831010a0f742e1209b3bcea8fab6a8e149051ba6099432c8cb2cc117dec3ead1" +dependencies = [ + "aead", + "aes", + "cipher", + "ctr", + "ghash", + "subtle", ] [[package]] @@ -56,6 +81,24 @@ dependencies = [ "memchr", ] +[[package]] +name = "aligned" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee4508988c62edf04abd8d92897fca0c2995d907ce1dfeaf369dac3716a40685" +dependencies = [ + "as-slice", +] + +[[package]] +name = "aligned-vec" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc890384c8602f339876ded803c97ad529f3842aba97f6392b3dba0dd171769b" +dependencies = [ + "equator", +] + [[package]] name = "allocator-api2" version = "0.2.21" @@ -73,9 +116,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.21" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" dependencies = [ "anstyle", "anstyle-parse", @@ -88,15 +131,15 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" [[package]] name = "anstyle-parse" -version = "0.2.7" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" dependencies = [ "utf8parse", ] @@ -123,9 +166,26 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.100" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" + +[[package]] +name = "arg_enum_proc_macro" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] [[package]] name = "arrayref" @@ -140,16 +200,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] -name = "async-compat" -version = "0.2.5" +name = "as-slice" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "516b6b4f0e40d50dcda9365d53964ec74560ad4284da2e7fc97122cd83174516" +dependencies = [ + "stable_deref_trait", +] + +[[package]] +name = "async-stream" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1ba85bc55464dcbf728b56d97e119d673f4cf9062be330a9a26f3acf504a590" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" dependencies = [ + "async-stream-impl", "futures-core", - "futures-io", - "once_cell", "pin-project-lite", - "tokio", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] @@ -160,7 +238,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -207,18 +285,86 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "av-scenechange" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f321d77c20e19b92c39e7471cf986812cbb46659d2af674adc4331ef3f18394" +dependencies = [ + "aligned", + "anyhow", + "arg_enum_proc_macro", + "arrayvec", + "log", + "num-rational", + "num-traits", + "pastey", + "rayon", + "thiserror 2.0.18", + "v_frame", + "y4m", +] + +[[package]] +name = "av1-grain" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cfddb07216410377231960af4fcab838eaa12e013417781b78bd95ee22077f8" +dependencies = [ + "anyhow", + "arrayvec", + "log", + "nom 8.0.0", + "num-rational", + "v_frame", +] + +[[package]] +name = "avif-serialize" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7178fe5f7d460b13895ebb9dcb28a3a6216d2df2574a0806cb51b555d297f38" +dependencies = [ + "arrayvec", +] + +[[package]] +name = "aws-lc-rs" +version = "1.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ec6fb3fe69024a75fa7e1bfb48aa6cf59706a101658ea01bfd33b2b248a038f" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f50037ee5e1e41e7b8f9d161680a725bd1626cb6f8c7e901f91f942850852fe7" +dependencies = [ + "cc", + "cmake", + "dunce", + "fs_extra", +] + [[package]] name = "axum" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" +checksum = "31b698c5f9a010f6573133b09e0de5408834d0c82f8d7475a89fc1867a71cd90" dependencies = [ "axum-core", "bytes", + "form_urlencoded", "futures-util", "http", "http-body", "http-body-util", + "hyper", + "hyper-util", "itoa", "matchit", "memchr", @@ -226,10 +372,15 @@ dependencies = [ "percent-encoding", "pin-project-lite", "serde_core", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", "sync_wrapper", - "tower 0.5.2", + "tokio", + "tower", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -248,6 +399,7 @@ dependencies = [ "sync_wrapper", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -261,17 +413,46 @@ dependencies = [ "tokio", ] +[[package]] +name = "bao-tree" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06384416b1825e6e04fde63262fda2dc408f5b64c02d04e0d8b70ae72c17a52b" +dependencies = [ + "blake3", + "bytes", + "futures-lite", + "genawaiter", + "iroh-io", + "positioned-io", + "range-collections", + "self_cell", + "serde", + "smallvec", + "tokio", +] + +[[package]] +name = "base-x" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270" + [[package]] name = "base16ct" -version = "1.0.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd307490d624467aa6f74b0eabb77633d1f758a7b25f12bceb0b22e08d9726f6" +checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" [[package]] -name = "base32" -version = "0.5.1" +name = "base256emoji" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "022dfe9eb35f19ebbcb51e0b40a5ab759f46ad60cadf7297e0bd085afb50e076" +checksum = "b5e9430d9a245a77c92176e649af6e275f20839a48389859d1661e9a128d077c" +dependencies = [ + "const-str", + "match-lookup", +] [[package]] name = "base64" @@ -287,50 +468,122 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "base64ct" -version = "1.8.2" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + +[[package]] +name = "binary-merge" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597bb81c80a54b6a4381b23faba8d7774b144c94cbd1d6fe3f1329bd776554ab" + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + +[[package]] +name = "bit_field" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e4b40c7323adcfc0a41c4b88143ed58346ff65a288fc144329c5c45e05d70c6" + +[[package]] +name = "bitflags" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d809780667f4410e7c41b07f52439b94d2bdf8528eeedc287fa38d3b7f95d82" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.10.0" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + +[[package]] +name = "bitstream-io" +version = "4.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +checksum = "7eff00be299a18769011411c9def0d827e8f2d7bf0c3dbf53633147a8867fd1f" +dependencies = [ + "no_std_io2", +] [[package]] name = "blake3" -version = "1.8.2" +version = "1.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" +checksum = "0aa83c34e62843d924f905e0f5c866eb1dd6545fc4d719e803d9ba6030371fce" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", + "cpufeatures 0.3.0", +] + +[[package]] +name = "block" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", ] [[package]] name = "block-buffer" -version = "0.11.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96eb4cdd6cf1b31d671e9efe75c5d1ec614776856cefbe109ca373554a6d514f" +checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" dependencies = [ "hybrid-array", - "zeroize", ] +[[package]] +name = "block2" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdeb9d870516001442e364c5220d3574d2da8dc765554b4a617230d33fa58ef5" +dependencies = [ + "objc2", +] + +[[package]] +name = "built" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4ad8f11f288f48ca24471bbd51ac257aaeaaa07adae295591266b792902ae64" + [[package]] name = "bumpalo" -version = "3.19.1" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "bytemuck" -version = "1.24.0" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" dependencies = [ "bytemuck_derive", ] @@ -343,7 +596,7 @@ checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -352,15 +605,86 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "byteorder-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" + [[package]] name = "bytes" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" dependencies = [ "serde", ] +[[package]] +name = "candle-core" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bd9895436c1ba5dc1037a19935d084b838db066ff4e15ef7dded020b7c12a4a" +dependencies = [ + "byteorder", + "candle-kernels", + "candle-metal-kernels", + "candle-ug", + "cudarc 0.19.4", + "float8", + "gemm 0.19.0", + "half", + "libm", + "memmap2", + "num-traits", + "num_cpus", + "objc2-foundation", + "objc2-metal", + "rand 0.9.4", + "rand_distr", + "rayon", + "safetensors 0.7.0", + "thiserror 2.0.18", + "tokenizers 0.22.2", + "yoke 0.8.2", + "zip", +] + +[[package]] +name = "candle-kernels" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "742e2ac226b777134436e9e692f44e77c278b8a7abb1554dc10e44dc911b349f" +dependencies = [ + "cudaforge", +] + +[[package]] +name = "candle-metal-kernels" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b6b5a4cae6b4e1ab0efcee4dc05272d11b374a3d1ba121b3a961e36be54ab60" +dependencies = [ + "half", + "objc2", + "objc2-foundation", + "objc2-metal", + "once_cell", + "thiserror 2.0.18", + "tracing", +] + +[[package]] +name = "candle-ug" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca0fc3167cbc99c8ec1be618cb620aa21dca95038f118c3579a79370e3dc5f77" +dependencies = [ + "ug", + "ug-cuda", + "ug-metal", +] + [[package]] name = "castaway" version = "0.2.4" @@ -373,60 +697,65 @@ dependencies = [ [[package]] name = "catgrad" version = "0.2.1" -source = "git+https://github.com/hellas-ai/catgrad#4e4d09b62081acc4b9fdefc16180a27f03b61c55" +source = "git+https://github.com/georgewhewell/catgrad?branch=grw%2Ffeat%2Fchat-types-on-chatgrad#5c39f6bacbb126c7b32710a83383aa90286312fc" dependencies = [ - "ndarray", + "candle-core", + "float8", + "half", "open-hypergraphs", "serde", ] [[package]] -name = "catgrad-legacy" -version = "0.1.1" -source = "git+https://github.com/hellas-ai/catgrad#4e4d09b62081acc4b9fdefc16180a27f03b61c55" +name = "catgrad-llm" +version = "0.2.1" +source = "git+https://github.com/georgewhewell/catgrad?branch=grw%2Ffeat%2Fchat-types-on-chatgrad#5c39f6bacbb126c7b32710a83383aa90286312fc" dependencies = [ - "gemm", + "catgrad", + "float8", "half", + "hf-hub 0.4.3", + "hound", + "image", "log", "memmap2", - "num-traits", - "num_cpus", "open-hypergraphs", "rayon", + "rustfft", + "safetensors 0.7.0", "serde", "serde_json", - "test-log", + "serde_path_to_error", + "thiserror 2.0.18", + "tokenizers 0.21.4", ] [[package]] -name = "catgrad-llm" +name = "catnix" version = "0.2.1" -source = "git+https://github.com/hellas-ai/catgrad#4e4d09b62081acc4b9fdefc16180a27f03b61c55" +source = "git+https://github.com/georgewhewell/catgrad?branch=grw%2Ffeat%2Fchat-types-on-chatgrad#5c39f6bacbb126c7b32710a83383aa90286312fc" +dependencies = [ + "blake3", +] + +[[package]] +name = "cbor4ii" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544cf8c89359205f4f990d0e6f3828db42df85b5dac95d09157a250eb0749c4" dependencies = [ - "catgrad", - "catgrad-legacy", - "half", - "hf-hub", - "log", - "memmap2", - "minijinja", - "minijinja-contrib", - "open-hypergraphs", - "rayon", - "safetensors", "serde", - "serde_json", - "thiserror 2.0.17", - "tokenizers", ] [[package]] name = "cc" -version = "1.2.51" +version = "1.2.62" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203" +checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] @@ -450,45 +779,76 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chacha20" -version = "0.10.0-rc.2" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bd162f2b8af3e0639d83f28a637e4e55657b7a74508dba5a9bf4da523d5c9e9" +checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" dependencies = [ "cfg-if", - "cipher", - "cpufeatures", - "zeroize", + "cpufeatures 0.3.0", + "rand_core 0.10.1", ] [[package]] -name = "chrono" -version = "0.4.42" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +name = "chatgrad" +version = "0.2.1" +source = "git+https://github.com/georgewhewell/catgrad?branch=grw%2Ffeat%2Fchat-types-on-chatgrad#5c39f6bacbb126c7b32710a83383aa90286312fc" dependencies = [ - "iana-time-zone", - "num-traits", + "catgrad", + "catgrad-llm", + "chrono", + "minijinja", + "minijinja-contrib", + "serde", + "serde_json", + "serde_with", + "tokenizers 0.21.4", + "typed-builder", + "ureq 2.12.1", + "url", +] + +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", "serde", + "wasm-bindgen", "windows-link", ] +[[package]] +name = "cid" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21a304f95f84d169a6f31c4d0a30d784643aaa0bbc9c1e449a2c23e963ec4971" +dependencies = [ + "multibase", + "multihash", + "serde", + "serde_bytes", + "unsigned-varint", +] + [[package]] name = "cipher" -version = "0.5.0-rc.1" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e12a13eb01ded5d32ee9658d94f553a19e804204f2dc811df69ab4d9e0cb8c7" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" dependencies = [ - "block-buffer", - "crypto-common", + "crypto-common 0.1.7", "inout", - "zeroize", ] [[package]] name = "clap" -version = "4.5.54" +version = "4.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" dependencies = [ "clap_builder", "clap_derive", @@ -496,9 +856,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.54" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ "anstream", "anstyle", @@ -508,21 +868,36 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.49" +version = "4.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "clap_lex" -version = "0.7.6" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "cmake" +version = "0.1.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" +dependencies = [ + "cc", +] + +[[package]] +name = "cmov" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" +checksum = "3f88a43d011fc4a6876cb7344703e297c71dda42494fee094d5f7c76bf13f746" [[package]] name = "cobs" @@ -530,14 +905,20 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1" dependencies = [ - "thiserror 2.0.17", + "thiserror 2.0.18", ] +[[package]] +name = "color_quant" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" + [[package]] name = "colorchoice" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" [[package]] name = "combine" @@ -577,17 +958,41 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "console" +version = "0.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87" +dependencies = [ + "encode_unicode", + "libc", + "unicode-width", + "windows-sys 0.61.2", +] + [[package]] name = "const-oid" -version = "0.10.1" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + +[[package]] +name = "const-str" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dabb6555f92fb9ee4140454eb5dcd14c7960e1225c6d1a6cc361f032947713e" +checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3" [[package]] name = "constant_time_eq" -version = "0.3.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" [[package]] name = "convert_case" @@ -598,6 +1003,35 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "cookie" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747" +dependencies = [ + "percent-encoding", + "time", + "version_check", +] + +[[package]] +name = "cookie_store" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15b2c103cf610ec6cae3da84a766285b42fd16aad564758459e6ecf128c75206" +dependencies = [ + "cookie", + "document-features", + "idna", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "time", + "url", +] + [[package]] name = "cordyceps" version = "0.3.4" @@ -634,6 +1068,17 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core-graphics-types" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45390e6114f68f718cc7a830514a96f903cccd70d02a8f6d9f643ac4ba45afaf" +dependencies = [ + "bitflags 1.3.2", + "core-foundation 0.9.4", + "libc", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -643,6 +1088,15 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "crc" version = "3.4.0" @@ -654,9 +1108,9 @@ dependencies = [ [[package]] name = "crc-catalog" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +checksum = "217698eaf96b4a3f0bc4f3662aaa55bdf913cd54d7204591faa790070c6d0853" [[package]] name = "crc32fast" @@ -713,60 +1167,107 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "generic-array", + "rand_core 0.6.4", + "subtle", + "zeroize", +] + [[package]] name = "crypto-common" -version = "0.2.0-rc.4" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a8235645834fbc6832939736ce2f2d08192652269e11010a6240f61b908a1c6" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ - "hybrid-array", - "rand_core 0.9.3", + "generic-array", + "typenum", ] [[package]] -name = "crypto_box" -version = "0.10.0-pre.0" +name = "crypto-common" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bda4de3e070830cf3a27a394de135b6709aefcc54d1e16f2f029271254a6ed9" +checksum = "77727bb15fa921304124b128af125e7e3b968275d1b108b379190264f4423710" dependencies = [ - "aead", - "chacha20", - "crypto_secretbox", - "curve25519-dalek", - "salsa20", - "serdect", - "subtle", - "zeroize", + "hybrid-array", ] [[package]] -name = "crypto_secretbox" -version = "0.2.0-pre.0" +name = "ctr" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54532aae6546084a52cef855593daf9555945719eeeda9974150e0def854873e" +checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" dependencies = [ - "aead", - "chacha20", "cipher", - "hybrid-array", - "poly1305", - "salsa20", - "subtle", - "zeroize", +] + +[[package]] +name = "ctutils" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5515a3834141de9eafb9717ad39eea8247b5674e6066c404e8c4b365d2a29e" +dependencies = [ + "cmov", +] + +[[package]] +name = "cudaforge" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f7a0d45b139b5beeeb1c34188717e12241c44a0120afb498815ce7f5373c691" +dependencies = [ + "anyhow", + "fs2", + "glob", + "num_cpus", + "rayon", + "serde", + "serde_json", + "sha2 0.10.9", + "thiserror 2.0.18", + "walkdir", + "which", +] + +[[package]] +name = "cudarc" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf99ab37ee7072d64d906aa2dada9a3422f1d975cdf8c8055a573bc84897ed8" +dependencies = [ + "half", + "libloading 0.8.9", +] + +[[package]] +name = "cudarc" +version = "0.19.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f071cd6a7b5d51607df76aa2d426aaabc7a74bc6bdb885b8afa63a880572ad9b" +dependencies = [ + "float8", + "half", + "libloading 0.9.0", ] [[package]] name = "curve25519-dalek" -version = "5.0.0-pre.1" +version = "5.0.0-pre.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f9200d1d13637f15a6acb71e758f64624048d85b31a5fdbfd8eca1e2687d0b7" +checksum = "335f1947f241137a14106b6f5acc5918a5ede29c9d71d3f2cb1678d5075d9fc3" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "curve25519-dalek-derive", - "digest", + "digest 0.11.3", "fiat-crypto", - "rand_core 0.9.3", + "rand_core 0.10.1", "rustc_version", "serde", "subtle", @@ -781,7 +1282,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -790,8 +1291,18 @@ version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" dependencies = [ - "darling_core", - "darling_macro", + "darling_core 0.20.11", + "darling_macro 0.20.11", +] + +[[package]] +name = "darling" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" +dependencies = [ + "darling_core 0.23.0", + "darling_macro 0.23.0", ] [[package]] @@ -805,7 +1316,20 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "darling_core" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" +dependencies = [ + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.117", ] [[package]] @@ -814,25 +1338,66 @@ version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ - "darling_core", + "darling_core 0.20.11", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "darling_macro" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" +dependencies = [ + "darling_core 0.23.0", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "dary_heap" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04" +checksum = "8b1e3a325bc115f096c8b77bbf027a7c2592230e70be2d985be950d3d5e60ebe" dependencies = [ "serde", ] [[package]] name = "data-encoding" -version = "2.9.0" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8" + +[[package]] +name = "data-encoding-macro" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3259c913752a86488b501ed8680446a5ed2d5aeac6e596cb23ba3800768ea32c" +dependencies = [ + "data-encoding", + "data-encoding-macro-internal", +] + +[[package]] +name = "data-encoding-macro-internal" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccc2776f0c61eca1ca32528f85548abd1a4be8fb53d1b21c013e4f18da1e7090" +dependencies = [ + "data-encoding", + "syn 2.0.117", +] + +[[package]] +name = "der" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid 0.9.6", + "zeroize", +] [[package]] name = "der" @@ -840,16 +1405,16 @@ version = "0.8.0-rc.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02c1d73e9668ea6b6a28172aa55f3ebec38507131ce179051c8033b5c6037653" dependencies = [ - "const-oid", + "const-oid 0.10.2", "pem-rfc7468", "zeroize", ] [[package]] name = "deranged" -version = "0.5.5" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" dependencies = [ "powerfmt", ] @@ -869,10 +1434,10 @@ version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" dependencies = [ - "darling", + "darling 0.20.11", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -882,7 +1447,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn", + "syn 2.0.117", ] [[package]] @@ -904,7 +1469,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version", - "syn", + "syn 2.0.117", "unicode-xid", ] @@ -916,13 +1481,25 @@ checksum = "ab03c107fafeb3ee9f5925686dbb7a73bc76e3932abb0d2b365cb64b169cf04c" [[package]] name = "digest" -version = "0.11.0-rc.3" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer 0.10.4", + "const-oid 0.9.6", + "crypto-common 0.1.7", + "subtle", +] + +[[package]] +name = "digest" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dac89f8a64533a9b0eaa73a68e424db0fb1fd6271c74cc0125336a05f090568d" +checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" dependencies = [ - "block-buffer", - "const-oid", - "crypto-common", + "block-buffer 0.12.0", + "const-oid 0.10.2", + "crypto-common 0.2.1", ] [[package]] @@ -946,6 +1523,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "dispatch2" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38" +dependencies = [ + "bitflags 2.11.1", + "block2", + "libc", + "objc2", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -954,14 +1543,14 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "dlopen2" -version = "0.5.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b4f5f101177ff01b8ec4ecc81eead416a8aa42819a2869311b3420fa114ffa" +checksum = "5e2c5bd4158e66d1e215c49b837e11d62f3267b30c92f1d171c4d3105e3dc4d4" dependencies = [ "libc", "once_cell", @@ -977,6 +1566,18 @@ dependencies = [ "litrs", ] +[[package]] +name = "dtoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590" + +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + [[package]] name = "dyn-clone" version = "1.0.20" @@ -999,29 +1600,43 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1d926b4d407d372f141f93bb444696142c29d32962ccbd3531117cf3aa0bfa9" +[[package]] +name = "ecdsa" +version = "0.16.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca" +dependencies = [ + "der 0.7.10", + "digest 0.10.7", + "elliptic-curve", + "rfc6979", + "signature 2.2.0", + "spki 0.7.3", +] + [[package]] name = "ed25519" -version = "3.0.0-rc.2" +version = "3.0.0-rc.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "594435fe09e345ee388e4e8422072ff7dfeca8729389fbd997b3f5504c44cd47" +checksum = "c6e914c7c52decb085cea910552e24c63ac019e3ab8bf001ff736da9a9d9d890" dependencies = [ - "pkcs8", + "pkcs8 0.11.0-rc.10", "serde", - "signature", + "signature 3.0.0", ] [[package]] name = "ed25519-dalek" -version = "3.0.0-pre.1" +version = "3.0.0-pre.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad207ed88a133091f83224265eac21109930db09bedcad05d5252f2af2de20a1" +checksum = "053618a4c3d3bc24f188aa660ae75a46eeab74ef07fb415c61431e5e7cd4749b" dependencies = [ "curve25519-dalek", "ed25519", - "rand_core 0.9.3", + "rand_core 0.10.1", "serde", - "sha2", - "signature", + "sha2 0.11.0-rc.5", + "signature 3.0.0", "subtle", "zeroize", ] @@ -1032,6 +1647,25 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "elliptic-curve" +version = "0.13.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47" +dependencies = [ + "base16ct", + "crypto-bigint", + "digest 0.10.7", + "ff", + "generic-array", + "group", + "pkcs8 0.10.2", + "rand_core 0.6.4", + "sec1", + "subtle", + "zeroize", +] + [[package]] name = "embedded-io" version = "0.4.0" @@ -1050,15 +1684,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" -[[package]] -name = "encoding_rs" -version = "0.8.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" -dependencies = [ - "cfg-if", -] - [[package]] name = "enum-as-inner" version = "0.6.1" @@ -1068,28 +1693,44 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] -name = "env_filter" -version = "0.1.4" +name = "enum-assoc" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" +checksum = "3ed8956bd5c1f0415200516e78ff07ec9e16415ade83c056c230d7b7ea0d55b7" dependencies = [ - "log", + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] -name = "env_logger" -version = "0.11.8" +name = "env_home" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe" + +[[package]] +name = "equator" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc" dependencies = [ - "anstream", - "anstyle", - "env_filter", - "log", + "equator-macro", +] + +[[package]] +name = "equator-macro" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] @@ -1117,11 +1758,62 @@ dependencies = [ "cc", ] +[[package]] +name = "exr" +version = "1.74.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4300e043a56aa2cb633c01af81ca8f699a321879a7854d3896a0ba89056363be" +dependencies = [ + "bit_field", + "half", + "lebe", + "miniz_oxide", + "rayon-core", + "smallvec", + "zune-inflate", +] + +[[package]] +name = "fancy-regex" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298" +dependencies = [ + "bit-set", + "regex-automata", + "regex-syntax", +] + [[package]] name = "fastrand" -version = "2.3.0" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "fax" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caf1079563223d5d59d83c85886a56e586cfd5c1a26292e971a0fa266531ac5a" + +[[package]] +name = "fdeflate" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6853b52649d4ac5c0bd02320cddc5ba956bdb407c4b75a2c6b75bf51500f8c" +dependencies = [ + "simd-adler32", +] + +[[package]] +name = "ff" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] [[package]] name = "fiat-crypto" @@ -1131,9 +1823,9 @@ checksum = "64cd1e32ddd350061ae6edb1b082d7c54915b5c672c389143b9a63403a109f24" [[package]] name = "find-msvc-tools" -version = "0.1.6" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "fixedbitset" @@ -1143,19 +1835,31 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flate2" -version = "1.1.5" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", "miniz_oxide", ] +[[package]] +name = "float8" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2d1f04709a8ac06e8e8042875a3c466cc4832d3c1a18dbcb9dba3c6e83046bc" +dependencies = [ + "half", + "num-traits", + "rand 0.9.4", + "rand_distr", +] + [[package]] name = "flume" -version = "0.11.1" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" +checksum = "5e139bc46ca777eb5efaf62df0ab8cc5fd400866427e56c68b22e414e53bd3be" dependencies = [ "futures-core", "futures-sink", @@ -1168,6 +1872,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "foldhash" version = "0.2.0" @@ -1176,18 +1886,30 @@ checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" [[package]] name = "foreign-types" -version = "0.3.2" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" dependencies = [ + "foreign-types-macros", "foreign-types-shared", ] +[[package]] +name = "foreign-types-macros" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "foreign-types-shared" -version = "0.1.1" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" +checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" [[package]] name = "form_urlencoded" @@ -1198,11 +1920,27 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "futures" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", @@ -1215,9 +1953,9 @@ dependencies = [ [[package]] name = "futures-buffered" -version = "0.2.12" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8e0e1f38ec07ba4abbde21eed377082f17ccb988be9d988a5adbf4bafc118fd" +checksum = "4421cb78ee172b6b06080093479d3c50f058e7c81b7d577bbb8d118d551d4cd5" dependencies = [ "cordyceps", "diatomic-waker", @@ -1228,40 +1966,25 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", "futures-sink", ] -[[package]] -name = "futures-concurrency" -version = "7.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eb68017df91f2e477ed4bea586c59eaecaa47ed885a770d0444e21e62572cd2" -dependencies = [ - "fixedbitset", - "futures-buffered", - "futures-core", - "futures-lite", - "pin-project", - "slab", - "smallvec", -] - [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-executor" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" dependencies = [ "futures-core", "futures-task", @@ -1270,9 +1993,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" [[package]] name = "futures-lite" @@ -1289,32 +2012,32 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-channel", "futures-core", @@ -1324,7 +2047,6 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", - "pin-utils", "slab", ] @@ -1335,12 +2057,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab96b703d31950f1aeddded248bc95543c9efc7ac9c4a21fda8703a83ee35451" dependencies = [ "dyn-stack", - "gemm-c32", - "gemm-c64", - "gemm-common", - "gemm-f16", - "gemm-f32", - "gemm-f64", + "gemm-c32 0.18.2", + "gemm-c64 0.18.2", + "gemm-common 0.18.2", + "gemm-f16 0.18.2", + "gemm-f32 0.18.2", + "gemm-f64 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa0673db364b12263d103b68337a68fbecc541d6f6b61ba72fe438654709eacb" +dependencies = [ + "dyn-stack", + "gemm-c32 0.19.0", + "gemm-c64 0.19.0", + "gemm-common 0.19.0", + "gemm-f16 0.19.0", + "gemm-f32 0.19.0", + "gemm-f64 0.19.0", "num-complex", "num-traits", "paste", @@ -1355,7 +2097,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6db9fd9f40421d00eea9dd0770045a5603b8d684654816637732463f4073847" dependencies = [ "dyn-stack", - "gemm-common", + "gemm-common 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-c32" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "086936dbdcb99e37aad81d320f98f670e53c1e55a98bee70573e83f95beb128c" +dependencies = [ + "dyn-stack", + "gemm-common 0.19.0", "num-complex", "num-traits", "paste", @@ -1370,7 +2127,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfcad8a3d35a43758330b635d02edad980c1e143dc2f21e6fd25f9e4eada8edf" dependencies = [ "dyn-stack", - "gemm-common", + "gemm-common 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-c64" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20c8aeeeec425959bda4d9827664029ba1501a90a0d1e6228e48bef741db3a3f" +dependencies = [ + "dyn-stack", + "gemm-common 0.19.0", "num-complex", "num-traits", "paste", @@ -1392,7 +2164,28 @@ dependencies = [ "num-traits", "once_cell", "paste", - "pulp", + "pulp 0.21.5", + "raw-cpuid", + "rayon", + "seq-macro", + "sysctl", +] + +[[package]] +name = "gemm-common" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88027625910cc9b1085aaaa1c4bc46bb3a36aad323452b33c25b5e4e7c8e2a3e" +dependencies = [ + "bytemuck", + "dyn-stack", + "half", + "libm", + "num-complex", + "num-traits", + "once_cell", + "paste", + "pulp 0.22.2", "raw-cpuid", "rayon", "seq-macro", @@ -1406,8 +2199,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cff95ae3259432f3c3410eaa919033cd03791d81cebd18018393dc147952e109" dependencies = [ "dyn-stack", - "gemm-common", - "gemm-f32", + "gemm-common 0.18.2", + "gemm-f32 0.18.2", + "half", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "rayon", + "seq-macro", +] + +[[package]] +name = "gemm-f16" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3df7a55202e6cd6739d82ae3399c8e0c7e1402859b30e4cb780e61525d9486e" +dependencies = [ + "dyn-stack", + "gemm-common 0.19.0", + "gemm-f32 0.19.0", "half", "num-complex", "num-traits", @@ -1424,7 +2235,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc8d3d4385393304f407392f754cd2dc4b315d05063f62cf09f47b58de276864" dependencies = [ "dyn-stack", - "gemm-common", + "gemm-common 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-f32" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02e0b8c9da1fbec6e3e3ab2ce6bc259ef18eb5f6f0d3e4edf54b75f9fd41a81c" +dependencies = [ + "dyn-stack", + "gemm-common 0.19.0", "num-complex", "num-traits", "paste", @@ -1439,7 +2265,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35b2a4f76ce4b8b16eadc11ccf2e083252d8237c1b589558a49b0183545015bd" dependencies = [ "dyn-stack", - "gemm-common", + "gemm-common 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-f64" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "056131e8f2a521bfab322f804ccd652520c79700d81209e9d9275bbdecaadc6a" +dependencies = [ + "dyn-stack", + "gemm-common 0.19.0", "num-complex", "num-traits", "paste", @@ -1447,6 +2288,37 @@ dependencies = [ "seq-macro", ] +[[package]] +name = "genawaiter" +version = "0.99.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c86bd0361bcbde39b13475e6e36cb24c329964aa2611be285289d1e4b751c1a0" +dependencies = [ + "futures-core", + "genawaiter-macro", + "genawaiter-proc-macro", + "proc-macro-hack", +] + +[[package]] +name = "genawaiter-macro" +version = "0.99.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b32dfe1fdfc0bbde1f22a5da25355514b5e450c33a6af6770884c8750aedfbc" + +[[package]] +name = "genawaiter-proc-macro" +version = "0.99.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784f84eebc366e15251c4a8c3acee82a6a6f427949776ecb88377362a9621738" +dependencies = [ + "proc-macro-error", + "proc-macro-hack", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "generator" version = "0.8.8" @@ -1462,11 +2334,22 @@ dependencies = [ "windows-result", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", + "zeroize", +] + [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "js-sys", @@ -1484,37 +2367,90 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "r-efi", + "r-efi 5.3.0", "wasip2", "wasm-bindgen", ] [[package]] -name = "gloo-timers" -version = "0.3.0" +name = "getrandom" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" dependencies = [ - "futures-channel", - "futures-core", + "cfg-if", "js-sys", + "libc", + "r-efi 6.0.0", + "rand_core 0.10.1", + "wasip2", + "wasip3", "wasm-bindgen", ] [[package]] -name = "h2" -version = "0.4.13" +name = "ghash" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" +checksum = "f0d8a4362ccb29cb0b265253fb0a2728f592895ee6854fd9bc13f2ffda266ff1" dependencies = [ - "atomic-waker", - "bytes", - "fnv", - "futures-core", - "futures-sink", - "http", - "indexmap", - "slab", + "opaque-debug", + "polyval", +] + +[[package]] +name = "gif" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee8cfcc411d9adbbaba82fb72661cc1bcca13e8bba98b364e62b2dba8f960159" +dependencies = [ + "color_quant", + "weezl", +] + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "gloo-timers" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" +dependencies = [ + "futures-channel", + "futures-core", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "group" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" +dependencies = [ + "ff", + "rand_core 0.6.4", + "subtle", +] + +[[package]] +name = "h2" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "171fefbc92fe4a4de27e0698d6a5b392d6a0e333506bc49133760b3bcf948733" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap", + "slab", "tokio", "tokio-util", "tracing", @@ -1530,6 +2466,8 @@ dependencies = [ "cfg-if", "crunchy", "num-traits", + "rand 0.9.4", + "rand_distr", "zerocopy", ] @@ -1542,6 +2480,15 @@ dependencies = [ "byteorder", ] +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash 0.1.5", +] + [[package]] name = "hashbrown" version = "0.16.1" @@ -1550,11 +2497,17 @@ checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" dependencies = [ "allocator-api2", "equivalent", - "foldhash", + "foldhash 0.2.0", "serde", "serde_core", ] +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + [[package]] name = "heapless" version = "0.7.17" @@ -1580,48 +2533,119 @@ name = "hellas-cli" version = "0.1.0" dependencies = [ "anyhow", + "async-stream", + "axum", + "base64 0.22.1", + "catgrad", + "catgrad-llm", + "chatgrad", "clap", + "futures", + "hellas-core", "hellas-executor", + "hellas-pb", "hellas-rpc", + "iroh-metrics", + "libc", + "minijinja", + "minijinja-contrib", + "opentelemetry", + "opentelemetry-otlp", + "opentelemetry_sdk", + "prometheus-client", + "qrcode", + "rand 0.9.4", + "reqwest 0.13.1", + "serde", + "serde_json", + "tempfile", + "test-log", "tokio", "tokio-stream", "tonic", "tonic-iroh-transport", + "tower", "tracing", + "tracing-opentelemetry", "tracing-subscriber", ] +[[package]] +name = "hellas-core" +version = "0.1.0" +dependencies = [ + "blake3", + "k256", + "serde", + "serde_bytes", + "serde_ipld_dagcbor", + "serde_json", + "thiserror 2.0.18", +] + [[package]] name = "hellas-executor" version = "0.1.0" dependencies = [ + "async-stream", + "blake3", "catgrad", "catgrad-llm", + "catnix", + "chatgrad", + "half", + "hellas-core", + "hellas-pb", "hellas-rpc", - "hf-hub", - "minijinja", - "minijinja-contrib", + "hf-hub 0.5.0", + "iroh-blobs", + "prometheus-client", + "proptest", "serde", + "serde_bytes", + "serde_ipld_dagcbor", "serde_json", - "thiserror 1.0.69", - "tokenizers", + "thiserror 2.0.18", "tokio", "tokio-stream", + "tokio-util", "tonic", "tracing", + "uuid", ] [[package]] -name = "hellas-rpc" +name = "hellas-pb" version = "0.1.0" dependencies = [ + "glob", "prost", "tonic", - "tonic-iroh-transport", "tonic-prost", "tonic-prost-build", ] +[[package]] +name = "hellas-rpc" +version = "0.1.0" +dependencies = [ + "catgrad", + "catgrad-llm", + "chatgrad", + "futures", + "futures-core", + "hellas-pb", + "hf-hub 0.5.0", + "mainline", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokenizers 0.21.4", + "tokio", + "tonic", + "tonic-iroh-transport", +] + [[package]] name = "hermit-abi" version = "0.5.2" @@ -1641,46 +2665,59 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97" dependencies = [ "dirs", - "futures", "http", - "indicatif", + "indicatif 0.17.11", "libc", "log", - "native-tls", - "num_cpus", - "rand 0.9.2", - "reqwest", + "rand 0.9.4", "serde", "serde_json", - "thiserror 2.0.17", - "tokio", - "ureq", + "thiserror 2.0.18", + "ureq 2.12.1", "windows-sys 0.60.2", ] [[package]] -name = "hickory-proto" -version = "0.25.2" +name = "hf-hub" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef3982638978efa195ff11b305f51f1f22f4f0a6cabee7af79b383ebee6a213" +dependencies = [ + "dirs", + "http", + "indicatif 0.18.4", + "libc", + "log", + "rand 0.9.4", + "serde", + "serde_json", + "thiserror 2.0.18", + "ureq 3.3.0", + "windows-sys 0.61.2", +] + +[[package]] +name = "hickory-net" +version = "0.26.0-beta.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8a6fe56c0038198998a6f217ca4e7ef3a5e51f46163bd6dd60b5c71ca6c6502" +checksum = "1e232f503c4cfe3f4ea6594971255ecab9f6a0080c4c8e0e17630cc701322aa4" dependencies = [ "async-trait", "bytes", "cfg-if", "data-encoding", - "enum-as-inner", "futures-channel", "futures-io", "futures-util", "h2", + "hickory-proto", "http", "idna", "ipnet", - "once_cell", - "rand 0.9.2", - "ring", + "jni 0.22.4", + "rand 0.10.1", "rustls", - "thiserror 2.0.17", + "thiserror 2.0.18", "tinyvec", "tokio", "tokio-rustls", @@ -1688,29 +2725,69 @@ dependencies = [ "url", ] +[[package]] +name = "hickory-proto" +version = "0.26.0-beta.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcca12171ce774c549f35510be702f4da00ef12ca486f0f2acb2ee96f2f5ca0f" +dependencies = [ + "data-encoding", + "idna", + "ipnet", + "jni 0.22.4", + "once_cell", + "prefix-trie", + "rand 0.10.1", + "ring", + "thiserror 2.0.18", + "tinyvec", + "tracing", + "url", +] + [[package]] name = "hickory-resolver" -version = "0.25.2" +version = "0.26.0-beta.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc62a9a99b0bfb44d2ab95a7208ac952d31060efc16241c87eaf36406fecf87a" +checksum = "1e7d2c928fa078e6640f26cf1b537b212e1688829c3944780025c7084e8bbbf6" dependencies = [ "cfg-if", "futures-util", + "hickory-net", "hickory-proto", "ipconfig", + "ipnet", + "jni 0.22.4", "moka", + "ndk-context", "once_cell", "parking_lot", - "rand 0.9.2", + "rand 0.10.1", "resolv-conf", "rustls", "smallvec", - "thiserror 2.0.17", + "system-configuration", + "thiserror 2.0.18", "tokio", "tokio-rustls", "tracing", ] +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest 0.10.7", +] + +[[package]] +name = "hound" +version = "3.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62adaabb884c94955b19907d60019f4e145d091c75345379e70d1ee696f7854f" + [[package]] name = "http" version = "1.4.0" @@ -1758,19 +2835,18 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hybrid-array" -version = "0.4.5" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f471e0a81b2f90ffc0cb2f951ae04da57de8baa46fa99112b062a5173a5088d0" +checksum = "08d46837a0ed51fe95bd3b05de33cd64a1ee88fc797477ca48446872504507c5" dependencies = [ "typenum", - "zeroize", ] [[package]] name = "hyper" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" dependencies = [ "atomic-waker", "bytes", @@ -1783,7 +2859,6 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "pin-utils", "smallvec", "tokio", "want", @@ -1791,19 +2866,18 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.7" +version = "0.27.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" dependencies = [ "http", "hyper", "hyper-util", "rustls", - "rustls-pki-types", "tokio", "tokio-rustls", "tower-service", - "webpki-roots 1.0.5", + "webpki-roots 1.0.7", ] [[package]] @@ -1819,32 +2893,15 @@ dependencies = [ "tower-service", ] -[[package]] -name = "hyper-tls" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" -dependencies = [ - "bytes", - "http-body-util", - "hyper", - "hyper-util", - "native-tls", - "tokio", - "tokio-native-tls", - "tower-service", -] - [[package]] name = "hyper-util" -version = "0.1.19" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ "base64 0.22.1", "bytes", "futures-channel", - "futures-core", "futures-util", "http", "http-body", @@ -1853,19 +2910,17 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.1", - "system-configuration", + "socket2", "tokio", "tower-service", "tracing", - "windows-registry", ] [[package]] name = "iana-time-zone" -version = "0.1.64" +version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1887,22 +2942,23 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" dependencies = [ "displaydoc", "potential_utf", - "yoke", + "utf8_iter", + "yoke 0.8.2", "zerofrom", "zerovec", ] [[package]] name = "icu_locale_core" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" dependencies = [ "displaydoc", "litemap", @@ -1913,9 +2969,9 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" dependencies = [ "icu_collections", "icu_normalizer_data", @@ -1927,15 +2983,15 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" [[package]] name = "icu_properties" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" dependencies = [ "icu_collections", "icu_locale_core", @@ -1947,31 +3003,43 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" [[package]] name = "icu_provider" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" dependencies = [ "displaydoc", "icu_locale_core", "writeable", - "yoke", + "yoke 0.8.2", "zerofrom", "zerotrie", "zerovec", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "ident_case" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" +[[package]] +name = "identity-hash" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfdd7caa900436d8f13b2346fe10257e0c05c1f1f9e351f4f5d57c03bd5f45da" + [[package]] name = "idna" version = "1.1.0" @@ -1985,9 +3053,9 @@ dependencies = [ [[package]] name = "idna_adapter" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" dependencies = [ "icu_normalizer", "icu_properties", @@ -1995,11 +3063,10 @@ dependencies = [ [[package]] name = "igd-next" -version = "0.16.2" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "516893339c97f6011282d5825ac94fc1c7aad5cad26bdc2d0cee068c0bf97f97" +checksum = "bac9a3c8278f43b4cd8463380f4a25653ac843e5b177e1d3eaf849cc9ba10d4d" dependencies = [ - "async-trait", "attohttpc", "bytes", "futures", @@ -2008,20 +3075,62 @@ dependencies = [ "hyper", "hyper-util", "log", - "rand 0.9.2", + "rand 0.10.1", "tokio", "url", "xmltree", ] +[[package]] +name = "image" +version = "0.25.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85ab80394333c02fe689eaf900ab500fbd0c2213da414687ebf995a65d5a6104" +dependencies = [ + "bytemuck", + "byteorder-lite", + "color_quant", + "exr", + "gif", + "image-webp", + "moxcms", + "num-traits", + "png", + "qoi", + "ravif", + "rayon", + "rgb", + "tiff", + "zune-core", + "zune-jpeg", +] + +[[package]] +name = "image-webp" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "525e9ff3e1a4be2fbea1fdf0e98686a6d98b4d8f937e1bf7402245af1909e8c3" +dependencies = [ + "byteorder-lite", + "quick-error 2.0.1", +] + +[[package]] +name = "imgref" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40fac9d56ed6437b198fddba683305e8e2d651aa42647f00f5ae542e7f5c94a2" + [[package]] name = "indexmap" -version = "2.12.1" +version = "2.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.17.1", + "serde", + "serde_core", ] [[package]] @@ -2030,102 +3139,130 @@ version = "0.17.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" dependencies = [ - "console", + "console 0.15.11", "number_prefix", "portable-atomic", "unicode-width", "web-time", ] +[[package]] +name = "indicatif" +version = "0.18.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25470f23803092da7d239834776d653104d551bc4d7eacaf31e6837854b8e9eb" +dependencies = [ + "console 0.16.3", + "portable-atomic", + "unicode-width", + "unit-prefix", + "web-time", +] + [[package]] name = "inout" -version = "0.2.2" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4250ce6452e92010fdf7268ccc5d14faa80bb12fc741938534c58f16804e03c7" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" dependencies = [ - "hybrid-array", + "generic-array", ] [[package]] -name = "instant" -version = "0.1.13" +name = "inplace-vec-builder" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +checksum = "cf64c2edc8226891a71f127587a2861b132d2b942310843814d5001d99a1d307" dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", + "smallvec", +] + +[[package]] +name = "interpolate_name" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] name = "ipconfig" -version = "0.3.2" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b58db92f96b720de98181bbbe63c831e87005ab460c1bf306eb2622b4707997f" +checksum = "4d40460c0ce33d6ce4b0630ad68ff63d6661961c48b6dba35e5a4d81cfb48222" dependencies = [ - "socket2 0.5.10", + "socket2", "widestring", - "windows-sys 0.48.0", - "winreg", + "windows-registry", + "windows-result", + "windows-sys 0.61.2", ] [[package]] -name = "ipnet" -version = "2.11.0" +name = "ipld-core" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +checksum = "090f624976d72f0b0bb71b86d58dc16c15e069193067cb3a3a09d655246cbbda" +dependencies = [ + "cid", + "serde", + "serde_bytes", +] [[package]] -name = "iri-string" -version = "0.7.10" +name = "ipnet" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" dependencies = [ - "memchr", "serde", ] [[package]] name = "iroh" -version = "0.95.1" +version = "0.98.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2374ba3cdaac152dc6ada92d971f7328e6408286faab3b7350842b2ebbed4789" +checksum = "9881b221c7c645d90594cbd331012f7cccb914894288a6cf5538a9115f6d0f3e" dependencies = [ - "aead", "backon", + "blake3", "bytes", "cfg_aliases", - "crypto_box", + "ctutils", "data-encoding", + "der 0.8.0-rc.10", "derive_more", "ed25519-dalek", "futures-util", - "getrandom 0.3.4", + "getrandom 0.4.2", "hickory-resolver", "http", - "igd-next", - "instant", + "ipnet", "iroh-base", + "iroh-dns", "iroh-metrics", - "iroh-quinn", - "iroh-quinn-proto", - "iroh-quinn-udp", "iroh-relay", + "mainline", "n0-error", "n0-future", "n0-watcher", - "netdev", "netwatch", + "noq", + "noq-proto", + "noq-udp", + "papaya", "pin-project", - "pkarr", - "pkcs8", + "pkcs8 0.11.0-rc.10", + "portable-atomic", "portmapper", - "rand 0.9.2", - "reqwest", + "rand 0.10.1", + "reqwest 0.13.1", + "rustc-hash", "rustls", "rustls-pki-types", - "rustls-platform-verifier", "rustls-webpki", "serde", "smallvec", @@ -2138,174 +3275,160 @@ dependencies = [ "tracing", "url", "wasm-bindgen-futures", - "webpki-roots 1.0.5", - "z32", + "webpki-roots 1.0.7", ] [[package]] name = "iroh-base" -version = "0.95.1" +version = "0.98.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25a8c5fb1cc65589f0d7ab44269a76f615a8c4458356952c9b0ef1c93ea45ff8" +checksum = "738865784637830fb14204ebd3047922db83bc1816a59027af29579b9c27bd99" dependencies = [ "curve25519-dalek", "data-encoding", + "data-encoding-macro", "derive_more", + "digest 0.11.3", "ed25519-dalek", + "getrandom 0.4.2", "n0-error", - "rand_core 0.9.3", + "rand 0.10.1", "serde", + "sha2 0.11.0-rc.5", "url", "zeroize", "zeroize_derive", ] [[package]] -name = "iroh-gossip" -version = "0.95.0" +name = "iroh-blobs" +version = "0.100.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "026dd31b487ec5e80ac0240f4eb70cd6c0a2800f6ef44beca5329443c194bb22" +checksum = "04dd8da14b7c35d8c0e82a246939ee532ce4d9eb30b0e353a5a9470bc8f52b34" dependencies = [ - "blake3", + "arrayvec", + "bao-tree", "bytes", + "cfg_aliases", + "chrono", + "constant_time_eq", "data-encoding", "derive_more", - "ed25519-dalek", - "futures-concurrency", "futures-lite", - "futures-util", + "genawaiter", + "getrandom 0.4.2", "hex", - "indexmap", "iroh", "iroh-base", + "iroh-io", "iroh-metrics", + "iroh-tickets", "irpc", "n0-error", "n0-future", + "nested_enum_utils", "postcard", - "rand 0.9.2", + "rand 0.10.1", + "range-collections", + "redb", + "ref-cast", + "reflink-copy", + "self_cell", "serde", + "smallvec", "tokio", - "tokio-util", "tracing", ] [[package]] -name = "iroh-metrics" -version = "0.37.0" +name = "iroh-dns" +version = "0.98.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79e3381da7c93c12d353230c74bba26131d1c8bf3a4d8af0fec041546454582e" +checksum = "ca474630d1e62ddef83149db6babe6a1055d901df9054349d31b22df99811b92" dependencies = [ - "iroh-metrics-derive", - "itoa", + "derive_more", + "iroh-base", "n0-error", - "postcard", - "ryu", - "serde", - "tracing", -] - -[[package]] -name = "iroh-metrics-derive" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4e12bd0763fd16062f5cc5e8db15dd52d26e75a8af4c7fb57ccee3589b344b8" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn", + "n0-future", + "simple-dns", + "strum", ] [[package]] -name = "iroh-quinn" -version = "0.14.0" +name = "iroh-io" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cde160ebee7aabede6ae887460cd303c8b809054224815addf1469d54a6fcf7" +checksum = "e0a5feb781017b983ff1b155cd1faf8174da2acafd807aa482876da2d7e6577a" dependencies = [ "bytes", - "cfg_aliases", - "iroh-quinn-proto", - "iroh-quinn-udp", - "pin-project-lite", - "rustc-hash", - "rustls", - "socket2 0.5.10", - "thiserror 2.0.17", + "futures-lite", + "pin-project", + "smallvec", "tokio", - "tracing", - "web-time", ] [[package]] -name = "iroh-quinn-proto" -version = "0.13.0" +name = "iroh-metrics" +version = "0.38.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "929d5d8fa77d5c304d3ee7cae9aede31f13908bd049f9de8c7c0094ad6f7c535" +checksum = "761b45ba046134b11eb3e432fa501616b45c4bf3a30c21717578bc07aa6461dd" dependencies = [ - "bytes", - "getrandom 0.2.16", - "rand 0.8.5", - "ring", - "rustc-hash", - "rustls", - "rustls-pki-types", - "slab", - "thiserror 2.0.17", - "tinyvec", + "iroh-metrics-derive", + "itoa", + "n0-error", + "portable-atomic", + "postcard", + "ryu", + "serde", "tracing", - "web-time", ] [[package]] -name = "iroh-quinn-udp" -version = "0.5.7" +name = "iroh-metrics-derive" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c53afaa1049f7c83ea1331f5ebb9e6ebc5fdd69c468b7a22dd598b02c9bcc973" +checksum = "cab063c2bfd6c3d5a33a913d4fdb5252f140db29ec67c704f20f3da7e8f92dbf" dependencies = [ - "cfg_aliases", - "libc", - "once_cell", - "socket2 0.5.10", - "tracing", - "windows-sys 0.59.0", + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] name = "iroh-relay" -version = "0.95.1" +version = "0.98.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43fbdf2aeffa7d6ede1a31f6570866c2199b1cee96a0b563994623795d1bac2c" +checksum = "4aa6e9a7277bfbb439739c52b57eb5f9288030983928412022b8e94a43d4d838" dependencies = [ "blake3", "bytes", "cfg_aliases", "data-encoding", "derive_more", - "getrandom 0.3.4", + "getrandom 0.4.2", "hickory-resolver", "http", "http-body-util", "hyper", "hyper-util", "iroh-base", + "iroh-dns", "iroh-metrics", - "iroh-quinn", - "iroh-quinn-proto", - "lru 0.16.3", + "lru", "n0-error", "n0-future", + "noq", + "noq-proto", "num_enum", "pin-project", - "pkarr", "postcard", - "rand 0.9.2", - "reqwest", + "rand 0.10.1", + "reqwest 0.13.1", "rustls", "rustls-pki-types", "serde", "serde_bytes", - "sha1", "strum", "tokio", "tokio-rustls", @@ -2313,22 +3436,38 @@ dependencies = [ "tokio-websockets", "tracing", "url", - "webpki-roots 1.0.5", + "vergen-gitcl", + "webpki-roots 1.0.7", "ws_stream_wasm", - "z32", +] + +[[package]] +name = "iroh-tickets" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09579438a34a147dcdce8a67cdf59bd53a197bfefe71da1a8e94df9aec0583ae" +dependencies = [ + "data-encoding", + "derive_more", + "iroh-base", + "n0-error", + "postcard", + "serde", ] [[package]] name = "irpc" -version = "0.11.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bee97aaa18387c4f0aae61058195dc9f9dea3e41c0e272973fe3e9bf611563d" +checksum = "26bacc8d71f54f16cb5ae82745cfca440ad8ecd09b4480d415b8d9dc78146432" dependencies = [ "futures-util", "irpc-derive", "n0-error", "n0-future", + "postcard", "serde", + "smallvec", "tokio", "tokio-util", "tracing", @@ -2336,13 +3475,13 @@ dependencies = [ [[package]] name = "irpc-derive" -version = "0.9.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58148196d2230183c9679431ac99b57e172000326d664e8456fa2cd27af6505a" +checksum = "4651422b9d7af09fa1437a5fabbd9e074162b502a1af7f5bae8b439eaf3e049f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -2362,9 +3501,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "jni" @@ -2375,68 +3514,187 @@ dependencies = [ "cesu8", "cfg-if", "combine", - "jni-sys", + "jni-sys 0.3.1", "log", "thiserror 1.0.69", "walkdir", "windows-sys 0.45.0", ] +[[package]] +name = "jni" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efd9a482cf3a427f00d6b35f14332adc7902ce91efb778580e180ff90fa3498" +dependencies = [ + "cfg-if", + "combine", + "jni-macros", + "jni-sys 0.4.1", + "log", + "simd_cesu8", + "thiserror 2.0.18", + "walkdir", + "windows-link", +] + +[[package]] +name = "jni-macros" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00109accc170f0bdb141fed3e393c565b6f5e072365c3bd58f5b062591560a3" +dependencies = [ + "proc-macro2", + "quote", + "rustc_version", + "simd_cesu8", + "syn 2.0.117", +] + [[package]] name = "jni-sys" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" +checksum = "41a652e1f9b6e0275df1f15b32661cf0d4b78d4d87ddec5e0c3c20f097433258" +dependencies = [ + "jni-sys 0.4.1", +] + +[[package]] +name = "jni-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2" +dependencies = [ + "jni-sys-macros", +] + +[[package]] +name = "jni-sys-macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264" +dependencies = [ + "quote", + "syn 2.0.117", +] + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] [[package]] name = "js-sys" -version = "0.3.83" +version = "0.3.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +checksum = "67df7112613f8bfd9150013a0314e196f4800d3201ae742489d999db2f979f08" dependencies = [ + "cfg-if", + "futures-util", "once_cell", "wasm-bindgen", ] +[[package]] +name = "k256" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b" +dependencies = [ + "cfg-if", + "ecdsa", + "elliptic-curve", + "once_cell", + "sha2 0.10.9", + "signature 2.2.0", +] + [[package]] name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "lebe" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a79a3332a6609480d7d0c9eab957bca6b455b91bb84e66d19f5ff66294b85b8" + [[package]] name = "libc" -version = "0.2.179" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "libfuzzer-sys" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f12a681b7dd8ce12bff52488013ba614b869148d54dd79836ab85aafdd53f08d" +dependencies = [ + "arbitrary", + "cc", +] + +[[package]] +name = "libloading" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5a2d376baa530d1238d133232d15e239abad80d05838b4b59354e5268af431f" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "libloading" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60" +dependencies = [ + "cfg-if", + "windows-link", +] [[package]] name = "libm" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libredox" -version = "0.1.12" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" +checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c" dependencies = [ - "bitflags", "libc", ] [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "litemap" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" [[package]] name = "litrs" @@ -2473,18 +3731,21 @@ dependencies = [ ] [[package]] -name = "lru" -version = "0.13.0" +name = "loop9" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "227748d55f2f0ab4735d87fd623798cb6b664512fe979705f829c9f81c934465" +checksum = "0fae87c125b03c1d2c0150c90365d7d6bcc53fb73a9acaef207d2d065860f062" +dependencies = [ + "imgref", +] [[package]] name = "lru" -version = "0.16.3" +version = "0.16.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1dc47f592c06f33f8e3aea9591776ec7c9f9e4124778ff8a3c3b87159f7e593" +checksum = "7f66e8d5d03f609abc3a39e6f08e4164ebf1447a732906d39eb9b99b7919ef39" dependencies = [ - "hashbrown", + "hashbrown 0.16.1", ] [[package]] @@ -2493,6 +3754,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "mac-addr" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3d25b0e0b648a86960ac23b7ad4abb9717601dec6f66c165f5b037f3f03065f" + [[package]] name = "macro_rules_attribute" version = "0.2.2" @@ -2511,26 +3778,47 @@ checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30" [[package]] name = "mainline" -version = "6.0.1" +version = "6.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff27d378ca495eaf3be8616d5d7319c1c18e93fd60e13698fcdc7e19448f1a4" +checksum = "578beb3b6dcbe6f3f60a89547a13b34d36bda41dc056540bac5f4e4340ebf25c" dependencies = [ "crc", + "digest 0.11.3", "document-features", "dyn-clone", "ed25519-dalek", "flume", "futures-lite", - "getrandom 0.3.4", - "lru 0.16.3", + "getrandom 0.4.2", + "lru", "serde", "serde_bencode", "serde_bytes", "sha1_smol", - "thiserror 2.0.17", + "thiserror 2.0.18", "tracing", ] +[[package]] +name = "malloc_buf" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" +dependencies = [ + "libc", +] + +[[package]] +name = "match-lookup" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "757aee279b8bdbb9f9e676796fd459e4207a1f986e87886700abf589f5abf771" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "matchers" version = "0.2.0" @@ -2547,28 +3835,50 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" [[package]] -name = "matrixmultiply" -version = "0.3.10" +name = "maybe-rayon" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" +checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519" dependencies = [ - "autocfg", - "rawpointer", + "cfg-if", + "rayon", ] [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "memmap2" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" dependencies = [ "libc", + "stable_deref_trait", +] + +[[package]] +name = "memo-map" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d1115007560874e373613744c6fba374c17688327a71c1476d1a5954cc857b" + +[[package]] +name = "metal" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21" +dependencies = [ + "bitflags 2.11.1", + "block", + "core-graphics-types", + "foreign-types", + "log", + "objc", + "paste", ] [[package]] @@ -2579,18 +3889,21 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "minijinja" -version = "2.14.0" +version = "2.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12ea9ac0a51fb5112607099560fdf0f90366ab088a2a9e6e8ae176794e9806aa" +checksum = "805bfd7352166bae857ee569628b52bcd85a1cecf7810861ebceb1686b72b75d" dependencies = [ + "indexmap", + "memo-map", "serde", + "serde_json", ] [[package]] name = "minijinja-contrib" -version = "2.14.0" +version = "2.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be6ad8bbc21c256d5f2f5494699d5d69d519b8510d672a0e43b7bfa3a56c388a" +checksum = "45092d80391870622fcf3bd82f5d2af18f99533ea60debb4bc9db0c76f0e809a" dependencies = [ "minijinja", "serde", @@ -2614,9 +3927,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.1.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" dependencies = [ "libc", "wasi", @@ -2625,9 +3938,9 @@ dependencies = [ [[package]] name = "moka" -version = "0.12.12" +version = "0.12.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3dec6bd31b08944e08b58fd99373893a6c17054d6f3ea5006cc894f4f4eee2a" +checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046" dependencies = [ "crossbeam-channel", "crossbeam-epoch", @@ -2659,7 +3972,39 @@ checksum = "e4db6d5580af57bf992f59068d4ea26fd518574ff48d7639b255a36f9de6e7e9" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "moxcms" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb85c154ba489f01b25c0d36ae69a87e4a1c73a72631fc6c0eb6dde34a73e44b" +dependencies = [ + "num-traits", + "pxfm", +] + +[[package]] +name = "multibase" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8694bb4835f452b0e3bb06dbebb1d6fc5385b6ca1caf2e55fd165c042390ec77" +dependencies = [ + "base-x", + "base256emoji", + "data-encoding", + "data-encoding-macro", +] + +[[package]] +name = "multihash" +version = "0.19.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "577c63b00ad74d57e8c9aa870b5fccebf2fd64a308a5aee9f1bb88e4aea19447" +dependencies = [ + "serde", + "unsigned-varint", ] [[package]] @@ -2670,24 +4015,23 @@ checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" [[package]] name = "n0-error" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7d5969a2f40e9d9ed121a789c415f4114ac2b28e5731c080bdefee217d3b3fb" +checksum = "af4782b4baf92d686d161c15460c83d16ebcfd215918763903e9619842665cae" dependencies = [ - "anyhow", "n0-error-macros", "spez", ] [[package]] name = "n0-error-macros" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a6908df844696d9af91c7c3950d50e52d67df327d02a95367f95bbf177d6556" +checksum = "03755949235714b2b307e5ae89dd8c1c2531fb127d9b8b7b4adf9c876cd3ed18" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -2713,9 +4057,9 @@ dependencies = [ [[package]] name = "n0-watcher" -version = "0.5.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38acf13c1ddafc60eb7316d52213467f8ccb70b6f02b65e7d97f7799b1f50be4" +checksum = "38795f7932e6e9d1c6e989270ef5b3ff24ebb910e2c9d4bed2d28d8bae3007dc" dependencies = [ "derive_more", "n0-error", @@ -2723,52 +4067,43 @@ dependencies = [ ] [[package]] -name = "native-tls" -version = "0.2.14" +name = "ndk-context" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" -dependencies = [ - "libc", - "log", - "openssl", - "openssl-probe 0.1.6", - "openssl-sys", - "schannel", - "security-framework 2.11.1", - "security-framework-sys", - "tempfile", -] +checksum = "27b02d87554356db9e9a873add8782d4ea6e3e58ea071a9adb9a2e8ddb884a8b" [[package]] -name = "ndarray" -version = "0.17.1" +name = "nested_enum_utils" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c7c9125e8f6f10c9da3aad044cc918cf8784fa34de857b1aa68038eb05a50a9" +checksum = "b1d5475271bdd36a4a2769eac1ef88df0f99428ea43e52dfd8b0ee5cb674695f" dependencies = [ - "matrixmultiply", - "num-complex", - "num-integer", - "num-traits", - "portable-atomic", - "portable-atomic-util", - "rawpointer", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] name = "netdev" -version = "0.38.2" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67ab878b4c90faf36dab10ea51d48c69ae9019bcca47c048a7c9b273d5d7a823" +checksum = "e30af1a5073b82356d9317c18226826370b4288eba2f71c7e84e18bae51b3847" dependencies = [ + "block2", + "dispatch2", "dlopen2", "ipnet", "libc", + "mac-addr", "netlink-packet-core", - "netlink-packet-route", + "netlink-packet-route 0.29.0", "netlink-sys", + "objc2-core-foundation", + "objc2-system-configuration", "once_cell", - "system-configuration", - "windows-sys 0.59.0", + "plist", + "windows-sys 0.61.2", ] [[package]] @@ -2782,11 +4117,23 @@ dependencies = [ [[package]] name = "netlink-packet-route" -version = "0.25.1" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9854ea6ad14e3f4698a7f03b65bce0833dd2d81d594a0e4a984170537146b6" +dependencies = [ + "bitflags 2.11.1", + "libc", + "log", + "netlink-packet-core", +] + +[[package]] +name = "netlink-packet-route" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ec2f5b6839be2a19d7fa5aab5bc444380f6311c2b693551cb80f45caaa7b5ef" +checksum = "be8919612f6028ab4eacbbfe1234a9a43e3722c6e0915e7ff519066991905092" dependencies = [ - "bitflags", + "bitflags 2.11.1", "libc", "log", "netlink-packet-core", @@ -2803,17 +4150,17 @@ dependencies = [ "log", "netlink-packet-core", "netlink-sys", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] name = "netlink-sys" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16c903aa70590cb93691bf97a767c8d1d6122d2cc9070433deb3bbf36ce8bd23" +checksum = "cd6c30ed10fa69cc491d491b85cc971f6bdeb8e7367b7cde2ee6cc878d583fae" dependencies = [ "bytes", - "futures", + "futures-util", "libc", "log", "tokio", @@ -2821,15 +4168,14 @@ dependencies = [ [[package]] name = "netwatch" -version = "0.12.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26f2acd376ef48b6c326abf3ba23c449e0cb8aa5c2511d189dd8a8a3bfac889b" +checksum = "6fc0d4b4134425d9834e591b1a6f807ea365c6d941d738942215564af5f28a97" dependencies = [ "atomic-waker", "bytes", "cfg_aliases", "derive_more", - "iroh-quinn-udp", "js-sys", "libc", "n0-error", @@ -2837,12 +4183,15 @@ dependencies = [ "n0-watcher", "netdev", "netlink-packet-core", - "netlink-packet-route", + "netlink-packet-route 0.30.0", "netlink-proto", "netlink-sys", + "noq-udp", + "objc2-core-foundation", + "objc2-system-configuration", "pin-project-lite", "serde", - "socket2 0.6.1", + "socket2", "time", "tokio", "tokio-util", @@ -2853,6 +4202,21 @@ dependencies = [ "wmi", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + +[[package]] +name = "no_std_io2" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418abd1b6d34fbf6cae440dc874771b0525a604428704c76e48b29a5e67b8003" +dependencies = [ + "memchr", +] + [[package]] name = "nom" version = "7.1.3" @@ -2864,18 +4228,79 @@ dependencies = [ ] [[package]] -name = "ntimestamp" -version = "1.0.0" +name = "nom" +version = "8.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c50f94c405726d3e0095e89e72f75ce7f6587b94a8bd8dc8054b73f65c0fd68c" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" dependencies = [ - "base32", - "document-features", - "getrandom 0.2.16", - "httpdate", - "js-sys", - "once_cell", - "serde", + "memchr", +] + +[[package]] +name = "noop_proc_macro" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8" + +[[package]] +name = "noq" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b969bd157c3bd3bab239a1a8b14f67f2033fa012770367fcbd5b42d71ae3548" +dependencies = [ + "bytes", + "cfg_aliases", + "derive_more", + "noq-proto", + "noq-udp", + "pin-project-lite", + "rustc-hash", + "rustls", + "socket2", + "thiserror 2.0.18", + "tokio", + "tokio-stream", + "tracing", + "web-time", +] + +[[package]] +name = "noq-proto" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdec6f5039d98ee5377b2f532d495a555eb664c53161b1b5780dcaeac678b60e" +dependencies = [ + "aes-gcm", + "bytes", + "derive_more", + "enum-assoc", + "getrandom 0.4.2", + "identity-hash", + "lru-slab", + "rand 0.10.1", + "ring", + "rustc-hash", + "rustls", + "rustls-pki-types", + "slab", + "sorted-index-buffer", + "thiserror 2.0.18", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "noq-udp" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee91b05f4f3353290936ba1f3233518868fb4e2da99cb4c90d1f8cebb064e527" +dependencies = [ + "cfg_aliases", + "libc", + "socket2", + "tracing", + "windows-sys 0.61.2", ] [[package]] @@ -2887,6 +4312,30 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + [[package]] name = "num-complex" version = "0.4.6" @@ -2899,9 +4348,20 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.1.0" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" + +[[package]] +name = "num-derive" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] [[package]] name = "num-integer" @@ -2912,6 +4372,28 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -2934,9 +4416,9 @@ dependencies = [ [[package]] name = "num_enum" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +checksum = "5d0bca838442ec211fa11de3a8b0e0e8f3a4522575b5c4c06ed722e005036f26" dependencies = [ "num_enum_derive", "rustversion", @@ -2944,14 +4426,23 @@ dependencies = [ [[package]] name = "num_enum_derive" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +checksum = "680998035259dcfcafe653688bf2aa6d3e2dc05e98be6ab46afb089dc84f1df8" dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", ] [[package]] @@ -2960,11 +4451,100 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" +[[package]] +name = "objc" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" +dependencies = [ + "malloc_buf", +] + +[[package]] +name = "objc2" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a12a8ed07aefc768292f076dc3ac8c48f3781c8f2d5851dd3d98950e8c5a89f" +dependencies = [ + "objc2-encode", +] + +[[package]] +name = "objc2-core-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" +dependencies = [ + "bitflags 2.11.1", + "block2", + "dispatch2", + "libc", + "objc2", +] + +[[package]] +name = "objc2-encode" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33" + +[[package]] +name = "objc2-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272" +dependencies = [ + "bitflags 2.11.1", + "block2", + "libc", + "objc2", + "objc2-core-foundation", +] + +[[package]] +name = "objc2-metal" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0125f776a10d00af4152d74616409f0d4a2053a6f57fa5b7d6aa2854ac04794" +dependencies = [ + "bitflags 2.11.1", + "block2", + "dispatch2", + "objc2", + "objc2-core-foundation", + "objc2-foundation", +] + +[[package]] +name = "objc2-security" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "709fe137109bd1e8b5a99390f77a7d8b2961dafc1a1c5db8f2e60329ad6d895a" +dependencies = [ + "bitflags 2.11.1", + "objc2", + "objc2-core-foundation", +] + +[[package]] +name = "objc2-system-configuration" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7216bd11cbda54ccabcab84d523dc93b858ec75ecfb3a7d89513fa22464da396" +dependencies = [ + "bitflags 2.11.1", + "dispatch2", + "libc", + "objc2", + "objc2-core-foundation", + "objc2-security", +] + [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" dependencies = [ "critical-section", "portable-atomic", @@ -2978,11 +4558,11 @@ checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "onig" -version = "6.5.1" +version = "6.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" +checksum = "0cc3cbf698f9438986c11a880c90a6d04b9de27575afd28bbf45b154b6c709e2" dependencies = [ - "bitflags", + "bitflags 2.11.1", "libc", "once_cell", "onig_sys", @@ -2990,72 +4570,107 @@ dependencies = [ [[package]] name = "onig_sys" -version = "69.9.1" +version = "69.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc" +checksum = "1e68317604e77e53b85896388e1a803c1d21b74c899ec9e5e1112db90735edd7" dependencies = [ "cc", "pkg-config", ] +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + [[package]] name = "open-hypergraphs" -version = "0.2.9" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c1e6b890bbd53b03344882387c36bdc3be51b401bcdead164066e8926f43a1f" +checksum = "35368b8ccf2a61fdb493242cb5b0420d6c46f0e285d1f5ab14dbd2f94e7e4f6a" dependencies = [ "num-traits", "serde", ] [[package]] -name = "openssl" -version = "0.10.75" +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + +[[package]] +name = "opentelemetry" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" +checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0" dependencies = [ - "bitflags", - "cfg-if", - "foreign-types", - "libc", - "once_cell", - "openssl-macros", - "openssl-sys", + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 2.0.18", + "tracing", ] [[package]] -name = "openssl-macros" -version = "0.1.1" +name = "opentelemetry-http" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d" dependencies = [ - "proc-macro2", - "quote", - "syn", + "async-trait", + "bytes", + "http", + "opentelemetry", + "reqwest 0.12.28", ] [[package]] -name = "openssl-probe" -version = "0.1.6" +name = "opentelemetry-otlp" +version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +checksum = "1f69cd6acbb9af919df949cd1ec9e5e7fdc2ef15d234b6b795aaa525cc02f71f" +dependencies = [ + "http", + "opentelemetry", + "opentelemetry-http", + "opentelemetry-proto", + "opentelemetry_sdk", + "prost", + "reqwest 0.12.28", + "thiserror 2.0.18", +] [[package]] -name = "openssl-probe" -version = "0.2.0" +name = "opentelemetry-proto" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391" +checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f" +dependencies = [ + "opentelemetry", + "opentelemetry_sdk", + "prost", + "tonic", + "tonic-prost", +] [[package]] -name = "openssl-sys" -version = "0.9.111" +name = "opentelemetry_sdk" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" +checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd" dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", + "futures-channel", + "futures-executor", + "futures-util", + "opentelemetry", + "percent-encoding", + "rand 0.9.4", + "thiserror 2.0.18", + "tokio", + "tokio-stream", ] [[package]] @@ -3064,6 +4679,16 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "papaya" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "997ee03cd38c01469a7046643714f0ad28880bcb9e6679ff0666e24817ca19b7" +dependencies = [ + "equivalent", + "seize", +] + [[package]] name = "parking" version = "2.2.1" @@ -3099,6 +4724,12 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pastey" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35fb2e5f958ec131621fdd531e9fc186ed768cbe395337403ae56c17a74c68ec" + [[package]] name = "pem-rfc7468" version = "1.0.0" @@ -3116,11 +4747,12 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "petgraph" -version = "0.7.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", + "hashbrown 0.15.5", "indexmap", ] @@ -3136,114 +4768,108 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.10" +version = "1.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" +checksum = "cbf0d9e68100b3a7989b4901972f265cd542e560a3a8a724e1e20322f4d06ce9" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.10" +version = "1.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" +checksum = "a990e22f43e84855daf260dded30524ef4a9021cc7541c26540500a50b624389" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "pin-project-lite" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" - -[[package]] -name = "pin-utils" -version = "0.1.0" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" [[package]] -name = "pkarr" -version = "5.0.0" +name = "pkcs8" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "792c1328860f6874e90e3b387b4929819cc7783a6bd5a4728e918706eb436a48" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" dependencies = [ - "async-compat", - "base32", - "bytes", - "cfg_aliases", - "document-features", - "dyn-clone", - "ed25519-dalek", - "futures-buffered", - "futures-lite", - "getrandom 0.3.4", - "log", - "lru 0.13.0", - "mainline", - "ntimestamp", - "reqwest", - "self_cell", - "serde", - "sha1_smol", - "simple-dns", - "thiserror 2.0.17", - "tokio", - "tracing", - "url", - "wasm-bindgen-futures", + "der 0.7.10", + "spki 0.7.3", ] [[package]] name = "pkcs8" -version = "0.11.0-rc.8" +version = "0.11.0-rc.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77089aec8290d0b7bb01b671b091095cf1937670725af4fd73d47249f03b12c0" +checksum = "b226d2cc389763951db8869584fd800cbbe2962bf454e2edeb5172b31ee99774" dependencies = [ - "der", - "spki", + "der 0.8.0-rc.10", + "spki 0.8.0-rc.4", ] [[package]] name = "pkg-config" -version = "0.3.32" +version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" [[package]] -name = "poly1305" -version = "0.9.0-rc.2" +name = "plist" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb78a635f75d76d856374961deecf61031c0b6f928c83dc9c0924ab6c019c298" +checksum = "092791278e026273c1b65bbdcfbba3a300f2994c896bd01ab01da613c29c46f1" dependencies = [ - "cpufeatures", - "universal-hash", + "base64 0.22.1", + "indexmap", + "quick-xml", + "serde", + "time", ] [[package]] -name = "portable-atomic" -version = "1.13.0" +name = "png" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60769b8b31b2a9f263dae2776c37b1b28ae246943cf719eb6946a1db05128a61" +dependencies = [ + "bitflags 2.11.1", + "crc32fast", + "fdeflate", + "flate2", + "miniz_oxide", +] + +[[package]] +name = "polyval" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" +checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "opaque-debug", + "universal-hash", +] [[package]] -name = "portable-atomic-util" -version = "0.2.4" +name = "portable-atomic" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" dependencies = [ - "portable-atomic", + "serde", ] [[package]] name = "portmapper" -version = "0.12.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b575f975dcf03e258b0c7ab3f81497d7124f508884c37da66a7314aa2a8d467" +checksum = "a145e62ddd9aecc9c7b1a3c84cea2a803386c7f4da7795bf9f0d50d90dc52549" dependencies = [ "base64 0.22.1", "bytes", @@ -3257,10 +4883,10 @@ dependencies = [ "n0-error", "netwatch", "num_enum", - "rand 0.9.2", + "rand 0.10.1", "serde", "smallvec", - "socket2 0.6.1", + "socket2", "time", "tokio", "tokio-util", @@ -3269,6 +4895,16 @@ dependencies = [ "url", ] +[[package]] +name = "positioned-io" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4ec4b80060f033312b99b6874025d9503d2af87aef2dd4c516e253fbfcdada7" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "postcard" version = "1.1.3" @@ -3291,14 +4927,14 @@ checksum = "e0232bd009a197ceec9cc881ba46f727fcd8060a2d8d6a9dde7a69030a6fe2bb" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "potential_utf" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" dependencies = [ "zerovec", ] @@ -3318,6 +4954,17 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "prefix-trie" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90f561214012d3fc240a1f9c817cc4d57f5310910d066069c1b093f766bb5966" +dependencies = [ + "either", + "ipnet", + "num-traits", +] + [[package]] name = "prettyplease" version = "0.2.37" @@ -3325,32 +4972,134 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "primal-check" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc0d895b311e3af9902528fbb8f928688abbd95872819320517cc24ca6b2bd08" +dependencies = [ + "num-integer", ] [[package]] name = "proc-macro-crate" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" dependencies = [ "toml_edit", ] +[[package]] +name = "proc-macro-error" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18f33027081eba0a6d8aba6d1b1c3a3be58cbb12106341c2d5759fcd9b5277e7" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a5b4b77fdb63c1eca72173d68d24501c54ab1269409f6b672c85deb18af69de" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", + "syn-mid", + "version_check", +] + +[[package]] +name = "proc-macro-hack" +version = "0.5.20+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" + [[package]] name = "proc-macro2" -version = "1.0.105" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] +[[package]] +name = "profiling" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d595e54a326bc53c1c197b32d295e14b169e3cfeaa8dc82b529f947fba6bcf5" +dependencies = [ + "profiling-procmacros", +] + +[[package]] +name = "profiling-procmacros" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4488a4a36b9a4ba6b9334a32a39971f77c1436ec82c38707bce707699cc3bbcb" +dependencies = [ + "quote", + "syn 2.0.117", +] + +[[package]] +name = "prometheus-client" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cca3d75b4566b9a29fe1ed623587fb058e826eb329a0be4b7c4da1ebb2d7a6ca" +dependencies = [ + "dtoa", + "itoa", + "parking_lot", + "prometheus-client-derive-encode", +] + +[[package]] +name = "prometheus-client-derive-encode" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9adf1691c04c0a5ff46ff8f262b58beb07b0dbb61f96f9f54f6cbd82106ed87f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "proptest" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b45fcc2344c680f5025fe57779faef368840d0bd1f42f216291f0dc4ace4744" +dependencies = [ + "bit-set", + "bit-vec", + "bitflags 2.11.1", + "num-traits", + "rand 0.9.4", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", + "unarray", +] + [[package]] name = "prost" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", "prost-derive", @@ -3358,15 +5107,14 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac6c3320f9abac597dcbc668774ef006702672474aad53c6d596b62e487b40b1" +checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", "itertools", "log", "multimap", - "once_cell", "petgraph", "prettyplease", "prost", @@ -3374,48 +5122,48 @@ dependencies = [ "pulldown-cmark", "pulldown-cmark-to-cmark", "regex", - "syn", + "syn 2.0.117", "tempfile", ] [[package]] name = "prost-derive" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", "itertools", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "prost-types" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ "prost", ] [[package]] name = "pulldown-cmark" -version = "0.13.0" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e8bbe1a966bd2f362681a44f6edce3c2310ac21e4d5067a6e7ec396297a6ea0" +checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad" dependencies = [ - "bitflags", + "bitflags 2.11.1", "memchr", "unicase", ] [[package]] name = "pulldown-cmark-to-cmark" -version = "21.1.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8246feae3db61428fd0bb94285c690b460e4517d83152377543ca802357785f1" +checksum = "50793def1b900256624a709439404384204a5dc3a6ec580281bfaac35e882e90" dependencies = [ "pulldown-cmark", ] @@ -3434,6 +5182,71 @@ dependencies = [ "version_check", ] +[[package]] +name = "pulp" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e205bb30d5b916c55e584c22201771bcf2bad9aabd5d4127f38387140c38632" +dependencies = [ + "bytemuck", + "cfg-if", + "libm", + "num-complex", + "paste", + "pulp-wasm-simd-flag", + "raw-cpuid", + "reborrow", + "version_check", +] + +[[package]] +name = "pulp-wasm-simd-flag" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40e24eee682d89fb193496edf918a7f407d30175b2e785fe057e4392dfd182e0" + +[[package]] +name = "pxfm" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0c5ccf5294c6ccd63a74f1565028353830a9c2f5eb0c682c355c471726a6e3f" + +[[package]] +name = "qoi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6d64c71eb498fe9eae14ce4ec935c555749aef511cca85b5568910d6e48001" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "qrcode" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d68782463e408eb1e668cf6152704bd856c78c5b6417adaee3203d8f4c1fc9ec" + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quick-error" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" + +[[package]] +name = "quick-xml" +version = "0.39.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdcc8dd4e2f670d309a5f0e83fe36dfdc05af317008fea29144da1a2ac858e5e" +dependencies = [ + "memchr", +] + [[package]] name = "quinn" version = "0.11.9" @@ -3447,8 +5260,8 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls", - "socket2 0.6.1", - "thiserror 2.0.17", + "socket2", + "thiserror 2.0.18", "tokio", "tracing", "web-time", @@ -3456,20 +5269,21 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.13" +version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ + "aws-lc-rs", "bytes", "getrandom 0.3.4", "lru-slab", - "rand 0.9.2", + "rand 0.9.4", "ring", "rustc-hash", "rustls", "rustls-pki-types", "slab", - "thiserror 2.0.17", + "thiserror 2.0.18", "tinyvec", "tracing", "web-time", @@ -3484,16 +5298,16 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.1", + "socket2", "tracing", "windows-sys 0.60.2", ] [[package]] name = "quote" -version = "1.0.43" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -3504,63 +5318,147 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "rand" -version = "0.8.5" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" dependencies = [ - "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", + "rand_chacha", + "rand_core 0.9.5", ] [[package]] name = "rand" -version = "0.9.2" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" dependencies = [ - "rand_chacha 0.9.0", - "rand_core 0.9.3", + "chacha20", + "getrandom 0.4.2", + "rand_core 0.10.1", ] [[package]] name = "rand_chacha" -version = "0.3.1" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rand_core" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69" + +[[package]] +name = "rand_distr" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" +dependencies = [ + "num-traits", + "rand 0.9.4", +] + +[[package]] +name = "rand_xorshift" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" dependencies = [ - "ppv-lite86", - "rand_core 0.6.4", + "rand_core 0.9.5", ] [[package]] -name = "rand_chacha" -version = "0.9.0" +name = "range-collections" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +checksum = "861706ea9c4aded7584c5cd1d241cec2ea7f5f50999f236c22b65409a1f1a0d0" dependencies = [ - "ppv-lite86", - "rand_core 0.9.3", + "binary-merge", + "inplace-vec-builder", + "ref-cast", + "serde", + "smallvec", ] [[package]] -name = "rand_core" -version = "0.6.4" +name = "rav1e" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +checksum = "43b6dd56e85d9483277cde964fd1bdb0428de4fec5ebba7540995639a21cb32b" dependencies = [ - "getrandom 0.2.16", + "aligned-vec", + "arbitrary", + "arg_enum_proc_macro", + "arrayvec", + "av-scenechange", + "av1-grain", + "bitstream-io", + "built", + "cfg-if", + "interpolate_name", + "itertools", + "libc", + "libfuzzer-sys", + "log", + "maybe-rayon", + "new_debug_unreachable", + "noop_proc_macro", + "num-derive", + "num-traits", + "paste", + "profiling", + "rand 0.9.4", + "rand_chacha", + "simd_helpers", + "thiserror 2.0.18", + "v_frame", + "wasm-bindgen", ] [[package]] -name = "rand_core" -version = "0.9.3" +name = "ravif" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "e52310197d971b0f5be7fe6b57530dcd27beb35c1b013f29d66c1ad73fbbcc45" dependencies = [ - "getrandom 0.3.4", + "avif-serialize", + "imgref", + "loop9", + "quick-error 2.0.1", + "rav1e", + "rayon", + "rgb", ] [[package]] @@ -3569,20 +5467,14 @@ version = "11.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186" dependencies = [ - "bitflags", + "bitflags 2.11.1", ] -[[package]] -name = "rawpointer" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" - [[package]] name = "rayon" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" dependencies = [ "either", "rayon-core", @@ -3615,13 +5507,22 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430" +[[package]] +name = "redb" +version = "2.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eca1e9d98d5a7e9002d0013e18d5a9b000aee942eb134883a82f06ebffb6c01" +dependencies = [ + "libc", +] + [[package]] name = "redox_syscall" version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags", + "bitflags 2.11.1", ] [[package]] @@ -3630,16 +5531,48 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "libredox", - "thiserror 2.0.17", + "thiserror 2.0.18", +] + +[[package]] +name = "ref-cast" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "reflink-copy" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13362233b147e57674c37b802d216b7c5e3dcccbed8967c84f0d8d223868ae27" +dependencies = [ + "cfg-if", + "libc", + "rustix", + "windows", ] [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -3649,9 +5582,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -3660,9 +5593,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.8" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "reqwest" @@ -3672,21 +5605,17 @@ checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ "base64 0.22.1", "bytes", - "encoding_rs", + "futures-channel", "futures-core", "futures-util", - "h2", "http", "http-body", "http-body-util", "hyper", "hyper-rustls", - "hyper-tls", "hyper-util", "js-sys", "log", - "mime", - "native-tls", "percent-encoding", "pin-project-lite", "quinn", @@ -3697,10 +5626,46 @@ dependencies = [ "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-native-tls", + "tokio-rustls", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots 1.0.7", +] + +[[package]] +name = "reqwest" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04e9018c9d814e5f30cc16a0f03271aeab3571e609612d9fe78c1aa8d11c2f62" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-core", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls", + "rustls-pki-types", + "rustls-platform-verifier", + "sync_wrapper", + "tokio", "tokio-rustls", "tokio-util", - "tower 0.5.2", + "tower", "tower-http", "tower-service", "url", @@ -3708,7 +5673,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots 1.0.5", + "webpki-roots 1.0.7", ] [[package]] @@ -3717,6 +5682,22 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e061d1b48cb8d38042de4ae0a7a6401009d6143dc80d2e2d6f31f0bdd6470c7" +[[package]] +name = "rfc6979" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2" +dependencies = [ + "hmac", + "subtle", +] + +[[package]] +name = "rgb" +version = "0.8.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b34b781b31e5d73e9fbc8689c70551fd1ade9a19e3e28cfec8580a79290cc4" + [[package]] name = "ring" version = "0.17.14" @@ -3725,7 +5706,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.16", + "getrandom 0.2.17", "libc", "untrusted", "windows-sys 0.52.0", @@ -3733,9 +5714,9 @@ dependencies = [ [[package]] name = "rustc-hash" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" [[package]] name = "rustc_version" @@ -3746,13 +5727,27 @@ dependencies = [ "semver", ] +[[package]] +name = "rustfft" +version = "6.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21db5f9893e91f41798c88680037dba611ca6674703c1a18601b01a72c8adb89" +dependencies = [ + "num-complex", + "num-integer", + "num-traits", + "primal-check", + "strength_reduce", + "transpose", +] + [[package]] name = "rustix" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ - "bitflags", + "bitflags 2.11.1", "errno", "libc", "linux-raw-sys", @@ -3761,10 +5756,11 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.36" +version = "0.23.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" +checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" dependencies = [ + "aws-lc-rs", "log", "once_cell", "ring", @@ -3780,17 +5776,17 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" dependencies = [ - "openssl-probe 0.2.0", + "openssl-probe", "rustls-pki-types", "schannel", - "security-framework 3.5.1", + "security-framework", ] [[package]] name = "rustls-pki-types" -version = "1.13.2" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282" +checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" dependencies = [ "web-time", "zeroize", @@ -3798,23 +5794,23 @@ dependencies = [ [[package]] name = "rustls-platform-verifier" -version = "0.5.3" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19787cda76408ec5404443dc8b31795c87cd8fec49762dc75fa727740d34acc1" +checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784" dependencies = [ "core-foundation 0.10.1", "core-foundation-sys", - "jni", + "jni 0.21.1", "log", "once_cell", "rustls", "rustls-native-certs", "rustls-platform-verifier-android", "rustls-webpki", - "security-framework 3.5.1", + "security-framework", "security-framework-sys", - "webpki-root-certs 0.26.11", - "windows-sys 0.59.0", + "webpki-root-certs", + "windows-sys 0.61.2", ] [[package]] @@ -3825,10 +5821,11 @@ checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" [[package]] name = "rustls-webpki" -version = "0.103.8" +version = "0.103.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" +checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" dependencies = [ + "aws-lc-rs", "ring", "rustls-pki-types", "untrusted", @@ -3840,31 +5837,43 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "rusty-fork" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2" +dependencies = [ + "fnv", + "quick-error 1.2.3", + "tempfile", + "wait-timeout", +] + [[package]] name = "ryu" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] name = "safetensors" -version = "0.7.0" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "675656c1eabb620b921efea4f9199f97fc86e36dd6ffd1fbbe48d0f59a4987f5" +checksum = "44560c11236a6130a46ce36c836a62936dc81ebf8c36a37947423571be0e55b6" dependencies = [ - "hashbrown", "serde", "serde_json", ] [[package]] -name = "salsa20" -version = "0.11.0-rc.1" +name = "safetensors" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3ff3b81c8a6e381bc1673768141383f9328048a60edddcfc752a8291a138443" +checksum = "675656c1eabb620b921efea4f9199f97fc86e36dd6ffd1fbbe48d0f59a4987f5" dependencies = [ - "cfg-if", - "cipher", + "hashbrown 0.16.1", + "serde", + "serde_json", ] [[package]] @@ -3878,9 +5887,9 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.28" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" dependencies = [ "windows-sys 0.61.2", ] @@ -3898,25 +5907,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] -name = "security-framework" -version = "2.11.1" +name = "sec1" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc" dependencies = [ - "bitflags", - "core-foundation 0.9.4", - "core-foundation-sys", - "libc", - "security-framework-sys", + "base16ct", + "der 0.7.10", + "generic-array", + "pkcs8 0.10.2", + "subtle", + "zeroize", ] [[package]] name = "security-framework" -version = "3.5.1" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" dependencies = [ - "bitflags", + "bitflags 2.11.1", "core-foundation 0.10.1", "core-foundation-sys", "libc", @@ -3925,14 +5935,24 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.15.0" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" dependencies = [ "core-foundation-sys", "libc", ] +[[package]] +name = "seize" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b55fb86dfd3a2f5f76ea78310a88f96c4ea21a3031f8d212443d56123fd0521" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "self_cell" version = "1.2.2" @@ -3941,9 +5961,9 @@ checksum = "b12e76d157a900eb52e81bc6e9f3069344290341720e9178cde2407113ac8d89" [[package]] name = "semver" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" [[package]] name = "send_wrapper" @@ -4004,7 +6024,19 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "serde_ipld_dagcbor" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46182f4f08349a02b45c998ba3215d3f9de826246ba02bb9dddfe9a2a2100778" +dependencies = [ + "cbor4ii", + "ipld-core", + "scopeguard", + "serde", ] [[package]] @@ -4013,6 +6045,7 @@ version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ + "indexmap", "itoa", "memchr", "serde", @@ -4020,6 +6053,17 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -4033,24 +6077,25 @@ dependencies = [ ] [[package]] -name = "serdect" -version = "0.4.2" +name = "serde_with" +version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9af4a3e75ebd5599b30d4de5768e00b5095d518a79fefc3ecbaf77e665d1ec06" +checksum = "f05839ce67618e14a09b286535c0d9c94e85ef25469b0e13cb4f844e5593eb19" dependencies = [ - "base16ct", - "serde", + "serde_core", + "serde_with_macros", ] [[package]] -name = "sha1" -version = "0.11.0-rc.2" +name = "serde_with_macros" +version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5e046edf639aa2e7afb285589e5405de2ef7e61d4b0ac1e30256e3eab911af9" +checksum = "cf2ebbe86054f9b45bc3881e865683ccfaccce97b9b4cb53f3039d67f355a334" dependencies = [ - "cfg-if", - "cpufeatures", - "digest", + "darling 0.23.0", + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] @@ -4061,13 +6106,24 @@ checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d" [[package]] name = "sha2" -version = "0.11.0-rc.2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "digest 0.10.7", +] + +[[package]] +name = "sha2" +version = "0.11.0-rc.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1e3878ab0f98e35b2df35fe53201d088299b41a6bb63e3e34dada2ac4abd924" +checksum = "7c5f3b1e2dc8aad28310d8410bd4d7e180eca65fca176c52ab00d364475d0024" dependencies = [ "cfg-if", - "cpufeatures", - "digest", + "cpufeatures 0.2.17", + "digest 0.11.3", ] [[package]] @@ -4097,15 +6153,44 @@ dependencies = [ [[package]] name = "signature" -version = "3.0.0-rc.6" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest 0.10.7", + "rand_core 0.6.4", +] + +[[package]] +name = "signature" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a96996ccff7dfa16f052bd995b4cecc72af22c35138738dc029f0ead6608d" +checksum = "28d567dcbaf0049cb8ac2608a76cd95ff9e4412e1899d389ee400918ca7537f5" [[package]] name = "simd-adler32" -version = "0.3.8" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + +[[package]] +name = "simd_cesu8" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94f90157bb87cddf702797c5dadfa0be7d266cdf49e22da2fcaa32eff75b2c33" +dependencies = [ + "rustc_version", + "simdutf8", +] + +[[package]] +name = "simd_helpers" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +checksum = "95890f873bec569a0362c235787f3aca6e1e887302ba4840839bcc6459c42da6" +dependencies = [ + "quote", +] [[package]] name = "simdutf8" @@ -4119,20 +6204,23 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dee851d0e5e7af3721faea1843e8015e820a234f81fda3dea9247e15bac9a86a" dependencies = [ - "bitflags", + "bitflags 2.11.1", ] [[package]] name = "slab" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "smallvec" version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +dependencies = [ + "serde", +] [[package]] name = "smol_str" @@ -4142,22 +6230,12 @@ checksum = "fad6c857cbab2627dcf01ec85a623ca4e7dcb5691cbaa3d7fb7653671f0d09c9" [[package]] name = "socket2" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "socket2" -version = "0.6.1" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -4171,6 +6249,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "sorted-index-buffer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea06cc588e43c632923a55450401b8f25e628131571d4e1baea1bdfdb2b5ed06" + [[package]] name = "spez" version = "0.1.2" @@ -4179,7 +6263,7 @@ checksum = "c87e960f4dca2788eeb86bbdde8dd246be8948790b7618d656e68f9b720a86e8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -4197,6 +6281,16 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591" +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der 0.7.10", +] + [[package]] name = "spki" version = "0.8.0-rc.4" @@ -4204,7 +6298,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8baeff88f34ed0691978ec34440140e1572b68c7dd4a495fd14a3dc1944daa80" dependencies = [ "base64ct", - "der", + "der 0.8.0-rc.10", ] [[package]] @@ -4214,7 +6308,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326" dependencies = [ "base64 0.13.1", - "nom", + "nom 7.1.3", "serde", "unicode-segmentation", ] @@ -4231,6 +6325,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "strength_reduce" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" + [[package]] name = "strsim" version = "0.11.1" @@ -4239,23 +6339,23 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "strum" -version = "0.27.2" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" +checksum = "9628de9b8791db39ceda2b119bbe13134770b56c138ec1d3af810d045c04f9bd" dependencies = [ "strum_macros", ] [[package]] name = "strum_macros" -version = "0.27.2" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +checksum = "ab85eea0270ee17587ed4156089e10b9e6880ee688791d45a905f5b1ca36f664" dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -4266,30 +6366,52 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "swarm-discovery" -version = "0.4.1" +version = "0.6.0-alpha.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "790d8444f7db1e88f70aed3234cab8e42c48e05360bfc86ca7dce0d9a5d95d26" +checksum = "cf5ccbd3c5abd6e7314768de12649c1b0a29bea38fca4370f9408340c0f364a6" dependencies = [ "acto", "hickory-proto", - "rand 0.9.2", - "socket2 0.5.10", - "thiserror 2.0.17", + "rand 0.10.1", + "socket2", + "thiserror 2.0.18", "tokio", "tracing", ] [[package]] name = "syn" -version = "2.0.114" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] +[[package]] +name = "syn-mid" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea305d57546cc8cd04feb14b62ec84bf17f50e3f7b12560d7bfa9265f39d9ed" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "sync_wrapper" version = "1.0.2" @@ -4307,7 +6429,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -4316,7 +6438,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc" dependencies = [ - "bitflags", + "bitflags 2.11.1", "byteorder", "enum-as-inner", "libc", @@ -4326,11 +6448,11 @@ dependencies = [ [[package]] name = "system-configuration" -version = "0.6.1" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" dependencies = [ - "bitflags", + "bitflags 2.11.1", "core-foundation 0.9.4", "system-configuration-sys", ] @@ -4353,12 +6475,12 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" [[package]] name = "tempfile" -version = "3.24.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom 0.3.4", + "getrandom 0.4.2", "once_cell", "rustix", "windows-sys 0.61.2", @@ -4366,24 +6488,33 @@ dependencies = [ [[package]] name = "test-log" -version = "0.2.19" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37d53ac171c92a39e4769491c4b4dde7022c60042254b5fc044ae409d34a24d4" +checksum = "2f46bf474f0a4afebf92f076d54fd5e63423d9438b8c278a3d2ccb0f47f7cdb3" dependencies = [ - "env_logger", "test-log-macros", "tracing-subscriber", ] [[package]] -name = "test-log-macros" -version = "0.2.19" +name = "test-log-core" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be35209fd0781c5401458ab66e4f98accf63553e8fae7425503e92fdd319783b" +checksum = "37d4d41320b48bc4a211a9021678fcc0c99569b594ea31c93735b8e517102b4c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "test-log-macros" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9beb9249a81e430dffd42400a49019bcf548444f1968ff23080a625de0d4d320" +dependencies = [ + "syn 2.0.117", + "test-log-core", ] [[package]] @@ -4397,11 +6528,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl 2.0.17", + "thiserror-impl 2.0.18", ] [[package]] @@ -4412,18 +6543,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "thiserror-impl" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -4435,31 +6566,59 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "tiff" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b63feaf3343d35b6ca4d50483f94843803b0f51634937cc2ec519fc32232bc52" +dependencies = [ + "fax", + "flate2", + "half", + "quick-error 2.0.1", + "weezl", + "zune-jpeg", +] + [[package]] name = "time" -version = "0.3.44" +version = "0.3.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" dependencies = [ "deranged", + "itoa", "js-sys", + "libc", "num-conv", + "num_threads", "powerfmt", - "serde", + "serde_core", "time-core", + "time-macros", ] [[package]] name = "time-core" -version = "0.1.6" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] [[package]] name = "tinystr" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" dependencies = [ "displaydoc", "zerovec", @@ -4467,9 +6626,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" dependencies = [ "tinyvec_macros", ] @@ -4485,6 +6644,42 @@ name = "tokenizers" version = "0.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a620b996116a59e184c2fa2dfd8251ea34a36d0a514758c6f966386bd2e03476" +dependencies = [ + "ahash", + "aho-corasick", + "compact_str", + "dary_heap", + "derive_builder", + "esaxx-rs", + "fancy-regex", + "getrandom 0.3.4", + "hf-hub 0.4.3", + "indicatif 0.17.11", + "itertools", + "log", + "macro_rules_attribute", + "monostate", + "onig", + "paste", + "rand 0.9.4", + "rayon", + "rayon-cond", + "regex", + "regex-syntax", + "serde", + "serde_json", + "spm_precompiled", + "thiserror 2.0.18", + "unicode-normalization-alignments", + "unicode-segmentation", + "unicode_categories", +] + +[[package]] +name = "tokenizers" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b238e22d44a15349529690fb07bd645cf58149a1b1e44d6cb5bd1641ff1a6223" dependencies = [ "ahash", "aho-corasick", @@ -4493,15 +6688,13 @@ dependencies = [ "derive_builder", "esaxx-rs", "getrandom 0.3.4", - "hf-hub", - "indicatif", "itertools", "log", "macro_rules_attribute", "monostate", "onig", "paste", - "rand 0.9.2", + "rand 0.9.4", "rayon", "rayon-cond", "regex", @@ -4509,7 +6702,7 @@ dependencies = [ "serde", "serde_json", "spm_precompiled", - "thiserror 2.0.17", + "thiserror 2.0.18", "unicode-normalization-alignments", "unicode-segmentation", "unicode_categories", @@ -4517,39 +6710,29 @@ dependencies = [ [[package]] name = "tokio" -version = "1.49.0" +version = "1.52.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" +checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe" dependencies = [ "bytes", "libc", "mio", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.1", + "socket2", "tokio-macros", "windows-sys 0.61.2", ] [[package]] name = "tokio-macros" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" dependencies = [ "proc-macro2", "quote", - "syn", -] - -[[package]] -name = "tokio-native-tls" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" -dependencies = [ - "native-tls", - "tokio", + "syn 2.0.117", ] [[package]] @@ -4590,20 +6773,21 @@ dependencies = [ [[package]] name = "tokio-websockets" -version = "0.12.3" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1b6348ebfaaecd771cecb69e832961d277f59845d4220a584701f72728152b7" +checksum = "dad543404f98bfc969aeb71994105c592acfc6c43323fddcd016bb208d1c65cb" dependencies = [ "base64 0.22.1", "bytes", "futures-core", "futures-sink", - "getrandom 0.3.4", + "getrandom 0.4.2", "http", "httparse", - "rand 0.9.2", + "rand 0.10.1", "ring", "rustls-pki-types", + "sha1_smol", "simdutf8", "tokio", "tokio-rustls", @@ -4612,18 +6796,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.7.5+spec-1.1.0" +version = "1.1.1+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" +checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7" dependencies = [ "serde_core", ] [[package]] name = "toml_edit" -version = "0.23.10+spec-1.0.0" +version = "0.25.11+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" +checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b" dependencies = [ "indexmap", "toml_datetime", @@ -4633,23 +6817,24 @@ dependencies = [ [[package]] name = "toml_parser" -version = "1.0.6+spec-1.1.0" +version = "1.1.2+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" dependencies = [ "winnow", ] [[package]] name = "tonic" -version = "0.14.2" +version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203" +checksum = "ac2a5518c70fa84342385732db33fb3f44bc4cc748936eb5833d2df34d6445ef" dependencies = [ "async-trait", "axum", "base64 0.22.1", "bytes", + "flate2", "h2", "http", "http-body", @@ -4659,60 +6844,64 @@ dependencies = [ "hyper-util", "percent-encoding", "pin-project", - "socket2 0.6.1", + "socket2", "sync_wrapper", "tokio", "tokio-stream", - "tower 0.5.2", + "tower", "tower-layer", "tower-service", "tracing", + "zstd", ] [[package]] name = "tonic-build" -version = "0.14.2" +version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c40aaccc9f9eccf2cd82ebc111adc13030d23e887244bc9cfa5d1d636049de3" +checksum = "c68f61875ac5293cf72e6c8cf0158086428c82c37229e98c840878f1706b0322" dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "tonic-iroh-transport" -version = "0.2.0" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "037b740079cc5cf9d92d16038e91c857c3a725a0e29826c336b5f41414040b53" +checksum = "1d344e841f9ba4f1a81e81217c4ae2c9871c9855dc33335143f56395bc4d33a2" dependencies = [ - "async-trait", + "async-stream", "axum", - "blake3", "bytes", + "data-encoding", "futures-util", + "h2", "http", "http-body", - "hyper", - "hyper-util", "iroh", - "iroh-gossip", - "prost", - "thiserror 2.0.17", + "mainline", + "n0-future", + "opentelemetry", + "postcard", + "serde", + "sha2 0.10.9", + "thiserror 2.0.18", "tokio", "tokio-stream", "tonic", - "tonic-prost-build", - "tower 0.4.13", + "tower", "tracing", + "tracing-opentelemetry", ] [[package]] name = "tonic-prost" -version = "0.14.2" +version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67" +checksum = "50849f68853be452acf590cde0b146665b8d507b3b8af17261df47e02c209ea0" dependencies = [ "bytes", "prost", @@ -4721,39 +6910,25 @@ dependencies = [ [[package]] name = "tonic-prost-build" -version = "0.14.2" +version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4a16cba4043dc3ff43fcb3f96b4c5c154c64cbd18ca8dce2ab2c6a451d058a2" +checksum = "654e5643eff75d7f8c99197ce1440ed19a3474eada74c12bbac488b2cafdae27" dependencies = [ "prettyplease", "proc-macro2", "prost-build", "prost-types", "quote", - "syn", + "syn 2.0.117", "tempfile", "tonic-build", ] [[package]] name = "tower" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" -dependencies = [ - "futures-core", - "futures-util", - "pin-project", - "pin-project-lite", - "tower-layer", - "tower-service", -] - -[[package]] -name = "tower" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" dependencies = [ "futures-core", "futures-util", @@ -4770,20 +6945,20 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.8" +version = "0.6.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +checksum = "68d6fdd9f81c2819c9a8b0e0cd91660e7746a8e6ea2ba7c6b2b057985f6bcb51" dependencies = [ - "bitflags", + "bitflags 2.11.1", "bytes", "futures-util", "http", "http-body", - "iri-string", "pin-project-lite", - "tower 0.5.2", + "tower", "tower-layer", "tower-service", + "url", ] [[package]] @@ -4818,7 +6993,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -4842,11 +7017,27 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-opentelemetry" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ac28f2d093c6c477eaa76b23525478f38de514fa9aeb1285738d4b97a9552fc" +dependencies = [ + "js-sys", + "opentelemetry", + "smallvec", + "tracing", + "tracing-core", + "tracing-log", + "tracing-subscriber", + "web-time", +] + [[package]] name = "tracing-subscriber" -version = "0.3.22" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" dependencies = [ "matchers", "nu-ansi-term", @@ -4860,17 +7051,107 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "transpose" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad61aed86bc3faea4300c7aee358b4c6d0c8d6ccc36524c96e4c92ccf26e77e" +dependencies = [ + "num-integer", + "strength_reduce", +] + [[package]] name = "try-lock" version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "typed-builder" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31aa81521b70f94402501d848ccc0ecaa8f93c8eb6999eb9747e72287757ffda" +dependencies = [ + "typed-builder-macro", +] + +[[package]] +name = "typed-builder-macro" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "076a02dc54dd46795c2e9c8282ed40bcfb1e22747e955de9389a1de28190fb26" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "typed-path" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e28f89b80c87b8fb0cf04ab448d5dd0dd0ade2f8891bae878de66a75a28600e" + [[package]] name = "typenum" -version = "1.19.0" +version = "1.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" + +[[package]] +name = "ug" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76b761acf8af3494640d826a8609e2265e19778fb43306c7f15379c78c9b05b0" +dependencies = [ + "gemm 0.18.2", + "half", + "libloading 0.8.9", + "memmap2", + "num", + "num-traits", + "num_cpus", + "rayon", + "safetensors 0.4.5", + "serde", + "thiserror 1.0.69", + "tracing", + "yoke 0.7.5", +] + +[[package]] +name = "ug-cuda" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f0a1fa748f26166778c33b8498255ebb7c6bffb472bcc0a72839e07ebb1d9b5" +dependencies = [ + "cudarc 0.17.8", + "half", + "serde", + "thiserror 1.0.69", + "ug", +] + +[[package]] +name = "ug-metal" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7adf545a99a086d362efc739e7cf4317c18cbeda22706000fd434d70ea3d95" +dependencies = [ + "half", + "metal", + "objc", + "serde", + "thiserror 1.0.69", + "ug", +] + +[[package]] +name = "unarray" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" [[package]] name = "unicase" @@ -4880,9 +7161,9 @@ checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" [[package]] name = "unicode-ident" -version = "1.0.22" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-normalization-alignments" @@ -4895,9 +7176,9 @@ dependencies = [ [[package]] name = "unicode-segmentation" -version = "1.12.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" [[package]] name = "unicode-width" @@ -4917,16 +7198,28 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" +[[package]] +name = "unit-prefix" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" + [[package]] name = "universal-hash" -version = "0.6.0-rc.2" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a55be643b40a21558f44806b53ee9319595bc7ca6896372e4e08e5d7d83c9cd6" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" dependencies = [ - "crypto-common", + "crypto-common 0.1.7", "subtle", ] +[[package]] +name = "unsigned-varint" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06" + [[package]] name = "untrusted" version = "0.9.0" @@ -4942,7 +7235,6 @@ dependencies = [ "base64 0.22.1", "flate2", "log", - "native-tls", "once_cell", "rustls", "rustls-pki-types", @@ -4953,6 +7245,39 @@ dependencies = [ "webpki-roots 0.26.11", ] +[[package]] +name = "ureq" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dea7109cdcd5864d4eeb1b58a1648dc9bf520360d7af16ec26d0a9354bafcfc0" +dependencies = [ + "base64 0.22.1", + "cookie_store", + "flate2", + "log", + "percent-encoding", + "rustls", + "rustls-pki-types", + "serde", + "serde_json", + "socks", + "ureq-proto", + "utf8-zero", + "webpki-roots 1.0.7", +] + +[[package]] +name = "ureq-proto" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e994ba84b0bd1b1b0cf92878b7ef898a5c1760108fe7b6010327e274917a808c" +dependencies = [ + "base64 0.22.1", + "http", + "httparse", + "log", +] + [[package]] name = "url" version = "2.5.8" @@ -4966,40 +7291,99 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "utf8-zero" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8c0a043c9540bae7c578c88f91dda8bd82e59ae27c21baca69c8b191aaf5a6e" + [[package]] name = "utf8_iter" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uuid" +version = "1.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" +dependencies = [ + "getrandom 0.4.2", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "v_frame" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "666b7727c8875d6ab5db9533418d7c764233ac9c0cff1d469aec8fa127597be2" +dependencies = [ + "aligned-vec", + "num-traits", + "wasm-bindgen", +] + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" [[package]] -name = "utf8parse" -version = "0.2.2" +name = "vergen" +version = "9.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +checksum = "b849a1f6d8639e8de261e81ee0fc881e3e3620db1af9f2e0da015d4382ceaf75" +dependencies = [ + "anyhow", + "derive_builder", + "rustversion", + "vergen-lib 9.1.0", +] [[package]] -name = "uuid" -version = "1.19.0" +name = "vergen-gitcl" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" +checksum = "b9dfc1de6eb2e08a4ddf152f1b179529638bedc0ea95e6d667c014506377aefe" dependencies = [ - "getrandom 0.3.4", - "js-sys", - "wasm-bindgen", + "anyhow", + "derive_builder", + "rustversion", + "time", + "vergen", + "vergen-lib 0.1.6", ] [[package]] -name = "valuable" -version = "0.1.1" +name = "vergen-lib" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +checksum = "9b07e6010c0f3e59fcb164e0163834597da68d1f864e2b8ca49f74de01e9c166" +dependencies = [ + "anyhow", + "derive_builder", + "rustversion", +] [[package]] -name = "vcpkg" -version = "0.2.15" +name = "vergen-lib" +version = "9.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +checksum = "b34a29ba7e9c59e62f229ae1932fb1b8fb8a6fdcc99215a641913f5f5a59a569" +dependencies = [ + "anyhow", + "derive_builder", + "rustversion", +] [[package]] name = "version_check" @@ -5007,6 +7391,15 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "wait-timeout" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" +dependencies = [ + "libc", +] + [[package]] name = "walkdir" version = "2.5.0" @@ -5034,18 +7427,27 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen 0.57.1", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ - "wit-bindgen", + "wit-bindgen 0.51.0", ] [[package]] name = "wasm-bindgen" -version = "0.2.106" +version = "0.2.121" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +checksum = "49ace1d07c165b0864824eee619580c4689389afa9dc9ed3a4c75040d82e6790" dependencies = [ "cfg-if", "once_cell", @@ -5056,22 +7458,19 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.56" +version = "0.4.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" +checksum = "96492d0d3ffba25305a7dc88720d250b1401d7edca02cc3bcd50633b424673b8" dependencies = [ - "cfg-if", "js-sys", - "once_cell", "wasm-bindgen", - "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.106" +version = "0.2.121" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -5079,26 +7478,48 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.106" +version = "0.2.121" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2" dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn", + "syn 2.0.117", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.106" +version = "0.2.121" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +checksum = "c4e0100b01e9f0d03189a92b96772a1fb998639d981193d7dbab487302513441" dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + [[package]] name = "wasm-streams" version = "0.4.2" @@ -5112,11 +7533,23 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.11.1", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + [[package]] name = "web-sys" -version = "0.3.83" +version = "0.3.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" +checksum = "4b572dff8bcf38bad0fa19729c89bb5748b2b9b1d8be70cf90df697e3a8f32aa" dependencies = [ "js-sys", "wasm-bindgen", @@ -5134,18 +7567,9 @@ dependencies = [ [[package]] name = "webpki-root-certs" -version = "0.26.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75c7f0ef91146ebfb530314f5f1d24528d7f0767efbfd31dce919275413e393e" -dependencies = [ - "webpki-root-certs 1.0.5", -] - -[[package]] -name = "webpki-root-certs" -version = "1.0.5" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36a29fc0408b113f68cf32637857ab740edfafdf460c326cd2afaa2d84cc05dc" +checksum = "f31141ce3fc3e300ae89b78c0dd67f9708061d1d2eda54b8209346fd6be9a92c" dependencies = [ "rustls-pki-types", ] @@ -5156,18 +7580,36 @@ version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" dependencies = [ - "webpki-roots 1.0.5", + "webpki-roots 1.0.7", ] [[package]] name = "webpki-roots" -version = "1.0.5" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c" +checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" dependencies = [ "rustls-pki-types", ] +[[package]] +name = "weezl" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28ac98ddc8b9274cb41bb4d9d4d5c425b6020c50c46f25559911905610b4a88" + +[[package]] +name = "which" +version = "7.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d643ce3fd3e5b54854602a080f34fb10ab75e0b813ee32d00ca2b44fa74762" +dependencies = [ + "either", + "env_home", + "rustix", + "winsafe", +] + [[package]] name = "widestring" version = "1.2.1" @@ -5258,7 +7700,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -5269,7 +7711,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -5326,15 +7768,6 @@ dependencies = [ "windows-targets 0.42.2", ] -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets 0.48.5", -] - [[package]] name = "windows-sys" version = "0.52.0" @@ -5386,21 +7819,6 @@ dependencies = [ "windows_x86_64_msvc 0.42.2", ] -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", -] - [[package]] name = "windows-targets" version = "0.52.6" @@ -5449,12 +7867,6 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -5473,12 +7885,6 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -5497,12 +7903,6 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -5533,12 +7933,6 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -5557,12 +7951,6 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -5581,12 +7969,6 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -5605,12 +7987,6 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -5625,49 +8001,133 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" -version = "0.7.14" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" +checksum = "2ee1708bef14716a11bae175f579062d4554d95be2c6829f518df847b7b3fdd0" dependencies = [ "memchr", ] [[package]] -name = "winreg" -version = "0.50.0" +name = "winsafe" +version = "0.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" + +[[package]] +name = "wit-bindgen" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" dependencies = [ - "cfg-if", - "windows-sys 0.48.0", + "wit-bindgen-rust-macro", ] [[package]] name = "wit-bindgen" -version = "0.46.0" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn 2.0.117", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.11.1", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] [[package]] name = "wmi" -version = "0.17.3" +version = "0.18.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "120d8c2b6a7c96c27bf4a7947fd7f02d73ca7f5958b8bd72a696e46cb5521ee6" +checksum = "7c81b85c57a57500e56669586496bf2abd5cf082b9d32995251185d105208b64" dependencies = [ "chrono", "futures", "log", "serde", - "thiserror 2.0.17", + "thiserror 2.0.18", "windows", "windows-core", ] [[package]] name = "writeable" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" [[package]] name = "ws_stream_wasm" @@ -5682,7 +8142,7 @@ dependencies = [ "pharos", "rustc_version", "send_wrapper", - "thiserror 2.0.17", + "thiserror 2.0.18", "wasm-bindgen", "wasm-bindgen-futures", "web-sys", @@ -5703,73 +8163,97 @@ dependencies = [ "xml-rs", ] +[[package]] +name = "y4m" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5a4b21e1a62b67a2970e6831bc091d7b87e119e7f9791aef9702e3bef04448" + [[package]] name = "yoke" -version = "0.8.1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive 0.7.5", + "zerofrom", +] + +[[package]] +name = "yoke" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" dependencies = [ "stable_deref_trait", - "yoke-derive", + "yoke-derive 0.8.2", "zerofrom", ] [[package]] name = "yoke-derive" -version = "0.8.1" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", "synstructure", ] [[package]] -name = "z32" -version = "1.3.0" +name = "yoke-derive" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2164e798d9e3d84ee2c91139ace54638059a3b23e361f5c11781c2c6459bde0f" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] [[package]] name = "zerocopy" -version = "0.8.32" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fabae64378cb18147bb18bca364e63bdbe72a0ffe4adf0addfec8aa166b2c56" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.32" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9c2d862265a8bb4471d87e033e730f536e2a285cc7cb05dbce09a2a97075f90" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "zerofrom" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", "synstructure", ] @@ -5790,44 +8274,108 @@ checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "zerotrie" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" dependencies = [ "displaydoc", - "yoke", + "yoke 0.8.2", "zerofrom", ] [[package]] name = "zerovec" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" dependencies = [ - "yoke", + "yoke 0.8.2", "zerofrom", "zerovec-derive", ] [[package]] name = "zerovec-derive" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "zip" +version = "7.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c42e33efc22a0650c311c2ef19115ce232583abbe80850bc8b66509ebef02de0" +dependencies = [ + "crc32fast", + "indexmap", + "memchr", + "typed-path", ] [[package]] name = "zmij" -version = "1.0.12" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] + +[[package]] +name = "zune-core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb8a0807f7c01457d0379ba880ba6322660448ddebc890ce29bb64da71fb40f9" + +[[package]] +name = "zune-inflate" +version = "0.2.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73ab332fe2f6680068f3582b16a24f90ad7096d5d39b974d1c0aff0125116f02" +dependencies = [ + "simd-adler32", +] + +[[package]] +name = "zune-jpeg" +version = "0.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fc5a66a20078bf1251bde995aa2fdcc4b800c70b5d92dd2c62abc5c60f679f8" +checksum = "27bc9d5b815bc103f142aa054f561d9187d191692ec7c2d1e2b4737f8dbd7296" +dependencies = [ + "zune-core", +] diff --git a/Cargo.toml b/Cargo.toml index 04f3977..59aaa04 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,31 +1,81 @@ [workspace] members = [ - "crates/cli", - "crates/rpc", - "crates/executor", -] -default-members = [ - "crates/cli", + "crates/cli", + "crates/core", + "crates/executor", + "crates/pb", + "crates/rpc", ] +default-members = ["crates/cli"] resolver = "2" [workspace.package] version = "0.1.0" -edition = "2021" +edition = "2024" license = "MIT" repository = "https://github.com/hellas-ai/node" documentation = "https://docs.rs" [workspace.dependencies] -thiserror = "1" -tokio = { version = "1", features = ["rt-multi-thread", "macros", "sync", "time"] } +base64 = "0.22" +blake3 = "1" +catgrad = { git = "https://github.com/georgewhewell/catgrad", branch = "grw/feat/chat-types-on-chatgrad", default-features = false, features = [ + "serde", +] } +catgrad-llm = { git = "https://github.com/georgewhewell/catgrad", branch = "grw/feat/chat-types-on-chatgrad", default-features = false } +catnix = { git = "https://github.com/georgewhewell/catgrad", branch = "grw/feat/chat-types-on-chatgrad", default-features = false } +chatgrad = { git = "https://github.com/georgewhewell/catgrad", branch = "grw/feat/chat-types-on-chatgrad", default-features = false } +half = "2.7.1" +hellas-core = { path = "crates/core", default-features = false } +hellas-executor = { path = "crates/executor", default-features = false } +hellas-pb = { path = "crates/pb", default-features = false } +# tonic-iroh-transport = { path = "../tonic-iroh-transport", default-features = false, features = ["tls-ring", "portmapper", "fast-apple-datapath"] } +# tonic-iroh-transport = { git = "https://github.com/hellas-ai/tonic-iroh-transport", branch = "grw/feat/iroh-0.98", default-features = false, features = ["tls-ring", "portmapper", "fast-apple-datapath"] } + +hellas-rpc = { path = "crates/rpc", default-features = false } +hf-hub = { version = "0.5", default-features = false, features = ["ureq"] } +iroh-blobs = { version = "0.100", default-features = false } +k256 = { version = "0.13", features = ["ecdsa"] } +opentelemetry = "0.31" +opentelemetry-otlp = { version = "0.31", default-features = false, features = [ + "http-proto", + "trace", + "reqwest-blocking-client", + "reqwest-rustls-webpki-roots", +] } +opentelemetry_sdk = { version = "0.31", features = ["rt-tokio"] } +reqwest = { version = "0.13", default-features = false, features = [ + "rustls", + "webpki-roots", +] } +rustls-webpki = "0.103.9" +serde = { version = "1", features = ["derive"] } +serde_bytes = "0.11" +serde_ipld_dagcbor = "=0.6.4" +serde_json = "1" +thiserror = "2" +tokio = { version = "1", features = [ + "rt-multi-thread", + "macros", + "signal", + "sync", + "time", + "process", +] } tokio-stream = { version = "0.1", features = ["sync"] } -tonic = "0.14" -tonic-iroh-transport = "0.2" -# tonic-iroh-transport = {path = "../tonic-iroh" } -hellas-rpc = { path = "crates/rpc" } -hellas-executor = { path = "crates/executor" } +tonic = { version = "0.14", features = ["gzip"] } +tonic-iroh-transport = { version = "0.9.2", default-features = false, features = [ + "tls-ring", + "portmapper", + "fast-apple-datapath", +] } tracing = "0.1" +tracing-opentelemetry = "0.32" tracing-subscriber = { version = "0.3", features = ["env-filter"] } -serde = { version = "1", features = ["derive"] } -serde_json = "1" + +# [patch."https://github.com/hellas-ai/catgrad"] +# catgrad = { path = "../catgrad/catgrad" } +# catgrad-llm = { path = "../catgrad/catgrad-llm" } + +# [patch.crates-io] +# tonic-iroh-transport = { path = "../tonic-iroh-transport" } diff --git a/README.md b/README.md index d26dd26..bae8ef7 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,22 @@ cargo install --git https://github.com/hellas-ai/node Execute: ```bash -cargo run -- execute run -p hey +cargo run -- execute -p hey +``` + +Execute locally with the catgrad backend: + +```bash +cargo run -- execute --local -p hey +``` + +Local execution uses the same catgrad executor backend as `serve` and prefers +accelerated backends when available (Metal on macOS, `--features cuda` on Linux). + +Verify a remote execution against the local catgrad backend: + +```bash +cargo run -- execute --verify-local -p hey ``` ## End-to-end @@ -25,14 +40,83 @@ cargo install --git https://github.com/hellas-ai/node --features serve Run server: ```bash -hellas-cli serve --discovery +hellas-cli serve --download-policy=eager --execute-policy=eager Node Address: bb18ebc065d836ecc7e1f33972d2c17eac9894cd33ce4916f66cb1165ccc7550 RPC server running. Press Ctrl+C to stop ``` +`hellas-cli serve` without policy flags now starts in deny-by-default mode +(`--download-policy=skip --execute-policy=skip`). Only pass eager or allow-list +policies when you intentionally want a node to serve remote work. + +Preload weights on startup: + +```bash +hellas-cli serve \ + --download-policy=eager \ + --execute-policy=eager \ + --preload HuggingFaceTB/SmolLM2-135M-Instruct +``` + +Repeat `--preload` to warm multiple models before the node starts serving. + Run client: ```bash -cargo run -- execute run -p hey bb18ebc065d836ecc7e1f33972d2c17eac9894cd33ce4916f66cb1165ccc7550 -Hello! How can I help you today?<|im_end|>% +cargo run -- execute bb18ebc065d836ecc7e1f33972d2c17eac9894cd33ce4916f66cb1165ccc7550 -p hey +Hello! How can I help you today? +``` + +Monitor discovery and peer health: + +```bash +cargo run -- monitor --timeout-secs 30 +``` + +Run HTTP gateway (OpenAI / Anthropic / plain completions over Hellas network): + +```bash +cargo run -- gateway --port 8080 +``` + +Routes: + +```bash +POST /v1/chat/completions +POST /v1/messages +POST /v1/completions +``` + +## Docker + +Docker images: `.#docker-cpu`, `.#docker-cuda12-sm89`, etc. They stream to stdout. + +```bash +$(nix build .#docker-cuda12-sm89 --print-out-paths) | docker load +nix run .#docker-push-all # push all images to ghcr.io/hellas-ai/node +``` + +Run a CUDA server with persistent HF cache, metrics, and Jaeger tracing: + +```bash +docker run --rm -it \ + --device=nvidia.com/gpu=all \ + -p 31145:31145/udp \ + -p 9090:9090 \ + -v ~/.cache/huggingface:/home/hellas/.cache/huggingface \ + -e OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=http://jaeger:4318/v1/traces \ + ghcr.io/hellas-ai/node:cuda12-sm89 \ + --download-policy=eager --execute-policy=eager \ + --metrics-port=9090 \ + --preload HuggingFaceTB/SmolLM2-135M-Instruct +``` + +## Dependency maintenance + +Available in the dev shell (`nix develop`): + +```bash +cargo audit # security advisories +cargo outdated --workspace --root-deps-only # outdated deps +cargo update --workspace # update Cargo.lock ``` diff --git a/buf.yaml b/buf.yaml new file mode 100644 index 0000000..e8b5562 --- /dev/null +++ b/buf.yaml @@ -0,0 +1,20 @@ +version: v2 +modules: + - path: proto +lint: + use: + - STANDARD + except: + # Hellas service names are used directly as transport service names. + # Adding "Service" to every proto service makes the wire/API names worse. + - SERVICE_SUFFIX + # CreateTicket returns the generic Ticket object and RunTicket streams the + # generic WorkEvent. Both are intentional reusable protocol shapes. + - RPC_RESPONSE_STANDARD_NAME + # Scheme services intentionally accept the scheme request itself. Wrapping + # SymbolicRequest/OpaqueRequest in one-field *CreateTicketRequest messages + # would add wire ceremony without adding protocol state. + - RPC_REQUEST_STANDARD_NAME + # Ticket is the single reusable protocol object returned by every + # scheme-specific ticket creation surface. + - RPC_REQUEST_RESPONSE_UNIQUE diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 4cc1ea0..3bcdfe2 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -9,21 +9,93 @@ documentation.workspace = true [features] default = [] -serve = ["hellas-executor"] + +# Backend variants: each pulls in the optional `hellas-executor` dep plus the +# RPC/transport server bits. Source code gates on the implicit `hellas-executor` +# feature that cargo creates from the optional dep (no `dep:` prefix used). +candle = [ + "hellas-executor/candle", + "hellas-pb/server", + "hellas-rpc/server", + "tonic-iroh-transport/server", +] +candle-cuda = ["candle", "hellas-executor/candle-cuda"] +candle-metal = ["candle", "hellas-executor/candle-metal"] + +# OpenTelemetry / OTLP exporter + iroh-internal metrics. When off, none of the +# opentelemetry crates compile, the trace context propagation paths in +# `execution.rs` / `serve/node.rs` collapse to identity, and iroh's metrics +# module is not built. +otel = [ + "dep:opentelemetry", + "dep:opentelemetry_sdk", + "dep:opentelemetry-otlp", + "dep:tracing-opentelemetry", + "dep:reqwest", + "dep:iroh-metrics", + "tonic-iroh-transport/otel", + "tonic-iroh-transport/metrics", +] + +[target.'cfg(unix)'.dependencies] +libc = "0.2" [dependencies] -tokio.workspace = true -tracing.workspace = true -tracing-subscriber.workspace = true anyhow = "1" +async-stream = "0.3" +axum = "0.8" +base64.workspace = true +catgrad = { workspace = true, default-features = false } +catgrad-llm.workspace = true +chatgrad.workspace = true clap = { version = "4", features = ["derive"] } -hellas-rpc = { workspace = true, features = ["client", "server"] } -hellas-executor = { workspace = true, optional = true } -tonic-iroh-transport = { workspace = true, features = ["gossip"] } -tonic = { workspace = true } +futures = "0.3" +hellas-core.workspace = true +hellas-executor = { workspace = true, default-features = false, optional = true } +hellas-pb = { workspace = true, features = [ + "hellas", + "symbolic", + "opaque", + "courtesy", + "swarm", + "client", +] } +hellas-rpc = { workspace = true, default-features = false, features = [ + "node", + "client", + "compression", + "discovery", +] } +iroh-metrics = { version = "0.38", default-features = false, features = [ + "metrics", +], optional = true } +minijinja = "2" +minijinja-contrib = { version = "2", features = ["pycompat"] } +opentelemetry = { workspace = true, optional = true } +opentelemetry-otlp = { workspace = true, optional = true } +opentelemetry_sdk = { workspace = true, optional = true } +prometheus-client = "0.24" +qrcode = { version = "0.14", default-features = false } +rand = "0.9" +reqwest = { workspace = true, optional = true } +serde.workspace = true +serde_json.workspace = true +tempfile = "3" +tokio.workspace = true tokio-stream = { workspace = true } +tonic = { workspace = true } +tonic-iroh-transport = { workspace = true, default-features = false, features = [ + "client", + "discovery-mdns", + "discovery-dht", +] } +tower = { version = "0.5", default-features = false, features = ["util"] } +tracing.workspace = true +tracing-opentelemetry = { workspace = true, optional = true } +tracing-subscriber.workspace = true -# dev-dependencies- add 'compile' feature to hellas-rpc +# dev-dependencies: enable `hellas-pb/compile` when regenerating checked-in protos. [dev-dependencies] -# hellas-rpc = { workspace = true, features = ["compile"] } +# hellas-pb = { workspace = true, features = ["compile"] } +test-log = { version = "0.2", default-features = false, features = ["trace"] } diff --git a/crates/cli/src/bootstrap_peers.rs b/crates/cli/src/bootstrap_peers.rs deleted file mode 100644 index ca61109..0000000 --- a/crates/cli/src/bootstrap_peers.rs +++ /dev/null @@ -1,15 +0,0 @@ -use tonic_iroh_transport::iroh::EndpointId; - -// Hardcoded bootstrap peers for gossip discovery. -// -// These should be stable public nodes that publish their addresses (e.g. via pkarr/DHT), -// so we can dial them by `EndpointId` without having to discover them on the LAN. -const BOOTSTRAP_PEERS: &[&str] = - &["bad6b59cd14afc9c15ab944ce3cc699d50ecaa56241882f85c111b546feea410"]; - -pub fn bootstrap_peer_ids() -> Vec { - BOOTSTRAP_PEERS - .iter() - .filter_map(|s| s.parse::().ok()) - .collect() -} diff --git a/crates/cli/src/commands/artifact.rs b/crates/cli/src/commands/artifact.rs new file mode 100644 index 0000000..f5595a7 --- /dev/null +++ b/crates/cli/src/commands/artifact.rs @@ -0,0 +1,157 @@ +use crate::commands::CliResult; +use anyhow::{Context, bail}; +use clap::Subcommand; +use hellas_core::Digest; +use hellas_pb::courtesy::courtesy_client::CourtesyClient; +use hellas_pb::courtesy::{GetArtifactRequest, PutArtifactRequest}; +use hellas_rpc::GRPC_MESSAGE_LIMIT; +use hellas_rpc::discovery::DiscoveryEndpoint; +use hellas_rpc::service::CourtesyService; +use std::net::SocketAddr; +use std::path::PathBuf; +use tonic_iroh_transport::iroh::{EndpointAddr, EndpointId, SecretKey, TransportAddr}; +use tonic_iroh_transport::{ConnectionPool, IrohChannel, IrohConnect, PoolOptions}; + +#[derive(Debug, Subcommand)] +pub enum ArtifactCommand { + /// Store exact canonical artifact bytes on a provider and print the CID + Put { + /// Node ID of the provider to store on + node_id: EndpointId, + /// Direct UDP address hint for the provider. Repeat or use commas. + #[arg(long = "node-addr", value_delimiter = ',')] + node_addrs: Vec, + /// File containing exact canonical artifact bytes + path: PathBuf, + }, + /// Fetch canonical artifact bytes by CID from a provider + Get { + /// Node ID of the provider to fetch from + node_id: EndpointId, + /// Direct UDP address hint for the provider. Repeat or use commas. + #[arg(long = "node-addr", value_delimiter = ',')] + node_addrs: Vec, + /// 32-byte artifact CID as hex + cid: String, + /// File to write the fetched canonical artifact bytes + #[arg(short = 'o', long = "output")] + output: PathBuf, + }, +} + +pub async fn run(command: ArtifactCommand, secret_key: SecretKey) -> CliResult<()> { + match command { + ArtifactCommand::Put { + node_id, + node_addrs, + path, + } => put(node_id, node_addrs, path, secret_key).await, + ArtifactCommand::Get { + node_id, + node_addrs, + cid, + output, + } => get(node_id, node_addrs, cid, output, secret_key).await, + } +} + +async fn put( + node_id: EndpointId, + node_addrs: Vec, + path: PathBuf, + secret_key: SecretKey, +) -> CliResult<()> { + let canonical_artifact = tokio::fs::read(&path) + .await + .with_context(|| format!("failed to read artifact bytes from {}", path.display()))?; + let mut client = connect(node_id, node_addrs, secret_key).await?; + let response = client + .put_artifact(PutArtifactRequest { canonical_artifact }) + .await + .context("put_artifact RPC failed")? + .into_inner(); + let cid = + Digest::from_slice(&response.cid).context("provider returned invalid artifact cid")?; + println!("{cid}"); + Ok(()) +} + +async fn get( + node_id: EndpointId, + node_addrs: Vec, + cid: String, + output: PathBuf, + secret_key: SecretKey, +) -> CliResult<()> { + let cid = parse_digest_hex(&cid)?; + let mut client = connect(node_id, node_addrs, secret_key).await?; + let response = client + .get_artifact(GetArtifactRequest { + cid: cid.as_bytes().to_vec(), + }) + .await + .context("get_artifact RPC failed")? + .into_inner(); + let actual = Digest::hash(&response.canonical_artifact); + if actual != cid { + bail!("provider returned bytes with cid {actual}, expected {cid}"); + } + tokio::fs::write(&output, response.canonical_artifact) + .await + .with_context(|| format!("failed to write artifact bytes to {}", output.display()))?; + Ok(()) +} + +async fn connect( + node_id: EndpointId, + node_addrs: Vec, + secret_key: SecretKey, +) -> CliResult> { + let endpoint = DiscoveryEndpoint::bind(Some(secret_key)).await?.endpoint; + let channel = if node_addrs.is_empty() { + let pool = ConnectionPool::for_service::( + endpoint.clone(), + PoolOptions::default(), + ); + pool.channel(node_id) + .await + .with_context(|| format!("failed to connect to courtesy service on node {node_id}"))? + } else { + CourtesyService::connect( + &endpoint, + EndpointAddr::from_parts(node_id, node_addrs.into_iter().map(TransportAddr::Ip)), + ) + .await + .with_context(|| format!("failed to connect to courtesy service on node {node_id}"))? + }; + Ok(CourtesyClient::new(channel) + .max_decoding_message_size(GRPC_MESSAGE_LIMIT) + .max_encoding_message_size(GRPC_MESSAGE_LIMIT)) +} + +fn parse_digest_hex(raw: &str) -> CliResult { + let bytes = raw.as_bytes(); + if bytes.len() != Digest::LEN * 2 { + bail!( + "artifact cid must be {} hex chars, got {}", + Digest::LEN * 2, + bytes.len() + ); + } + let mut out = [0_u8; Digest::LEN]; + for (idx, chunk) in bytes.chunks_exact(2).enumerate() { + let high = hex_value(chunk[0]).with_context(|| format!("invalid artifact cid {raw:?}"))?; + let low = hex_value(chunk[1]).with_context(|| format!("invalid artifact cid {raw:?}"))?; + out[idx] = (high << 4) | low; + } + Ok(Digest::from_bytes(out)) +} + +fn hex_value(byte: u8) -> Option { + match byte { + b'0'..=b'9' => Some(byte - b'0'), + b'a'..=b'f' => Some(byte - b'a' + 10), + b'A'..=b'F' => Some(byte - b'A' + 10), + _ => None, + } +} diff --git a/crates/cli/src/commands/execute.rs b/crates/cli/src/commands/execute.rs deleted file mode 100644 index c97925b..0000000 --- a/crates/cli/src/commands/execute.rs +++ /dev/null @@ -1,299 +0,0 @@ -use crate::commands::CliResult; -use crate::bootstrap_peers::bootstrap_peer_ids; -use anyhow::Context; -use hellas_rpc::pb::hellas::execute_client::ExecuteClient; -use hellas_rpc::pb::hellas::execute_server::ExecuteServer; -use hellas_rpc::pb::hellas::{ - get_quote_request, ExecuteRequest, ExecuteStatusRequest, GetQuoteRequest, LlmQuoteRequest, - Presence, -}; -use std::io::{self, Write}; -use tokio::time::{timeout, Duration, Instant}; -use tokio_stream::StreamExt; -use tonic_iroh_transport::gossip::join; -use tonic_iroh_transport::iroh::discovery::mdns::{DiscoveryEvent, MdnsDiscovery}; -use tonic_iroh_transport::iroh::discovery::pkarr::dht::DhtDiscovery; -use tonic_iroh_transport::iroh::{Endpoint, EndpointId, Watcher}; -use tonic_iroh_transport::{IrohConnect, TransportBuilder, TransportGuard}; - -const GRPC_MESSAGE_LIMIT: usize = 32 * 1024 * 1024; -const DISCOVERY_TIMEOUT: Duration = Duration::from_secs(10); -const BOOTSTRAP_JOIN_TIMEOUT: Duration = Duration::from_secs(3); - -pub async fn run( - node_id: Option, - model: String, - prompt: String, - max_seq: u32, -) -> CliResult<()> { - let endpoint = Endpoint::builder() - .bind() - .await - .context("failed to create iroh endpoint")?; - - // Needed for local-network bootstrap discovery when the user doesn't provide a node id. - let mdns = MdnsDiscovery::builder() - .advertise(false) - .service_name("hellas") - .build(endpoint.id()) - .context("failed to start mDNS discovery")?; - endpoint.discovery().add(mdns.clone()); - - // Add internet discovery via pkarr+DHT as a resolver (no publish). - // `Endpoint::builder()` already includes pkarr publisher + DNS resolver via the N0 preset. - let dht = DhtDiscovery::builder() - .n0_dns_pkarr_relay() - .no_publish() - .build() - .context("failed to initialize pkarr+DHT discovery")?; - endpoint.discovery().add(dht); - - let (node_id, _transport) = match node_id { - Some(id) => (id, None), - None => { - let (id, transport) = discover_executor(&endpoint, &mdns, &model).await?; - (id, Some(transport)) - } - }; - - let channel = ExecuteServer::<()>::connect(&endpoint, node_id.into()) - .await - .with_context(|| format!("failed to connect to node {node_id}"))?; - - let mut client = ExecuteClient::new(channel) - .max_decoding_message_size(GRPC_MESSAGE_LIMIT) - .max_encoding_message_size(GRPC_MESSAGE_LIMIT); - - // 1. Get quote - let req = GetQuoteRequest { - payload: Some(get_quote_request::Payload::LlmPrompt(LlmQuoteRequest { - huggingface_model_id: model.clone(), - prompt: prompt.clone(), - max_seq, - })), - }; - info!("Getting quote... {req:?}"); - let quote = client - .get_quote(req) - .await - .context("GetQuote RPC failed")? - .into_inner(); - - info!("Got quote: {quote:?}"); - - // 2. Execute - let req = ExecuteRequest { - quote_id: quote.quote_id.as_bytes().to_vec(), - }; - info!("Req: {req:?}"); - let exec = client - .execute(req) - .await - .context("Execute RPC failed")? - .into_inner(); - info!("Executing: {exec:?}"); - - // 3. Stream status until completed - let req = ExecuteStatusRequest { - execution_id: exec.execution_id.clone(), - }; - info!("Streaming status: {req:?}"); - let mut stream = client - .execute_stream(req) - .await - .context("ExecuteStream RPC failed")? - .into_inner(); - - while let Some(progress) = stream.next().await { - let progress = progress.context("ExecuteStream RPC progress failed")?; - if let Some(decoded) = progress.decoded.as_deref() { - debug!( - "Status: {} | Progress: {} | Decoded chunk: {}", - progress.status, progress.progress, decoded - ); - print!("{}", decoded); - io::stdout().flush()?; - } else if progress.chunk.is_empty() { - debug!( - "Status: {} | Progress: {}", - progress.status, progress.progress - ); - } else { - debug!( - "Status: {} | Progress: {} | Chunk bytes: {}", - progress.status, - progress.progress, - progress.chunk.len() - ); - } - if progress.status == "completed" || progress.status == "failed" { - break; - } - } - - Ok(()) -} - -async fn discover_executor( - endpoint: &Endpoint, - mdns: &MdnsDiscovery, - model: &str, -) -> CliResult<(EndpointId, TransportGuard)> { - info!("No node ID provided, discovering executor via gossip..."); - - // Wait for endpoint to have addresses before starting gossip - let mut addr_stream = endpoint.watch_addr().stream(); - let _ = timeout(DISCOVERY_TIMEOUT, async { - while let Some(addr) = addr_stream.next().await { - let addrs: Vec<_> = addr.ip_addrs().collect(); - if !addrs.is_empty() { - info!("endpoint ready with {} addresses", addrs.len()); - return; - } - } - }) - .await; - - // Gossip won't send anything unless we have at least one connected neighbor. - // Use mDNS to discover local peers for the bootstrap dial. - let mut bootstrap: Vec = Vec::new(); - let mut mdns_events = mdns.subscribe().await; - let mdns_deadline = Instant::now() + Duration::from_secs(2); - while Instant::now() < mdns_deadline { - let remaining = mdns_deadline.saturating_duration_since(Instant::now()); - match timeout(remaining, mdns_events.next()).await { - Ok(Some(DiscoveryEvent::Discovered { endpoint_info, .. })) => { - if endpoint_info.endpoint_id == endpoint.id() { - continue; - } - if !bootstrap.contains(&endpoint_info.endpoint_id) { - bootstrap.push(endpoint_info.endpoint_id); - } - } - Ok(Some(DiscoveryEvent::Expired { .. })) => {} - Ok(None) => break, - Err(_) => break, - } - } - - if bootstrap.is_empty() { - info!("No peers discovered via mDNS, falling back to compiled-in bootstrap peers"); - } else { - info!(peers = bootstrap.len(), "Discovered local peers via mDNS"); - } - - for peer in bootstrap_peer_ids() { - if peer == endpoint.id() { - continue; - } - if !bootstrap.contains(&peer) { - bootstrap.push(peer); - } - } - - if bootstrap.is_empty() { - return Err(anyhow::anyhow!( - "No bootstrap peers available (mDNS found none and BOOTSTRAP_PEERS is empty); pass a `node_id`." - )); - } - - let transport = TransportBuilder::new(endpoint.clone()) - .with_gossip_config(Default::default()) - .spawn() - .await - .context("failed to start gossip transport")?; - - let gossip = transport - .gossip() - .cloned() - .context("gossip handle missing from transport")?; - - let mut topic = match timeout( - BOOTSTRAP_JOIN_TIMEOUT, - join::(&gossip, bootstrap.clone()), - ) - .await - { - Ok(Ok(topic)) => topic, - Ok(Err(err)) => { - warn!( - peers = bootstrap.len(), - "failed to join presence topic with full bootstrap set: {err}" - ); - let mut last_err: Option = None; - let mut topic: Option<_> = None; - for peer in bootstrap { - match timeout(BOOTSTRAP_JOIN_TIMEOUT, join::(&gossip, vec![peer])).await { - Ok(Ok(t)) => { - topic = Some(t); - break; - } - Ok(Err(e)) => { - last_err = Some(anyhow::anyhow!(e)); - } - Err(e) => { - last_err = Some(anyhow::anyhow!("bootstrap join timeout: {e}")); - } - } - } - topic.ok_or_else(|| { - last_err.unwrap_or_else(|| anyhow::anyhow!("failed to join presence topic")) - })? - } - Err(e) => { - return Err(anyhow::anyhow!("bootstrap join timeout: {e}")); - } - }; - - // optional but gives us feedback on connectivity before we broadcast - if let Err(e) = timeout(DISCOVERY_TIMEOUT, topic.joined()).await { - debug!("gossip join wait timed out: {e:?}"); - } - - let req_id = format!( - "{}-{}", - endpoint.id(), - std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_default() - .as_millis() - ); - - let presence = Presence { - hf_id: model.to_string(), - req_id: req_id.clone(), - peer_id: endpoint.id().to_string(), - ttl_ms: DISCOVERY_TIMEOUT.as_millis() as u64, - is_executor: false, - }; - - topic - .broadcast(&presence) - .await - .context("failed to broadcast presence request")?; - - let selected = timeout(DISCOVERY_TIMEOUT, async { - while let Some(event) = topic.recv().await { - let (_ctx, msg) = event.context("gossip receive error")?; - if msg.req_id != req_id || msg.hf_id != model { - continue; - } - if !msg.is_executor { - continue; - } - let node_id: EndpointId = msg - .peer_id - .parse() - .context("failed to parse executor peer id")?; - info!("Discovered executor {}", node_id); - return Ok::(node_id); - } - Err(anyhow::anyhow!( - "gossip stream closed before discovery completed" - )) - }) - .await - .context("discovery timed out waiting for executor")??; - - Ok((selected, transport)) -} diff --git a/crates/cli/src/commands/gateway/anthropic.rs b/crates/cli/src/commands/gateway/anthropic.rs new file mode 100644 index 0000000..5d4c7c3 --- /dev/null +++ b/crates/cli/src/commands/gateway/anthropic.rs @@ -0,0 +1,299 @@ +use super::hellas_ext::{HellasExt, WithHellas}; +use super::state::{GatewayState, GenerationEvent, PreparedGeneration}; +use super::{next_id, parse_json_body, sse_event_data, sse_response}; +use crate::execution::{Outcome, ReceiptArtifact, StopReason}; +use async_stream::stream; +use axum::Json; +use axum::body::Bytes; +use axum::extract::State; +use axum::http::StatusCode; +use axum::response::sse::Event; +use axum::response::{IntoResponse, Response}; +use chatgrad::types::anthropic; +use futures::StreamExt; +use serde_json::json; +use std::sync::Arc; + +pub(super) async fn handle(State(state): State>, body: Bytes) -> Response { + let req = match parse_json_body::(&body, "Anthropic") { + Ok(req) => req, + Err(err) => return err.into_response(), + }; + let stream_response_flag = req.stream == Some(true); + let prepared = match state.prepare_anthropic(&req).await { + Ok(prepared) => prepared, + Err(err) => return err.into_response(), + }; + + if stream_response_flag { + return stream_response(prepared); + } + respond(prepared).await +} + +async fn respond(prepared: PreparedGeneration) -> Response { + let id = next_id("msg"); + let model = prepared.model.clone(); + let prompt_tokens = prepared.prompt_tokens; + let provenance = prepared.provenance.clone(); + let deadline = prepared.deadline(); + + let stream = prepared.stream(); + tokio::pin!(stream); + let mut text = String::new(); + let outcome = loop { + match tokio::time::timeout_at(deadline, stream.next()).await { + Ok(Some(Ok(GenerationEvent::Delta(d)))) => text.push_str(&d), + Ok(Some(Ok(GenerationEvent::Done(o)))) => break Ok(o), + Ok(Some(Err(err))) => break Err(format!("Inference error: {err:#}")), + Ok(None) => break Err("execution stream ended without terminal outcome".to_string()), + Err(_) => { + break Err(format!( + "inference timed out after {}s", + super::timeout_secs_until(deadline) + )); + } + } + }; + + let (completion_tokens, stop_reason, receipt) = match outcome { + Ok(Outcome::Completed { + total_tokens, + stop_reason, + receipt, + }) => { + info!( + receipt = %receipt.encoded(), + ?provenance, + total_tokens, + ?stop_reason, + "anthropic message completion ready" + ); + (total_tokens, map_stop_reason(stop_reason), receipt) + } + Ok(Outcome::Failed { position, error }) => { + warn!(position, %error, "anthropic message request failed"); + return super::json_error( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Inference error: {error}"), + ); + } + Err(message) => { + error!(%message, "anthropic message request failed"); + return super::json_error(StatusCode::INTERNAL_SERVER_ERROR, message); + } + }; + + let response = anthropic::MessageResponse::builder() + .id(id) + .message_type(Some("message".to_string())) + .role("assistant".to_string()) + .content(vec![anthropic::ContentBlock::Text { text }]) + .model(model) + .stop_reason(Some(stop_reason)) + .usage(anthropic::AnthropicUsage::new( + prompt_tokens, + u32::try_from(completion_tokens).unwrap_or(u32::MAX), + )) + .build(); + + let hellas = match provenance.as_ref() { + Some(prov) => HellasExt::both(prov, &receipt), + None => HellasExt::receipt(&receipt), + }; + let body = WithHellas::new(response, hellas); + + let mut response = Json(body).into_response(); + if let Some(prov) = provenance { + response.extensions_mut().insert(prov); + } + response.extensions_mut().insert(receipt); + response +} + +#[cfg_attr(test, derive(Debug))] +struct AnthropicSsePayload { + name: &'static str, + json: serde_json::Value, +} + +impl AnthropicSsePayload { + fn into_event(self) -> Event { + sse_event_data(self.name, &self.json) + } +} + +fn stream_response(prepared: PreparedGeneration) -> Response { + let id = next_id("msg"); + let model = prepared.model.clone(); + let prompt_tokens = prepared.prompt_tokens; + let provenance = prepared.provenance.clone(); + let deadline = prepared.deadline(); + + let stream_provenance = provenance.clone(); + let payloads = stream! { + let message = anthropic::MessageResponse::builder() + .id(id.clone()) + .message_type(Some("message".to_string())) + .role("assistant".to_string()) + .content(vec![]) + .model(model.clone()) + .usage(anthropic::AnthropicUsage::new(prompt_tokens, 0)) + .build(); + let message_hellas = match stream_provenance.as_ref() { + Some(prov) => HellasExt::commitment(prov), + None => HellasExt::default(), + }; + yield AnthropicSsePayload { + name: "message_start", + json: json!({ + "type": "message_start", + "message": WithHellas::new(message, message_hellas), + }), + }; + + let inner = prepared.stream(); + tokio::pin!(inner); + let mut content_started = false; + let mut completed: Option<(anthropic::StopReason, u64, ReceiptArtifact)> = None; + let mut error_message: Option = None; + + loop { + match tokio::time::timeout_at(deadline, inner.next()).await { + Ok(Some(Ok(GenerationEvent::Delta(text)))) => { + if !content_started { + content_started = true; + yield content_block_start(); + } + yield AnthropicSsePayload { + name: "content_block_delta", + json: serde_json::to_value( + anthropic::MessageStreamEvent::ContentBlockDelta { + index: 0, + delta: anthropic::ContentBlockDelta::TextDelta { text }, + }, + ) + .unwrap(), + }; + } + Ok(Some(Ok(GenerationEvent::Done(Outcome::Completed { + stop_reason, + total_tokens, + receipt, + })))) => { + info!( + receipt = %receipt.encoded(), + provenance = ?stream_provenance, + total_tokens, + ?stop_reason, + "anthropic message completion ready" + ); + completed = Some((map_stop_reason(stop_reason), total_tokens, receipt)); + break; + } + Ok(Some(Ok(GenerationEvent::Done(Outcome::Failed { error, .. })))) => { + error_message = Some(error); + break; + } + Ok(Some(Err(err))) => { + error_message = Some(format!("{err:#}")); + break; + } + Ok(None) => { + error_message = + Some("execution stream ended without terminal outcome".to_string()); + break; + } + Err(_) => { + error_message = Some(format!( + "inference timed out after {}s", + super::timeout_secs_until(deadline) + )); + break; + } + } + } + + if let Some(err) = error_message { + if content_started { + yield content_block_stop(); + } + yield error_payload(format!("Inference error: {err}")); + return; + } + + if let Some((stop_reason, total_tokens, receipt)) = completed { + if content_started { + yield content_block_stop(); + } + yield AnthropicSsePayload { + name: "message_delta", + json: serde_json::to_value(anthropic::MessageStreamEvent::MessageDelta { + delta: anthropic::StreamMessageDelta { + stop_reason: Some(stop_reason), + }, + usage: anthropic::AnthropicUsage::new( + prompt_tokens, + u32::try_from(total_tokens).unwrap_or(u32::MAX), + ), + }) + .unwrap(), + }; + yield AnthropicSsePayload { + name: "message_stop", + json: serde_json::to_value(WithHellas::new( + anthropic::MessageStreamEvent::MessageStop, + HellasExt::receipt(&receipt), + )) + .unwrap(), + }; + } + }; + let events = payloads.map(|payload| Ok::<_, std::convert::Infallible>(payload.into_event())); + let mut response = sse_response(events); + if let Some(prov) = provenance { + response.extensions_mut().insert(prov); + } + response +} + +fn content_block_start() -> AnthropicSsePayload { + AnthropicSsePayload { + name: "content_block_start", + json: serde_json::to_value(anthropic::MessageStreamEvent::ContentBlockStart { + index: 0, + content_block: anthropic::ContentBlock::Text { + text: String::new(), + }, + }) + .unwrap(), + } +} + +fn content_block_stop() -> AnthropicSsePayload { + AnthropicSsePayload { + name: "content_block_stop", + json: serde_json::to_value(anthropic::MessageStreamEvent::ContentBlockStop { index: 0 }) + .unwrap(), + } +} + +fn error_payload(message: String) -> AnthropicSsePayload { + AnthropicSsePayload { + name: "error", + json: serde_json::to_value(anthropic::MessageStreamEvent::Error { + error: anthropic::StreamError { + error_type: "invalid_request_error".to_string(), + message, + }, + }) + .unwrap(), + } +} + +fn map_stop_reason(stop: StopReason) -> anthropic::StopReason { + match stop { + StopReason::EndOfSequence | StopReason::Cancelled => anthropic::StopReason::EndTurn, + StopReason::MaxNewTokens => anthropic::StopReason::MaxTokens, + } +} diff --git a/crates/cli/src/commands/gateway/hellas_ext.rs b/crates/cli/src/commands/gateway/hellas_ext.rs new file mode 100644 index 0000000..528c95a --- /dev/null +++ b/crates/cli/src/commands/gateway/hellas_ext.rs @@ -0,0 +1,147 @@ +//! Wire-extension helpers for stamping hellas-namespaced metadata onto +//! protocol-native streaming/non-streaming JSON envelopes. +//! +//! The wrapper pattern keeps catgrad-llm's wire types +//! (`openai::ChatCompletionChunk`, `anthropic::MessageStreamEvent`, +//! `plain::CompletionChunk`, etc.) clean and protocol-neutral — +//! `WithHellas` adds a sibling `"hellas"` field at the gateway +//! emission boundary via `#[serde(flatten)]`. +//! +//! The public shape is `hellas.commitment` plus `hellas.receipt`; HTTP uses +//! the matching `x-hellas-commitment` and `x-hellas-receipt` headers. + +use crate::execution::ReceiptArtifact; +use hellas_rpc::provenance::{ExecutionProvenance, encode_hex}; +use serde::Serialize; + +#[derive(Serialize, Default, Debug, Clone)] +pub(super) struct HellasExt { + #[serde(skip_serializing_if = "Option::is_none")] + pub commitment: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub receipt: Option, +} + +impl HellasExt { + pub fn is_empty(&self) -> bool { + self.commitment.is_none() && self.receipt.is_none() + } + + pub fn commitment(prov: &ExecutionProvenance) -> Self { + Self { + commitment: Some(encode_hex(&prov.commitment_id)), + receipt: None, + } + } + + pub fn receipt(receipt: &ReceiptArtifact) -> Self { + Self { + commitment: None, + receipt: Some(receipt.encoded()), + } + } + + pub fn both(prov: &ExecutionProvenance, receipt: &ReceiptArtifact) -> Self { + Self { + commitment: Some(encode_hex(&prov.commitment_id)), + receipt: Some(receipt.encoded()), + } + } +} + +/// Wraps any `Serialize` value with a sibling `"hellas"` field. +/// `#[serde(flatten)]` on `inner` produces the merged JSON, so wrapping +/// `ChatCompletionChunk` yields `{...chunk fields..., "hellas": {...}}`. +/// +/// Empty `HellasExt` is skipped at serialization, so `WithHellas` with +/// a default-constructed `hellas` is wire-equivalent to the unwrapped +/// inner value. +#[derive(Serialize, Debug)] +pub(super) struct WithHellas { + #[serde(flatten)] + pub inner: T, + #[serde(skip_serializing_if = "HellasExt::is_empty")] + pub hellas: HellasExt, +} + +impl WithHellas { + pub fn new(inner: T, hellas: HellasExt) -> Self { + Self { inner, hellas } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn empty_hellas_skipped() { + #[derive(Serialize)] + struct Inner { + a: u32, + } + let wrapped = WithHellas::new(Inner { a: 1 }, HellasExt::default()); + let v = serde_json::to_value(&wrapped).unwrap(); + assert_eq!(v, json!({ "a": 1 })); + } + + #[test] + fn commitment_renders_as_lowercase_hex() { + let prov = ExecutionProvenance { + commitment_id: [0xab; 32], + }; + let hellas = HellasExt::commitment(&prov); + assert_eq!(hellas.commitment.as_deref(), Some("ab".repeat(32).as_str())); + assert!(hellas.receipt.is_none()); + } + + #[test] + fn receipt_renders_as_base64url_envelope() { + let receipt = ReceiptArtifact::from_test_bytes(vec![0xcd; 32]); + let expected = receipt.encoded(); + let hellas = HellasExt::receipt(&receipt); + assert_eq!(hellas.receipt.as_deref(), Some(expected.as_str())); + assert!(hellas.commitment.is_none()); + } + + #[test] + fn flatten_merges_sibling_hellas_field() { + #[derive(Serialize)] + struct Inner { + id: &'static str, + choices: Vec, + } + let prov = ExecutionProvenance { + commitment_id: [0x12; 32], + }; + let wrapped = WithHellas::new( + Inner { + id: "chatcmpl-1", + choices: vec![0], + }, + HellasExt::commitment(&prov), + ); + let v = serde_json::to_value(&wrapped).unwrap(); + assert_eq!( + v, + json!({ + "id": "chatcmpl-1", + "choices": [0], + "hellas": { "commitment": "12".repeat(32) }, + }) + ); + } + + #[test] + fn both_carries_commitment_and_receipt() { + let prov = ExecutionProvenance { + commitment_id: [1; 32], + }; + let receipt = ReceiptArtifact::from_test_bytes(vec![2; 32]); + let expected = receipt.encoded(); + let hellas = HellasExt::both(&prov, &receipt); + assert_eq!(hellas.commitment.as_deref(), Some("01".repeat(32).as_str())); + assert_eq!(hellas.receipt.as_deref(), Some(expected.as_str())); + } +} diff --git a/crates/cli/src/commands/gateway/mod.rs b/crates/cli/src/commands/gateway/mod.rs new file mode 100644 index 0000000..8568472 --- /dev/null +++ b/crates/cli/src/commands/gateway/mod.rs @@ -0,0 +1,244 @@ +mod anthropic; +mod hellas_ext; +mod openai; +mod plain; +mod provenance_layer; +mod state; +mod wrap; + +use crate::commands::CliResult; +use anyhow::{Context, bail}; +use axum::body::Bytes; +use axum::http::StatusCode; +use axum::response::sse::{Event, KeepAlive, Sse}; +use axum::response::{IntoResponse, Response}; +use axum::routing::post; +use axum::{Json, Router}; +use catgrad::prelude::Dtype; +use futures::Stream; +use serde::Serialize; +use serde_json::json; +use std::convert::Infallible; +use std::net::SocketAddr; +#[cfg(feature = "hellas-executor")] +use std::path::PathBuf; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{SystemTime, UNIX_EPOCH}; +use tonic_iroh_transport::iroh::{EndpointId, SecretKey}; + +use self::state::{GatewayState, HttpError}; + +const DEFAULT_HTTP_PORT: u16 = 8080; + +static NEXT_ID: AtomicU64 = AtomicU64::new(1); + +pub struct GatewayOptions { + pub host: String, + pub port: Option, + pub node_id: Option, + pub node_addrs: Vec, + #[cfg(feature = "hellas-executor")] + pub local: bool, + #[cfg(feature = "hellas-executor")] + pub verify_local: bool, + pub verify: Option, + #[cfg(feature = "hellas-executor")] + pub queue_size: usize, + pub retries: usize, + pub default_max_tokens: u32, + pub force_model: Option, + pub metrics_port: Option, + pub dtype: Dtype, + #[cfg(feature = "hellas-executor")] + pub producer_key_path: Option, + pub secret_key: SecretKey, + pub wrap: Option, + pub wrap_args: Vec, +} + +pub async fn run(options: GatewayOptions) -> CliResult<()> { + let state = Arc::new(GatewayState::from_options(&options)?); + + let app = Router::new() + .route("/v1/chat/completions", post(openai::handle)) + .route("/v1/messages", post(anthropic::handle)) + .route("/v1/completions", post(plain::handle)) + .with_state(state.clone()) + .layer(provenance_layer::ProvenanceLayer); + + let listener = bind_gateway(&options.host, options.port).await?; + let bound_addr = listener + .local_addr() + .context("listener has no local address")?; + info!("gateway listening on {bound_addr}"); + + if let Some(metrics_port) = options.metrics_port { + let registry = Arc::new(prometheus_client::registry::Registry::default()); + let bundle = crate::metrics::MetricsBundle::new(registry); + crate::metrics::spawn_metrics_server(metrics_port, bundle); + } + + #[cfg(feature = "hellas-executor")] + if state.local { + info!( + "local catgrad execution, queue size: {}", + options.queue_size + ); + } else if state.verify_local { + info!( + "local catgrad verification, queue size: {}", + options.queue_size + ); + } else if let Some(verify_node) = state.verify_node_id.as_ref() { + info!("Verifying primary node against remote shadow node {verify_node}"); + } + #[cfg(not(feature = "hellas-executor"))] + if let Some(verify_node) = state.verify_node_id.as_ref() { + info!("Verifying primary node against remote shadow node {verify_node}"); + } + + info!("timeout: {}s", state.inference_timeout.as_secs()); + if let Some(model) = state.force_model.as_deref() { + info!("Forcing request model override to `{model}`"); + } + + let wrap_child = if let Some(cmd) = options.wrap.as_deref() { + // Wrapped commands talk to us over loopback, so an unspecified bind + // address (0.0.0.0 / ::) becomes 127.0.0.1 in the URLs they see. + let host = if options.host == "0.0.0.0" || options.host == "::" { + "127.0.0.1" + } else { + options.host.as_str() + }; + let base = format!("http://{host}:{}", bound_addr.port()); + info!("wrapping `{cmd}` with gateway base {base}"); + Some(wrap::spawn(cmd, &options.wrap_args, &base)?) + } else { + None + }; + + let shutdown = Arc::new(tokio::sync::Notify::new()); + let server_shutdown = shutdown.clone(); + let server = std::future::IntoFuture::into_future( + axum::serve(listener, app).with_graceful_shutdown(async move { + tokio::select! { + _ = tokio::signal::ctrl_c() => {} + _ = server_shutdown.notified() => {} + } + }), + ); + + match wrap_child { + Some(mut child) => { + tokio::pin!(server); + tokio::select! { + res = &mut server => { + // Gateway stopped (ctrl-c or error); kill_on_drop tears the + // wrapped child down too. + res.context("gateway server failed")?; + } + status = child.wait() => { + let status = status.context("waiting on wrapped child failed")?; + shutdown.notify_one(); + server.await.context("gateway server failed")?; + if !status.success() { + bail!("wrapped command exited with status {status}"); + } + } + } + } + None => { + server.await.context("gateway server failed")?; + } + } + + Ok(()) +} + +/// Bind the gateway listener. With `--port`, fail loud on conflict (the user +/// asked for that exact port). Without it, try 8080 first and fall back to +/// an OS-assigned port on EADDRINUSE so a stray dev gateway doesn't block a +/// fresh one. +async fn bind_gateway(host: &str, port: Option) -> CliResult { + if let Some(p) = port { + let addr = format!("{host}:{p}"); + return tokio::net::TcpListener::bind(&addr) + .await + .with_context(|| format!("failed to bind gateway on {addr}")); + } + let preferred = format!("{host}:{DEFAULT_HTTP_PORT}"); + match tokio::net::TcpListener::bind(&preferred).await { + Ok(listener) => Ok(listener), + Err(err) if err.kind() == std::io::ErrorKind::AddrInUse => { + let fallback = format!("{host}:0"); + info!("failed to bind {preferred}; attempting to bind {fallback}"); + tokio::net::TcpListener::bind(&fallback) + .await + .with_context(|| format!("failed to bind gateway on {fallback}")) + } + Err(err) => Err(err).with_context(|| format!("failed to bind gateway on {preferred}")), + } +} + +fn parse_json_body( + body: &Bytes, + protocol: &str, +) -> Result { + catgrad_llm::utils::from_json_slice::(body).map_err(|err| HttpError { + status: StatusCode::BAD_REQUEST, + message: format!("Invalid {protocol} request: {err}"), + }) +} + +fn json_error(status: StatusCode, message: impl Into) -> Response { + ( + status, + Json(json!({ "error": { "message": message.into() } })), + ) + .into_response() +} + +/// Wrap an event stream as an SSE response. The stream IS the producer — +/// no spawn, no channel. When axum drops the response body the stream is +/// dropped, propagating drop-cancellation through every layer (decoder, +/// inference, broadcast subscriber, executor's per-running cancel token). +fn sse_response(stream: S) -> Response +where + S: Stream> + Send + 'static, +{ + Sse::new(stream) + .keep_alive(KeepAlive::default()) + .into_response() +} + +fn sse_data(payload: &T) -> Event { + let data = serde_json::to_string(payload).unwrap_or_else(|_| "{}".to_string()); + Event::default().data(data) +} + +fn sse_event_data(event: &str, payload: &T) -> Event { + let data = serde_json::to_string(payload).unwrap_or_else(|_| "{}".to_string()); + Event::default().event(event).data(data) +} + +fn next_id(prefix: &str) -> String { + let n = NEXT_ID.fetch_add(1, Ordering::Relaxed); + format!("{prefix}-{n}") +} + +fn now_unix() -> i64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|duration| duration.as_secs() as i64) + .unwrap_or(0) +} + +/// How many seconds remain until `deadline`, clamped to at least one +/// second so timeout error messages don't report `0s`. +fn timeout_secs_until(deadline: tokio::time::Instant) -> u64 { + deadline + .saturating_duration_since(tokio::time::Instant::now()) + .as_secs() + .max(1) +} diff --git a/crates/cli/src/commands/gateway/openai.rs b/crates/cli/src/commands/gateway/openai.rs new file mode 100644 index 0000000..94f7dbd --- /dev/null +++ b/crates/cli/src/commands/gateway/openai.rs @@ -0,0 +1,282 @@ +use super::hellas_ext::{HellasExt, WithHellas}; +use super::state::{GatewayState, GenerationEvent, PreparedGeneration}; +use super::{next_id, now_unix, parse_json_body, sse_data, sse_response}; +use crate::execution::{Outcome, ReceiptArtifact, StopReason}; +use async_stream::stream; +use axum::Json; +use axum::body::Bytes; +use axum::extract::State; +use axum::http::StatusCode; +use axum::response::{IntoResponse, Response}; +use chatgrad::types::openai; +use futures::StreamExt; +use serde_json::json; +use std::sync::Arc; + +pub(super) async fn handle(State(state): State>, body: Bytes) -> Response { + let req = match parse_json_body::(&body, "OpenAI") { + Ok(req) => req, + Err(err) => return err.into_response(), + }; + let stream_response_flag = req.stream == Some(true); + let include_usage = req + .stream_options + .as_ref() + .and_then(|options| options.include_usage) + .unwrap_or(false); + let prepared = match state.prepare_openai(&req).await { + Ok(prepared) => prepared, + Err(err) => return err.into_response(), + }; + + if stream_response_flag { + return stream_response(prepared, include_usage); + } + respond(prepared).await +} + +async fn respond(prepared: PreparedGeneration) -> Response { + let id = next_id("chatcmpl"); + let created = now_unix(); + let model = prepared.model.clone(); + let prompt_tokens = prepared.prompt_tokens; + let provenance = prepared.provenance.clone(); + let deadline = prepared.deadline(); + + let stream = prepared.stream(); + tokio::pin!(stream); + let mut text = String::new(); + let outcome = loop { + match tokio::time::timeout_at(deadline, stream.next()).await { + Ok(Some(Ok(GenerationEvent::Delta(d)))) => text.push_str(&d), + Ok(Some(Ok(GenerationEvent::Done(o)))) => break Ok(o), + Ok(Some(Err(err))) => break Err(format!("Inference error: {err:#}")), + Ok(None) => break Err("execution stream ended without terminal outcome".to_string()), + Err(_) => { + break Err(format!( + "inference timed out after {}s", + super::timeout_secs_until(deadline) + )); + } + } + }; + + let (completion_tokens, finish_reason, receipt) = match outcome { + Ok(Outcome::Completed { + total_tokens, + stop_reason, + receipt, + }) => { + info!( + receipt = %receipt.encoded(), + ?provenance, + total_tokens, + ?stop_reason, + "openai chat completion ready" + ); + (total_tokens, map_finish_reason(stop_reason), receipt) + } + Ok(Outcome::Failed { position, error }) => { + warn!(position, %error, "openai chat request failed"); + return super::json_error( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Inference error: {error}"), + ); + } + Err(message) => { + error!(%message, "openai chat request failed"); + return super::json_error(StatusCode::INTERNAL_SERVER_ERROR, message); + } + }; + + let response = openai::ChatCompletionResponse::builder() + .id(id) + .object("chat.completion".to_string()) + .created(created) + .model(model) + .choices(vec![ + openai::ChatChoice::builder() + .index(0) + .message(openai::ChatMessage::assistant(text)) + .finish_reason(Some(finish_reason)) + .build(), + ]) + .usage(Some(openai::Usage::from_counts( + prompt_tokens, + u32::try_from(completion_tokens).unwrap_or(u32::MAX), + ))) + .build(); + + let hellas = match provenance.as_ref() { + Some(prov) => HellasExt::both(prov, &receipt), + None => HellasExt::receipt(&receipt), + }; + let body = WithHellas::new(response, hellas); + + let mut response = Json(body).into_response(); + if let Some(prov) = provenance { + response.extensions_mut().insert(prov); + } + response.extensions_mut().insert(receipt); + response +} + +fn stream_response(prepared: PreparedGeneration, include_usage: bool) -> Response { + let id = next_id("chatcmpl"); + let created = now_unix(); + let model = prepared.model.clone(); + let prompt_tokens = prepared.prompt_tokens; + let provenance = prepared.provenance.clone(); + let deadline = prepared.deadline(); + + let stream_provenance = provenance.clone(); + let mut response = sse_response(stream! { + let start_chunk = chat_chunk( + &id, + created, + &model, + openai::ChatDelta { + role: Some("assistant".to_string()), + ..Default::default() + }, + None, + ); + let start_hellas = match stream_provenance.as_ref() { + Some(prov) => HellasExt::commitment(prov), + None => HellasExt::default(), + }; + yield Ok(sse_data(&WithHellas::new(start_chunk, start_hellas))); + + let inner = prepared.stream(); + tokio::pin!(inner); + let mut completed: Option<(openai::FinishReason, u64, ReceiptArtifact)> = None; + let mut error_message: Option = None; + + loop { + match tokio::time::timeout_at(deadline, inner.next()).await { + Ok(Some(Ok(GenerationEvent::Delta(text)))) => { + let chunk = chat_chunk( + &id, + created, + &model, + openai::ChatDelta { + content: Some(text), + ..Default::default() + }, + None, + ); + yield Ok(sse_data(&chunk)); + } + Ok(Some(Ok(GenerationEvent::Done(Outcome::Completed { + stop_reason, + total_tokens, + receipt, + })))) => { + info!( + receipt = %receipt.encoded(), + provenance = ?stream_provenance, + total_tokens, + ?stop_reason, + "openai chat completion ready" + ); + completed = Some((map_finish_reason(stop_reason), total_tokens, receipt)); + break; + } + Ok(Some(Ok(GenerationEvent::Done(Outcome::Failed { error, .. })))) => { + error_message = Some(error); + break; + } + Ok(Some(Err(err))) => { + error_message = Some(format!("{err:#}")); + break; + } + Ok(None) => { + error_message = + Some("execution stream ended without terminal outcome".to_string()); + break; + } + Err(_) => { + error_message = Some(format!( + "inference timed out after {}s", + super::timeout_secs_until(deadline) + )); + break; + } + } + } + + if let Some(err) = error_message { + yield Ok(sse_data(&json!({ + "error": { "message": format!("Inference error: {err}") } + }))); + return; + } + + if let Some((finish_reason, total_tokens, receipt)) = completed { + let finish_chunk = chat_chunk( + &id, + created, + &model, + openai::ChatDelta::default(), + Some(finish_reason), + ); + if include_usage { + yield Ok(sse_data(&finish_chunk)); + let usage_chunk = openai::ChatCompletionChunk::builder() + .id(id.clone()) + .object("chat.completion.chunk".to_string()) + .created(created) + .model(model.clone()) + .choices(vec![]) + .usage(Some(openai::Usage::from_counts( + prompt_tokens, + u32::try_from(total_tokens).unwrap_or(u32::MAX), + ))) + .build(); + yield Ok(sse_data(&WithHellas::new( + usage_chunk, + HellasExt::receipt(&receipt), + ))); + } else { + yield Ok(sse_data(&WithHellas::new( + finish_chunk, + HellasExt::receipt(&receipt), + ))); + } + yield Ok(axum::response::sse::Event::default().data("[DONE]")); + } + }); + if let Some(prov) = provenance { + response.extensions_mut().insert(prov); + } + response +} + +fn chat_chunk( + id: &str, + created: i64, + model: &str, + delta: openai::ChatDelta, + finish_reason: Option, +) -> openai::ChatCompletionChunk { + openai::ChatCompletionChunk::builder() + .id(id.to_string()) + .object("chat.completion.chunk".to_string()) + .created(created) + .model(model.to_string()) + .choices(vec![ + openai::ChatStreamChoice::builder() + .index(0) + .delta(delta) + .finish_reason(finish_reason) + .build(), + ]) + .build() +} + +fn map_finish_reason(stop: StopReason) -> openai::FinishReason { + match stop { + StopReason::EndOfSequence | StopReason::Cancelled => openai::FinishReason::Stop, + StopReason::MaxNewTokens => openai::FinishReason::Length, + } +} diff --git a/crates/cli/src/commands/gateway/plain.rs b/crates/cli/src/commands/gateway/plain.rs new file mode 100644 index 0000000..0b9197e --- /dev/null +++ b/crates/cli/src/commands/gateway/plain.rs @@ -0,0 +1,259 @@ +use super::hellas_ext::{HellasExt, WithHellas}; +use super::state::{GatewayState, GenerationEvent, PreparedGeneration}; +use super::{next_id, now_unix, parse_json_body, sse_data, sse_response}; +use crate::execution::{Outcome, ReceiptArtifact, StopReason}; +use async_stream::stream; +use axum::Json; +use axum::body::Bytes; +use axum::extract::State; +use axum::response::{IntoResponse, Response}; +use chatgrad::types::{openai, plain}; +use futures::StreamExt; +use serde_json::json; +use std::sync::Arc; + +pub(super) async fn handle(State(state): State>, body: Bytes) -> Response { + let req = match parse_json_body::(&body, "completion") { + Ok(req) => req, + Err(err) => return err.into_response(), + }; + let stream_response_flag = req.stream == Some(true); + let prepared = match state.prepare_plain(&req).await { + Ok(prepared) => prepared, + Err(err) => return err.into_response(), + }; + + if stream_response_flag { + return stream_response(prepared); + } + respond(prepared).await +} + +fn stream_response(prepared: PreparedGeneration) -> Response { + let id = next_id("cmpl"); + let created = now_unix(); + let model = prepared.model.clone(); + let provenance = prepared.provenance.clone(); + let deadline = prepared.deadline(); + + let stream_provenance = provenance.clone(); + let mut response = sse_response(stream! { + let inner = prepared.stream(); + tokio::pin!(inner); + + let mut completed: Option<(openai::FinishReason, ReceiptArtifact)> = None; + let mut error_message: Option = None; + // Track whether the commitment has been stamped on a chunk + // yet. The first per-delta chunk carries it; if the stream + // terminates with zero deltas, the terminal chunk carries + // both commitment and receipt. + let mut commitment_pending = stream_provenance.is_some(); + + loop { + match tokio::time::timeout_at(deadline, inner.next()).await { + Ok(Some(Ok(GenerationEvent::Delta(text)))) => { + let chunk = plain::CompletionChunk::builder() + .id(id.clone()) + .object("text_completion".to_string()) + .created(created) + .model(model.clone()) + .choices(vec![ + plain::CompletionChoice::builder() + .index(0) + .text(text) + .build(), + ]) + .build(); + let hellas = if commitment_pending { + commitment_pending = false; + match stream_provenance.as_ref() { + Some(prov) => HellasExt::commitment(prov), + None => HellasExt::default(), + } + } else { + HellasExt::default() + }; + yield Ok(sse_data(&WithHellas::new(chunk, hellas))); + } + Ok(Some(Ok(GenerationEvent::Done(Outcome::Completed { + stop_reason, + total_tokens, + receipt, + })))) => { + info!( + receipt = %receipt.encoded(), + provenance = ?stream_provenance, + total_tokens, + ?stop_reason, + "completion request ready" + ); + completed = Some((map_finish_reason(stop_reason), receipt)); + break; + } + Ok(Some(Ok(GenerationEvent::Done(Outcome::Failed { error, .. })))) => { + error_message = Some(error); + break; + } + Ok(Some(Err(err))) => { + error_message = Some(format!("{err:#}")); + break; + } + Ok(None) => { + error_message = + Some("execution stream ended without terminal outcome".to_string()); + break; + } + Err(_) => { + error_message = + Some(format!("inference timed out after {}s", super::timeout_secs_until(deadline))); + break; + } + } + } + + if let Some(err) = error_message { + // Error path: receipt stays fenced inside the Completed + // arm. Commitment can still ride the error frame if it + // hasn't been stamped yet — the stream terminated before + // any delta carried it. + let mut error_value = json!({ + "error": { "message": format!("Inference error: {err}") } + }); + if commitment_pending + && let (Some(prov), Some(map)) = ( + stream_provenance.as_ref(), + error_value.as_object_mut(), + ) { + map.insert( + "hellas".to_string(), + serde_json::to_value(HellasExt::commitment(prov)).unwrap(), + ); + } + yield Ok(sse_data(&error_value)); + } else if let Some((reason, receipt)) = completed { + let final_chunk = plain::CompletionChunk::builder() + .id(id.clone()) + .object("text_completion".to_string()) + .created(created) + .model(model.clone()) + .choices(vec![ + plain::CompletionChoice::builder() + .index(0) + .text(String::new()) + .finish_reason(Some(reason)) + .build(), + ]) + .build(); + // Terminal chunk carries the receipt. If zero deltas ran, + // it ALSO carries the commitment. + let hellas = if commitment_pending { + match stream_provenance.as_ref() { + Some(prov) => HellasExt::both(prov, &receipt), + None => HellasExt::receipt(&receipt), + } + } else { + HellasExt::receipt(&receipt) + }; + yield Ok(sse_data(&WithHellas::new(final_chunk, hellas))); + } + + yield Ok(axum::response::sse::Event::default().data("[DONE]")); + }); + if let Some(prov) = provenance { + response.extensions_mut().insert(prov); + } + response +} + +async fn respond(prepared: PreparedGeneration) -> Response { + let id = next_id("cmpl"); + let created = now_unix(); + let model = prepared.model.clone(); + let prompt_tokens = prepared.prompt_tokens; + let provenance = prepared.provenance.clone(); + let deadline = prepared.deadline(); + + let stream = prepared.stream(); + tokio::pin!(stream); + let mut text = String::new(); + let outcome = loop { + match tokio::time::timeout_at(deadline, stream.next()).await { + Ok(Some(Ok(GenerationEvent::Delta(d)))) => text.push_str(&d), + Ok(Some(Ok(GenerationEvent::Done(o)))) => break Ok(o), + Ok(Some(Err(err))) => break Err(format!("Inference error: {err:#}")), + Ok(None) => break Err("execution stream ended without terminal outcome".to_string()), + Err(_) => { + break Err(format!( + "inference timed out after {}s", + super::timeout_secs_until(deadline) + )); + } + } + }; + + let (completion_tokens, finish_reason, receipt) = match outcome { + Ok(Outcome::Completed { + total_tokens, + stop_reason, + receipt, + }) => { + info!( + receipt = %receipt.encoded(), + ?provenance, + total_tokens, + ?stop_reason, + "completion request ready" + ); + (total_tokens, map_finish_reason(stop_reason), receipt) + } + Ok(Outcome::Failed { position, error }) => { + warn!(position, %error, "completion request failed"); + return super::json_error( + axum::http::StatusCode::INTERNAL_SERVER_ERROR, + format!("Inference error: {error}"), + ); + } + Err(message) => { + error!(%message, "completion request failed"); + return super::json_error(axum::http::StatusCode::INTERNAL_SERVER_ERROR, message); + } + }; + + let response = plain::CompletionResponse::builder() + .id(id) + .object("text_completion".to_string()) + .created(created) + .model(model) + .choices(vec![ + plain::CompletionChoice::builder() + .index(0) + .text(text) + .finish_reason(Some(finish_reason)) + .build(), + ]) + .usage(Some(openai::Usage::from_counts( + prompt_tokens, + u32::try_from(completion_tokens).unwrap_or(u32::MAX), + ))) + .build(); + + let hellas = match provenance.as_ref() { + Some(prov) => HellasExt::both(prov, &receipt), + None => HellasExt::receipt(&receipt), + }; + let body = WithHellas::new(response, hellas); + + let mut response = Json(body).into_response(); + if let Some(prov) = provenance { + response.extensions_mut().insert(prov); + } + response.extensions_mut().insert(receipt); + response +} + +fn map_finish_reason(stop: StopReason) -> openai::FinishReason { + match stop { + StopReason::EndOfSequence | StopReason::Cancelled => openai::FinishReason::Stop, + StopReason::MaxNewTokens => openai::FinishReason::Length, + } +} diff --git a/crates/cli/src/commands/gateway/provenance_layer.rs b/crates/cli/src/commands/gateway/provenance_layer.rs new file mode 100644 index 0000000..eed5d0a --- /dev/null +++ b/crates/cli/src/commands/gateway/provenance_layer.rs @@ -0,0 +1,198 @@ +//! Tower middleware that lifts `ExecutionProvenance` and, when known before +//! headers are sent, a terminal signed receipt envelope from response +//! extensions into `x-hellas-*` HTTP response headers. +//! +//! Handlers stay free of header-attachment boilerplate: they insert the +//! typed values into `response.extensions_mut()` and this layer renders +//! them as headers on the way out. SSE bodies emit the same data as +//! in-band events for browser EventSource consumers — those are still +//! produced by the handlers themselves (the layer can't see into the +//! body's stream). + +use axum::body::Body; +use axum::http::{HeaderName, HeaderValue, Request, Response}; +use futures::future::BoxFuture; +use hellas_rpc::provenance::{COMMITMENT_HEADER, ExecutionProvenance, RECEIPT_HEADER, encode_hex}; +use std::task::{Context, Poll}; +use tower::{Layer, Service}; + +use crate::execution::ReceiptArtifact; + +#[derive(Clone, Default)] +pub(super) struct ProvenanceLayer; + +impl Layer for ProvenanceLayer { + type Service = ProvenanceService; + + fn layer(&self, inner: S) -> Self::Service { + ProvenanceService { inner } + } +} + +#[derive(Clone)] +pub(super) struct ProvenanceService { + inner: S, +} + +impl Service> for ProvenanceService +where + S: Service, Response = Response> + Clone + Send + 'static, + S::Future: Send + 'static, + B: Send + 'static, +{ + type Response = Response; + type Error = S::Error; + type Future = BoxFuture<'static, Result, S::Error>>; + + fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + self.inner.poll_ready(cx) + } + + fn call(&mut self, request: Request) -> Self::Future { + // Standard tower/axum cloning idiom: own a "ready" clone of the + // inner service for the spawned future, leave the original behind. + let clone = self.inner.clone(); + let mut inner = std::mem::replace(&mut self.inner, clone); + Box::pin(async move { + let mut response = inner.call(request).await?; + apply_provenance_headers(&mut response); + Ok(response) + }) + } +} + +fn apply_provenance_headers(response: &mut Response) { + let extensions = response.extensions().clone(); + if let Some(prov) = extensions.get::() { + response + .headers_mut() + .insert(commitment_header(), header_value(&prov.commitment_id)); + } + if let Some(receipt) = extensions.get::() { + response + .headers_mut() + .insert(receipt_header(), receipt_header_value(receipt)); + } +} + +fn commitment_header() -> HeaderName { + HeaderName::from_static(COMMITMENT_HEADER) +} + +fn receipt_header() -> HeaderName { + HeaderName::from_static(RECEIPT_HEADER) +} + +fn header_value(bytes: &[u8; 32]) -> HeaderValue { + HeaderValue::from_str(&encode_hex(bytes)) + .expect("64-char lowercase hex is always a valid header value") +} + +fn receipt_header_value(receipt: &ReceiptArtifact) -> HeaderValue { + HeaderValue::from_str(&receipt.encoded()) + .expect("base64url receipt envelope is always a valid header value") +} + +#[cfg(test)] +mod tests { + use super::*; + use axum::http::StatusCode; + + fn build_response_with_extensions( + prov: Option, + receipt: Option, + ) -> Response { + let mut response = Response::builder() + .status(StatusCode::OK) + .body(Body::empty()) + .unwrap(); + if let Some(prov) = prov { + response.extensions_mut().insert(prov); + } + if let Some(receipt) = receipt { + response.extensions_mut().insert(receipt); + } + response + } + + #[test] + fn applies_both_headers_when_present() { + let prov = ExecutionProvenance { + commitment_id: [0xab; 32], + }; + let receipt = ReceiptArtifact::from_test_bytes(vec![0xef; 32]); + let expected_receipt = receipt.encoded(); + let mut response = build_response_with_extensions(Some(prov.clone()), Some(receipt)); + apply_provenance_headers(&mut response); + assert_eq!( + response + .headers() + .get(COMMITMENT_HEADER) + .and_then(|v| v.to_str().ok()), + Some("ab".repeat(32).as_str()) + ); + assert_eq!( + response + .headers() + .get(RECEIPT_HEADER) + .and_then(|v| v.to_str().ok()), + Some(expected_receipt.as_str()) + ); + } + + #[test] + fn skips_receipt_header_when_absent() { + let prov = ExecutionProvenance { + commitment_id: [1; 32], + }; + let mut response = build_response_with_extensions(Some(prov), None); + apply_provenance_headers(&mut response); + assert!(response.headers().contains_key(COMMITMENT_HEADER)); + assert!(!response.headers().contains_key(RECEIPT_HEADER)); + } + + #[test] + fn no_extensions_yields_no_headers() { + let mut response = build_response_with_extensions(None, None); + apply_provenance_headers(&mut response); + assert!(!response.headers().contains_key(COMMITMENT_HEADER)); + assert!(!response.headers().contains_key(RECEIPT_HEADER)); + } + + /// End-to-end: dispatch a request through an axum `Router` wrapped with + /// `ProvenanceLayer` and confirm the layer lifts the handler-set + /// extensions into the outgoing response headers. + #[tokio::test] + async fn router_layer_lifts_extensions_to_headers() { + use axum::Router; + use axum::body::Body; + use axum::routing::get; + use tower::ServiceExt; + + async fn handler() -> Response { + let prov = ExecutionProvenance { + commitment_id: [0x12; 32], + }; + let receipt = ReceiptArtifact::from_test_bytes(vec![0x56; 32]); + let mut response = Response::new(Body::empty()); + response.extensions_mut().insert(prov); + response.extensions_mut().insert(receipt); + response + } + + let app = Router::new() + .route("/", get(handler)) + .layer(ProvenanceLayer); + + let request = Request::builder().uri("/").body(Body::empty()).unwrap(); + let response = app.oneshot(request).await.unwrap(); + assert_eq!( + response.headers().get(COMMITMENT_HEADER).unwrap(), + &"12".repeat(32) + ); + assert_eq!( + response.headers().get(RECEIPT_HEADER).unwrap(), + &ReceiptArtifact::from_test_bytes(vec![0x56; 32]).encoded() + ); + } +} diff --git a/crates/cli/src/commands/gateway/state.rs b/crates/cli/src/commands/gateway/state.rs new file mode 100644 index 0000000..36332dd --- /dev/null +++ b/crates/cli/src/commands/gateway/state.rs @@ -0,0 +1,801 @@ +use super::{GatewayOptions, json_error}; +use crate::execution::{ + ExecutionEvent, ExecutionRequest, ExecutionRoute, ExecutionRuntime, ExecutionStrategy, Outcome, + PreparedExecution, RemoteNodeTarget, +}; +use crate::text_output::TextOutputDecoder; +use anyhow::Context; +use async_stream::try_stream; +use axum::http::StatusCode; +use axum::response::{IntoResponse, Response}; +use catgrad::prelude::Dtype; +use chatgrad::PreparedPrompt; +use chatgrad::types::Message; +use chatgrad::types::{anthropic, openai, plain}; +use futures::Stream; +use futures::StreamExt; +#[cfg(feature = "hellas-executor")] +use hellas_executor::Executor; +use hellas_rpc::model::ModelAssets; +#[cfg(feature = "hellas-executor")] +use hellas_rpc::policy::{DownloadPolicy, ExecutePolicy}; +use hellas_rpc::provenance::ExecutionProvenance; +use std::collections::HashMap; +use std::error::Error as StdError; +use std::net::SocketAddr; +use std::sync::Arc; +use tokio::sync::{Mutex, RwLock}; +use tokio::time::Duration; +use tonic_iroh_transport::iroh::EndpointId; + +/// End-to-end deadline applied at the consumer of `PreparedGeneration::stream`. +/// Covers preparation (quote / discovery) AND the entire decode stream. +pub(super) const DEFAULT_INFERENCE_TIMEOUT: Duration = Duration::from_secs(300); + +#[derive(Clone)] +pub(super) struct GatewayState { + pub(super) node_id: Option, + pub(super) node_addrs: Vec, + #[cfg(feature = "hellas-executor")] + pub(super) local: bool, + #[cfg(feature = "hellas-executor")] + pub(super) verify_local: bool, + pub(super) verify_node_id: Option, + pub(super) retries: usize, + default_max_tokens: u32, + pub(super) force_model: Option, + pub(super) inference_timeout: Duration, + pub(super) dtype: Dtype, + runtime: ExecutionRuntime, + model_cache: Arc>>>, + model_load_locks: Arc>>>>, +} + +pub(super) struct PreparedGeneration { + pub(super) model: String, + pub(super) prepared: PreparedExecution, + /// Pre-flight provenance the executor committed to. `None` for routes + /// that defer their quote until streaming starts (`RemoteDiscovery`); + /// in that case headers can't be set and clients must rely on the + /// in-band SSE `hellas-provenance` event. + pub(super) provenance: Option, + pub(super) prompt_tokens: u32, + pub(super) stop_token_ids: Vec, + pub(super) assets: Arc, + pub(super) inference_timeout: Duration, +} + +/// One observation from a generation. The `Done` event is the authoritative +/// terminal frame — its `Outcome::Completed.total_tokens` is what should +/// be reported in protocol-level usage frames. +#[derive(Debug, Clone)] +pub(super) enum GenerationEvent { + Delta(String), + Done(Outcome), +} + +pub(super) struct HttpError { + pub(super) status: StatusCode, + pub(super) message: String, +} + +impl GatewayState { + pub(super) fn from_options(options: &GatewayOptions) -> anyhow::Result { + #[cfg(feature = "hellas-executor")] + let runtime = if options.local || options.verify_local { + let producer_key = + crate::identity::load_or_create_producer_key(options.producer_key_path.as_deref())?; + ExecutionRuntime::with_local_executor( + Executor::spawn_with_producer_key( + DownloadPolicy::Eager, + ExecutePolicy::Eager, + options.queue_size, + vec![options.dtype], + producer_key, + ) + .context("failed to initialize local execution backend")?, + ) + .with_secret_key(options.secret_key.clone()) + } else { + ExecutionRuntime::default().with_secret_key(options.secret_key.clone()) + }; + #[cfg(not(feature = "hellas-executor"))] + let runtime = ExecutionRuntime::default().with_secret_key(options.secret_key.clone()); + + Ok(Self { + node_id: options.node_id, + node_addrs: options.node_addrs.clone(), + #[cfg(feature = "hellas-executor")] + local: options.local, + #[cfg(feature = "hellas-executor")] + verify_local: options.verify_local, + verify_node_id: options.verify, + retries: options.retries, + default_max_tokens: options.default_max_tokens, + force_model: options.force_model.clone(), + inference_timeout: DEFAULT_INFERENCE_TIMEOUT, + dtype: options.dtype, + runtime, + model_cache: Arc::new(RwLock::new(HashMap::new())), + model_load_locks: Arc::new(Mutex::new(HashMap::new())), + }) + } + + fn resolve_model(&self, request_model: &str) -> String { + self.force_model + .clone() + .unwrap_or_else(|| request_model.to_string()) + } + + fn execution_route(&self) -> ExecutionRoute { + #[cfg(feature = "hellas-executor")] + if self.local { + return ExecutionRoute::Local; + } + ExecutionRoute::remote(self.node_id, self.node_addrs.clone(), self.retries) + } + + fn execution_strategy(&self) -> ExecutionStrategy { + let primary = self.execution_route(); + + #[cfg(feature = "hellas-executor")] + if self.verify_local { + return ExecutionStrategy::Verify { + primary, + shadow: ExecutionRoute::Local, + }; + } + + if let Some(node_id) = self.verify_node_id { + return ExecutionStrategy::Verify { + primary, + shadow: ExecutionRoute::RemoteDirect(RemoteNodeTarget { + node_id, + node_addrs: Vec::new(), + }), + }; + } + + ExecutionStrategy::Run(primary) + } + + async fn model_assets(&self, model: &str) -> anyhow::Result> { + { + let cache = self.model_cache.read().await; + if let Some(assets) = cache.get(model) { + return Ok(assets.clone()); + } + } + + let load_lock = { + let mut locks = self.model_load_locks.lock().await; + locks + .entry(model.to_string()) + .or_insert_with(|| Arc::new(Mutex::new(()))) + .clone() + }; + let _load_guard = load_lock.lock().await; + + { + let cache = self.model_cache.read().await; + if let Some(assets) = cache.get(model) { + return Ok(assets.clone()); + } + } + + let model_name = model.to_string(); + let dtype = self.dtype; + let assets = tokio::task::spawn_blocking(move || ModelAssets::load(&model_name, dtype)) + .await + .context("local model loader panicked")??; + + let assets = Arc::new(assets); + let mut cache = self.model_cache.write().await; + cache.insert(model.to_string(), assets.clone()); + Ok(assets) + } + + /// Drive the executor quote step and assemble a `PreparedGeneration` + /// from already-prepared inputs. Surface-specific assembly + /// (`prepare_openai` / `prepare_anthropic` / `prepare_plain`) + /// produces the `PreparedPrompt` before calling here. + async fn finalize_generation( + &self, + model: String, + assets: Arc, + prepared_prompt: PreparedPrompt, + max_tokens: u32, + prepare_error: &str, + ) -> Result { + let prompt_tokens = prepared_prompt.input_ids.len() as u32; + let stop_token_ids = prepared_prompt.stop_token_ids.clone(); + let request = ExecutionRequest::new( + self.runtime.clone(), + assets.clone(), + prepared_prompt, + max_tokens, + self.execution_strategy(), + ) + .map_err(|err| HttpError { + status: StatusCode::BAD_REQUEST, + message: format!("Failed to build execution request: {err}"), + })?; + let prepared = request.prepare().await.map_err(|err| HttpError { + status: StatusCode::BAD_GATEWAY, + message: format!("{prepare_error}: {}", format_error_causes(err.as_ref())), + })?; + let provenance = prepared.provenance().cloned(); + + Ok(PreparedGeneration { + model, + assets, + prepared, + provenance, + prompt_tokens, + stop_token_ids, + inference_timeout: self.inference_timeout, + }) + } + + pub(super) async fn prepare_openai( + &self, + req: &openai::ChatCompletionRequest, + ) -> Result { + let max_tokens = req.max_tokens.unwrap_or(self.default_max_tokens); + let messages: Vec = req.messages.iter().cloned().map(Message::from).collect(); + let enable_thinking = req + .reasoning_effort + .is_some_and(openai::ReasoningEffort::enables_thinking); + let model = self.resolve_model(&req.model); + let assets = self.model_assets(&model).await.map_err(|err| HttpError { + status: StatusCode::BAD_REQUEST, + message: format!("Failed to load local model assets for `{model}`: {err}"), + })?; + let prepared_prompt = assets + .prepare_chat_with_options(&messages, req.tools.as_deref(), enable_thinking) + .map_err(|err| HttpError { + status: StatusCode::BAD_REQUEST, + message: format!("Failed to prepare chat request: {err}"), + })?; + self.finalize_generation( + model, + assets, + prepared_prompt, + max_tokens, + "Failed to prepare chat request", + ) + .await + } + + pub(super) async fn prepare_anthropic( + &self, + req: &anthropic::MessageRequest, + ) -> Result { + let messages = anthropic_request_to_openai_messages(req) + .into_iter() + .map(Message::from) + .collect::>(); + let model = self.resolve_model(&req.model); + let assets = self.model_assets(&model).await.map_err(|err| HttpError { + status: StatusCode::BAD_REQUEST, + message: format!("Failed to load local model assets for `{model}`: {err}"), + })?; + let prepared_prompt = assets + .prepare_chat_with_options(&messages, req.tools.as_deref(), false) + .map_err(|err| HttpError { + status: StatusCode::BAD_REQUEST, + message: format!("Failed to prepare chat request: {err}"), + })?; + self.finalize_generation( + model, + assets, + prepared_prompt, + req.max_tokens, + "Failed to prepare chat request", + ) + .await + } + + pub(super) async fn prepare_plain( + &self, + req: &plain::CompletionRequest, + ) -> Result { + let max_tokens = req.max_tokens.unwrap_or(self.default_max_tokens); + let prompt = req.prompt.clone(); + let model = self.resolve_model(&req.model); + let assets = self.model_assets(&model).await.map_err(|err| HttpError { + status: StatusCode::BAD_REQUEST, + message: format!("Failed to load local model assets for `{model}`: {err}"), + })?; + let prepared_prompt = assets.prepare_plain(&prompt).map_err(|err| HttpError { + status: StatusCode::BAD_REQUEST, + message: format!( + "Failed to prepare completion prompt: {}", + format_error_causes(&err) + ), + })?; + self.finalize_generation( + model, + assets, + prepared_prompt, + max_tokens, + "Failed to prepare completion prompt", + ) + .await + } +} + +impl PreparedGeneration { + /// Drive the execution to completion as a stream of `GenerationEvent`s. + /// + /// Owning consumption: dropping the returned stream cancels everything + /// downstream (broadcast subscriber → executor's per-running cancel + /// token, or tonic stream → server-side close-monitor on remote). + /// + /// The `inference_timeout` field on `PreparedGeneration` is *not* + /// applied here — callers wrap the stream with `tokio::time::timeout_at` + /// against `Self::deadline()` so the protocol can shape the timeout + /// frame in its own format. + pub(super) fn stream(self) -> impl Stream> + Send { + let Self { + prepared, + assets, + stop_token_ids, + .. + } = self; + try_stream! { + let mut decoder = TextOutputDecoder::new(assets, &stop_token_ids); + let inner = prepared.stream(); + tokio::pin!(inner); + while let Some(event) = inner.next().await { + match event? { + ExecutionEvent::Chunk { tokens, .. } => { + let delta = decoder.push_bytes(&tokens)?; + if !delta.is_empty() { + yield GenerationEvent::Delta(delta); + } + } + ExecutionEvent::Done(outcome) => { + yield GenerationEvent::Done(outcome); + return; + } + } + } + Err(anyhow::anyhow!("execution stream ended without terminal outcome"))?; + } + } + + /// Absolute deadline for this generation's stream consumption. + /// Computed at call time; covers the whole lifecycle from this point on. + pub(super) fn deadline(&self) -> tokio::time::Instant { + tokio::time::Instant::now() + self.inference_timeout + } +} + +/// Convert an Anthropic `MessageRequest` into a flat list of OpenAI chat +/// messages so the existing OpenAI-style chat templates can consume it. +/// +/// Rules: +/// - `req.system` becomes a leading `system` role message. +/// - Assistant messages with `ToolUse` blocks collapse into one OpenAI +/// assistant message whose `tool_calls` carries each call. +/// - User messages with `ToolResult` blocks expand into one `tool` role +/// message per result (optionally preceded by a `user` message if the same +/// Anthropic message also carried text blocks). +fn anthropic_request_to_openai_messages( + req: &anthropic::MessageRequest, +) -> Vec { + let mut out = Vec::new(); + + if let Some(system) = &req.system { + let text = match system { + anthropic::SystemPrompt::Text(text) => text.clone(), + anthropic::SystemPrompt::Blocks(blocks) => blocks + .iter() + .map(|block| block.text.as_str()) + .collect::>() + .join(""), + }; + out.push(openai::ChatMessage::system(text)); + } + + for msg in &req.messages { + let blocks = match &msg.content { + anthropic::MessageContent::Text(text) => { + vec![anthropic::ContentBlock::Text { text: text.clone() }] + } + anthropic::MessageContent::Blocks(blocks) => blocks.clone(), + }; + match msg.role.as_str() { + "user" => emit_user_turn(&mut out, blocks), + "assistant" => emit_assistant_turn(&mut out, blocks), + _ => { + let text = blocks + .iter() + .filter_map(|block| match block { + anthropic::ContentBlock::Text { text } => Some(text.as_str()), + _ => None, + }) + .collect(); + out.push( + openai::ChatMessage::builder() + .role(msg.role.clone()) + .content(Some(openai::MessageContent::Text(text))) + .build(), + ); + } + } + } + + out +} + +fn emit_user_turn(out: &mut Vec, blocks: Vec) { + let mut text_parts = Vec::new(); + let mut tool_results = Vec::new(); + for block in blocks { + match block { + anthropic::ContentBlock::Text { text } => text_parts.push(text), + anthropic::ContentBlock::ToolResult { + tool_use_id, + content, + .. + } => tool_results.push((tool_use_id, content)), + anthropic::ContentBlock::ToolUse { .. } => {} + } + } + if !text_parts.is_empty() { + out.push(openai::ChatMessage::user(text_parts.join(""))); + } + for (tool_use_id, content) in tool_results { + out.push( + openai::ChatMessage::builder() + .role("tool".to_string()) + .content(Some(openai::MessageContent::Text( + anthropic_tool_result_to_string(&content), + ))) + .tool_call_id(Some(tool_use_id)) + .build(), + ); + } +} + +fn emit_assistant_turn(out: &mut Vec, blocks: Vec) { + let mut text_parts = Vec::new(); + let mut tool_calls = Vec::new(); + for block in blocks { + match block { + anthropic::ContentBlock::Text { text } => text_parts.push(text), + anthropic::ContentBlock::ToolUse { id, name, input } => { + let arguments = serde_json::to_string(&input).unwrap_or_else(|_| "{}".to_string()); + tool_calls.push(serde_json::json!({ + "id": id, + "type": "function", + "function": { "name": name, "arguments": arguments }, + })); + } + anthropic::ContentBlock::ToolResult { .. } => {} + } + } + let content = if text_parts.is_empty() { + None + } else { + Some(openai::MessageContent::Text(text_parts.join(""))) + }; + let tool_calls = if tool_calls.is_empty() { + None + } else { + Some(tool_calls) + }; + out.push( + openai::ChatMessage::builder() + .role("assistant".to_string()) + .content(content) + .tool_calls(tool_calls) + .build(), + ); +} + +/// Convert an Anthropic `tool_result.content` payload to the single-string +/// shape OpenAI's `tool` role message carries. Accepts raw strings, arrays of +/// text blocks (Anthropic permits both), or falls back to JSON serialization. +fn anthropic_tool_result_to_string(content: &serde_json::Value) -> String { + match content { + serde_json::Value::String(text) => text.clone(), + serde_json::Value::Array(blocks) => blocks + .iter() + .filter_map(|block| { + block + .as_object() + .and_then(|obj| obj.get("text")) + .and_then(serde_json::Value::as_str) + }) + .collect(), + other => serde_json::to_string(other).unwrap_or_default(), + } +} + +fn format_error_causes(err: &(dyn StdError + 'static)) -> String { + let mut parts = Vec::new(); + let mut current = err.source().unwrap_or(err); + parts.push(current.to_string()); + while let Some(source) = current.source() { + parts.push(source.to_string()); + current = source; + } + parts.join(": ") +} + +impl IntoResponse for HttpError { + fn into_response(self) -> Response { + if self.status.is_server_error() { + error!( + status = %self.status, + message = %self.message, + "gateway request failed" + ); + } else { + warn!( + status = %self.status, + message = %self.message, + "gateway request rejected" + ); + } + json_error(self.status, self.message) + } +} + +#[cfg(all(test, feature = "hellas-executor"))] +mod tests { + use super::*; + use std::str::FromStr; + + fn endpoint(byte: u8) -> EndpointId { + match byte { + 1 => EndpointId::from_str( + "bb18ebc065d836ecc7e1f33972d2c17eac9894cd33ce4916f66cb1165ccc7550", + ) + .expect("valid endpoint id"), + 2 => EndpointId::from_str( + "edfadcefb3917925de1111087f11925542c97e14ab00cf42b9447f7567a25b62", + ) + .expect("valid endpoint id"), + _ => panic!("unknown test endpoint"), + } + } + + fn state(local: bool, verify_local: bool, verify_node_id: Option) -> GatewayState { + GatewayState { + node_id: Some(endpoint(1)), + node_addrs: Vec::new(), + local, + verify_local, + verify_node_id, + retries: 2, + default_max_tokens: 128, + force_model: None, + inference_timeout: DEFAULT_INFERENCE_TIMEOUT, + dtype: Dtype::F32, + runtime: ExecutionRuntime::default(), + model_cache: Arc::default(), + model_load_locks: Arc::default(), + } + } + + #[test] + fn execution_strategy_uses_local_shadow_for_verify_local() { + let state = state(false, true, None); + assert_eq!( + state.execution_strategy(), + ExecutionStrategy::Verify { + primary: ExecutionRoute::RemoteDirect(RemoteNodeTarget { + node_id: endpoint(1), + node_addrs: Vec::new(), + }), + shadow: ExecutionRoute::Local, + } + ); + } + + #[test] + fn execution_strategy_uses_remote_shadow_for_verify_node() { + let verify_node = endpoint(2); + let state = state(false, false, Some(verify_node)); + assert_eq!( + state.execution_strategy(), + ExecutionStrategy::Verify { + primary: ExecutionRoute::RemoteDirect(RemoteNodeTarget { + node_id: endpoint(1), + node_addrs: Vec::new(), + }), + shadow: ExecutionRoute::RemoteDirect(RemoteNodeTarget { + node_id: endpoint(2), + node_addrs: Vec::new(), + }), + } + ); + } + + #[test] + fn execution_strategy_uses_local_run_when_local_is_enabled() { + let state = state(true, false, None); + assert_eq!( + state.execution_strategy(), + ExecutionStrategy::Run(ExecutionRoute::Local) + ); + } +} + +#[cfg(test)] +mod anthropic_conversion_tests { + use super::*; + use serde_json::json; + + fn assistant_tool_calls(msg: &openai::ChatMessage) -> &[serde_json::Value] { + msg.tool_calls.as_deref().expect("tool_calls populated") + } + + #[test] + fn system_prompt_text_becomes_leading_system_message() { + let req = anthropic::MessageRequest::builder() + .model("m".into()) + .messages(vec![anthropic::AnthropicMessage::user("hi")]) + .max_tokens(16) + .system(Some(anthropic::SystemPrompt::Text("be brief".into()))) + .build(); + let out = anthropic_request_to_openai_messages(&req); + assert_eq!(out[0].role, "system"); + assert_eq!( + out[0].content, + Some(openai::MessageContent::Text("be brief".into())) + ); + assert_eq!(out[1].role, "user"); + } + + #[test] + fn assistant_tool_use_collapses_to_openai_tool_calls() { + let req = anthropic::MessageRequest::builder() + .model("m".into()) + .messages(vec![ + anthropic::AnthropicMessage::user("what's the weather in Paris?"), + anthropic::AnthropicMessage { + role: "assistant".into(), + content: anthropic::MessageContent::Blocks(vec![ + anthropic::ContentBlock::Text { + text: "Let me check.".into(), + }, + anthropic::ContentBlock::ToolUse { + id: "toolu_1".into(), + name: "get_weather".into(), + input: json!({"city": "Paris"}), + }, + ]), + }, + ]) + .max_tokens(16) + .build(); + let out = anthropic_request_to_openai_messages(&req); + assert_eq!(out.len(), 2); + assert_eq!(out[1].role, "assistant"); + assert_eq!( + out[1].content, + Some(openai::MessageContent::Text("Let me check.".into())) + ); + let tool_calls = assistant_tool_calls(&out[1]); + assert_eq!(tool_calls.len(), 1); + assert_eq!(tool_calls[0]["id"], "toolu_1"); + assert_eq!(tool_calls[0]["type"], "function"); + assert_eq!(tool_calls[0]["function"]["name"], "get_weather"); + assert_eq!( + tool_calls[0]["function"]["arguments"], + r#"{"city":"Paris"}"# + ); + } + + #[test] + fn user_tool_result_becomes_tool_role_message() { + let req = anthropic::MessageRequest::builder() + .model("m".into()) + .messages(vec![anthropic::AnthropicMessage { + role: "user".into(), + content: anthropic::MessageContent::Blocks(vec![ + anthropic::ContentBlock::ToolResult { + tool_use_id: "toolu_1".into(), + content: json!("sunny, 22C"), + is_error: None, + }, + ]), + }]) + .max_tokens(16) + .build(); + let out = anthropic_request_to_openai_messages(&req); + assert_eq!(out.len(), 1); + assert_eq!(out[0].role, "tool"); + assert_eq!(out[0].tool_call_id.as_deref(), Some("toolu_1")); + assert_eq!( + out[0].content, + Some(openai::MessageContent::Text("sunny, 22C".into())) + ); + } + + #[test] + fn user_message_with_text_and_tool_result_splits() { + let req = anthropic::MessageRequest::builder() + .model("m".into()) + .messages(vec![anthropic::AnthropicMessage { + role: "user".into(), + content: anthropic::MessageContent::Blocks(vec![ + anthropic::ContentBlock::ToolResult { + tool_use_id: "toolu_1".into(), + content: json!("sunny"), + is_error: None, + }, + anthropic::ContentBlock::Text { + text: "thanks!".into(), + }, + ]), + }]) + .max_tokens(16) + .build(); + let out = anthropic_request_to_openai_messages(&req); + // Text flushes first, then the tool messages follow. + assert_eq!(out.len(), 2); + assert_eq!(out[0].role, "user"); + assert_eq!( + out[0].content, + Some(openai::MessageContent::Text("thanks!".into())) + ); + assert_eq!(out[1].role, "tool"); + assert_eq!(out[1].tool_call_id.as_deref(), Some("toolu_1")); + } + + #[test] + fn tool_result_content_accepts_blocks_or_object() { + assert_eq!( + anthropic_tool_result_to_string(&json!("plain")), + "plain".to_string() + ); + assert_eq!( + anthropic_tool_result_to_string(&json!([ + {"type": "text", "text": "alpha"}, + {"type": "text", "text": "beta"}, + ])), + "alphabeta".to_string() + ); + assert_eq!( + anthropic_tool_result_to_string(&json!({"result": 42})), + r#"{"result":42}"#.to_string() + ); + } + + #[test] + fn parallel_tool_calls_all_land_on_single_assistant_message() { + let req = anthropic::MessageRequest::builder() + .model("m".into()) + .messages(vec![anthropic::AnthropicMessage { + role: "assistant".into(), + content: anthropic::MessageContent::Blocks(vec![ + anthropic::ContentBlock::ToolUse { + id: "toolu_1".into(), + name: "get_weather".into(), + input: json!({"city": "Paris"}), + }, + anthropic::ContentBlock::ToolUse { + id: "toolu_2".into(), + name: "get_time".into(), + input: json!({"tz": "UTC"}), + }, + ]), + }]) + .max_tokens(16) + .build(); + let out = anthropic_request_to_openai_messages(&req); + assert_eq!(out.len(), 1); + assert_eq!(out[0].role, "assistant"); + assert_eq!(out[0].content, None); + let tool_calls = assistant_tool_calls(&out[0]); + assert_eq!(tool_calls.len(), 2); + assert_eq!(tool_calls[0]["id"], "toolu_1"); + assert_eq!(tool_calls[1]["id"], "toolu_2"); + } +} diff --git a/crates/cli/src/commands/gateway/wrap.rs b/crates/cli/src/commands/gateway/wrap.rs new file mode 100644 index 0000000..28b2b90 --- /dev/null +++ b/crates/cli/src/commands/gateway/wrap.rs @@ -0,0 +1,46 @@ +use std::process::Stdio; + +use anyhow::Context; +use tokio::process::{Child, Command}; + +use crate::commands::CliResult; + +/// Spawn the wrapped command. The child inherits this process's stdio so +/// shell redirection on the wrapped command works as the user expects, and +/// is configured with `kill_on_drop(true)` so dropping the returned `Child` +/// (e.g. on gateway shutdown) tears it down too. +pub fn spawn(cmd: &str, args: &[String], base_url: &str) -> CliResult { + let mut command = Command::new(cmd); + command + .args(args) + .env("OPENAI_BASE_URL", format!("{base_url}/v1")) + .env("ANTHROPIC_BASE_URL", base_url) + .stdin(Stdio::inherit()) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()) + .kill_on_drop(true); + + // PR_SET_PDEATHSIG: if the gateway dies (panic / SIGKILL), the kernel + // delivers SIGTERM to the wrapped child instead of stranding it as an + // orphan. Linux-only; on macOS/BSD we rely on `kill_on_drop` for orderly + // shutdown but a hard parent kill leaves the child running. + #[cfg(target_os = "linux")] + unsafe { + command.pre_exec(|| { + if libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM as libc::c_ulong) != 0 { + return Err(std::io::Error::last_os_error()); + } + // Race window: if the parent already died between fork and prctl, + // the signal will never fire. Re-check; bail if we're now reparented + // to init. + if libc::getppid() == 1 { + libc::_exit(0); + } + Ok(()) + }); + } + + command + .spawn() + .with_context(|| format!("failed to spawn `{cmd}`")) +} diff --git a/crates/cli/src/commands/health.rs b/crates/cli/src/commands/health.rs deleted file mode 100644 index 3335d7f..0000000 --- a/crates/cli/src/commands/health.rs +++ /dev/null @@ -1,31 +0,0 @@ -use crate::commands::CliResult; -use anyhow::Context; -use hellas_rpc::pb::hellas::node_client::NodeClient; -use hellas_rpc::pb::hellas::node_server::NodeServer; -use hellas_rpc::pb::hellas::HealthCheckRequest; -use tonic_iroh_transport::iroh::{Endpoint, EndpointId}; -use tonic_iroh_transport::IrohConnect; - -pub async fn run(node_id: EndpointId) -> CliResult<()> { - let endpoint = Endpoint::builder() - .bind() - .await - .context("failed to create iroh endpoint")?; - - let channel = NodeServer::<()>::connect(&endpoint, node_id.into()) - .await - .with_context(|| format!("failed to connect to node {node_id}"))?; - - let mut client = NodeClient::new(channel); - let response = client - .health_check(HealthCheckRequest {}) - .await - .context("health check RPC failed")? - .into_inner(); - - println!("Version: {}", response.version); - println!("Uptime: {}s", response.uptime_seconds); - println!("Node ID: {}", response.node_id); - - Ok(()) -} diff --git a/crates/cli/src/commands/identity.rs b/crates/cli/src/commands/identity.rs new file mode 100644 index 0000000..b5f9f1b --- /dev/null +++ b/crates/cli/src/commands/identity.rs @@ -0,0 +1,25 @@ +use crate::commands::CliResult; +use hellas_core::ProducerSigningKey; +use tonic_iroh_transport::iroh::SecretKey; + +pub fn show_node_id(secret_key: &SecretKey) -> CliResult<()> { + println!("{}", secret_key.public()); + Ok(()) +} + +pub fn show_producer_key(key: &ProducerSigningKey) -> CliResult<()> { + let public_key = key.public_key(); + println!("signature_kind: secp256k1"); + println!("public_key: {}", hex(public_key.bytes())); + println!("producer_id: {}", hex(key.producer_id().as_bytes())); + Ok(()) +} + +fn hex(bytes: &[u8]) -> String { + let mut out = String::with_capacity(bytes.len() * 2); + for byte in bytes { + use std::fmt::Write; + let _ = write!(out, "{byte:02x}"); + } + out +} diff --git a/crates/cli/src/commands/llm.rs b/crates/cli/src/commands/llm.rs new file mode 100644 index 0000000..0aff26d --- /dev/null +++ b/crates/cli/src/commands/llm.rs @@ -0,0 +1,190 @@ +use crate::commands::CliResult; +use crate::execution::{ + ExecutionEvent, ExecutionRequest, ExecutionRoute, ExecutionRuntime, ExecutionStrategy, Outcome, +}; +use crate::text_output::TextOutputDecoder; +use catgrad::prelude::Dtype; +use chatgrad::types::{Message, openai::ChatMessage}; +use futures::StreamExt; +use hellas_rpc::ExecutorError; +use hellas_rpc::model::ModelAssets; +use std::io::{self, Write}; +use std::net::SocketAddr; +#[cfg(feature = "hellas-executor")] +use std::path::PathBuf; +use std::sync::Arc; +use tonic_iroh_transport::iroh::{EndpointId, SecretKey}; + +pub struct ExecuteOptions { + pub node_id: Option, + pub node_addrs: Vec, + pub model: String, + pub prompt: String, + pub max_seq: u32, + pub retries: usize, + #[cfg(feature = "hellas-executor")] + pub local: bool, + #[cfg(feature = "hellas-executor")] + pub verify_local: bool, + #[cfg(feature = "hellas-executor")] + pub producer_key_path: Option, + pub raw: bool, + /// Ordered preference list. The first entry is what the client *first* + /// builds the program at; later entries are tried via fallback if the + /// remote executor refuses with `DtypeNotSupported`. For `--local` / + /// `--verify-local` the embedded executor's `supported_dtypes` is the + /// full list so no fallback occurs. + pub dtype: Vec, +} + +/// Returns `true` if `err`'s chain carries an executor's +/// `DtypeNotSupported` decision — either as a local `ExecutorError` (the +/// `--local` route) or as a remote `tonic::Status` with `FailedPrecondition` +/// and the canonical message prefix. +fn is_dtype_not_supported(err: &anyhow::Error) -> bool { + for cause in err.chain() { + if let Some(ExecutorError::DtypeNotSupported { .. }) = cause.downcast_ref::() + { + return true; + } + if let Some(status) = cause.downcast_ref::() + && status.code() == tonic::Code::FailedPrecondition + && status.message().starts_with("program was built for dtype") + { + return true; + } + } + false +} + +pub async fn run(options: ExecuteOptions, secret_key: SecretKey) -> CliResult<()> { + if options.dtype.is_empty() { + anyhow::bail!("--dtype must list at least one of f32, f16, bf16"); + } + + // Pre-tokenize the prompt once. Tokenization is dtype-independent, so the + // `assets` we use here is throwaway; we reload per attempt below to get + // the dtype-specific courtesy request construction needs. + let bootstrap_assets = Arc::new(ModelAssets::load(&options.model, options.dtype[0])?); + let messages = vec![Message::openai(ChatMessage::user(&options.prompt))]; + let prepared = if options.raw || !bootstrap_assets.has_chat_template() { + if options.raw { + info!("executing raw prompt without chat template"); + } else { + info!("model has no chat template; using raw prompt"); + } + bootstrap_assets.prepare_plain(&options.prompt)? + } else { + info!("executing prompt with model chat template"); + bootstrap_assets.prepare_chat(&messages)? + }; + let mut decoder = TextOutputDecoder::new(bootstrap_assets.clone(), &prepared.stop_token_ids); + + let last_index = options.dtype.len() - 1; + for (idx, &dtype) in options.dtype.iter().enumerate() { + if idx > 0 { + info!(?dtype, "previous dtype rejected, retrying"); + } + + // Per-attempt assets: same tokenizer/template as bootstrap, but the + // courtesy request below asks the provider for this dtype. + let assets = Arc::new(ModelAssets::load(&options.model, dtype)?); + + #[cfg(feature = "hellas-executor")] + let runtime = if options.local || options.verify_local { + let producer_key = + crate::identity::load_or_create_producer_key(options.producer_key_path.as_deref())?; + // Embedded executor accepts the full preference list so a future + // dialer can pin any of them. The CLI itself only ever builds + // the program at the first acceptable entry. + ExecutionRuntime::spawn_default_local_with_producer_key( + hellas_rpc::DEFAULT_EXECUTION_QUEUE_CAPACITY, + options.dtype.clone(), + producer_key, + )? + .with_secret_key(secret_key.clone()) + } else { + ExecutionRuntime::default().with_secret_key(secret_key.clone()) + }; + #[cfg(not(feature = "hellas-executor"))] + let runtime = ExecutionRuntime::default().with_secret_key(secret_key.clone()); + + #[cfg(feature = "hellas-executor")] + let strategy = if options.verify_local { + if idx == 0 { + info!("executing remotely and verifying against local catgrad backend"); + } + ExecutionStrategy::Verify { + primary: ExecutionRoute::remote( + options.node_id, + options.node_addrs.clone(), + options.retries, + ), + shadow: ExecutionRoute::Local, + } + } else if options.local { + if idx == 0 { + info!(?dtype, "executing locally with catgrad backend"); + } + ExecutionStrategy::Run(ExecutionRoute::Local) + } else { + ExecutionStrategy::Run(ExecutionRoute::remote( + options.node_id, + options.node_addrs.clone(), + options.retries, + )) + }; + #[cfg(not(feature = "hellas-executor"))] + let strategy = ExecutionStrategy::Run(ExecutionRoute::remote( + options.node_id, + options.node_addrs.clone(), + options.retries, + )); + + let request = + ExecutionRequest::new(runtime, assets, prepared.clone(), options.max_seq, strategy)?; + let uses_remote = request.uses_remote_transport(); + + let result: anyhow::Result<()> = async { + let stream = request.stream(); + tokio::pin!(stream); + let mut completed = false; + while let Some(event) = stream.next().await { + match event? { + ExecutionEvent::Chunk { tokens, .. } => { + let delta = decoder.push_bytes(&tokens)?; + if !delta.is_empty() { + print!("{delta}"); + io::stdout().flush()?; + } + } + ExecutionEvent::Done(Outcome::Completed { .. }) => { + completed = true; + break; + } + ExecutionEvent::Done(Outcome::Failed { error, .. }) => { + anyhow::bail!("execution failed: {error}"); + } + } + } + if !completed { + anyhow::bail!("execution stream ended without terminal outcome"); + } + Ok(()) + } + .await; + + if uses_remote { + crate::tracing_config::suppress_execute_tail_logs(); + } + + match result { + Ok(()) => return Ok(()), + Err(err) if idx < last_index && is_dtype_not_supported(&err) => { + continue; + } + Err(err) => return Err(err), + } + } + unreachable!("loop returns on Ok or last-index error") +} diff --git a/crates/cli/src/commands/mod.rs b/crates/cli/src/commands/mod.rs index d2dfa68..dcda55f 100644 --- a/crates/cli/src/commands/mod.rs +++ b/crates/cli/src/commands/mod.rs @@ -1,6 +1,11 @@ pub type CliResult = anyhow::Result; -pub mod execute; -pub mod health; -#[cfg(feature = "serve")] +pub mod artifact; +pub mod gateway; +pub mod identity; +pub mod llm; +pub mod monitor; +pub mod opaque; +pub mod rpc; +#[cfg(feature = "hellas-executor")] pub mod serve; diff --git a/crates/cli/src/commands/monitor.rs b/crates/cli/src/commands/monitor.rs new file mode 100644 index 0000000..051b12b --- /dev/null +++ b/crates/cli/src/commands/monitor.rs @@ -0,0 +1,322 @@ +use crate::commands::CliResult; + +use anyhow::Context; +use futures::StreamExt; +use hellas_pb::swarm::node_client::NodeClient; +use hellas_pb::swarm::{GetKnownPeersRequest, GetNodeInfoRequest, GetNodeInfoResponse}; +use hellas_rpc::GRPC_MESSAGE_LIMIT; +use hellas_rpc::discovery::DiscoveryEndpoint; +use hellas_rpc::service::{ExecuteService, NodeService}; +use std::collections::HashSet; +use std::future; +use tokio::task::JoinSet; +use tokio::time::{Duration, timeout}; +use tonic_iroh_transport::iroh::{EndpointId, SecretKey}; +use tonic_iroh_transport::swarm::{ + DhtBackend, MdnsBackend, Peer, PeerExchangeBackend, ServiceRegistry, +}; +use tonic_iroh_transport::{ConnectionPool, PoolOptions}; + +const CONNECT_TIMEOUT: Duration = Duration::from_secs(3); +const RPC_TIMEOUT: Duration = Duration::from_secs(3); + +struct PeerInterrogationOutcome { + node_info: GetNodeInfoResponse, + known_peers: Vec, + invalid_known_peers: usize, + known_peers_error: Option, +} + +struct DiscoveryEventContext<'a> { + node_pool: &'a ConnectionPool, + interrogate: bool, + service_seen: &'a mut HashSet, + unique_peers: &'a mut HashSet, + interrogated: &'a mut HashSet, + interrogations: &'a mut JoinSet<(EndpointId, anyhow::Result)>, +} + +pub async fn run( + timeout_secs: Option, + interrogate: bool, + secret_key: SecretKey, +) -> CliResult<()> { + let bound = DiscoveryEndpoint::bind(Some(secret_key)).await?; + let endpoint = bound.endpoint; + let mdns = bound.bindings.mdns; + let shared_dht = bound.bindings.dht; + + let peer_exchange = PeerExchangeBackend::new(); + let mut registry = ServiceRegistry::new(&endpoint); + registry.with_pool_options(PoolOptions { + connect_timeout: CONNECT_TIMEOUT, + ..PoolOptions::default() + }); + registry.add(MdnsBackend::new(mdns)); + registry.add(DhtBackend::with_dht(&endpoint, shared_dht)); + registry.add(peer_exchange.clone()); + let node_pool = registry.pool::(); + + let mut node_discovery = Box::pin(registry.discover::()); + let mut execute_discovery = Box::pin(registry.discover::()); + + let mut interrogations = JoinSet::new(); + let mut node_seen = HashSet::new(); + let mut execute_seen = HashSet::new(); + let mut unique_peers = HashSet::new(); + let mut interrogated = HashSet::new(); + + let mut interrogation_ok = 0usize; + let mut interrogation_failed = 0usize; + let mut hinted_peers = 0usize; + let mut node_done = false; + let mut execute_done = false; + + println!( + "event=monitor-start local_peer={} interrogate={} timeout_secs={}", + endpoint.id(), + interrogate, + timeout_secs + .map(|secs| secs.to_string()) + .unwrap_or_else(|| "none".to_string()) + ); + println!("event=monitor-ready message=\"press Ctrl+C to stop\""); + + let monitor_timeout = async { + if let Some(secs) = timeout_secs { + tokio::time::sleep(Duration::from_secs(secs)).await; + } else { + future::pending::<()>().await; + } + }; + tokio::pin!(monitor_timeout); + + loop { + tokio::select! { + _ = tokio::signal::ctrl_c() => { + println!("event=monitor-stop reason=signal"); + break; + } + _ = &mut monitor_timeout => { + println!("event=monitor-stop reason=timeout"); + break; + } + peer = node_discovery.next(), if !node_done => { + match peer { + Some(Ok(peer)) => { + handle_discovery_event( + "node", + &peer, + DiscoveryEventContext { + node_pool: &node_pool, + interrogate, + service_seen: &mut node_seen, + unique_peers: &mut unique_peers, + interrogated: &mut interrogated, + interrogations: &mut interrogations, + }, + ); + } + Some(Err(err)) => { + println!("event=discovery-error service=node error=\"{err}\""); + } + None => { + node_done = true; + println!("event=discovery-complete service=node"); + } + } + } + peer = execute_discovery.next(), if !execute_done => { + match peer { + Some(Ok(peer)) => { + handle_discovery_event( + "execute", + &peer, + DiscoveryEventContext { + node_pool: &node_pool, + interrogate, + service_seen: &mut execute_seen, + unique_peers: &mut unique_peers, + interrogated: &mut interrogated, + interrogations: &mut interrogations, + }, + ); + } + Some(Err(err)) => { + println!("event=discovery-error service=execute error=\"{err}\""); + } + None => { + execute_done = true; + println!("event=discovery-complete service=execute"); + } + } + } + joined = interrogations.join_next(), if !interrogations.is_empty() => { + match joined { + Some(Ok((peer_id, Ok(outcome)))) => { + interrogation_ok += 1; + let info = &outcome.node_info; + println!( + "event=node-info peer={} reported_node_id={} version={} build={} os={} uptime_seconds={} graffiti={}", + peer_id, + info.node_id, + info.version, + info.build, + info.os, + info.uptime_seconds, + String::from_utf8_lossy(&info.graffiti), + ); + + if let Some(err) = outcome.known_peers_error.as_deref() { + println!("event=known-peers-error peer={} error=\"{}\"", peer_id, err); + } + + if outcome.invalid_known_peers > 0 { + println!( + "event=known-peers-invalid peer={} invalid_count={}", + peer_id, + outcome.invalid_known_peers + ); + } + + println!( + "event=known-peers peer={} count={}", + peer_id, + outcome.known_peers.len() + ); + + if !outcome.known_peers.is_empty() { + hinted_peers += outcome.known_peers.len(); + for hinted in &outcome.known_peers { + println!("event=peer-hint from={} peer={}", peer_id, hinted); + } + peer_exchange.ingest_peers(outcome.known_peers.iter().copied()); + } + } + Some(Ok((peer_id, Err(err)))) => { + interrogation_failed += 1; + println!("event=interrogate-error peer={} error=\"{err:#}\"", peer_id); + } + Some(Err(err)) => { + interrogation_failed += 1; + println!("event=interrogate-error error=\"task join failed: {err}\""); + } + None => {} + } + } + } + + if node_done && execute_done && interrogations.is_empty() { + println!("event=monitor-stop reason=discovery-exhausted"); + break; + } + } + + println!( + "event=monitor-summary unique_peers={} node_service_peers={} execute_service_peers={} interrogated={} interrogation_ok={} interrogation_failed={} hinted_peers={}", + unique_peers.len(), + node_seen.len(), + execute_seen.len(), + interrogated.len(), + interrogation_ok, + interrogation_failed, + hinted_peers + ); + + Ok(()) +} + +fn handle_discovery_event(service: &str, peer: &Peer, context: DiscoveryEventContext<'_>) { + let peer_id = peer.id(); + if !context.service_seen.insert(peer_id) { + return; + } + + context.unique_peers.insert(peer_id); + println!( + "event=discovered service={} peer={} source={} trust={} remote_trust={} source_trust={}", + service, + peer_id, + peer.source(), + peer.trust(), + peer.remote_trust(), + peer.source_trust() + ); + + if context.interrogate && context.interrogated.insert(peer_id) { + println!("event=interrogate-start peer={}", peer_id); + let node_pool = context.node_pool.clone(); + context.interrogations.spawn(async move { + let result = interrogate_peer(node_pool, peer_id).await; + (peer_id, result) + }); + } +} + +async fn interrogate_peer( + node_pool: ConnectionPool, + peer_id: EndpointId, +) -> anyhow::Result { + let channel = node_pool + .channel(peer_id) + .await + .with_context(|| format!("failed to connect to node service on {peer_id}"))?; + + let mut client = NodeClient::new(channel) + .max_decoding_message_size(GRPC_MESSAGE_LIMIT) + .max_encoding_message_size(GRPC_MESSAGE_LIMIT); + + let node_info = timeout(RPC_TIMEOUT, client.get_node_info(GetNodeInfoRequest {})) + .await + .map_err(|_| anyhow::anyhow!("get_node_info timed out after {RPC_TIMEOUT:?}"))? + .context("get_node_info RPC failed")? + .into_inner(); + + let mut known_peers = Vec::new(); + let mut invalid_known_peers = 0usize; + let mut known_peers_error = None; + + match timeout( + RPC_TIMEOUT, + client.get_known_peers(GetKnownPeersRequest { + service_alpn: String::new(), + }), + ) + .await + { + Ok(Ok(resp)) => { + let mut dedupe = HashSet::new(); + for raw_id in resp.into_inner().peer_ids { + match decode_endpoint_id(&raw_id) { + Ok(id) if id != peer_id => { + if dedupe.insert(id) { + known_peers.push(id); + } + } + Ok(_) => {} + Err(_) => invalid_known_peers += 1, + } + } + } + Ok(Err(status)) => { + known_peers_error = Some(format!("get_known_peers RPC failed: {status}")); + } + Err(_) => { + known_peers_error = Some(format!("get_known_peers timed out after {RPC_TIMEOUT:?}")); + } + } + + Ok(PeerInterrogationOutcome { + node_info, + known_peers, + invalid_known_peers, + known_peers_error, + }) +} + +fn decode_endpoint_id(raw_id: &[u8]) -> anyhow::Result { + let bytes: [u8; 32] = raw_id + .try_into() + .map_err(|_| anyhow::anyhow!("invalid endpoint id length: {}", raw_id.len()))?; + EndpointId::from_bytes(&bytes).context("invalid endpoint id bytes") +} diff --git a/crates/cli/src/commands/opaque.rs b/crates/cli/src/commands/opaque.rs new file mode 100644 index 0000000..3419e6e --- /dev/null +++ b/crates/cli/src/commands/opaque.rs @@ -0,0 +1,91 @@ +use crate::commands::CliResult; +use crate::execution::{ + ExecutionRoute, ExecutionRuntime, OpaqueExecutionEvent, OpaqueExecutionRequest, OpaqueOutcome, +}; +#[cfg(feature = "hellas-executor")] +use catgrad::prelude::Dtype; +use futures::StreamExt; +use hellas_pb::opaque::OpaqueRequest; +use std::io::{self, Write}; +use std::net::SocketAddr; +#[cfg(feature = "hellas-executor")] +use std::path::PathBuf; +use tonic_iroh_transport::iroh::{EndpointId, SecretKey}; + +pub struct ExecuteOptions { + pub node_id: Option, + pub node_addrs: Vec, + pub service: String, + pub method: String, + pub payload: Vec, + pub retries: usize, + #[cfg(feature = "hellas-executor")] + pub local: bool, + #[cfg(feature = "hellas-executor")] + pub producer_key_path: Option, +} + +pub async fn run(options: ExecuteOptions, secret_key: SecretKey) -> CliResult<()> { + serde_json::from_slice::(&options.payload) + .map_err(|err| anyhow::anyhow!("--payload must be UTF-8 JSON: {err}"))?; + + #[cfg(feature = "hellas-executor")] + let route = if options.local { + ExecutionRoute::Local + } else { + ExecutionRoute::remote(options.node_id, options.node_addrs.clone(), options.retries) + }; + #[cfg(not(feature = "hellas-executor"))] + let route = + ExecutionRoute::remote(options.node_id, options.node_addrs.clone(), options.retries); + + #[cfg(feature = "hellas-executor")] + let runtime = if options.local { + let producer_key = + crate::identity::load_or_create_producer_key(options.producer_key_path.as_deref())?; + ExecutionRuntime::spawn_default_local_with_producer_key( + hellas_rpc::DEFAULT_EXECUTION_QUEUE_CAPACITY, + vec![Dtype::F32], + producer_key, + )? + .with_secret_key(secret_key) + } else { + ExecutionRuntime::default().with_secret_key(secret_key) + }; + #[cfg(not(feature = "hellas-executor"))] + let runtime = ExecutionRuntime::default().with_secret_key(secret_key); + + let request = OpaqueRequest { + service: options.service, + method: options.method, + payload: options.payload, + }; + let execution = OpaqueExecutionRequest::new(runtime, request, route); + let uses_remote = execution.uses_remote_transport(); + let stream = execution.stream(); + tokio::pin!(stream); + + let mut completed = false; + while let Some(event) = stream.next().await { + match event? { + OpaqueExecutionEvent::Chunk { .. } => {} + OpaqueExecutionEvent::Done(OpaqueOutcome::Completed { output, .. }) => { + io::stdout().write_all(&output)?; + io::stdout().flush()?; + completed = true; + break; + } + OpaqueExecutionEvent::Done(OpaqueOutcome::Failed { error, .. }) => { + anyhow::bail!("opaque execution failed: {error}"); + } + } + } + + if uses_remote { + crate::tracing_config::suppress_execute_tail_logs(); + } + if !completed { + anyhow::bail!("opaque execution stream ended without terminal outcome"); + } + Ok(()) +} diff --git a/crates/cli/src/commands/rpc.rs b/crates/cli/src/commands/rpc.rs new file mode 100644 index 0000000..150ebd6 --- /dev/null +++ b/crates/cli/src/commands/rpc.rs @@ -0,0 +1,47 @@ +use crate::commands::CliResult; +use anyhow::Context; +use hellas_pb::swarm::GetNodeInfoRequest; +use hellas_pb::swarm::node_client::NodeClient; +use hellas_rpc::discovery::DiscoveryEndpoint; +use hellas_rpc::service::NodeService; +use std::net::SocketAddr; +use tonic_iroh_transport::iroh::{EndpointAddr, EndpointId, SecretKey, TransportAddr}; +use tonic_iroh_transport::{ConnectionPool, IrohConnect, PoolOptions}; + +pub async fn run( + node_id: EndpointId, + node_addrs: Vec, + secret_key: SecretKey, +) -> CliResult<()> { + let endpoint = DiscoveryEndpoint::bind(Some(secret_key)).await?.endpoint; + let channel = if node_addrs.is_empty() { + let pool = + ConnectionPool::for_service::(endpoint.clone(), PoolOptions::default()); + pool.channel(node_id) + .await + .with_context(|| format!("failed to connect to node {node_id}"))? + } else { + NodeService::connect( + &endpoint, + EndpointAddr::from_parts(node_id, node_addrs.into_iter().map(TransportAddr::Ip)), + ) + .await + .with_context(|| format!("failed to connect to node {node_id}"))? + }; + + let mut client = NodeClient::new(channel); + let response = client + .get_node_info(GetNodeInfoRequest {}) + .await + .context("get_node_info RPC failed")? + .into_inner(); + + println!("Node ID: {}", response.node_id); + println!("Version: {}", response.version); + println!("Build: {}", response.build); + println!("OS: {}", response.os); + println!("Uptime: {}s", response.uptime_seconds); + println!("Graffiti: {}", String::from_utf8_lossy(&response.graffiti)); + + Ok(()) +} diff --git a/crates/cli/src/commands/serve/mod.rs b/crates/cli/src/commands/serve/mod.rs index c14192b..208ab9a 100644 --- a/crates/cli/src/commands/serve/mod.rs +++ b/crates/cli/src/commands/serve/mod.rs @@ -1,18 +1,98 @@ use crate::commands::CliResult; use anyhow::Context; -use tokio::time::{timeout, Duration}; +use catgrad::prelude::Dtype; +use hellas_core::ProducerSigningKey; +use hellas_executor::ExecutorMetrics; +use hellas_rpc::policy::{DownloadPolicy, ExecutePolicy}; +use std::collections::HashSet; +use std::path::PathBuf; +use std::sync::Arc; +use tokio::time::{Duration, timeout}; +use tonic_iroh_transport::iroh::SecretKey; use tracing::warn; mod node; +mod peer_tracker; -pub async fn run(enable_discovery: bool) -> CliResult<()> { - let node = node::spawn_node(enable_discovery) - .await - .context("failed to start node server")?; +pub async fn run( + port: Option, + download_policy: DownloadPolicy, + execute_policy: ExecutePolicy, + queue_size: usize, + preload_weights: Vec, + artifact_store_path: Option, + metrics_port: Option, + graffiti: String, + dtype: Vec, + secret_key: SecretKey, + producer_key: ProducerSigningKey, +) -> CliResult<()> { + let preload_weights = dedupe_preload_weights(preload_weights); + let artifact_store_path = artifact_store_path + .map(Ok) + .unwrap_or_else(crate::identity::default_artifact_store_path)?; + let build = option_env!("GIT_REV").unwrap_or("unknown").to_string(); + let graffiti = { + let mut buf = [0u8; 16]; + let src = graffiti.as_bytes(); + let len = src.len().min(16); + buf[..len].copy_from_slice(&src[..len]); + buf.to_vec() + }; + // Counters live in the executor and are mutated inline; cloning the + // counter handles into a registry just adds a scrape view on the same + // underlying state. + let metrics = Arc::new(ExecutorMetrics::default()); + let node = node::spawn_node( + port, + download_policy.clone(), + execute_policy.clone(), + queue_size, + preload_weights.clone(), + build, + graffiti, + dtype, + artifact_store_path, + secret_key, + producer_key, + metrics.clone(), + ) + .await + .context("failed to start node server")?; + + if let Some(metrics_port) = metrics_port { + let mut registry = prometheus_client::registry::Registry::default(); + metrics.register_with(&mut registry); + let bundle = crate::metrics::MetricsBundle::new(Arc::new(registry)); + #[cfg(feature = "otel")] + let bundle = bundle.with_iroh(node.iroh_metrics()); + crate::metrics::spawn_metrics_server(metrics_port, bundle); + } + + let node_id = node.node_id(); + let add_url = format!("https://explorer.hellas.ai/executors/add/{node_id}"); + + eprintln!("Node ID: {node_id}"); + print_qr(&add_url); + eprintln!("Explorer: {add_url}"); - println!("Node Address: {}", node.node_id()); - if !enable_discovery { - warn!("discovery disabled; clients must pass a node id or start the server with `serve --discovery`"); + if !preload_weights.is_empty() { + info!("Preloaded weights: {}", preload_weights.join(", ")); + } + + if matches!(download_policy, DownloadPolicy::Skip) + && matches!(execute_policy, ExecutePolicy::Skip) + { + warn!( + "Node is running in deny-by-default mode. Pass explicit policies to allow remote downloads or execution." + ); + } else { + warn!( + %download_policy, + %execute_policy, + "node is permitting remote downloads and/or execution; only run this on trusted networks" + ); + warn!("warning: current policies allow remote peers to trigger downloads and/or execution"); } println!("RPC server running. Press Ctrl+C to stop."); @@ -20,7 +100,7 @@ pub async fn run(enable_discovery: bool) -> CliResult<()> { .await .context("failed to listen for shutdown signal")?; - println!("Shutting down RPC server..."); + println!("Shutting down..."); match timeout(Duration::from_secs(5), node.shutdown()).await { Ok(result) => result.context("failed to shut down RPC server")?, Err(_) => { @@ -32,3 +112,75 @@ pub async fn run(enable_discovery: bool) -> CliResult<()> { Ok(()) } + +/// Print a QR code to stderr using Unicode half-block characters. +fn print_qr(data: &str) { + use qrcode::QrCode; + let Ok(code) = QrCode::new(data.as_bytes()) else { + return; + }; + let width = code.width(); + let modules = code.into_colors(); + // Two rows per character using upper/lower half blocks. + // ██ = both dark, ▀ = top dark, ▄ = bottom dark, ' ' = both light. + for y in (0..width).step_by(2) { + eprint!(" "); + for x in 0..width { + let top = modules[y * width + x] == qrcode::Color::Dark; + let bottom = if y + 1 < width { + modules[(y + 1) * width + x] == qrcode::Color::Dark + } else { + false + }; + eprint!( + "{}", + match (top, bottom) { + (true, true) => "█", + (true, false) => "▀", + (false, true) => "▄", + (false, false) => " ", + } + ); + } + eprintln!(); + } +} + +fn dedupe_preload_weights(mut models: Vec) -> Vec { + let mut seen = HashSet::new(); + models.retain(|model| { + let trimmed = model.trim(); + !trimmed.is_empty() && seen.insert(trimmed.to_string()) + }); + models + .into_iter() + .map(|model| model.trim().to_string()) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn dedupe_preload_weights_preserves_first_occurrence() { + let models = dedupe_preload_weights(vec![ + "foo/bar".to_string(), + "baz/qux".to_string(), + "foo/bar".to_string(), + "baz/qux@rev".to_string(), + ]); + assert_eq!(models, vec!["foo/bar", "baz/qux", "baz/qux@rev"]); + } + + #[test] + fn dedupe_preload_weights_trims_and_drops_empty_entries() { + let models = dedupe_preload_weights(vec![ + " foo/bar ".to_string(), + "".to_string(), + " ".to_string(), + "baz/qux@rev".to_string(), + ]); + assert_eq!(models, vec!["foo/bar", "baz/qux@rev"]); + } +} diff --git a/crates/cli/src/commands/serve/node.rs b/crates/cli/src/commands/serve/node.rs index 8d0fe30..76cbad1 100644 --- a/crates/cli/src/commands/serve/node.rs +++ b/crates/cli/src/commands/serve/node.rs @@ -1,165 +1,377 @@ +use super::peer_tracker::{MAX_SERVICE_ALPN_LEN, PeerTracker, RequestKind}; use anyhow::Context; -use hellas_executor::{ExecuteServer, Executor}; -use hellas_rpc::pb::hellas::node_server::{Node, NodeServer}; -use hellas_rpc::pb::hellas::{HealthCheckRequest, HealthCheckResponse, Presence}; +use catgrad::prelude::Dtype; +use futures::StreamExt; +use futures::future::try_join_all; +use hellas_core::ProducerSigningKey; +use hellas_executor::{ + ArtifactStoreConfig, CourtesyServer, ExecuteServer, Executor, ExecutorMetrics, OpaqueServer, + SymbolicServer, +}; +use hellas_pb::swarm::node_server::{Node, NodeServer}; +use hellas_pb::swarm::{ + GetKnownPeersRequest, GetKnownPeersResponse, GetNodeInfoRequest, GetNodeInfoResponse, +}; +use hellas_rpc::GRPC_MESSAGE_LIMIT; +use hellas_rpc::discovery::DiscoveryBindings; +use hellas_rpc::policy::{DownloadPolicy, ExecutePolicy}; +use std::net::{Ipv4Addr, Ipv6Addr, SocketAddrV4, SocketAddrV6}; +use std::path::PathBuf; +use std::sync::{Arc, Mutex}; use std::time::Instant; -use std::net::{Ipv4Addr, SocketAddrV4}; -use tokio_stream::StreamExt; +use tonic::codec::CompressionEncoding; +use tonic::service::interceptor::InterceptedService; use tonic::{Request, Response, Status}; -use tonic_iroh_transport::gossip::{topic_for, GossipHandler, GossipRequest}; -use tonic_iroh_transport::iroh::discovery::mdns::MdnsDiscovery; -use tonic_iroh_transport::iroh::discovery::EndpointData; -use tonic_iroh_transport::iroh::discovery::pkarr::dht::DhtDiscovery; -use tonic_iroh_transport::iroh::{Endpoint, EndpointId, TransportAddr}; -use tonic_iroh_transport::TransportBuilder; -use tonic_iroh_transport::iroh::discovery::Discovery; -use tonic_iroh_transport::iroh::Watcher; -use std::net::Ipv6Addr; -use std::net::SocketAddrV6; - -const GRPC_MESSAGE_LIMIT: usize = 32 * 1024 * 1024; +use tonic_iroh_transport::iroh::address_lookup::{DnsAddressLookup, PkarrPublisher}; +use tonic_iroh_transport::iroh::endpoint::{PathId, presets}; +use tonic_iroh_transport::iroh::{Endpoint, EndpointId}; +use tonic_iroh_transport::swarm::{DhtBackend, MdnsBackend, ServiceRegistry}; +use tonic_iroh_transport::{IrohContext, PoolOptions, TransportBuilder}; + +// `traced_service` wraps a tonic service with W3C trace context extraction when +// the `otel` feature is on; with the feature off it returns the service +// unchanged so the trace layer compiles to nothing. +#[cfg(feature = "otel")] +fn traced_service(svc: S) -> tonic_iroh_transport::otel::TraceContextService { + tower::Layer::layer(&tonic_iroh_transport::otel::TraceContextLayer, svc) +} +#[cfg(not(feature = "otel"))] +fn traced_service(svc: S) -> S { + svc +} + const DEFAULT_PORT: u16 = 31145; +const MAX_PORT_RETRIES: u16 = 100; + +struct NodeService { + start_time: Instant, + node_id: String, + build: String, + graffiti: Vec, + peer_tracker: Arc>, +} #[derive(Clone)] -struct PresenceResponder { - endpoint_id: EndpointId, +struct ExecutePeerInterceptor { + peer_tracker: Arc>, +} + +impl tonic::service::Interceptor for ExecutePeerInterceptor { + fn call(&mut self, request: Request<()>) -> Result, Status> { + if let Some((peer_id, observed_rtt)) = peer_observation(&request) + && let Ok(mut tracker) = self.peer_tracker.lock() + { + let _ = tracker.observe_request(peer_id, observed_rtt, RequestKind::ExecuteRpc); + } + Ok(request) + } } #[tonic::async_trait] -impl GossipHandler for PresenceResponder { - async fn handle(&self, request: GossipRequest) -> Result<(), Status> { - let msg = request.get_ref(); - if msg.is_executor { - return Ok(()); +impl Node for NodeService { + async fn get_node_info( + &self, + request: Request, + ) -> Result, Status> { + if let Some((peer_id, observed_rtt)) = peer_observation(&request) + && let Ok(mut tracker) = self.peer_tracker.lock() + { + let _ = tracker.observe_request(peer_id, observed_rtt, RequestKind::GetNodeInfo); } - info!( - hf_id = %msg.hf_id, - req_id = %msg.req_id, - from = %request.context().delivered_from.fmt_short(), - "responding to presence request" - ); + Ok(Response::new(GetNodeInfoResponse { + node_id: self.node_id.clone(), + uptime_seconds: self.start_time.elapsed().as_secs(), + version: env!("CARGO_PKG_VERSION").to_string(), + build: self.build.clone(), + os: format!("{}-{}", std::env::consts::ARCH, std::env::consts::OS), + graffiti: self.graffiti.clone(), + })) + } - let reply = Presence { - hf_id: msg.hf_id.clone(), - req_id: msg.req_id.clone(), - peer_id: self.endpoint_id.to_string(), - ttl_ms: msg.ttl_ms, - is_executor: true, + async fn get_known_peers( + &self, + request: Request, + ) -> Result, Status> { + let Some((requester_id, observed_rtt)) = peer_observation(&request) else { + return Err(Status::unauthenticated("missing peer context")); }; - request - .sender() - .broadcast(&reply) - .await - .map_err(|e| Status::internal(format!("failed to broadcast presence reply: {e}")))?; + let req = request.into_inner(); + if req.service_alpn.len() > MAX_SERVICE_ALPN_LEN { + if let Ok(mut tracker) = self.peer_tracker.lock() { + tracker.mark_invalid_request(requester_id); + } + return Err(Status::invalid_argument(format!( + "service_alpn too long (max {MAX_SERVICE_ALPN_LEN} bytes)" + ))); + } - Ok(()) + let mut tracker = self + .peer_tracker + .lock() + .map_err(|_| Status::internal("peer tracker is unavailable"))?; + + let admission = + tracker.observe_request(requester_id, observed_rtt, RequestKind::GetKnownPeers); + if !admission.allow { + warn!( + peer = %requester_id, + "rate-limited get_known_peers request" + ); + return Err(Status::resource_exhausted( + "rate-limited get_known_peers request", + )); + } + + let peers = tracker.ranked_known_peers( + requester_id, + req.service_alpn.as_str(), + admission.disclosure_limit, + ); + let peer_ids = peers + .into_iter() + .map(|peer_id| peer_id.as_bytes().to_vec()) + .collect(); + + Ok(Response::new(GetKnownPeersResponse { peer_ids })) } } -struct NodeService { - start_time: Instant, - node_id: String, +fn peer_observation(request: &Request) -> Option<(EndpointId, Option)> { + let context = request.extensions().get::()?; + Some((context.node_id, context.connection.rtt(PathId::ZERO))) } -#[tonic::async_trait] -impl Node for NodeService { - async fn health_check( - &self, - _request: Request, - ) -> Result, Status> { - Ok(Response::new(HealthCheckResponse { - version: env!("CARGO_PKG_VERSION").to_string(), - uptime_seconds: self.start_time.elapsed().as_secs(), - node_id: self.node_id.clone(), - })) - } +async fn bind_endpoint( + secret_key: tonic_iroh_transport::iroh::SecretKey, + port: u16, +) -> anyhow::Result { + Endpoint::builder(presets::N0) + .secret_key(secret_key) + .clear_address_lookup() + .address_lookup(PkarrPublisher::n0_dns()) + .address_lookup(DnsAddressLookup::n0_dns()) + .bind_addr(SocketAddrV4::new(Ipv4Addr::UNSPECIFIED, port))? + .bind_addr(SocketAddrV6::new(Ipv6Addr::UNSPECIFIED, port, 0, 0))? + .bind() + .await + .map_err(Into::into) } pub(super) struct NodeHandle { - endpoint: Endpoint, + node_id: EndpointId, guard: tonic_iroh_transport::TransportGuard, - addr_task: tokio::task::JoinHandle<()>, } impl NodeHandle { pub(super) fn node_id(&self) -> EndpointId { - self.endpoint.id() + self.node_id + } + + /// Snapshot of iroh's internal metrics. The returned `EndpointMetrics` + /// contains `Arc`s into the live metric storage, so values continue to + /// update as iroh records them. + #[cfg(feature = "otel")] + pub(super) fn iroh_metrics(&self) -> tonic_iroh_transport::iroh::metrics::EndpointMetrics { + self.guard.endpoint().metrics().clone() } pub(super) async fn shutdown(self) -> anyhow::Result<()> { - self.addr_task.abort(); - let _ = self.addr_task.await; - self.guard - .shutdown() - .await - .context("failed to shut down transport")?; + let Self { guard, .. } = self; + guard.endpoint().close().await; + drop(guard); Ok(()) } } -pub(super) async fn spawn_node(enable_discovery: bool) -> anyhow::Result { - let mut builder = Endpoint::builder() - .bind_addr_v4(SocketAddrV4::new(Ipv4Addr::UNSPECIFIED, DEFAULT_PORT)) - .bind_addr_v6(SocketAddrV6::new(Ipv6Addr::UNSPECIFIED, DEFAULT_PORT, 0, 0)); - - if enable_discovery { - builder = builder - .discovery(MdnsDiscovery::builder().service_name("hellas")) - // Adds internet discovery (DHT + optional pkarr relay); `Endpoint::builder()` - // already includes pkarr publisher + DNS resolver via the N0 preset. - .discovery(DhtDiscovery::builder().n0_dns_pkarr_relay()); +pub(super) async fn spawn_node( + port: Option, + download_policy: DownloadPolicy, + execute_policy: ExecutePolicy, + queue_size: usize, + preload_weights: Vec, + build: String, + graffiti: Vec, + supported_dtypes: Vec, + artifact_store_path: PathBuf, + secret_key: tonic_iroh_transport::iroh::SecretKey, + producer_key: ProducerSigningKey, + metrics: Arc, +) -> anyhow::Result { + let endpoint = if let Some(port) = port { + // Explicit port: fail if it can't bind. + bind_endpoint(secret_key.clone(), port) + .await + .with_context(|| format!("failed to bind on port {port}"))? } else { - builder = builder.clear_discovery(); - } - - let endpoint = builder - .bind() - .await - .context("failed to create iroh endpoint")?; - - // Seed discovery with current addresses and keep publishing updates. - let discovery = endpoint.discovery().clone(); - let mut addr_stream = endpoint.watch_addr().stream(); - let addr_task = tokio::spawn(async move { - while let Some(addr) = addr_stream.next().await { - let addrs: Vec<_> = addr.ip_addrs().map(|a| TransportAddr::Ip(*a)).collect(); - if addrs.is_empty() { - continue; + // Auto port: try DEFAULT_PORT, then increment until one works. + let mut endpoint = None; + for offset in 0..MAX_PORT_RETRIES { + let p = DEFAULT_PORT.wrapping_add(offset); + match bind_endpoint(secret_key.clone(), p).await { + Ok(ep) => { + if offset > 0 { + info!("port {DEFAULT_PORT} in use, bound to port {p}"); + } + endpoint = Some(ep); + break; + } + Err(e) => debug!("port {p} unavailable: {e:#}"), } - info!("discovery: {addrs:?}"); - let data = EndpointData::new(addrs); - discovery.publish(&data); } - }); + endpoint.ok_or_else(|| { + anyhow::anyhow!( + "failed to bind on any port in range {DEFAULT_PORT}..{}", + DEFAULT_PORT + MAX_PORT_RETRIES + ) + })? + }; + let shared_dht = DiscoveryBindings::attach(&endpoint, true, true) + .context("failed to attach node discovery lookups")? + .dht; let node_service = NodeService { start_time: Instant::now(), node_id: endpoint.id().to_string(), + build, + graffiti, + peer_tracker: Arc::new(Mutex::new(PeerTracker::new(endpoint.id()))), + }; + + let peer_tracker = node_service.peer_tracker.clone(); + + let execute_interceptor = ExecutePeerInterceptor { + peer_tracker: peer_tracker.clone(), }; - let executor = Executor::spawn(); - let execute_service = ExecuteServer::new(executor) + info!( + path = %artifact_store_path.display(), + "using persistent artifact blob store" + ); + let executor = Executor::spawn_with_metrics_and_producer_key_and_artifact_store( + download_policy, + execute_policy, + queue_size, + supported_dtypes, + metrics, + Arc::new(producer_key), + ArtifactStoreConfig::fs(artifact_store_path.clone()), + ) + .await + .context("failed to initialize executor backend")?; + + let execute_service = ExecuteServer::new(executor.clone()) + .accept_compressed(CompressionEncoding::Zstd) + .send_compressed(CompressionEncoding::Zstd) + .max_decoding_message_size(GRPC_MESSAGE_LIMIT) + .max_encoding_message_size(GRPC_MESSAGE_LIMIT); + let symbolic_service = SymbolicServer::new(executor.clone()) + .accept_compressed(CompressionEncoding::Zstd) + .send_compressed(CompressionEncoding::Zstd) + .max_decoding_message_size(GRPC_MESSAGE_LIMIT) + .max_encoding_message_size(GRPC_MESSAGE_LIMIT); + let opaque_service = OpaqueServer::new(executor.clone()) + .accept_compressed(CompressionEncoding::Zstd) + .send_compressed(CompressionEncoding::Zstd) + .max_decoding_message_size(GRPC_MESSAGE_LIMIT) + .max_encoding_message_size(GRPC_MESSAGE_LIMIT); + let courtesy_service = CourtesyServer::new(executor.clone()) + .accept_compressed(CompressionEncoding::Zstd) + .send_compressed(CompressionEncoding::Zstd) .max_decoding_message_size(GRPC_MESSAGE_LIMIT) .max_encoding_message_size(GRPC_MESSAGE_LIMIT); - let presence_responder = PresenceResponder { - endpoint_id: endpoint.id(), - }; + let mut transport = TransportBuilder::new(endpoint.clone()) + .add_rpc(traced_service(NodeServer::new(node_service))) + .add_rpc(InterceptedService::new( + traced_service(execute_service), + execute_interceptor.clone(), + )) + .add_rpc(InterceptedService::new( + traced_service(symbolic_service), + execute_interceptor.clone(), + )) + .add_rpc(InterceptedService::new( + traced_service(opaque_service), + execute_interceptor, + )) + .add_rpc(traced_service(courtesy_service)); + + let dht = DhtBackend::with_dht(&endpoint, Arc::clone(&shared_dht)); + let publisher = dht.create_publisher(Default::default()); + transport = transport.with_publisher(publisher); - let guard = TransportBuilder::new(endpoint.clone()) - .add_gossip::(presence_responder) - .add_rpc(NodeServer::new(node_service)) - .add_rpc(execute_service) + let guard = transport .spawn() .await .context("failed to start transport")?; - info!( - topic = ?topic_for::(), - "listening for gossip presence requests" - ); + // Background peer discovery: watch DHT + mDNS for other executors and + // feed them into the PeerTracker so GetKnownPeers returns useful results. + { + let peer_tracker = peer_tracker.clone(); + let disc_endpoint = endpoint.clone(); + let disc_dht = DhtBackend::with_dht(&disc_endpoint, Arc::clone(&shared_dht)); + tokio::spawn(async move { + use hellas_rpc::service::{ + CourtesyService as CourtesySvc, ExecuteService as ExecSvc, NodeService as NodeSvc, + OpaqueService as OpaqueSvc, SymbolicService as SymbolicSvc, + }; + let Ok(bindings) = DiscoveryBindings::client(disc_endpoint.id()) else { + warn!("failed to create discovery bindings for peer tracker"); + return; + }; + let mut registry = ServiceRegistry::new(&disc_endpoint); + registry.with_pool_options(PoolOptions::default()); + registry.add(MdnsBackend::new(bindings.mdns)); + registry.add(disc_dht); + let mut node_peers = Box::pin(registry.discover::()); + let mut exec_peers = Box::pin(registry.discover::()); + let mut symbolic_peers = Box::pin(registry.discover::()); + let mut opaque_peers = Box::pin(registry.discover::()); + let mut courtesy_peers = Box::pin(registry.discover::()); + loop { + let peer_id = tokio::select! { + Some(Ok(peer)) = node_peers.next() => peer.id(), + Some(Ok(peer)) = exec_peers.next() => peer.id(), + Some(Ok(peer)) = symbolic_peers.next() => peer.id(), + Some(Ok(peer)) = opaque_peers.next() => peer.id(), + Some(Ok(peer)) = courtesy_peers.next() => peer.id(), + else => break, + }; + if let Ok(mut tracker) = peer_tracker.lock() { + tracker.mark_service_provider(peer_id); + } + } + }); + } + + // Preload weights in the background so the node is reachable immediately. + if !preload_weights.is_empty() { + let count = preload_weights.len(); + info!(count, "preloading startup weights in background"); + let preload_executor = executor.clone(); + tokio::spawn(async move { + let results = try_join_all(preload_weights.into_iter().map(|model| { + let executor = preload_executor.clone(); + async move { + executor + .preload_weights(model.clone()) + .await + .with_context(|| format!("failed to preload weights for {model}")) + } + })) + .await; + match results { + Ok(_) => info!(count, "startup weight preload complete"), + Err(e) => warn!("startup weight preload failed: {e:#}"), + } + }); + } - Ok(NodeHandle { endpoint, guard, addr_task }) + Ok(NodeHandle { + node_id: endpoint.id(), + guard, + }) } diff --git a/crates/cli/src/commands/serve/peer_tracker.rs b/crates/cli/src/commands/serve/peer_tracker.rs new file mode 100644 index 0000000..775aba9 --- /dev/null +++ b/crates/cli/src/commands/serve/peer_tracker.rs @@ -0,0 +1,564 @@ +use std::collections::HashMap; +use std::time::{Duration, Instant}; + +use tonic_iroh_transport::iroh::EndpointId; + +pub(super) const NODE_SERVICE_ALPN: &str = "/hellas.Node/1.0"; +pub(super) const EXECUTE_SERVICE_ALPN: &str = "/hellas.Execute/1.0"; +pub(super) const MAX_SERVICE_ALPN_LEN: usize = 128; + +const MAX_TRACKED_PEERS: usize = 2048; +const MAX_KNOWN_PEERS_RESPONSE: usize = 64; +const STALE_PEER_AFTER: Duration = Duration::from_secs(15 * 60); +const DEFAULT_LATENCY_SCORE: i64 = 450; + +/// Request classes with different admission costs. +#[derive(Clone, Copy, Debug)] +pub(super) enum RequestKind { + GetNodeInfo, + GetKnownPeers, + ExecuteRpc, +} + +#[derive(Clone, Copy, Debug)] +pub(super) struct RequestAdmission { + pub allow: bool, + pub disclosure_limit: usize, +} + +/// Bounded peer tracker used to prefer well-behaved and low-latency peers. +pub(super) struct PeerTracker { + local_id: EndpointId, + peers: HashMap, + known_peers_global_bucket: TokenBucket, +} + +impl PeerTracker { + pub(super) fn new(local_id: EndpointId) -> Self { + Self { + local_id, + peers: HashMap::new(), + // Bound global CPU/alloc pressure from many concurrent GetKnownPeers calls. + known_peers_global_bucket: TokenBucket::new(16.0, 4.0), + } + } + + pub(super) fn observe_request( + &mut self, + peer_id: EndpointId, + observed_rtt: Option, + kind: RequestKind, + ) -> RequestAdmission { + let now = Instant::now(); + let (cost, throttleable) = match kind { + RequestKind::GetNodeInfo => (0.5, false), + RequestKind::ExecuteRpc => (1.0, false), + RequestKind::GetKnownPeers => (4.0, true), + }; + + let (per_peer_ok, disclosure_limit) = { + let peer = self.get_or_insert_peer(peer_id, now); + peer.last_seen = now; + peer.total_requests = peer.total_requests.saturating_add(1); + peer.record_rtt(observed_rtt); + + let per_peer_ok = peer.bucket.take(cost, now); + if !per_peer_ok { + peer.rate_limited = peer.rate_limited.saturating_add(1); + } + + let disclosure_limit = { + let score = peer.recommendation_score(now); + if score < 600 { + 8 + } else if score < 1600 { + 24 + } else { + MAX_KNOWN_PEERS_RESPONSE + } + }; + + (per_peer_ok, disclosure_limit) + }; + + let global_ok = if matches!(kind, RequestKind::GetKnownPeers) { + self.known_peers_global_bucket.take(1.0, now) + } else { + true + }; + if throttleable + && !global_ok + && let Some(peer) = self.peers.get_mut(&peer_id) + { + peer.rate_limited = peer.rate_limited.saturating_add(1); + } + + let allow = if throttleable { + per_peer_ok && global_ok + } else { + true + }; + + RequestAdmission { + allow, + disclosure_limit, + } + } + + /// Mark a peer as a known service provider (e.g. discovered via DHT). + /// + /// Service capability must be signalled explicitly here. Observing an + /// inbound RPC alone only proves the peer is a *client* — without this + /// distinction, ephemeral browser sessions would get shared as "known + /// peers" even though they can't serve anything. + pub(super) fn mark_service_provider(&mut self, peer_id: EndpointId) { + let now = Instant::now(); + let peer = self.get_or_insert_peer(peer_id, now); + peer.seen_node_service = true; + } + + pub(super) fn mark_invalid_request(&mut self, peer_id: EndpointId) { + let now = Instant::now(); + let peer = self.get_or_insert_peer(peer_id, now); + peer.invalid_requests = peer.invalid_requests.saturating_add(1); + } + + pub(super) fn ranked_known_peers( + &self, + requester: EndpointId, + requested_service_alpn: &str, + disclosure_limit: usize, + ) -> Vec { + let now = Instant::now(); + let response_limit = disclosure_limit.min(MAX_KNOWN_PEERS_RESPONSE); + let mut candidates: Vec<(EndpointId, i64)> = self + .peers + .iter() + .filter_map(|(peer_id, stats)| { + if *peer_id == self.local_id || *peer_id == requester { + return None; + } + let age = now.saturating_duration_since(stats.last_seen); + if age > STALE_PEER_AFTER { + return None; + } + if !matches_service_filter(stats, requested_service_alpn) { + return None; + } + let score = stats.recommendation_score(now); + if score <= 0 { + return None; + } + Some((*peer_id, score)) + }) + .collect(); + + candidates.sort_by(|(_, left_score), (_, right_score)| right_score.cmp(left_score)); + candidates + .into_iter() + .take(response_limit) + .map(|(peer_id, _)| peer_id) + .collect() + } + + fn get_or_insert_peer(&mut self, peer_id: EndpointId, now: Instant) -> &mut PeerStats { + if !self.peers.contains_key(&peer_id) && self.peers.len() >= MAX_TRACKED_PEERS { + self.evict_worst(now); + } + self.peers + .entry(peer_id) + .or_insert_with(|| PeerStats::new(now)) + } + + fn evict_worst(&mut self, now: Instant) { + let Some(evict_id) = self + .peers + .iter() + .min_by_key(|(_, stats)| stats.recommendation_score(now)) + .map(|(peer_id, _)| *peer_id) + else { + return; + }; + self.peers.remove(&evict_id); + } +} + +fn matches_service_filter(stats: &PeerStats, requested_service_alpn: &str) -> bool { + match requested_service_alpn { + // Empty ALPN returns all known service providers (not raw clients). + "" | NODE_SERVICE_ALPN | EXECUTE_SERVICE_ALPN => stats.seen_node_service, + _ => false, + } +} + +#[derive(Debug)] +struct PeerStats { + first_seen: Instant, + last_seen: Instant, + ema_rtt_ms: Option, + total_requests: u32, + invalid_requests: u32, + rate_limited: u32, + seen_node_service: bool, + bucket: TokenBucket, +} + +impl PeerStats { + fn new(now: Instant) -> Self { + Self { + first_seen: now, + last_seen: now, + ema_rtt_ms: None, + total_requests: 0, + invalid_requests: 0, + rate_limited: 0, + seen_node_service: false, + // Keep per-peer burst tolerance small to avoid "easy win" spam. + bucket: TokenBucket::new(24.0, 2.0), + } + } + + fn record_rtt(&mut self, rtt: Option) { + let Some(rtt) = rtt else { + return; + }; + let ms = rtt.as_secs_f64() * 1000.0; + self.ema_rtt_ms = Some(match self.ema_rtt_ms { + Some(prev) => prev * 0.75 + ms * 0.25, + None => ms, + }); + } + + fn recommendation_score(&self, now: Instant) -> i64 { + let age = now.saturating_duration_since(self.last_seen); + let age_secs = age.as_secs_f64(); + let recency_score = + ((1.0 - (age_secs / STALE_PEER_AFTER.as_secs_f64())).clamp(0.0, 1.0) * 1000.0) as i64; + + let latency_score = self + .ema_rtt_ms + .map(latency_score) + .unwrap_or(DEFAULT_LATENCY_SCORE); + + let lifespan_secs = now.saturating_duration_since(self.first_seen).as_secs_f64(); + let stability_score = ((lifespan_secs / 60.0).clamp(0.0, 20.0) * 50.0) as i64; + + let request_score = (self.total_requests.min(60) as i64) * 8; + let behavior_penalty = + (self.invalid_requests as i64 * 350) + (self.rate_limited as i64 * 110); + + (latency_score * 4) + (recency_score * 3) + (stability_score * 2) + request_score + - behavior_penalty + } +} + +fn latency_score(rtt_ms: f64) -> i64 { + if rtt_ms <= 5.0 { + return 1000; + } + if rtt_ms >= 2_500.0 { + return 0; + } + (((2_500.0 - rtt_ms) / 2_495.0) * 1000.0) as i64 +} + +#[derive(Debug)] +struct TokenBucket { + tokens: f64, + capacity: f64, + refill_per_sec: f64, + last_refill: Instant, +} + +impl TokenBucket { + fn new(capacity: f64, refill_per_sec: f64) -> Self { + Self { + tokens: capacity, + capacity, + refill_per_sec, + last_refill: Instant::now(), + } + } + + fn take(&mut self, cost: f64, now: Instant) -> bool { + let elapsed = now + .saturating_duration_since(self.last_refill) + .as_secs_f64(); + if elapsed > 0.0 { + self.tokens = (self.tokens + elapsed * self.refill_per_sec).min(self.capacity); + self.last_refill = now; + } + if self.tokens >= cost { + self.tokens -= cost; + true + } else { + false + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tonic_iroh_transport::iroh::SecretKey; + + fn endpoint_id(byte: u8) -> EndpointId { + SecretKey::from([byte; 32]).public() + } + + /// Two real servers publish via DHT. Three browser sessions open the + /// explorer, each health-checking the node and asking for peers. A CLI + /// monitor also calls get_known_peers. Only the two real servers should + /// ever appear in responses — browsers and CLI clients must not leak. + #[test] + fn mixed_servers_browsers_and_cli_clients() { + let node = endpoint_id(0); + let server_a = endpoint_id(1); + let server_b = endpoint_id(2); + let mut tracker = PeerTracker::new(node); + + // Two servers discovered via DHT — marked explicitly. + tracker.mark_service_provider(server_a); + let _ = tracker.observe_request( + server_a, + Some(Duration::from_millis(20)), + RequestKind::GetKnownPeers, + ); + tracker.mark_service_provider(server_b); + let _ = tracker.observe_request( + server_b, + Some(Duration::from_millis(80)), + RequestKind::GetKnownPeers, + ); + + // Three ephemeral browser sessions: get_node_info → get_known_peers. + let browsers: Vec<_> = (10..13).map(endpoint_id).collect(); + for &browser in &browsers { + let _ = tracker.observe_request(browser, None, RequestKind::GetNodeInfo); + let admission = tracker.observe_request(browser, None, RequestKind::GetKnownPeers); + assert!(admission.allow); + + let peers = tracker.ranked_known_peers(browser, NODE_SERVICE_ALPN, 64); + assert_eq!(peers.len(), 2, "browser should see exactly the 2 servers"); + assert!(peers.contains(&server_a)); + assert!(peers.contains(&server_b)); + } + + // CLI monitor discovers and queries. + let cli = endpoint_id(20); + let _ = tracker.observe_request( + cli, + Some(Duration::from_millis(5)), + RequestKind::GetNodeInfo, + ); + let _ = tracker.observe_request( + cli, + Some(Duration::from_millis(5)), + RequestKind::GetKnownPeers, + ); + + let peers = tracker.ranked_known_peers(cli, NODE_SERVICE_ALPN, 64); + assert_eq!(peers.len(), 2, "CLI should also only see the 2 servers"); + // Lower-RTT server_a should rank first. + assert_eq!(peers[0], server_a); + } + + /// A server starts with no known peers. Browsers connect and ask for + /// peers repeatedly, getting rate-limited. Then a real server appears + /// via DHT. Subsequent browser queries should find it despite the + /// earlier rate limiting. + #[test] + fn late_server_discovery_after_browser_spam() { + let node = endpoint_id(0); + let mut tracker = PeerTracker::new(node); + + // Browser hammers get_known_peers before any servers exist. + let browser = endpoint_id(10); + let mut denied = 0; + for _ in 0..20 { + let _ = tracker.observe_request(browser, None, RequestKind::GetNodeInfo); + let admission = tracker.observe_request(browser, None, RequestKind::GetKnownPeers); + if !admission.allow { + denied += 1; + } + let peers = tracker.ranked_known_peers(browser, NODE_SERVICE_ALPN, 64); + assert!(peers.is_empty(), "no servers registered yet"); + } + assert!(denied > 0, "browser should hit rate limit"); + + // Now a real server appears and health-checks the node. + let server = endpoint_id(1); + tracker.mark_service_provider(server); + let _ = tracker.observe_request( + server, + Some(Duration::from_millis(30)), + RequestKind::GetNodeInfo, + ); + + // A fresh browser session arrives. The global rate limit bucket may + // still be exhausted from the spam above (all calls happen at the + // same Instant in tests). This means one peer's GetKnownPeers spam + // can deny a fresh peer — a known trade-off for simplicity. + let browser2 = endpoint_id(11); + let _ = tracker.observe_request(browser2, None, RequestKind::GetNodeInfo); + let admission = tracker.observe_request(browser2, None, RequestKind::GetKnownPeers); + if admission.allow { + let peers = tracker.ranked_known_peers(browser2, NODE_SERVICE_ALPN, 64); + assert_eq!(peers, vec![server]); + } + // Regardless of rate limiting, when admitted the server should be visible. + // Simulate the global bucket refilling (in real life, time passes). + // We can verify by just calling ranked_known_peers directly. + let peers = tracker.ranked_known_peers(browser2, NODE_SERVICE_ALPN, 64); + assert_eq!( + peers, + vec![server], + "server should be visible once admitted" + ); + } + + /// Simulates a small network: node X knows about servers A, B, C. Server + /// A sends many invalid requests and gets penalised. Server C has very + /// high latency. A new peer asks for known peers and should get B first, + /// then C or A (or A excluded entirely due to penalty). + #[test] + fn ranking_with_penalties_and_latency() { + let node = endpoint_id(0); + let a = endpoint_id(1); // will be penalised + let b = endpoint_id(2); // well-behaved, low latency + let c = endpoint_id(3); // high latency + let mut tracker = PeerTracker::new(node); + + // All three are real servers. + for &s in &[a, b, c] { + tracker.mark_service_provider(s); + } + let _ = + tracker.observe_request(a, Some(Duration::from_millis(40)), RequestKind::GetNodeInfo); + let _ = + tracker.observe_request(b, Some(Duration::from_millis(10)), RequestKind::GetNodeInfo); + let _ = tracker.observe_request( + c, + Some(Duration::from_millis(2000)), + RequestKind::GetNodeInfo, + ); + + // A sends garbage. + for _ in 0..15 { + tracker.mark_invalid_request(a); + } + + let requester = endpoint_id(10); + let _ = tracker.observe_request(requester, None, RequestKind::GetKnownPeers); + + let peers = tracker.ranked_known_peers(requester, NODE_SERVICE_ALPN, 64); + // B should be first (low latency, no penalties). + assert!(!peers.is_empty()); + assert_eq!( + peers[0], b, + "well-behaved low-latency server should rank first" + ); + // A may be excluded entirely (score ≤ 0) due to penalties. + assert!(!peers.contains(&a) || peers.last() == Some(&a)); + } + + /// Disclosure limit is based on recommendation_score. A peer that has + /// been penalised (invalid requests) gets a smaller window than a + /// well-behaved peer. + #[test] + fn penalised_peer_gets_smaller_disclosure_limit() { + let node = endpoint_id(0); + let mut tracker = PeerTracker::new(node); + + // Register some service providers. + for i in 1..=30u8 { + let s = endpoint_id(i); + tracker.mark_service_provider(s); + let _ = tracker.observe_request( + s, + Some(Duration::from_millis(50)), + RequestKind::GetNodeInfo, + ); + } + + // Well-behaved peer. + let good_peer = endpoint_id(100); + let good_admission = tracker.observe_request( + good_peer, + Some(Duration::from_millis(20)), + RequestKind::GetKnownPeers, + ); + assert!(good_admission.allow); + + // Misbehaving peer — pile on enough invalid requests to drop below + // the highest disclosure tier (score < 1600 needs penalty > ~5400, + // i.e. 16+ invalid requests at 350 each). + let bad_peer = endpoint_id(101); + let _ = tracker.observe_request( + bad_peer, + Some(Duration::from_millis(20)), + RequestKind::GetNodeInfo, + ); + for _ in 0..20 { + tracker.mark_invalid_request(bad_peer); + } + let bad_admission = tracker.observe_request( + bad_peer, + Some(Duration::from_millis(20)), + RequestKind::GetKnownPeers, + ); + + assert!( + bad_admission.disclosure_limit < good_admission.disclosure_limit, + "penalised peer (limit={}) should get fewer peers than well-behaved (limit={})", + bad_admission.disclosure_limit, + good_admission.disclosure_limit, + ); + } + + /// Two servers know about each other. Server A calls get_known_peers on + /// the node repeatedly over time (like a monitor polling loop). The node + /// should consistently return server B without duplication or degradation. + #[test] + fn server_to_server_peer_exchange_over_time() { + let node = endpoint_id(0); + let server_a = endpoint_id(1); + let server_b = endpoint_id(2); + let mut tracker = PeerTracker::new(node); + + tracker.mark_service_provider(server_a); + tracker.mark_service_provider(server_b); + let _ = tracker.observe_request( + server_a, + Some(Duration::from_millis(25)), + RequestKind::GetNodeInfo, + ); + let _ = tracker.observe_request( + server_b, + Some(Duration::from_millis(30)), + RequestKind::GetNodeInfo, + ); + + // Server A polls get_known_peers 10 times (like monitor's periodic poll). + for round in 0..10 { + let admission = tracker.observe_request( + server_a, + Some(Duration::from_millis(25)), + RequestKind::GetKnownPeers, + ); + // First few should be allowed, later ones may be throttled. + if admission.allow { + let peers = tracker.ranked_known_peers( + server_a, + NODE_SERVICE_ALPN, + admission.disclosure_limit, + ); + assert_eq!( + peers, + vec![server_b], + "round {round}: server A should consistently see server B" + ); + } + } + } +} diff --git a/crates/cli/src/execution.rs b/crates/cli/src/execution.rs new file mode 100644 index 0000000..89ebc42 --- /dev/null +++ b/crates/cli/src/execution.rs @@ -0,0 +1,1661 @@ +//! Stream-shaped CLI execution layer. +//! +//! The fundamental shape: every layer returns +//! `impl Stream>`. Drop-cancellation +//! propagates naturally — when a consumer drops the stream, the generator +//! is dropped, which drops every in-flight future, which drops every +//! resource, which (for local executions) drops the per-execution +//! `mpsc::Receiver` the worker pushes chunks into. The worker observes +//! the closed channel on its next chunk send and converts it into a +//! cancel that the runner sees between decode steps. +//! +//! ```text +//! ExecutionRequest::stream → PreparedExecution::stream +//! ├─ primary: PreparedRoute::stream +//! │ ├─ Local: execute_stream(executor) +//! │ ├─ RemoteDirect: execute_stream(remote) +//! │ └─ RemoteDiscovery: retry loop wrapping execute_stream +//! └─ shadow (verify): same shape, run after primary +//! ``` +//! +//! Stream items separate two failure modes: +//! - `Err(_)` — transport error: we don't know the executor's verdict. +//! - `Ok(Done(Outcome::Failed))` — executor's explicit failure verdict. +//! +//! Discovery retry policy: +//! - Transport error before any chunk → try the next peer. +//! - Transport error after a chunk → propagate (committed work can't be retried). +//! - `Done(Failed)` (executor verdict) → propagate, never retry. + +#[cfg(feature = "hellas-executor")] +use anyhow::Error as AnyhowError; +use anyhow::{Context, anyhow, bail}; +use async_stream::try_stream; +use base64::Engine; +use base64::engine::general_purpose::URL_SAFE_NO_PAD; +#[cfg(feature = "hellas-executor")] +use catgrad::prelude::Dtype; +use chatgrad::PreparedPrompt; +use futures::StreamExt; +use futures::stream::{BoxStream, FuturesUnordered, Stream}; +#[cfg(feature = "hellas-executor")] +use hellas_core::ProducerSigningKey; +use hellas_core::{ + DeliveryOutput, DeliveryRequest, Digest, JsonBytes, OpaqueRequest as CoreOpaqueRequest, + SchemeId, SignedReceipt as CoreSignedReceipt, decode_dag_cbor, verify_delivery, verify_receipt, +}; +#[cfg(feature = "hellas-executor")] +use hellas_executor::{Executor, ExecutorHandle}; +use hellas_pb::courtesy::QuotePreparedTextRequest; +use hellas_pb::hellas::{self as pb, FinishStatus, RunTicketRequest, WorkEvent, work_event}; +use hellas_pb::opaque::OpaqueRequest as PbOpaqueRequest; +use hellas_rpc::discovery::DiscoveryBindings; +use hellas_rpc::driver::{ + ExecuteDriver, QuotedPreparedTextResponse, QuotedResponse, RemoteExecuteDriver, +}; +use hellas_rpc::model::ModelAssets; +#[cfg(feature = "hellas-executor")] +use hellas_rpc::policy::{DownloadPolicy, ExecutePolicy}; +use hellas_rpc::provenance::ExecutionProvenance; +use hellas_rpc::service::{CourtesyService, ExecuteService, OpaqueService}; +use std::collections::HashSet; +use std::net::SocketAddr; +use std::sync::Arc; +use tokio::time::Duration; +use tonic_iroh_transport::iroh::address_lookup::DnsAddressLookup; +use tonic_iroh_transport::iroh::{ + Endpoint, EndpointAddr, EndpointId, SecretKey, TransportAddr, endpoint::PortmapperConfig, +}; +use tonic_iroh_transport::swarm::{DhtBackend, MdnsBackend, ServiceRegistry}; +use tonic_iroh_transport::{ConnectionPool, IrohChannel, IrohConnect, PoolOptions}; +use tracing::instrument; + +// `TracedChannel` swaps under the `otel` feature: with otel on it wraps the +// channel in an interceptor that injects W3C traceparent headers; with otel +// off it's the bare channel. Construction sites use `traced(channel)`. +#[cfg(feature = "otel")] +type TracedChannel = tonic::service::interceptor::InterceptedService< + IrohChannel, + tonic_iroh_transport::otel::TraceContextInjector, +>; +#[cfg(not(feature = "otel"))] +type TracedChannel = IrohChannel; + +type TracedDriver = RemoteExecuteDriver; + +#[cfg(feature = "otel")] +fn traced(channel: IrohChannel) -> TracedChannel { + tonic::service::interceptor::InterceptedService::new( + channel, + tonic_iroh_transport::otel::TraceContextInjector, + ) +} +#[cfg(not(feature = "otel"))] +fn traced(channel: IrohChannel) -> TracedChannel { + channel +} + +const DISCOVERY_TIMEOUT: Duration = Duration::from_secs(30); +const REMOTE_CONNECT_TIMEOUT: Duration = Duration::from_secs(10); +/// Max quote RPCs in flight at once while draining the discovery stream. +/// Keep this high enough that we never stall the mDNS subscriber (the +/// consumer must drain at least as fast as iroh emits, i.e. ~1/sec per +/// peer), but low enough to avoid thundering-herd on the network. +const MAX_CONCURRENT_QUOTES: usize = 8; + +// --------------------------------------------------------------------------- +// Public configuration types +// --------------------------------------------------------------------------- + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum ExecutionRoute { + #[cfg(feature = "hellas-executor")] + Local, + RemoteDirect(RemoteNodeTarget), + RemoteDiscovery { + retries: usize, + }, +} + +impl ExecutionRoute { + pub fn remote( + node_id: Option, + node_addrs: Vec, + retries: usize, + ) -> Self { + match node_id { + Some(node_id) => Self::RemoteDirect(RemoteNodeTarget { + node_id, + node_addrs, + }), + None => Self::RemoteDiscovery { retries }, + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct RemoteNodeTarget { + pub node_id: EndpointId, + pub node_addrs: Vec, +} + +impl RemoteNodeTarget { + fn endpoint_addr(&self) -> EndpointAddr { + EndpointAddr::from_parts( + self.node_id, + self.node_addrs.iter().copied().map(TransportAddr::Ip), + ) + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum ExecutionStrategy { + Run(ExecutionRoute), + Verify { + primary: ExecutionRoute, + shadow: ExecutionRoute, + }, +} + +#[derive(Clone, Default)] +pub struct ExecutionRuntime { + #[cfg(feature = "hellas-executor")] + local_executor: Option, + secret_key: Option, +} + +// --------------------------------------------------------------------------- +// Stream item types +// --------------------------------------------------------------------------- + +/// One observation from a streaming execution. Stream protocol: zero or +/// more `Chunk` events, terminated by exactly one `Done`. +#[derive(Debug, Clone)] +pub enum ExecutionEvent { + Chunk { + /// Cumulative tokens emitted *after* this chunk. + position: u64, + /// Little-endian u32 token IDs. + tokens: Vec, + }, + Done(Outcome), +} + +/// Terminal verdict of an execution. +#[derive(Debug, Clone)] +pub enum Outcome { + Completed { + total_tokens: u64, + stop_reason: StopReason, + receipt: ReceiptArtifact, + }, + Failed { + /// Tokens emitted before the failure (for honest usage reporting). + position: u64, + error: String, + }, +} + +/// Verified signed receipt envelope bytes as delivered by the executor. +/// +/// The gateway exposes these bytes directly as `hellas.receipt`. Symbolic +/// callers that need the symbolic result artifact digest can project it from +/// the verified envelope, but that digest is not the universal receipt +/// identity. +#[derive(Debug, Clone)] +pub struct ReceiptArtifact { + dag_cbor: Vec, + symbolic_text_artifact: Option, +} + +impl ReceiptArtifact { + pub fn from_pb(envelope: Option) -> anyhow::Result { + let (dag_cbor, core) = decode_receipt_envelope(envelope)?; + verify_receipt(&core).context("receipt signature verification failed")?; + Ok(Self::from_verified_core(dag_cbor, &core)) + } + + pub fn encoded(&self) -> String { + URL_SAFE_NO_PAD.encode(&self.dag_cbor) + } + + pub fn symbolic_text_artifact(&self) -> Option { + self.symbolic_text_artifact + } + + fn from_verified_core(dag_cbor: Vec, core: &CoreSignedReceipt) -> Self { + let symbolic_text_artifact = match core.body().scheme() { + SchemeId::Symbolic => Some(core.body().result().digest()), + _ => None, + }; + Self { + dag_cbor, + symbolic_text_artifact, + } + } + + #[cfg(test)] + pub(crate) fn from_test_bytes(dag_cbor: Vec) -> Self { + Self { + dag_cbor, + symbolic_text_artifact: None, + } + } +} + +impl Outcome { + /// Cumulative token count at the moment the run terminated. + /// Authoritative for usage frames on both Completed and Failed. + pub fn position(&self) -> u64 { + match self { + Self::Completed { total_tokens, .. } => *total_tokens, + Self::Failed { position, .. } => *position, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum StopReason { + EndOfSequence, + MaxNewTokens, + Cancelled, +} + +#[derive(Debug, Clone)] +pub enum OpaqueExecutionEvent { + Chunk { position: u64, bytes: Vec }, + Done(OpaqueOutcome), +} + +#[derive(Debug, Clone)] +pub enum OpaqueOutcome { + Completed { output: Vec }, + Failed { error: String }, +} + +// --------------------------------------------------------------------------- +// ExecutionRuntime +// --------------------------------------------------------------------------- + +impl ExecutionRuntime { + #[cfg(feature = "hellas-executor")] + pub fn with_local_executor(local_executor: ExecutorHandle) -> Self { + Self { + local_executor: Some(local_executor), + secret_key: None, + } + } + + pub fn with_secret_key(mut self, secret_key: SecretKey) -> Self { + self.secret_key = Some(secret_key); + self + } + + #[cfg(feature = "hellas-executor")] + pub fn spawn_default_local_with_producer_key( + queue_capacity: usize, + supported_dtypes: Vec, + producer_key: ProducerSigningKey, + ) -> anyhow::Result { + let local_executor = Executor::spawn_with_producer_key( + DownloadPolicy::Eager, + ExecutePolicy::Eager, + queue_capacity, + supported_dtypes, + producer_key, + ) + .context("failed to initialize local execution backend")?; + Ok(Self::with_local_executor(local_executor)) + } + + #[cfg(feature = "hellas-executor")] + fn require_local_executor(&self) -> Result { + self.local_executor + .clone() + .ok_or_else(|| anyhow!("local execution requested but no local executor is configured")) + } +} + +// --------------------------------------------------------------------------- +// ExecutionRequest — public entry point +// --------------------------------------------------------------------------- + +pub struct ExecutionRequest { + runtime: ExecutionRuntime, + quote_req: QuotePreparedTextRequest, + strategy: ExecutionStrategy, +} + +impl ExecutionRequest { + pub fn new( + runtime: ExecutionRuntime, + assets: Arc, + prepared_prompt: PreparedPrompt, + max_seq: u32, + strategy: ExecutionStrategy, + ) -> anyhow::Result { + Ok(Self { + runtime, + quote_req: assets.build_quote_prepared_text_request(&prepared_prompt, max_seq)?, + strategy, + }) + } + + /// True if any leg of this strategy talks to a remote executor. + pub fn uses_remote_transport(&self) -> bool { + #[cfg(feature = "hellas-executor")] + let is_remote = |r: &ExecutionRoute| !matches!(r, ExecutionRoute::Local); + #[cfg(not(feature = "hellas-executor"))] + let is_remote = |_r: &ExecutionRoute| true; + match &self.strategy { + ExecutionStrategy::Run(route) => is_remote(route), + ExecutionStrategy::Verify { primary, shadow } => { + is_remote(primary) || is_remote(shadow) + } + } + } + + /// Run the quote step (talking to the chosen executor) and return the + /// `PreparedExecution`. Splitting prepare from `stream` lets callers + /// (notably the gateway) read pre-flight provenance off + /// `PreparedExecution::provenance()` *before* the response stream + /// flushes its headers. + pub async fn prepare(self) -> anyhow::Result { + prepare_execution(&self.runtime, &self.quote_req, &self.strategy).await + } + + /// Drive this request to completion as a stream of events. + /// + /// Owning consumption: dropping the returned stream cancels everything + /// downstream (broadcast subscribers, tonic streams, the executor's + /// per-running cancel token). + pub fn stream(self) -> impl Stream> + Send { + try_stream! { + let prepared = self.prepare().await?; + let inner = prepared.stream(); + tokio::pin!(inner); + while let Some(event) = inner.next().await { + yield event?; + } + } + } +} + +pub struct OpaqueExecutionRequest { + runtime: ExecutionRuntime, + request: PbOpaqueRequest, + route: ExecutionRoute, +} + +impl OpaqueExecutionRequest { + pub fn new(runtime: ExecutionRuntime, request: PbOpaqueRequest, route: ExecutionRoute) -> Self { + Self { + runtime, + request, + route, + } + } + + pub fn uses_remote_transport(&self) -> bool { + #[cfg(feature = "hellas-executor")] + return !matches!(self.route, ExecutionRoute::Local); + #[cfg(not(feature = "hellas-executor"))] + return true; + } + + pub fn stream(self) -> impl Stream> + Send { + try_stream! { + let prepared = prepare_opaque_route(&self.runtime, &self.request, &self.route).await?; + let inner = prepared.stream(); + tokio::pin!(inner); + while let Some(event) = inner.next().await { + yield event?; + } + } + } +} + +// --------------------------------------------------------------------------- +// PreparedExecution — primary + optional shadow for Verify +// --------------------------------------------------------------------------- + +pub struct PreparedExecution { + primary: PreparedRoute, + shadow: Option, +} + +async fn prepare_execution( + runtime: &ExecutionRuntime, + quote_req: &QuotePreparedTextRequest, + strategy: &ExecutionStrategy, +) -> anyhow::Result { + match strategy { + ExecutionStrategy::Run(route) => Ok(PreparedExecution { + primary: PreparedRoute::prepare(runtime, quote_req, route).await?, + shadow: None, + }), + ExecutionStrategy::Verify { primary, shadow } => Ok(PreparedExecution { + primary: PreparedRoute::prepare(runtime, quote_req, primary).await?, + shadow: Some(PreparedRoute::prepare(runtime, quote_req, shadow).await?), + }), + } +} + +impl PreparedExecution { + /// See [`PreparedRoute::provenance`] — this delegates to the primary + /// route. Shadow's provenance is intentionally not exposed (verify is + /// internal; the primary is what the user sees). + pub fn provenance(&self) -> Option<&ExecutionProvenance> { + self.primary.provenance() + } + + /// Stream primary's events live. If a shadow is configured, run it + /// after primary completes and only emit primary's `Done` once the two + /// receipts agree. Mismatch is reported as a `Done(Failed)` so the + /// terminal frame is honest about the disagreement. + pub fn stream(self) -> impl Stream> + Send { + let Self { primary, shadow } = self; + try_stream! { + // Yield primary's chunks live; hold its Done back until shadow + // (if any) agrees. + let mut primary_done: Option = None; + { + let primary = primary.stream(); + tokio::pin!(primary); + while let Some(event) = primary.next().await { + match event? { + ExecutionEvent::Chunk { position, tokens } => { + yield ExecutionEvent::Chunk { position, tokens }; + } + ExecutionEvent::Done(outcome) => { + primary_done = Some(outcome); + break; + } + } + } + } + let primary_outcome = primary_done + .ok_or_else(|| anyhow!("primary stream ended without terminal outcome"))?; + + let final_outcome = match shadow { + None => primary_outcome, + Some(shadow_route) => verify_shadow(primary_outcome, shadow_route).await?, + }; + yield ExecutionEvent::Done(final_outcome); + } + } +} + +/// Run the shadow stream to completion (discarding its chunks), extract +/// its terminal outcome, and return the reconciled outcome. +/// +/// Cases: +/// - Primary Failed → return primary unchanged. Shadow doesn't run; no +/// point burning verification compute on a failure. +/// - Primary Completed + shadow Completed + matching receipt CIDs → +/// primary unchanged. +/// - Primary Completed + shadow Completed + mismatched receipts → +/// synthetic Failed describing the divergence. +/// - Primary Completed + shadow Failed → synthetic Failed: the run is +/// unverified, even though the bytes the user saw were real. The +/// terminal frame is honest about that. +/// +/// Transport errors from the shadow stream propagate via `?` and surface +/// as stream-level errors (not Outcome::Failed) — they're also unverified +/// situations but distinguished for diagnostics. +async fn verify_shadow(primary: Outcome, shadow: PreparedRoute) -> anyhow::Result { + let primary_digest = match &primary { + Outcome::Completed { receipt, .. } => { + receipt.symbolic_text_artifact().ok_or_else(|| { + anyhow!("primary symbolic execution did not produce symbolic artifact digest") + })? + } + Outcome::Failed { .. } => return Ok(primary), + }; + + let shadow_outcome = drain_to_outcome(shadow.stream()).await?; + match shadow_outcome { + Outcome::Completed { + receipt: shadow_receipt, + .. + } => { + let shadow_digest = shadow_receipt.symbolic_text_artifact().ok_or_else(|| { + anyhow!("shadow symbolic execution did not produce symbolic artifact digest") + })?; + if primary_digest == shadow_digest { + Ok(primary) + } else { + Ok(Outcome::Failed { + position: primary.position(), + error: format!( + "verify mismatch: primary symbolic artifact {primary_digest} != shadow symbolic artifact {shadow_digest}" + ), + }) + } + } + Outcome::Failed { + error: shadow_error, + .. + } => Ok(Outcome::Failed { + position: primary.position(), + error: format!("shadow verification failed: {shadow_error}"), + }), + } +} + +/// Consume a stream to its terminal `Done`, discarding chunks. Errors if +/// the stream ends without a terminal event. +async fn drain_to_outcome( + stream: impl Stream>, +) -> anyhow::Result { + tokio::pin!(stream); + while let Some(event) = stream.next().await { + if let ExecutionEvent::Done(outcome) = event? { + return Ok(outcome); + } + } + Err(anyhow!("shadow stream ended without terminal outcome")) +} + +// --------------------------------------------------------------------------- +// PreparedRoute — Local | RemoteDirect | RemoteDiscovery +// --------------------------------------------------------------------------- + +// `RemoteDirect` is boxed; `RemoteDiscovery` carries the full quote request and +// stays sizeable. The variant is short-lived (one per execution setup), so the +// remaining disparity isn't worth more boxing. +#[allow(clippy::large_enum_variant)] +enum PreparedRoute { + #[cfg(feature = "hellas-executor")] + Local { + executor: ExecutorHandle, + request_commitment: Vec, + provenance: ExecutionProvenance, + }, + RemoteDirect(Box), + RemoteDiscovery { + quote_req: QuotePreparedTextRequest, + retries: usize, + secret_key: Option, + }, +} + +impl PreparedRoute { + /// Pre-flight provenance — `Some` when the route's quote has already + /// happened (Local, RemoteDirect) so the gateway can attach + /// `x-hellas-*` response headers before any stream events flow. + /// `None` for `RemoteDiscovery`, where the quote is deferred until + /// the first peer responds during streaming; in that case the gateway + /// falls back to in-band SSE events for the same provenance. + fn provenance(&self) -> Option<&ExecutionProvenance> { + match self { + #[cfg(feature = "hellas-executor")] + PreparedRoute::Local { provenance, .. } => Some(provenance), + PreparedRoute::RemoteDirect(remote) => Some(&remote.provenance), + PreparedRoute::RemoteDiscovery { .. } => None, + } + } + + #[instrument(skip_all, fields(?route))] + async fn prepare( + runtime: &ExecutionRuntime, + quote_req: &QuotePreparedTextRequest, + route: &ExecutionRoute, + ) -> anyhow::Result { + match route { + #[cfg(feature = "hellas-executor")] + ExecutionRoute::Local => { + let mut executor = runtime.require_local_executor()?; + executor + .preload_weights(local_model_spec(quote_req)) + .await + .context("failed to preload local weights")?; + let quoted = quote_with_driver(quote_req, &mut executor, || { + "local quote failed".to_string() + }) + .await?; + let ticket = quoted + .response + .ticket + .ok_or_else(|| anyhow!("quote_prepared_text response missing ticket"))?; + Ok(Self::Local { + executor, + request_commitment: ticket.request_commitment, + provenance: quoted.provenance, + }) + } + ExecutionRoute::RemoteDirect(target) => { + let endpoint = bind_remote_endpoint(runtime.secret_key.as_ref()).await?; + let quote = quote_remote_target(quote_req, &endpoint, target).await?; + Ok(Self::RemoteDirect(Box::new(RemoteExecution::from_quoted( + endpoint, quote, + )))) + } + ExecutionRoute::RemoteDiscovery { retries } => Ok(Self::RemoteDiscovery { + quote_req: quote_req.clone(), + retries: *retries, + secret_key: runtime.secret_key.clone(), + }), + } + } + + fn stream(self) -> BoxStream<'static, anyhow::Result> { + match self { + #[cfg(feature = "hellas-executor")] + PreparedRoute::Local { + executor, + request_commitment, + provenance: _, + } => execute_stream(executor, request_commitment).boxed(), + PreparedRoute::RemoteDirect(remote) => remote.stream().boxed(), + PreparedRoute::RemoteDiscovery { + quote_req, + retries, + secret_key, + } => discovery_stream(quote_req, retries, secret_key).boxed(), + } + } +} + +#[allow(clippy::large_enum_variant)] // see PreparedRoute +enum OpaquePreparedRoute { + #[cfg(feature = "hellas-executor")] + Local { + executor: ExecutorHandle, + request: PbOpaqueRequest, + request_commitment: Vec, + }, + RemoteDirect(Box), + RemoteDiscovery { + request: PbOpaqueRequest, + retries: usize, + secret_key: Option, + }, +} + +async fn prepare_opaque_route( + runtime: &ExecutionRuntime, + request: &PbOpaqueRequest, + route: &ExecutionRoute, +) -> anyhow::Result { + match route { + #[cfg(feature = "hellas-executor")] + ExecutionRoute::Local => { + let mut executor = runtime.require_local_executor()?; + let quoted = quote_opaque_with_driver(request, &mut executor, || { + "local opaque quote failed".to_string() + }) + .await?; + Ok(OpaquePreparedRoute::Local { + executor, + request: request.clone(), + request_commitment: quoted.response.request_commitment, + }) + } + ExecutionRoute::RemoteDirect(target) => { + let endpoint = bind_remote_endpoint(runtime.secret_key.as_ref()).await?; + let quote = quote_opaque_remote_target(request, &endpoint, target).await?; + Ok(OpaquePreparedRoute::RemoteDirect(Box::new( + OpaqueRemoteExecution::from_quoted(endpoint, request.clone(), quote), + ))) + } + ExecutionRoute::RemoteDiscovery { retries } => Ok(OpaquePreparedRoute::RemoteDiscovery { + request: request.clone(), + retries: *retries, + secret_key: runtime.secret_key.clone(), + }), + } +} + +impl OpaquePreparedRoute { + fn stream(self) -> BoxStream<'static, anyhow::Result> { + match self { + #[cfg(feature = "hellas-executor")] + Self::Local { + executor, + request, + request_commitment, + } => execute_opaque_stream(executor, request_commitment, request).boxed(), + Self::RemoteDirect(remote) => remote.stream().boxed(), + Self::RemoteDiscovery { + request, + retries, + secret_key, + } => opaque_discovery_stream(request, retries, secret_key).boxed(), + } + } +} + +fn opaque_discovery_stream( + request: PbOpaqueRequest, + retries: usize, + secret_key: Option, +) -> impl Stream> + Send { + try_stream! { + let max_attempts = retries.saturating_add(1); + let mut tried: HashSet = HashSet::new(); + let mut last_peer_error: Option = None; + info!("No node ID provided, discovering opaque executor"); + + for attempt in 1..=max_attempts { + let remote = prepare_discovered_opaque_remote(&request, secret_key.as_ref(), &tried).await?; + let peer_id = remote.peer_id; + let mut committed = false; + let mut transport_err: Option = None; + let mut got_terminal = false; + { + let inner = remote.stream(); + tokio::pin!(inner); + while let Some(event) = inner.next().await { + match event { + Ok(OpaqueExecutionEvent::Chunk { position, bytes }) => { + committed = true; + yield OpaqueExecutionEvent::Chunk { position, bytes }; + } + Ok(OpaqueExecutionEvent::Done(outcome)) => { + got_terminal = true; + yield OpaqueExecutionEvent::Done(outcome); + } + Err(e) => { + transport_err = Some(e); + break; + } + } + } + } + if got_terminal { return; } + + let err = transport_err + .unwrap_or_else(|| anyhow!("stream from {peer_id} ended without terminal outcome")); + if committed { + Err(err.context(format!( + "opaque execution failed on {peer_id} after output was emitted" + )))?; + unreachable!("Err(_)? always returns"); + } + warn!(attempt, %peer_id, "opaque execution failed before output, rediscovering: {err:#}"); + tried.insert(peer_id); + last_peer_error = Some(err); + } + + let err = last_peer_error + .unwrap_or_else(|| anyhow!("no opaque provider could serve the request")); + Err(err.context(format!("max retries ({retries}) exceeded")))?; + } +} + +/// Discovery+retry across providers. +/// +/// Per-attempt rules (matched off the inner Result so the failure-mode +/// distinction is visible): +/// - `Ok(Chunk)` → forward; mark `committed`. +/// - `Ok(Done)` → forward and finish (executor verdict, no retry). +/// - `Err(_)` before any `committed` chunk → exclude this peer, retry. +/// - `Err(_)` after `committed` chunks → propagate (can't retry committed work). +/// +/// `prepare_discovered_remote` failure aborts immediately — that's a +/// "couldn't find anyone" condition that retrying won't help with. +fn discovery_stream( + quote_req: QuotePreparedTextRequest, + retries: usize, + secret_key: Option, +) -> impl Stream> + Send { + try_stream! { + let max_attempts = retries.saturating_add(1); + let mut tried: HashSet = HashSet::new(); + let mut last_peer_error: Option = None; + info!("No node ID provided, discovering executor"); + + for attempt in 1..=max_attempts { + let remote = prepare_discovered_remote("e_req, secret_key.as_ref(), &tried).await?; + let peer_id = remote.peer_id; + let mut committed = false; + let mut transport_err: Option = None; + let mut got_terminal = false; + { + let inner = remote.stream(); + tokio::pin!(inner); + while let Some(event) = inner.next().await { + match event { + Ok(ExecutionEvent::Chunk { position, tokens }) => { + committed = true; + yield ExecutionEvent::Chunk { position, tokens }; + } + Ok(ExecutionEvent::Done(outcome)) => { + got_terminal = true; + yield ExecutionEvent::Done(outcome); + } + Err(e) => { + transport_err = Some(e); + break; + } + } + } + } + if got_terminal { return; } + + // No terminal — must be a transport error. The "stream ended + // without terminal" case manifests as None from the inner + // generator without an Err item; treat it the same way. + let err = transport_err + .unwrap_or_else(|| anyhow!("stream from {peer_id} ended without terminal outcome")); + if committed { + Err(err.context(format!( + "execution failed on {peer_id} after output was emitted" + )))?; + unreachable!("Err(_)? always returns"); + } + warn!(attempt, %peer_id, "execution failed before output, rediscovering: {err:#}"); + tried.insert(peer_id); + last_peer_error = Some(err); + } + + let err = last_peer_error + .unwrap_or_else(|| anyhow!("no provider could serve the request")); + Err(err.context(format!("max retries ({retries}) exceeded")))?; + } +} + +// --------------------------------------------------------------------------- +// RemoteExecution — owns one quoted remote driver + its endpoint +// --------------------------------------------------------------------------- + +struct RemoteExecution { + endpoint: Arc, + peer_id: EndpointId, + request_commitment: Vec, + provenance: ExecutionProvenance, + driver: TracedDriver, +} + +impl RemoteExecution { + fn from_quoted(endpoint: Arc, quoted: QuotedRemoteDriver) -> Self { + Self { + endpoint, + peer_id: quoted.peer_id, + request_commitment: quoted.quote.request_commitment, + provenance: quoted.provenance, + driver: quoted.driver, + } + } + + fn stream(self) -> impl Stream> + Send { + let Self { + endpoint, + peer_id: _, + request_commitment, + provenance: _, + driver, + } = self; + try_stream! { + // Hold the endpoint until the stream is dropped. Dropping the + // endpoint while the underlying QUIC connection is in-flight + // would tear down transport mid-execution. + let _endpoint = endpoint; + let inner = execute_stream(driver, request_commitment); + tokio::pin!(inner); + while let Some(event) = inner.next().await { + yield event?; + } + } + } +} + +struct OpaqueRemoteExecution { + endpoint: Arc, + peer_id: EndpointId, + request: PbOpaqueRequest, + request_commitment: Vec, + driver: TracedDriver, +} + +impl OpaqueRemoteExecution { + fn from_quoted( + endpoint: Arc, + request: PbOpaqueRequest, + quoted: QuotedRemoteDriver, + ) -> Self { + Self { + endpoint, + peer_id: quoted.peer_id, + request, + request_commitment: quoted.quote.request_commitment, + driver: quoted.driver, + } + } + + fn stream(self) -> impl Stream> + Send { + let Self { + endpoint, + peer_id: _, + request, + request_commitment, + driver, + } = self; + try_stream! { + let _endpoint = endpoint; + let inner = execute_opaque_stream(driver, request_commitment, request); + tokio::pin!(inner); + while let Some(event) = inner.next().await { + yield event?; + } + } + } +} + +// --------------------------------------------------------------------------- +// execute_stream — the bottom layer that maps wire events → ExecutionEvent +// --------------------------------------------------------------------------- + +fn execute_stream( + mut driver: D, + request_commitment: Vec, +) -> impl Stream> + Send { + try_stream! { + // Provenance arrives in `streamed.provenance` (from response + // metadata server-side) but the gateway already has it from the + // quote step, so we drop it here and only forward the event stream. + let mut wire = driver + .execute_streaming(RunTicketRequest { + request_commitment, + }) + .await + .context("failed to start execution stream")? + .stream; + + let mut got_terminal = false; + while let Some(item) = wire.next().await { + let event = convert_wire_event(item.context("execution stream failed")?)?; + let is_done = matches!(event, ExecutionEvent::Done(_)); + yield event; + if is_done { + got_terminal = true; + break; + } + } + + if !got_terminal { + Err(anyhow!("execution stream ended without terminal outcome"))?; + } + // Hold the driver until end of stream so the underlying transport + // (tonic streaming response) stays attached. + drop(driver); + } +} + +fn execute_opaque_stream( + mut driver: D, + request_commitment: Vec, + request: PbOpaqueRequest, +) -> impl Stream> + Send { + try_stream! { + let core_request = core_opaque_request(&request)?; + let mut wire = driver + .execute_streaming(RunTicketRequest { + request_commitment, + }) + .await + .context("failed to start opaque execution stream")? + .stream; + + let mut got_terminal = false; + while let Some(item) = wire.next().await { + let event = convert_opaque_wire_event( + item.context("opaque execution stream failed")?, + &core_request, + )?; + let is_done = matches!(event, OpaqueExecutionEvent::Done(_)); + yield event; + if is_done { + got_terminal = true; + break; + } + } + + if !got_terminal { + Err(anyhow!("opaque execution stream ended without terminal outcome"))?; + } + drop(driver); + } +} + +/// Translate one wire `WorkEvent` into one `ExecutionEvent`. +fn convert_wire_event(event: WorkEvent) -> anyhow::Result { + let Some(event) = event.kind else { + bail!("wire event with no body"); + }; + match event { + work_event::Kind::Chunk(chunk) => Ok(ExecutionEvent::Chunk { + position: chunk.position, + tokens: chunk.bytes, + }), + work_event::Kind::Finished(finished) => Ok(ExecutionEvent::Done(parse_finished(finished)?)), + work_event::Kind::Failed(failed) => Ok(ExecutionEvent::Done(Outcome::Failed { + position: failed.position, + error: failed.error, + })), + } +} + +fn convert_opaque_wire_event( + event: WorkEvent, + request: &CoreOpaqueRequest, +) -> anyhow::Result { + let Some(event) = event.kind else { + bail!("wire event with no body"); + }; + match event { + work_event::Kind::Chunk(chunk) => Ok(OpaqueExecutionEvent::Chunk { + position: chunk.position, + bytes: chunk.bytes, + }), + work_event::Kind::Finished(finished) => Ok(OpaqueExecutionEvent::Done( + parse_opaque_finished(finished, request)?, + )), + work_event::Kind::Failed(failed) => Ok(OpaqueExecutionEvent::Done(OpaqueOutcome::Failed { + error: failed.error, + })), + } +} + +fn parse_finished(finished: pb::WorkFinished) -> anyhow::Result { + let receipt = ReceiptArtifact::from_pb(finished.receipt)?; + if receipt.symbolic_text_artifact().is_none() { + bail!("symbolic execution returned an opaque receipt"); + } + let stop_reason = stop_reason_from_pb(finished.status)?; + Ok(Outcome::Completed { + total_tokens: finished.total_units, + stop_reason, + receipt, + }) +} + +fn parse_opaque_finished( + finished: pb::WorkFinished, + request: &CoreOpaqueRequest, +) -> anyhow::Result { + stop_reason_from_pb(finished.status)?; + serde_json::from_slice::(&finished.output) + .context("opaque output must be UTF-8 JSON")?; + let output = JsonBytes::new(finished.output.clone()); + let (_dag_cbor, core) = decode_receipt_envelope(finished.receipt)?; + verify_delivery( + DeliveryRequest::Opaque(request), + DeliveryOutput::Opaque(&output), + &core, + ) + .context("opaque receipt verification failed")?; + if core.body().scheme() != SchemeId::Opaque { + bail!("opaque execution returned a symbolic receipt"); + } + Ok(OpaqueOutcome::Completed { + output: output.into_bytes(), + }) +} + +fn core_opaque_request(request: &PbOpaqueRequest) -> anyhow::Result { + if request.service.is_empty() { + bail!("opaque service must not be empty"); + } + if request.method.is_empty() { + bail!("opaque method must not be empty"); + } + serde_json::from_slice::(&request.payload) + .context("opaque payload must be UTF-8 JSON")?; + Ok(CoreOpaqueRequest { + service: request.service.clone(), + method: request.method.clone(), + payload: JsonBytes::new(request.payload.clone()), + }) +} + +fn decode_receipt_envelope( + envelope: Option, +) -> anyhow::Result<(Vec, CoreSignedReceipt)> { + let envelope = envelope.ok_or_else(|| anyhow!("finished event missing receipt envelope"))?; + let core: CoreSignedReceipt = decode_dag_cbor(&envelope.dag_cbor) + .context("failed to decode receipt envelope dag-cbor")?; + Ok((envelope.dag_cbor, core)) +} + +fn stop_reason_from_pb(value: i32) -> anyhow::Result { + let pb_value = + FinishStatus::try_from(value).with_context(|| format!("unknown finish status {value}"))?; + match pb_value { + FinishStatus::Unspecified => bail!("wire finish status is unspecified"), + FinishStatus::EndOfSequence => Ok(StopReason::EndOfSequence), + FinishStatus::MaxOutput => Ok(StopReason::MaxNewTokens), + FinishStatus::Cancelled => Ok(StopReason::Cancelled), + } +} + +// --------------------------------------------------------------------------- +// Quote / discovery / endpoint helpers (largely unchanged) +// --------------------------------------------------------------------------- + +struct QuotedRemoteDriver { + peer_id: EndpointId, + quote: hellas_pb::hellas::Ticket, + provenance: ExecutionProvenance, + driver: TracedDriver, +} + +#[derive(Debug)] +enum QuoteCandidateError { + Declined(anyhow::Error), + Connect(anyhow::Error), +} + +#[instrument(skip_all, fields(model = %quote_req.huggingface_model_id))] +async fn quote_with_driver( + quote_req: &QuotePreparedTextRequest, + driver: &mut D, + context: impl FnOnce() -> String, +) -> anyhow::Result +where + D: ExecuteDriver, +{ + let quoted = driver + .quote_prepared_text(quote_req.clone()) + .await + .with_context(context)?; + let ticket = quoted + .response + .ticket + .as_ref() + .ok_or_else(|| anyhow!("quote_prepared_text response missing ticket"))?; + tracing::Span::current().record( + "request_commitment", + tracing::field::display(format_hex(&ticket.request_commitment)), + ); + Ok(quoted) +} + +#[instrument(skip_all, fields(service = %request.service, method = %request.method))] +async fn quote_opaque_with_driver( + request: &PbOpaqueRequest, + driver: &mut D, + context: impl FnOnce() -> String, +) -> anyhow::Result +where + D: ExecuteDriver, +{ + core_opaque_request(request)?; + let quoted = driver + .create_opaque_ticket(request.clone()) + .await + .with_context(context)?; + tracing::Span::current().record( + "request_commitment", + tracing::field::display(format_hex("ed.response.request_commitment)), + ); + Ok(quoted) +} + +async fn bind_remote_endpoint(secret_key: Option<&SecretKey>) -> anyhow::Result> { + let (endpoint, _bindings) = bind_remote_endpoint_with_bindings(secret_key).await?; + Ok(endpoint) +} + +/// Bind a client endpoint and attach the full discovery stack (DNS + Pkarr +/// publisher + mDNS + DHT resolver). Without mDNS attached to the endpoint's +/// address lookup, peers on the same LAN can only be resolved via the Pkarr +/// DHT / n0 DNS relay, so LAN connections take minutes instead of milliseconds. +async fn bind_remote_endpoint_with_bindings( + secret_key: Option<&SecretKey>, +) -> anyhow::Result<(Arc, DiscoveryBindings)> { + use tonic_iroh_transport::iroh::address_lookup::PkarrPublisher; + use tonic_iroh_transport::iroh::endpoint::presets; + + let mut builder = Endpoint::builder(presets::N0) + .clear_address_lookup() + .address_lookup(DnsAddressLookup::n0_dns()) + .address_lookup(PkarrPublisher::n0_dns()) + .portmapper_config(PortmapperConfig::Disabled); + if let Some(key) = secret_key { + builder = builder.secret_key(key.clone()); + } + let endpoint = builder + .bind() + .await + .context("failed to create client transport endpoint")?; + let bindings = DiscoveryBindings::attach(&endpoint, false, false) + .context("failed to attach client discovery lookups")?; + Ok((Arc::new(endpoint), bindings)) +} + +fn bind_remote_pool(endpoint: &Endpoint) -> ConnectionPool { + ConnectionPool::for_service::( + endpoint.clone(), + PoolOptions { + connect_timeout: REMOTE_CONNECT_TIMEOUT, + ..PoolOptions::default() + }, + ) +} + +fn bind_courtesy_pool(endpoint: &Endpoint) -> ConnectionPool { + ConnectionPool::for_service::( + endpoint.clone(), + PoolOptions { + connect_timeout: REMOTE_CONNECT_TIMEOUT, + ..PoolOptions::default() + }, + ) +} + +fn bind_opaque_pool(endpoint: &Endpoint) -> ConnectionPool { + ConnectionPool::for_service::( + endpoint.clone(), + PoolOptions { + connect_timeout: REMOTE_CONNECT_TIMEOUT, + ..PoolOptions::default() + }, + ) +} + +#[instrument(skip_all, fields(%peer_id, service = %request.service, method = %request.method))] +async fn quote_opaque_remote_endpoint( + request: &PbOpaqueRequest, + execute_pool: &ConnectionPool, + opaque_pool: &ConnectionPool, + peer_id: EndpointId, +) -> Result { + let opaque_channel = opaque_pool + .channel(peer_id) + .await + .with_context(|| format!("failed to connect to node {peer_id}")) + .map_err(QuoteCandidateError::Connect)?; + let execute_channel = execute_pool + .channel(peer_id) + .await + .with_context(|| format!("failed to connect to node {peer_id}")) + .map_err(QuoteCandidateError::Connect)?; + let mut driver = RemoteExecuteDriver::with_execute_and_opaque( + traced(execute_channel), + traced(opaque_channel), + ); + let quoted = match quote_opaque_with_driver(request, &mut driver, || { + format!("node {peer_id} declined opaque ticket") + }) + .await + { + Ok(quoted) => quoted, + Err(err) => return Err(QuoteCandidateError::Declined(err)), + }; + Ok(QuotedRemoteDriver { + peer_id, + quote: quoted.response, + provenance: quoted.provenance, + driver, + }) +} + +#[instrument(skip_all, fields(%peer_id, model = %quote_req.huggingface_model_id))] +async fn quote_remote_endpoint( + quote_req: &QuotePreparedTextRequest, + execute_pool: &ConnectionPool, + courtesy_pool: &ConnectionPool, + peer_id: EndpointId, +) -> Result { + let courtesy_channel = courtesy_pool + .channel(peer_id) + .await + .with_context(|| format!("failed to connect to node {peer_id}")) + .map_err(QuoteCandidateError::Connect)?; + let execute_channel = execute_pool + .channel(peer_id) + .await + .with_context(|| format!("failed to connect to node {peer_id}")) + .map_err(QuoteCandidateError::Connect)?; + let mut driver = RemoteExecuteDriver::with_execute_and_courtesy( + traced(execute_channel), + traced(courtesy_channel), + ); + let quoted = match quote_with_driver(quote_req, &mut driver, || { + format!("node {peer_id} declined ticket") + }) + .await + { + Ok(quoted) => quoted, + Err(err) => return Err(QuoteCandidateError::Declined(err)), + }; + Ok(QuotedRemoteDriver { + peer_id, + quote: quoted.response.ticket.ok_or_else(|| { + QuoteCandidateError::Declined(anyhow!("quote_prepared_text response missing ticket")) + })?, + provenance: quoted.provenance, + driver, + }) +} + +async fn quote_opaque_remote_peer( + request: &PbOpaqueRequest, + endpoint: &Endpoint, + peer_id: EndpointId, +) -> anyhow::Result { + let execute_pool = bind_remote_pool(endpoint); + let opaque_pool = bind_opaque_pool(endpoint); + quote_opaque_remote_endpoint(request, &execute_pool, &opaque_pool, peer_id) + .await + .map_err(|err| match err { + QuoteCandidateError::Declined(err) => { + err.context(format!("node {peer_id} declined opaque quote")) + } + QuoteCandidateError::Connect(err) => err, + }) +} + +async fn quote_remote_peer( + quote_req: &QuotePreparedTextRequest, + endpoint: &Endpoint, + peer_id: EndpointId, +) -> anyhow::Result { + let execute_pool = bind_remote_pool(endpoint); + let courtesy_pool = bind_courtesy_pool(endpoint); + quote_remote_endpoint(quote_req, &execute_pool, &courtesy_pool, peer_id) + .await + .map_err(|err| match err { + QuoteCandidateError::Declined(err) => { + err.context(format!("node {peer_id} declined quote")) + } + QuoteCandidateError::Connect(err) => err, + }) +} + +async fn quote_opaque_remote_target( + request: &PbOpaqueRequest, + endpoint: &Endpoint, + target: &RemoteNodeTarget, +) -> anyhow::Result { + if target.node_addrs.is_empty() { + return quote_opaque_remote_peer(request, endpoint, target.node_id).await; + } + + let execute_channel = ExecuteService::connect(endpoint, target.endpoint_addr()) + .connect_timeout(REMOTE_CONNECT_TIMEOUT) + .await + .with_context(|| format!("failed to connect to node {}", target.node_id))?; + let opaque_channel = OpaqueService::connect(endpoint, target.endpoint_addr()) + .connect_timeout(REMOTE_CONNECT_TIMEOUT) + .await + .with_context(|| format!("failed to connect to node {}", target.node_id))?; + let mut driver = RemoteExecuteDriver::with_execute_and_opaque( + traced(execute_channel), + traced(opaque_channel), + ); + let quoted = quote_opaque_with_driver(request, &mut driver, || { + format!("node {} declined opaque quote", target.node_id) + }) + .await?; + + Ok(QuotedRemoteDriver { + peer_id: target.node_id, + quote: quoted.response, + provenance: quoted.provenance, + driver, + }) +} + +async fn quote_remote_target( + quote_req: &QuotePreparedTextRequest, + endpoint: &Endpoint, + target: &RemoteNodeTarget, +) -> anyhow::Result { + if target.node_addrs.is_empty() { + return quote_remote_peer(quote_req, endpoint, target.node_id).await; + } + + let execute_channel = ExecuteService::connect(endpoint, target.endpoint_addr()) + .connect_timeout(REMOTE_CONNECT_TIMEOUT) + .await + .with_context(|| format!("failed to connect to node {}", target.node_id))?; + let courtesy_channel = CourtesyService::connect(endpoint, target.endpoint_addr()) + .connect_timeout(REMOTE_CONNECT_TIMEOUT) + .await + .with_context(|| format!("failed to connect to node {}", target.node_id))?; + let mut driver = RemoteExecuteDriver::with_execute_and_courtesy( + traced(execute_channel), + traced(courtesy_channel), + ); + let quoted = quote_with_driver(quote_req, &mut driver, || { + format!("node {} declined quote", target.node_id) + }) + .await?; + + Ok(QuotedRemoteDriver { + peer_id: target.node_id, + quote: quoted + .response + .ticket + .ok_or_else(|| anyhow!("quote_prepared_text response missing ticket"))?, + provenance: quoted.provenance, + driver, + }) +} + +#[instrument(skip_all, fields(service = %request.service, method = %request.method, excluded = exclude.len()))] +async fn discover_opaque_remote_quote( + request: &PbOpaqueRequest, + endpoint: &Endpoint, + bindings: DiscoveryBindings, + exclude: &HashSet, +) -> anyhow::Result { + let mut registry = ServiceRegistry::new(endpoint); + registry.with_pool_options(PoolOptions { + connect_timeout: REMOTE_CONNECT_TIMEOUT, + ..PoolOptions::default() + }); + registry.add(MdnsBackend::new(bindings.mdns)); + registry.add(DhtBackend::with_dht(endpoint, bindings.dht)); + let execute_pool = registry.pool::(); + let opaque_pool = registry.pool::(); + + let peers = Box::pin(registry.discover::()); + tokio::time::timeout(DISCOVERY_TIMEOUT, async { + let mut last_decline: Option = None; + let mut last_connect_error: Option = None; + let mut peers_done = false; + let mut in_flight: FuturesUnordered<_> = FuturesUnordered::new(); + futures::pin_mut!(peers); + + loop { + tokio::select! { + biased; + + Some(result) = in_flight.next(), if !in_flight.is_empty() => { + match result { + Ok(accepted) => return Ok(accepted), + Err(QuoteCandidateError::Declined(err)) => { + info!("opaque provider declined quote: {err:#}"); + last_decline = Some(err); + } + Err(QuoteCandidateError::Connect(err)) => { + debug!("opaque candidate connect error: {err:#}"); + last_connect_error = Some(err); + } + } + } + + peer = peers.next(), if !peers_done && in_flight.len() < MAX_CONCURRENT_QUOTES => { + match peer { + Some(Ok(peer)) => { + let peer_id = peer.id(); + if exclude.contains(&peer_id) { + debug!(%peer_id, "skipping previously-failed opaque peer"); + continue; + } + let execute_pool = execute_pool.clone(); + let opaque_pool = opaque_pool.clone(); + let req = request.clone(); + in_flight.push(async move { + quote_opaque_remote_endpoint( + &req, + &execute_pool, + &opaque_pool, + peer_id, + ).await + }); + } + Some(Err(err)) => last_connect_error = Some(err.into()), + None => peers_done = true, + } + } + + else => { + if peers_done && in_flight.is_empty() { + break; + } + } + } + } + + if let Some(status) = last_decline { + return Err(status).context("all discovered opaque providers declined the quote"); + } + if let Some(err) = last_connect_error { + return Err(err).context("failed to connect to discovered opaque providers"); + } + + anyhow::bail!("no opaque provider could serve the request"); + }) + .await + .context("opaque discovery timed out")? +} + +#[instrument(skip_all, fields(model = %quote_req.huggingface_model_id, excluded = exclude.len()))] +async fn discover_remote_quote( + quote_req: &QuotePreparedTextRequest, + endpoint: &Endpoint, + bindings: DiscoveryBindings, + exclude: &HashSet, +) -> anyhow::Result { + let mut registry = ServiceRegistry::new(endpoint); + registry.with_pool_options(PoolOptions { + connect_timeout: REMOTE_CONNECT_TIMEOUT, + ..PoolOptions::default() + }); + registry.add(MdnsBackend::new(bindings.mdns)); + registry.add(DhtBackend::with_dht(endpoint, bindings.dht)); + let execute_pool = registry.pool::(); + let courtesy_pool = registry.pool::(); + + let peers = Box::pin(registry.discover::()); + tokio::time::timeout(DISCOVERY_TIMEOUT, async { + let mut last_decline: Option = None; + let mut last_connect_error: Option = None; + let mut peers_done = false; + let mut in_flight: FuturesUnordered<_> = FuturesUnordered::new(); + futures::pin_mut!(peers); + + loop { + tokio::select! { + biased; + + // Consume completed quote attempts first; an early success short-circuits. + Some(result) = in_flight.next(), if !in_flight.is_empty() => { + match result { + Ok(accepted) => return Ok(accepted), + Err(QuoteCandidateError::Declined(err)) => { + info!("provider declined quote: {err:#}"); + last_decline = Some(err); + } + Err(QuoteCandidateError::Connect(err)) => { + debug!("candidate connect error: {err:#}"); + last_connect_error = Some(err); + } + } + } + + // Drain the mDNS/DHT stream as fast as we can, up to the concurrency cap, + // so iroh's subscriber buffer doesn't fill up and start dropping items. + peer = peers.next(), if !peers_done && in_flight.len() < MAX_CONCURRENT_QUOTES => { + match peer { + Some(Ok(peer)) => { + let peer_id = peer.id(); + if exclude.contains(&peer_id) { + debug!(%peer_id, "skipping previously-failed peer"); + continue; + } + let execute_pool = execute_pool.clone(); + let courtesy_pool = courtesy_pool.clone(); + let req = quote_req.clone(); + in_flight.push(async move { + quote_remote_endpoint( + &req, + &execute_pool, + &courtesy_pool, + peer_id, + ).await + }); + } + Some(Err(err)) => last_connect_error = Some(err.into()), + None => peers_done = true, + } + } + + else => { + if peers_done && in_flight.is_empty() { + break; + } + } + } + } + + if let Some(status) = last_decline { + return Err(status).context("all discovered providers declined the quote"); + } + if let Some(err) = last_connect_error { + return Err(err).context("failed to connect to discovered providers"); + } + + anyhow::bail!("no provider could serve the request"); + }) + .await + .context("discovery timed out")? +} + +async fn prepare_discovered_opaque_remote( + request: &PbOpaqueRequest, + secret_key: Option<&SecretKey>, + exclude: &HashSet, +) -> anyhow::Result { + let (endpoint, bindings) = bind_remote_endpoint_with_bindings(secret_key).await?; + let quote = discover_opaque_remote_quote(request, &endpoint, bindings, exclude).await?; + Ok(OpaqueRemoteExecution::from_quoted( + endpoint, + request.clone(), + quote, + )) +} + +async fn prepare_discovered_remote( + quote_req: &QuotePreparedTextRequest, + secret_key: Option<&SecretKey>, + exclude: &HashSet, +) -> anyhow::Result { + let (endpoint, bindings) = bind_remote_endpoint_with_bindings(secret_key).await?; + let quote = discover_remote_quote(quote_req, &endpoint, bindings, exclude).await?; + Ok(RemoteExecution::from_quoted(endpoint, quote)) +} + +#[cfg(feature = "hellas-executor")] +fn local_model_spec(quote_req: &QuotePreparedTextRequest) -> String { + let revision = quote_req.huggingface_revision.trim(); + if revision.is_empty() { + quote_req.huggingface_model_id.clone() + } else { + format!("{}@{revision}", quote_req.huggingface_model_id) + } +} + +fn format_hex(bytes: &[u8]) -> String { + let mut out = String::with_capacity(bytes.len() * 2); + for byte in bytes { + use std::fmt::Write as _; + let _ = write!(out, "{byte:02x}"); + } + out +} diff --git a/crates/cli/src/identity.rs b/crates/cli/src/identity.rs new file mode 100644 index 0000000..cf96a87 --- /dev/null +++ b/crates/cli/src/identity.rs @@ -0,0 +1,386 @@ +use anyhow::Context; +use hellas_core::ProducerSigningKey; +use std::fs; +use std::io::ErrorKind; +use std::path::{Path, PathBuf}; +use tonic_iroh_transport::iroh::SecretKey; + +const IDENTITY_DIR: &str = ".hellas"; +const IDENTITY_FILE: &str = "identity"; +const PRODUCER_KEY_FILE: &str = "signing-key.secp256k1"; +#[cfg(feature = "hellas-executor")] +const ARTIFACT_STORE_DIR: &str = "artifacts"; +const KEY_LEN: usize = 32; + +/// Resolve the identity file path and load or create the secret key. +/// +/// If `path` is `Some`, uses it directly. Otherwise defaults to `$HOME/.hellas/identity`. +/// Creates a new random key if the file does not exist, using atomic rename to avoid races. +pub fn load_or_create(path: Option<&Path>) -> anyhow::Result { + let path = match path { + Some(p) => p.to_owned(), + None => default_identity_path()?, + }; + match fs::read(&path) { + Ok(bytes) => load_from_bytes(&path, &bytes), + Err(e) if e.kind() == ErrorKind::NotFound => create_new(&path), + Err(e) => { + Err(e).with_context(|| format!("failed to read identity file {}", path.display())) + } + } +} + +#[cfg(feature = "hellas-executor")] +pub fn load_or_create_producer_key(path: Option<&Path>) -> anyhow::Result { + let path = match path { + Some(p) => p.to_owned(), + None => default_producer_key_path()?, + }; + match fs::read(&path) { + Ok(bytes) => load_producer_key_from_bytes(&path, &bytes), + Err(e) if e.kind() == ErrorKind::NotFound => create_new_producer_key(&path), + Err(e) => { + Err(e).with_context(|| format!("failed to read producer key file {}", path.display())) + } + } +} + +/// Load an existing identity file; error if missing. +/// +/// Unlike `load_or_create`, this never creates a new key. Use this for +/// read-only queries (e.g. printing the node ID of a running service) to avoid +/// racing the file creator. +pub fn load_existing(path: Option<&Path>) -> anyhow::Result { + let path = match path { + Some(p) => p.to_owned(), + None => default_identity_path()?, + }; + let bytes = fs::read(&path) + .with_context(|| format!("failed to read identity file {}", path.display()))?; + load_from_bytes(&path, &bytes) +} + +pub fn load_existing_producer_key(path: Option<&Path>) -> anyhow::Result { + let path = match path { + Some(p) => p.to_owned(), + None => default_producer_key_path()?, + }; + let bytes = fs::read(&path) + .with_context(|| format!("failed to read producer key file {}", path.display()))?; + load_producer_key_from_bytes(&path, &bytes) +} + +fn default_identity_path() -> anyhow::Result { + default_hellas_path(IDENTITY_FILE, "--identity") +} + +fn default_producer_key_path() -> anyhow::Result { + default_hellas_path(PRODUCER_KEY_FILE, "--producer-key-path") +} + +#[cfg(feature = "hellas-executor")] +pub fn default_artifact_store_path() -> anyhow::Result { + default_hellas_path(ARTIFACT_STORE_DIR, "--artifact-store-path") +} + +fn default_hellas_path(file: &str, flag: &str) -> anyhow::Result { + let home = std::env::var("HOME").with_context(|| { + format!("HOME environment variable not set; use {flag} to specify path") + })?; + Ok(PathBuf::from(home).join(IDENTITY_DIR).join(file)) +} + +fn load_from_bytes(path: &Path, bytes: &[u8]) -> anyhow::Result { + let bytes: [u8; KEY_LEN] = bytes.try_into().map_err(|_| { + anyhow::anyhow!( + "identity file at {} has invalid size ({} bytes, expected {KEY_LEN})", + path.display(), + bytes.len(), + ) + })?; + let key = SecretKey::from(bytes); + info!(node_id = %key.public(), path = %path.display(), "loaded identity"); + Ok(key) +} + +fn create_new(path: &Path) -> anyhow::Result { + let dir = path + .parent() + .context("identity path has no parent directory")?; + + create_dir_restricted(dir) + .with_context(|| format!("failed to create identity directory {}", dir.display()))?; + + let key = SecretKey::generate(); + let bytes = key.to_bytes(); + + // Write to a temp file, then atomic rename. If rename fails because another + // process created the file first, read the existing one instead. + let tmp_path = dir.join(format!( + ".identity.tmp.{}.{:?}", + std::process::id(), + std::thread::current().id() + )); + write_file_restricted(&tmp_path, &bytes) + .with_context(|| format!("failed to write temp identity file {}", tmp_path.display()))?; + + match fs::rename(&tmp_path, path) { + Ok(()) => { + info!(node_id = %key.public(), path = %path.display(), "created new identity"); + Ok(key) + } + Err(e) => { + // Clean up temp file on failure. + let _ = fs::remove_file(&tmp_path); + // If the target appeared (race), read it. + if path.exists() { + let bytes = fs::read(path) + .with_context(|| format!("failed to read identity file {}", path.display()))?; + load_from_bytes(path, &bytes) + } else { + Err(e) + .with_context(|| format!("failed to persist identity file {}", path.display())) + } + } + } +} + +fn load_producer_key_from_bytes(path: &Path, bytes: &[u8]) -> anyhow::Result { + let bytes: [u8; KEY_LEN] = bytes.try_into().map_err(|_| { + anyhow::anyhow!( + "producer key file at {} has invalid size ({} bytes, expected {KEY_LEN})", + path.display(), + bytes.len(), + ) + })?; + let key = ProducerSigningKey::from_secret_bytes(bytes) + .with_context(|| format!("producer key file {} is invalid", path.display()))?; + info!( + producer_id = ?key.producer_id(), + path = %path.display(), + "loaded producer signing key" + ); + Ok(key) +} + +#[cfg(feature = "hellas-executor")] +fn create_new_producer_key(path: &Path) -> anyhow::Result { + let dir = path + .parent() + .context("producer key path has no parent directory")?; + + create_dir_restricted(dir) + .with_context(|| format!("failed to create producer key directory {}", dir.display()))?; + + let key = ProducerSigningKey::generate(); + let bytes = key.to_secret_bytes(); + + let tmp_path = dir.join(format!( + ".signing-key.secp256k1.tmp.{}.{:?}", + std::process::id(), + std::thread::current().id() + )); + write_file_restricted(&tmp_path, &bytes).with_context(|| { + format!( + "failed to write temp producer key file {}", + tmp_path.display() + ) + })?; + + match fs::rename(&tmp_path, path) { + Ok(()) => { + info!( + producer_id = ?key.producer_id(), + path = %path.display(), + "created new producer signing key" + ); + Ok(key) + } + Err(e) => { + let _ = fs::remove_file(&tmp_path); + if path.exists() { + let bytes = fs::read(path).with_context(|| { + format!("failed to read producer key file {}", path.display()) + })?; + load_producer_key_from_bytes(path, &bytes) + } else { + Err(e).with_context(|| { + format!("failed to persist producer key file {}", path.display()) + }) + } + } + } +} + +/// Create a directory with restricted permissions (0700 on Unix). +fn create_dir_restricted(path: &Path) -> std::io::Result<()> { + #[cfg(unix)] + { + use std::os::unix::fs::DirBuilderExt; + fs::DirBuilder::new() + .recursive(true) + .mode(0o700) + .create(path) + } + #[cfg(not(unix))] + { + fs::create_dir_all(path) + } +} + +/// Write a file with restricted permissions (0600 on Unix). +fn write_file_restricted(path: &Path, data: &[u8]) -> std::io::Result<()> { + #[cfg(unix)] + { + use std::io::Write; + use std::os::unix::fs::OpenOptionsExt; + let mut file = fs::OpenOptions::new() + .write(true) + .create_new(true) + .mode(0o600) + .open(path)?; + file.write_all(data)?; + file.sync_all() + } + #[cfg(not(unix))] + { + fs::write(path, data) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::env; + + #[test] + fn creates_new_identity_in_temp_dir() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("identity"); + + let key = load_or_create(Some(&path)).unwrap(); + + assert!(path.exists()); + let bytes = fs::read(&path).unwrap(); + assert_eq!(bytes.len(), KEY_LEN); + assert_eq!( + SecretKey::from(<[u8; 32]>::try_from(bytes.as_slice()).unwrap()).to_bytes(), + key.to_bytes() + ); + } + + #[cfg(feature = "hellas-executor")] + #[test] + fn creates_new_producer_key_in_temp_dir() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("signing-key.secp256k1"); + + let key = load_or_create_producer_key(Some(&path)).unwrap(); + + assert!(path.exists()); + let bytes = fs::read(&path).unwrap(); + assert_eq!(bytes.len(), KEY_LEN); + let reloaded = + ProducerSigningKey::from_secret_bytes(<[u8; 32]>::try_from(bytes.as_slice()).unwrap()) + .unwrap(); + assert_eq!(reloaded.producer_id(), key.producer_id()); + } + + #[cfg(feature = "hellas-executor")] + #[test] + fn reloads_existing_producer_key() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("signing-key.secp256k1"); + + let key1 = load_or_create_producer_key(Some(&path)).unwrap(); + let key2 = load_or_create_producer_key(Some(&path)).unwrap(); + + assert_eq!(key1.producer_id(), key2.producer_id()); + } + + #[test] + fn reloads_existing_identity() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("identity"); + + let key1 = load_or_create(Some(&path)).unwrap(); + let key2 = load_or_create(Some(&path)).unwrap(); + + assert_eq!(key1.to_bytes(), key2.to_bytes()); + } + + #[test] + fn rejects_wrong_size_file() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("identity"); + fs::write(&path, [0u8; 16]).unwrap(); + + let err = load_or_create(Some(&path)).unwrap_err(); + assert!(err.to_string().contains("invalid size")); + assert!(err.to_string().contains("16 bytes")); + } + + #[test] + fn creates_parent_directory() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("sub").join("dir").join("identity"); + + let _key = load_or_create(Some(&path)).unwrap(); + + assert!(path.exists()); + assert!(path.parent().unwrap().is_dir()); + } + + #[test] + fn default_path_uses_home() { + let dir = tempfile::tempdir().unwrap(); + // SAFETY: test is single-threaded and restores the value immediately. + unsafe { env::set_var("HOME", dir.path()) }; + + let path = default_identity_path().unwrap(); + assert_eq!(path, dir.path().join(".hellas").join("identity")); + + let path = default_producer_key_path().unwrap(); + assert_eq!( + path, + dir.path().join(".hellas").join("signing-key.secp256k1") + ); + + #[cfg(feature = "hellas-executor")] + { + let path = default_artifact_store_path().unwrap(); + assert_eq!(path, dir.path().join(".hellas").join("artifacts")); + } + + unsafe { env::remove_var("HOME") }; + } + + #[test] + fn concurrent_creation_produces_valid_key() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("identity"); + + let handles: Vec<_> = (0..4) + .map(|_| { + let p = path.clone(); + std::thread::spawn(move || load_or_create(Some(&p)).unwrap().to_bytes()) + }) + .collect(); + + let results: Vec<[u8; 32]> = handles.into_iter().map(|h| h.join().unwrap()).collect(); + + // All threads should get a valid 32-byte key (the first one created wins). + for result in &results { + assert_eq!(result.len(), KEY_LEN); + } + // At most one unique key should exist (all should converge on the same file). + // Some threads may have generated their own key before rename, but the file + // content should be consistent — all reads after the first create should match. + let file_bytes = fs::read(&path).unwrap(); + let file_key: [u8; 32] = file_bytes.try_into().unwrap(); + // The last reader should have gotten the persisted key. + // (We can't guarantee all threads saw the same key due to create_new vs rename races, + // but the file on disk should be a valid 32-byte key.) + assert_eq!(file_key.len(), KEY_LEN); + } +} diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index 8020b4f..b398e87 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -1,79 +1,968 @@ #[macro_use] extern crate tracing; +use catgrad::prelude::Dtype; use clap::{Parser, Subcommand}; +use std::net::SocketAddr; +use std::path::PathBuf; +use std::str::FromStr; use tonic_iroh_transport::iroh::EndpointId; -mod bootstrap_peers; mod commands; +mod execution; +mod identity; +mod metrics; +mod text_output; +mod tracing_config; + +/// `clap` value parser for `--dtype`. Accepts model floating-point dtypes. +/// Rejects `u32`, which is the catgrad token-tensor dtype, never a model dtype. +fn parse_model_dtype(s: &str) -> Result { + let dtype = Dtype::from_str(s)?; + match dtype { + Dtype::F32 | Dtype::F16 | Dtype::BF16 | Dtype::F8 => Ok(dtype), + Dtype::U32 => Err("model dtype must be f32, f16, bf16, or f8".to_string()), + } +} + +/// Default dtype per build configuration. CUDA / Metal builds assume modern +/// hardware (Ampere+, M2+) where `bf16` matches the dtype most current models +/// are trained at and gives a real perf/VRAM win. CPU / unspecified-backend +/// builds default to `f32` because CPUs typically emulate bf16 via f32 anyway, +/// and `f32` is the safest broadly-correct choice. Used for `serve --dtype` +/// and `gateway --dtype`. +#[cfg(any(feature = "candle-cuda", feature = "candle-metal"))] +const DEFAULT_DTYPE_STR: &str = "bf16"; +#[cfg(not(any(feature = "candle-cuda", feature = "candle-metal")))] +const DEFAULT_DTYPE_STR: &str = "f32"; + +/// Default `--dtype` preference list for `llm`, resolved at dispatch. +/// +/// - **Network mode** (no `--local` / `--verify-local`): `[bf16, f32, f16]` +/// regardless of build. The remote executor decides what it can run; the +/// CLI's local hardware capability is irrelevant to the wire request. +/// - **Local-ish mode on a cuda/metal build**: same `[bf16, f32, f16]`. +/// The operator opted into a GPU-backend feature, so the build assumes +/// Ampere+/M2+ where bf16 is natively supported. If the GPU lacks bf16 +/// the weight load will fail loudly at first attempt — that's a build / +/// hardware mismatch the operator should fix, not something we paper over. +/// - **Local-ish mode on a cpu / unspecified build**: `[f32, f16]`. Skips +/// bf16 because CPU bf16 throughput is rarely a win and we want a default +/// that loads on every backend including older GPUs an operator might +/// bring in via a non-standard build. +fn default_llm_dtypes(is_local_mode: bool) -> Vec { + let cuda_or_metal = cfg!(any(feature = "candle-cuda", feature = "candle-metal")); + if is_local_mode && !cuda_or_metal { + vec![Dtype::F32, Dtype::F16] + } else { + vec![Dtype::BF16, Dtype::F32, Dtype::F16] + } +} #[derive(Parser)] #[command(name = "hellas")] #[command(version)] #[command(about = "Hellas node CLI")] struct Cli { + /// Path to node identity file (default: $HOME/.hellas/identity) + #[arg(long = "identity", global = true)] + identity: Option, + + /// Path to producer signing key (default: $HOME/.hellas/signing-key.secp256k1) + #[arg(long = "producer-key-path", global = true)] + producer_key_path: Option, + + /// Also append tracing output to this file. + #[arg(long = "log-file", global = true)] + log_file: Option, + #[command(subcommand)] command: Commands, } +#[derive(Subcommand)] +enum IdentityCommand { + /// Print the node ID (hex public key) derived from the identity file + ShowNodeId, +} + +#[derive(Subcommand)] +enum ProducerKeyCommand { + /// Print the producer public key and derived producer id + Show, +} + #[derive(Subcommand)] enum Commands { - #[cfg(feature = "serve")] + #[cfg(feature = "hellas-executor")] /// Run the RPC server Serve { - /// Enable discovery (LAN mDNS + internet discovery via pkarr/DNS + DHT). - #[arg(long, default_value_t = false)] - discovery: bool, + /// Port to listen on (auto-selects if not specified or if in use) + #[arg(long)] + port: Option, + /// Download policy: 'skip' (default, cache-only, never download), + /// 'eager' (download freely), + /// or 'allow(pattern,...)' (download only matching HF models) + #[arg(long = "download-policy", default_value = "skip")] + download_policy: hellas_rpc::policy::DownloadPolicy, + /// Execute policy: 'skip' (default, refuse all executions), + /// 'eager' (execute any graph), + /// or 'allow(hf/pattern,...,graph/pattern,...)' (execute only matching) + #[arg(long = "execute-policy", default_value = "skip")] + execute_policy: hellas_rpc::policy::ExecutePolicy, + /// Maximum number of queued executions waiting behind the active worker + #[arg( + long = "queue-size", + default_value_t = hellas_rpc::DEFAULT_EXECUTION_QUEUE_CAPACITY + )] + queue_size: usize, + /// Preload model weights on startup. Repeat or use commas: --preload foo/bar --preload baz/qux@rev + #[arg(long = "preload", value_delimiter = ',')] + preload_weights: Vec, + /// Persistent canonical artifact blob store path (default: $HOME/.hellas/artifacts) + #[arg(long = "artifact-store-path")] + artifact_store_path: Option, + /// Prometheus metrics port (e.g. 9090) + #[arg(long = "metrics-port")] + metrics_port: Option, + /// Operator graffiti tag (up to 16 bytes, padded/truncated) + #[arg(long = "graffiti", default_value = "")] + graffiti: String, + /// Dtypes this executor will accept, comma-separated. The first entry + /// is the executor's preferred dtype (used when the server constructs + /// a program itself, e.g. for `QuotePromptRequest`). Other entries are + /// also accepted on a per-request basis. Each accepted dtype loads its + /// own bundle of weights, so listing more dtypes costs more VRAM. + /// Defaults to `f32`. + #[arg( + long = "dtype", + default_value = DEFAULT_DTYPE_STR, + value_delimiter = ',', + value_parser = parse_model_dtype + )] + dtype: Vec, }, - /// Check health of a remote node - Health { + /// Run HTTP gateway exposing OpenAI/Anthropic/plain APIs over Hellas network + Gateway { + /// Host interface to bind + #[arg(long, default_value = "127.0.0.1")] + host: String, + /// Port to listen on. Omit to try 8080 with fallback to an OS-assigned port. + #[arg(long)] + port: Option, + /// Direct target node id (omit to use discovery) + #[arg(long)] + node_id: Option, + /// Direct UDP address hint for the target node. Repeat or use commas. + #[arg(long = "node-addr", value_delimiter = ',', requires = "node_id")] + node_addrs: Vec, + /// Run locally with the catgrad backend instead of the Hellas network + #[cfg(feature = "hellas-executor")] + #[arg(long = "local", default_value_t = false, conflicts_with_all = ["node_id", "node_addrs"])] + local: bool, + /// Run remotely and verify that the response matches a local catgrad execution + #[cfg(feature = "hellas-executor")] + #[arg( + long = "verify-local", + default_value_t = false, + conflicts_with_all = ["local", "verify"] + )] + verify_local: bool, + /// Verify the primary remote node against a second remote node + #[cfg_attr( + feature = "hellas-executor", + arg( + long = "verify", + conflicts_with_all = ["local", "verify_local"], + requires = "node_id" + ) + )] + #[cfg_attr( + not(feature = "hellas-executor"), + arg(long = "verify", requires = "node_id") + )] + verify: Option, + /// Maximum number of queued local executions when `--local` is set + #[cfg(feature = "hellas-executor")] + #[arg( + long = "queue-size", + default_value_t = hellas_rpc::DEFAULT_EXECUTION_QUEUE_CAPACITY + )] + queue_size: usize, + /// Max execution retries on failure (discovery mode) + #[arg(long = "retries", default_value_t = 2)] + retries: usize, + /// Fallback max new tokens when request omits max_tokens + #[arg(long = "default-max-tokens", default_value_t = 128)] + default_max_tokens: u32, + /// Override request model and force this HuggingFace model id, optionally with @revision + #[arg(long = "force-model")] + force_model: Option, + /// Prometheus metrics port (e.g. 9090) + #[arg(long = "metrics-port")] + metrics_port: Option, + /// Dtype the local executor (when `--local` or `--verify-local`) runs at, + /// and the dtype the client builds the quote program at: f32, f16, or bf16 + #[arg(long = "dtype", default_value = DEFAULT_DTYPE_STR, value_parser = parse_model_dtype)] + dtype: Dtype, + /// Wrap a child command with the gateway as its OpenAI/Anthropic backend. + #[arg(long = "wrap")] + wrap: Option, + /// Trailing args forwarded verbatim to the wrapped command (after `--`). + #[arg(last = true, allow_hyphen_values = true, requires = "wrap")] + wrap_args: Vec, + }, + /// Query a remote node via RPC + Rpc { /// Node ID to check node_id: EndpointId, + /// Direct UDP address hint for the target node. Repeat or use commas. + #[arg(long = "node-addr", value_delimiter = ',')] + node_addrs: Vec, + }, + /// Store or fetch canonical artifact bytes on a provider + Artifact { + #[command(subcommand)] + command: commands::artifact::ArtifactCommand, }, - /// Execute a job on a remote node - Execute { - /// Node ID to execute on (omit to auto-discover) + /// Run LLM inference remotely or locally + Llm { + /// Node ID to run on remotely (omit to auto-discover) node_id: Option, - /// HuggingFace model id used to fetch weights (e.g. HuggingFaceTB/SmolLM2-135M-Instruct) - #[arg( - short = 'm', - long = "model", - default_value = "HuggingFaceTB/SmolLM2-135M-Instruct" - )] + /// Direct UDP address hint for the target node. Repeat or use commas. + #[arg(long = "node-addr", value_delimiter = ',', requires = "node_id")] + node_addrs: Vec, + /// HuggingFace model id used to fetch weights, optionally with @revision + #[arg(short = 'm', long = "model", default_value = "Qwen/Qwen3-0.6B")] model: String, - /// Prompt to execute (required) + /// Prompt to send (required) #[arg(short = 'p', long = "prompt")] prompt: String, + /// Pass the prompt through unchanged instead of applying the model chat template + #[arg(long = "raw", default_value_t = false)] + raw: bool, /// Maximum number of new tokens to generate #[arg(long = "max-seq", default_value_t = 16)] max_seq: u32, + /// Max execution retries on failure (discovery path only) + #[arg(long = "retries", default_value_t = 2)] + retries: usize, + /// Run locally with the catgrad backend instead of the Hellas network + #[cfg(feature = "hellas-executor")] + #[arg(long = "local", default_value_t = false, conflicts_with_all = ["verify_local", "node_id", "node_addrs"])] + local: bool, + /// Run remotely and locally, then verify that both outputs match + #[cfg(feature = "hellas-executor")] + #[arg( + long = "verify-local", + default_value_t = false, + conflicts_with = "local" + )] + verify_local: bool, + /// Comma-separated preference list (each one of `f32`, `f16`, + /// `bf16`). The client builds the quote program at the first entry, + /// then on a remote `DtypeNotSupported` rejection retries at the + /// next. For `--local` / `--verify-local` the embedded executor's + /// `supported_dtypes` is the full list. If omitted the default + /// depends on the build and mode (cuda/metal builds and any network + /// mode prefer `bf16,f32,f16`; cpu builds in local-ish mode prefer + /// `f32,f16` to stay safe on hardware without bf16/f16 support). + #[arg(long = "dtype", value_delimiter = ',', value_parser = parse_model_dtype)] + dtype: Vec, + }, + /// Run trust-based opaque JSON work + Opaque { + /// Node ID to run on remotely (omit to auto-discover) + node_id: Option, + /// Direct UDP address hint for the target node. Repeat or use commas. + #[arg(long = "node-addr", value_delimiter = ',', requires = "node_id")] + node_addrs: Vec, + /// Opaque service label. The protocol records it but does not interpret it. + #[arg(long)] + service: String, + /// Opaque method label. The protocol records it but does not interpret it. + #[arg(long)] + method: String, + /// Exact UTF-8 JSON payload bytes. + #[arg( + long, + conflicts_with = "payload_file", + required_unless_present = "payload_file" + )] + payload: Option, + /// Read exact UTF-8 JSON payload bytes from a file. + #[arg(long = "payload-file")] + payload_file: Option, + /// Max execution retries on failure (discovery path only) + #[arg(long = "retries", default_value_t = 2)] + retries: usize, + /// Run locally with the in-process executor instead of the Hellas network + #[cfg(feature = "hellas-executor")] + #[arg(long = "local", default_value_t = false, conflicts_with_all = ["node_id", "node_addrs"])] + local: bool, + }, + /// Inspect the local identity file + Identity { + #[command(subcommand)] + command: IdentityCommand, + }, + /// Inspect the local producer signing key + ProducerKey { + #[command(subcommand)] + command: ProducerKeyCommand, + }, + /// Discover peers and log network events + Monitor { + /// Stop monitoring after N seconds (default: run until Ctrl+C) + #[arg(long = "timeout-secs")] + timeout_secs: Option, + /// Disable peer interrogation RPCs (health + known peers) + #[arg(long = "no-interrogate", default_value_t = false)] + no_interrogate: bool, }, } #[tokio::main] async fn main() { - tracing_subscriber::fmt() - .with_env_filter( - tracing_subscriber::EnvFilter::try_from_default_env() - .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("warn")), - ) - .init(); - + // Parse the CLI first so we can honour the global `--log-file` + // flag in the subscriber setup. clap's parser is cheap; doing it + // before tracing init means very early subscriber-internal failures + // (which print to stderr regardless) are the only thing that + // bypasses the requested log file. let cli = Cli::parse(); + let tracer_provider = tracing_config::init_tracing(cli.log_file.as_deref()); + let producer_key_path = cli.producer_key_path.clone(); + + if let Commands::ProducerKey { + command: ProducerKeyCommand::Show, + } = &cli.command + { + let result = identity::load_existing_producer_key(producer_key_path.as_deref()) + .and_then(|key| commands::identity::show_producer_key(&key)); + tracer_provider.shutdown(); + if let Err(err) = result { + eprintln!("error: {err:#}"); + std::process::exit(1); + } + return; + } + + // show-node-id is a read-only query; never create an identity file as a + // side effect of it (would race with a running service's own creator). + let load_identity = match &cli.command { + Commands::Identity { + command: IdentityCommand::ShowNodeId, + } => identity::load_existing, + _ => identity::load_or_create, + }; + let secret_key = match load_identity(cli.identity.as_deref()) { + Ok(key) => key, + Err(err) => { + eprintln!("error: {err:#}"); + std::process::exit(1); + } + }; + let result = match cli.command { - #[cfg(feature = "serve")] - Commands::Serve { discovery } => commands::serve::run(discovery).await, - Commands::Health { node_id } => commands::health::run(node_id).await, - Commands::Execute { + #[cfg(feature = "hellas-executor")] + Commands::Serve { + port, + download_policy, + execute_policy, + queue_size, + preload_weights, + artifact_store_path, + metrics_port, + graffiti, + dtype, + } => { + let producer_key = + match identity::load_or_create_producer_key(producer_key_path.as_deref()) { + Ok(key) => key, + Err(err) => { + eprintln!("error: {err:#}"); + std::process::exit(1); + } + }; + commands::serve::run( + port, + download_policy, + execute_policy, + queue_size, + preload_weights, + artifact_store_path, + metrics_port, + graffiti, + dtype, + secret_key, + producer_key, + ) + .await + } + Commands::Gateway { + host, + port, + node_id, + node_addrs, + #[cfg(feature = "hellas-executor")] + local, + #[cfg(feature = "hellas-executor")] + verify_local, + verify, + #[cfg(feature = "hellas-executor")] + queue_size, + retries, + default_max_tokens, + force_model, + metrics_port, + dtype, + wrap, + wrap_args, + } => { + commands::gateway::run(commands::gateway::GatewayOptions { + host, + port, + node_id, + node_addrs, + #[cfg(feature = "hellas-executor")] + local, + #[cfg(feature = "hellas-executor")] + verify_local, + verify, + #[cfg(feature = "hellas-executor")] + queue_size, + retries, + default_max_tokens, + force_model, + metrics_port, + dtype, + #[cfg(feature = "hellas-executor")] + producer_key_path: producer_key_path.clone(), + secret_key, + wrap, + wrap_args, + }) + .await + } + Commands::Rpc { + node_id, + node_addrs, + } => commands::rpc::run(node_id, node_addrs, secret_key).await, + Commands::Artifact { command } => commands::artifact::run(command, secret_key).await, + Commands::Llm { node_id, + node_addrs, model, prompt, + raw, max_seq, - } => commands::execute::run(node_id, model, prompt, max_seq).await, + retries, + #[cfg(feature = "hellas-executor")] + local, + #[cfg(feature = "hellas-executor")] + verify_local, + dtype, + } => { + #[cfg(feature = "hellas-executor")] + let is_local_mode = local || verify_local; + #[cfg(not(feature = "hellas-executor"))] + let is_local_mode = false; + let dtype = if dtype.is_empty() { + default_llm_dtypes(is_local_mode) + } else { + dtype + }; + commands::llm::run( + commands::llm::ExecuteOptions { + node_id, + node_addrs, + model, + prompt, + raw, + max_seq, + retries, + #[cfg(feature = "hellas-executor")] + local, + #[cfg(feature = "hellas-executor")] + verify_local, + dtype, + #[cfg(feature = "hellas-executor")] + producer_key_path: producer_key_path.clone(), + }, + secret_key, + ) + .await + } + Commands::Opaque { + node_id, + node_addrs, + service, + method, + payload, + payload_file, + retries, + #[cfg(feature = "hellas-executor")] + local, + } => { + let payload = match (payload, payload_file) { + (Some(payload), None) => Ok(payload.into_bytes()), + (None, Some(path)) => tokio::fs::read(&path).await.map_err(|err| { + anyhow::anyhow!("failed to read --payload-file {}: {err}", path.display()) + }), + (None, None) => unreachable!("clap requires --payload or --payload-file"), + (Some(_), Some(_)) => unreachable!("clap rejects both payload sources"), + }; + match payload { + Ok(payload) => { + commands::opaque::run( + commands::opaque::ExecuteOptions { + node_id, + node_addrs, + service, + method, + payload, + retries, + #[cfg(feature = "hellas-executor")] + local, + #[cfg(feature = "hellas-executor")] + producer_key_path: producer_key_path.clone(), + }, + secret_key, + ) + .await + } + Err(err) => Err(err), + } + } + Commands::Identity { command } => match command { + IdentityCommand::ShowNodeId => commands::identity::show_node_id(&secret_key), + }, + Commands::ProducerKey { .. } => unreachable!("producer-key handled before identity load"), + Commands::Monitor { + timeout_secs, + no_interrogate, + } => commands::monitor::run(timeout_secs, !no_interrogate, secret_key).await, }; + tracer_provider.shutdown(); + if let Err(err) = result { eprintln!("error: {err:#}"); std::process::exit(1); } } + +#[cfg(test)] +mod tests { + use super::*; + + #[cfg(feature = "hellas-executor")] + #[test] + fn llm_accepts_local_mode() { + let cli = Cli::try_parse_from(["hellas", "llm", "--local", "-p", "hello"]).unwrap(); + match cli.command { + Commands::Llm { + node_id, + node_addrs, + local, + verify_local, + raw, + .. + } => { + assert!(node_id.is_none()); + assert!(node_addrs.is_empty()); + assert!(local); + assert!(!verify_local); + assert!(!raw); + } + _ => panic!("expected llm command"), + } + } + + #[test] + fn llm_accepts_raw_mode() { + let cli = Cli::try_parse_from(["hellas", "llm", "--raw", "-p", "hello"]).unwrap(); + match cli.command { + Commands::Llm { raw, .. } => assert!(raw), + _ => panic!("expected llm command"), + } + } + + #[cfg(feature = "hellas-executor")] + #[test] + fn llm_rejects_local_with_node_id() { + let result = Cli::try_parse_from([ + "hellas", + "llm", + "bb18ebc065d836ecc7e1f33972d2c17eac9894cd33ce4916f66cb1165ccc7550", + "--local", + "-p", + "hello", + ]); + + assert!(result.is_err()); + } + + #[cfg(feature = "hellas-executor")] + #[test] + fn llm_rejects_conflicting_local_modes() { + let result = + Cli::try_parse_from(["hellas", "llm", "--local", "--verify-local", "-p", "hello"]); + + assert!(result.is_err()); + } + + #[cfg(feature = "hellas-executor")] + #[test] + fn gateway_accepts_local_mode() { + let cli = Cli::try_parse_from(["hellas", "gateway", "--local"]).unwrap(); + match cli.command { + Commands::Gateway { + node_id, + node_addrs, + local, + .. + } => { + assert!(node_id.is_none()); + assert!(node_addrs.is_empty()); + assert!(local); + } + _ => panic!("expected gateway command"), + } + } + + #[cfg(feature = "hellas-executor")] + #[test] + fn gateway_rejects_local_with_node_id() { + let result = Cli::try_parse_from([ + "hellas", + "gateway", + "--local", + "--node-id", + "bb18ebc065d836ecc7e1f33972d2c17eac9894cd33ce4916f66cb1165ccc7550", + ]); + + assert!(result.is_err()); + } + + #[test] + fn llm_rejects_node_addr_without_node_id() { + let result = Cli::try_parse_from([ + "hellas", + "llm", + "--node-addr", + "127.0.0.1:31145", + "-p", + "hello", + ]); + + assert!(result.is_err()); + } + + #[test] + fn gateway_rejects_node_addr_without_node_id() { + let result = Cli::try_parse_from(["hellas", "gateway", "--node-addr", "127.0.0.1:31145"]); + + assert!(result.is_err()); + } + + #[test] + fn opaque_accepts_payload() { + let cli = Cli::try_parse_from([ + "hellas", + "opaque", + "--service", + "echo", + "--method", + "run", + "--payload", + r#"{"x":1}"#, + ]) + .unwrap(); + match cli.command { + Commands::Opaque { + service, + method, + payload, + .. + } => { + assert_eq!(service, "echo"); + assert_eq!(method, "run"); + assert_eq!(payload.as_deref(), Some(r#"{"x":1}"#)); + } + _ => panic!("expected opaque command"), + } + } + + #[test] + fn opaque_rejects_node_addr_without_node_id() { + let result = Cli::try_parse_from([ + "hellas", + "opaque", + "--service", + "echo", + "--method", + "run", + "--payload", + r#"{"x":1}"#, + "--node-addr", + "127.0.0.1:31145", + ]); + + assert!(result.is_err()); + } + + #[test] + fn opaque_rejects_missing_payload() { + let result = + Cli::try_parse_from(["hellas", "opaque", "--service", "echo", "--method", "run"]); + + assert!(result.is_err()); + } + + #[test] + fn artifact_put_accepts_provider_and_path() { + let cli = Cli::try_parse_from([ + "hellas", + "artifact", + "put", + "bb18ebc065d836ecc7e1f33972d2c17eac9894cd33ce4916f66cb1165ccc7550", + "--node-addr", + "127.0.0.1:31145", + "/tmp/artifact.cbor", + ]) + .unwrap(); + match cli.command { + Commands::Artifact { + command: + commands::artifact::ArtifactCommand::Put { + node_id: _, + node_addrs, + path, + }, + } => { + assert_eq!(node_addrs.len(), 1); + assert_eq!(path, std::path::Path::new("/tmp/artifact.cbor")); + } + _ => panic!("expected artifact put command"), + } + } + + #[test] + fn artifact_get_accepts_cid_and_output() { + let cid = "00".repeat(32); + let cli = Cli::try_parse_from([ + "hellas", + "artifact", + "get", + "bb18ebc065d836ecc7e1f33972d2c17eac9894cd33ce4916f66cb1165ccc7550", + &cid, + "--output", + "/tmp/artifact.cbor", + ]) + .unwrap(); + match cli.command { + Commands::Artifact { + command: + commands::artifact::ArtifactCommand::Get { + node_id: _, + node_addrs, + cid: parsed_cid, + output, + }, + } => { + assert!(node_addrs.is_empty()); + assert_eq!(parsed_cid, cid); + assert_eq!(output, std::path::Path::new("/tmp/artifact.cbor")); + } + _ => panic!("expected artifact get command"), + } + } + + /// On CPU-only builds the default is `f32`; on CUDA/Metal builds it is + /// `bf16`. See [`DEFAULT_DTYPE_STR`]. Used for `serve` / `gateway`, + /// which still take a single dtype. + #[cfg(feature = "hellas-executor")] + fn expected_default_dtype() -> Dtype { + parse_model_dtype(DEFAULT_DTYPE_STR).unwrap() + } + + #[test] + fn llm_dtype_omitted_yields_empty_vec_for_runtime_resolution() { + // Clap parses no `--dtype` as an empty `Vec`; main resolves + // the per-mode default via [`default_llm_dtypes`]. + let cli = Cli::try_parse_from(["hellas", "llm", "-p", "hi"]).unwrap(); + match cli.command { + Commands::Llm { dtype, .. } => assert!(dtype.is_empty()), + _ => panic!("expected llm command"), + } + } + + #[test] + fn llm_accepts_single_dtype() { + let cli = Cli::try_parse_from(["hellas", "llm", "--dtype", "f16", "-p", "hi"]).unwrap(); + match cli.command { + Commands::Llm { dtype, .. } => assert_eq!(dtype, vec![Dtype::F16]), + _ => panic!("expected llm command"), + } + } + + #[test] + fn llm_accepts_dtype_preference_list() { + let cli = + Cli::try_parse_from(["hellas", "llm", "--dtype", "bf16,f32,f16", "-p", "hi"]).unwrap(); + match cli.command { + Commands::Llm { dtype, .. } => { + assert_eq!(dtype, vec![Dtype::BF16, Dtype::F32, Dtype::F16]); + } + _ => panic!("expected llm command"), + } + } + + #[test] + fn default_llm_dtypes_local_cpu_skips_bf16() { + let cuda_or_metal = cfg!(any(feature = "candle-cuda", feature = "candle-metal")); + let prefs = default_llm_dtypes(/* is_local_mode = */ true); + if cuda_or_metal { + assert_eq!(prefs, vec![Dtype::BF16, Dtype::F32, Dtype::F16]); + } else { + assert_eq!(prefs, vec![Dtype::F32, Dtype::F16]); + } + } + + #[test] + fn default_llm_dtypes_network_uses_bf16_first() { + let prefs = default_llm_dtypes(/* is_local_mode = */ false); + assert_eq!(prefs, vec![Dtype::BF16, Dtype::F32, Dtype::F16]); + } + + #[test] + fn gateway_accepts_dtype_bf16() { + let cli = Cli::try_parse_from(["hellas", "gateway", "--dtype", "bf16"]).unwrap(); + match cli.command { + Commands::Gateway { dtype, .. } => assert_eq!(dtype, Dtype::BF16), + _ => panic!("expected gateway command"), + } + } + + #[test] + fn gateway_wrap_forwards_trailing_args() { + let cli = Cli::try_parse_from([ + "hellas", + "gateway", + "--wrap", + "pi", + "--", + "-p", + "--no-session", + "say hello", + ]) + .unwrap(); + match cli.command { + Commands::Gateway { + wrap, wrap_args, .. + } => { + assert_eq!(wrap.as_deref(), Some("pi")); + assert_eq!(wrap_args, vec!["-p", "--no-session", "say hello"]); + } + _ => panic!("expected gateway command"), + } + } + + #[test] + fn gateway_wrap_args_require_wrap() { + let result = Cli::try_parse_from(["hellas", "gateway", "--", "-p", "hi"]); + assert!(result.is_err(), "trailing args without --wrap should error"); + } + + #[test] + fn producer_key_show_accepts_global_key_path() { + let cli = Cli::try_parse_from([ + "hellas", + "--producer-key-path", + "/tmp/hellas-producer-key", + "producer-key", + "show", + ]) + .unwrap(); + assert_eq!( + cli.producer_key_path.as_deref(), + Some(std::path::Path::new("/tmp/hellas-producer-key")) + ); + match cli.command { + Commands::ProducerKey { + command: ProducerKeyCommand::Show, + } => {} + _ => panic!("expected producer-key show command"), + } + } + + #[cfg(feature = "hellas-executor")] + #[test] + fn serve_accepts_artifact_store_path() { + let cli = Cli::try_parse_from([ + "hellas", + "serve", + "--artifact-store-path", + "/tmp/hellas-artifacts", + ]) + .unwrap(); + match cli.command { + Commands::Serve { + artifact_store_path, + .. + } => assert_eq!( + artifact_store_path.as_deref(), + Some(std::path::Path::new("/tmp/hellas-artifacts")) + ), + _ => panic!("expected serve command"), + } + } + + #[cfg(feature = "hellas-executor")] + #[test] + fn serve_accepts_dtype_f16() { + let cli = Cli::try_parse_from(["hellas", "serve", "--dtype", "f16"]).unwrap(); + match cli.command { + Commands::Serve { dtype, .. } => assert_eq!(dtype, vec![Dtype::F16]), + _ => panic!("expected serve command"), + } + } + + #[cfg(feature = "hellas-executor")] + #[test] + fn serve_accepts_multi_dtype() { + let cli = Cli::try_parse_from(["hellas", "serve", "--dtype", "f32,f16,bf16"]).unwrap(); + match cli.command { + Commands::Serve { dtype, .. } => { + assert_eq!(dtype, vec![Dtype::F32, Dtype::F16, Dtype::BF16]); + } + _ => panic!("expected serve command"), + } + } + + #[cfg(feature = "hellas-executor")] + #[test] + fn serve_dtype_defaults_to_build_default() { + let cli = Cli::try_parse_from(["hellas", "serve"]).unwrap(); + match cli.command { + Commands::Serve { dtype, .. } => { + assert_eq!(dtype, vec![expected_default_dtype()]); + } + _ => panic!("expected serve command"), + } + } + + #[cfg(feature = "hellas-executor")] + #[test] + fn serve_rejects_dtype_u32_in_list() { + let result = Cli::try_parse_from(["hellas", "serve", "--dtype", "f32,u32"]); + assert!(result.is_err()); + } + + #[test] + fn llm_rejects_dtype_u32() { + let result = Cli::try_parse_from(["hellas", "llm", "--dtype", "u32", "-p", "hi"]); + assert!(result.is_err()); + } +} diff --git a/crates/cli/src/metrics.rs b/crates/cli/src/metrics.rs new file mode 100644 index 0000000..3ed6fea --- /dev/null +++ b/crates/cli/src/metrics.rs @@ -0,0 +1,108 @@ +use prometheus_client::encoding::text::encode; +use prometheus_client::registry::Registry; +use std::net::SocketAddr; +use std::sync::Arc; +use tracing::info; + +/// Bundle of metric sources served by the prometheus HTTP endpoint. +/// +/// The `prometheus` registry is the workspace's primary metrics surface +/// (executor counters, gateway counters, etc.). When the `otel` feature is on, +/// iroh's internal `EndpointMetrics` are appended to the same response. +pub struct MetricsBundle { + pub prometheus: Arc, + #[cfg(feature = "otel")] + pub iroh: Option, +} + +impl MetricsBundle { + pub fn new(prometheus: Arc) -> Self { + Self { + prometheus, + #[cfg(feature = "otel")] + iroh: None, + } + } + + /// Attach iroh's `EndpointMetrics` so they are emitted alongside the + /// prometheus-client registry. Only the `serve` command currently calls + /// this — the gateway path could be wired up similarly once it has an + /// `Endpoint` handle to expose. + #[cfg(feature = "otel")] + #[allow(dead_code)] // unused in `--features otel` without `candle` + pub fn with_iroh(mut self, iroh: tonic_iroh_transport::iroh::metrics::EndpointMetrics) -> Self { + self.iroh = Some(iroh); + self + } +} + +pub fn spawn_metrics_server(port: u16, bundle: MetricsBundle) { + let addr: SocketAddr = ([0, 0, 0, 0], port).into(); + let bundle = Arc::new(bundle); + + tokio::spawn(async move { + let listener = match tokio::net::TcpListener::bind(addr).await { + Ok(l) => l, + Err(err) => { + eprintln!("warning: failed to bind metrics server on {addr}: {err}"); + return; + } + }; + + let app = axum::Router::new() + .route( + "/metrics", + axum::routing::get( + move |axum::extract::State(bundle): axum::extract::State< + Arc, + >| async move { + encode_metrics(&bundle) + .map(|buf| (axum::http::StatusCode::OK, buf)) + .unwrap_or(( + axum::http::StatusCode::INTERNAL_SERVER_ERROR, + "failed to encode metrics".to_string(), + )) + }, + ), + ) + .with_state(bundle); + + info!("prometheus metrics server listening on http://{addr}/metrics"); + + if let Err(err) = axum::serve(listener, app).await { + eprintln!("warning: metrics server failed: {err}"); + } + }); +} + +fn encode_metrics(bundle: &MetricsBundle) -> Result { + let mut buf = String::new(); + encode(&mut buf, &bundle.prometheus)?; + // prometheus-client's `encode` terminates with `# EOF\n`; we strip it so + // we can append iroh metrics in the same response. A single `# EOF\n` is + // re-added at the end below. + if let Some(pos) = buf.rfind("# EOF\n") { + buf.truncate(pos); + } + append_iroh_metrics(&mut buf, bundle); + if !buf.ends_with("# EOF\n") { + buf.push_str("# EOF\n"); + } + Ok(buf) +} + +#[cfg(feature = "otel")] +fn append_iroh_metrics(buf: &mut String, bundle: &MetricsBundle) { + use iroh_metrics::Registry as IrohRegistry; + + let Some(iroh) = bundle.iroh.as_ref() else { + return; + }; + + let mut reg = IrohRegistry::default(); + reg.register_all_prefixed(iroh); + let _ = reg.encode_openmetrics_to_writer(buf); +} + +#[cfg(not(feature = "otel"))] +fn append_iroh_metrics(_buf: &mut String, _bundle: &MetricsBundle) {} diff --git a/crates/cli/src/text_output.rs b/crates/cli/src/text_output.rs new file mode 100644 index 0000000..253e61f --- /dev/null +++ b/crates/cli/src/text_output.rs @@ -0,0 +1,52 @@ +use anyhow::{Context, anyhow}; +use catgrad_llm::{Detokenizer, LLMError}; +use hellas_rpc::decode_token_ids; +use hellas_rpc::model::ModelAssets; +use std::sync::Arc; + +/// Streaming detokenizer. Stateful — buffers partial UTF-8 sequences +/// across `push_bytes` calls so multi-byte glyphs aren't split mid-stream. +pub struct TextOutputDecoder { + decoder: Detokenizer<'static>, +} + +impl TextOutputDecoder { + pub fn new(assets: Arc, stop_token_ids: &[i32]) -> Self { + let decoder = Detokenizer::new( + move |token_ids| { + let token_ids: Vec = token_ids + .iter() + .map(|&token| { + u32::try_from(token).map_err(|_| { + LLMError::TokenizerError(format!( + "negative token id {token} cannot be decoded" + )) + }) + }) + .collect::>()?; + assets + .decode_tokens(&token_ids) + .map_err(|err| LLMError::TokenizerError(err.to_string())) + }, + stop_token_ids, + ); + Self { decoder } + } + + /// Push a chunk of token bytes; returns the incremental text delta. + /// May return an empty string if the chunk only contained the leading + /// bytes of a multi-byte UTF-8 character. + pub fn push_bytes(&mut self, bytes: &[u8]) -> anyhow::Result { + let token_ids: Vec = decode_token_ids(bytes) + .context("failed to decode streamed output batch")? + .into_iter() + .map(|token| { + i32::try_from(token) + .map_err(|_| anyhow!("output token id {token} exceeds i32 range")) + }) + .collect::>()?; + self.decoder + .push_tokens(&token_ids) + .context("failed to detokenize streamed output batch") + } +} diff --git a/crates/cli/src/tracing_config.rs b/crates/cli/src/tracing_config.rs new file mode 100644 index 0000000..237d145 --- /dev/null +++ b/crates/cli/src/tracing_config.rs @@ -0,0 +1,210 @@ +use std::path::Path; +use std::sync::OnceLock; + +#[cfg(feature = "otel")] +use opentelemetry::trace::TracerProvider; +#[cfg(feature = "otel")] +use opentelemetry_otlp::{WithExportConfig, WithHttpConfig}; +use tracing_subscriber::EnvFilter; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::reload; +use tracing_subscriber::util::SubscriberInitExt; + +type FilterHandle = reload::Handle; + +static LOG_FILTER: OnceLock = OnceLock::new(); + +fn base_env_filter() -> EnvFilter { + EnvFilter::try_from_default_env() + .unwrap_or_else(|_| EnvFilter::new("warn")) + .add_directive("noq::connection=error".parse().unwrap()) + .add_directive("netlink_packet_route=error".parse().unwrap()) +} + +/// Holds the OTLP tracer provider (when the `otel` feature is on) so the CLI +/// can flush spans on shutdown. With the feature off this is a zero-sized type +/// and `shutdown()` is a no-op. +pub struct TracerGuard { + #[cfg(feature = "otel")] + provider: Option, +} + +impl TracerGuard { + pub fn shutdown(self) { + #[cfg(feature = "otel")] + if let Some(provider) = self.provider + && let Err(err) = provider.shutdown() + { + eprintln!("warning: failed to flush traces: {err}"); + } + } +} + +/// Initialise the tracing subscriber. +/// +/// When the `otel` feature is enabled and `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` +/// is set (and non-empty), an OpenTelemetry OTLP layer is added that exports +/// traces over HTTP/protobuf. With the feature off, only the fmt + optional +/// file layers are registered. +/// +/// Supported environment variables (all standard OTEL, only consulted when +/// `otel` is enabled): +/// OTEL_EXPORTER_OTLP_TRACES_ENDPOINT — collector URL (e.g. https://jaeger.lsd-ag.ch/v1/traces) +/// OTEL_SERVICE_NAME — service name (default: hellas-node) +/// OTEL_TRACES_SAMPLER_ARG — sample rate 0.0–1.0 (default: 1.0) +/// OTEL_EXPORTER_OTLP_HEADERS — extra headers as k=v,k=v +/// (use for CF-Access-Client-Id / CF-Access-Client-Secret) +pub fn init_tracing(log_file: Option<&Path>) -> TracerGuard { + let (filter_layer, filter_handle) = reload::Layer::new(base_env_filter()); + let _ = LOG_FILTER.set(filter_handle); + + let fmt_layer = tracing_subscriber::fmt::layer().with_writer(std::io::stderr); + let file_layer = log_file.and_then(|path| { + // Open append-mode so successive runs accumulate; line-buffered + // happens naturally per-event because the fmt layer flushes + // after each record. + match std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(path) + { + Ok(f) => Some( + tracing_subscriber::fmt::layer() + .with_writer(std::sync::Mutex::new(f)) + .with_ansi(false), + ), + Err(err) => { + eprintln!( + "warning: --log-file {} could not be opened: {err}", + path.display() + ); + None + } + } + }); + + let registry = tracing_subscriber::registry() + .with(filter_layer) + .with(fmt_layer) + .with(file_layer); + + install_with_otel(registry) +} + +/// Suppress known one-shot transport tail logs after CLI execute has already finished. +pub fn suppress_execute_tail_logs() { + let Some(handle) = LOG_FILTER.get() else { + return; + }; + + let filter = base_env_filter() + .add_directive("iroh::socket=off".parse().unwrap()) + .add_directive("noq::connection=off".parse().unwrap()) + .add_directive("noq_proto::connection=off".parse().unwrap()) + .add_directive("acto::tokio=off".parse().unwrap()); + + let _ = handle.reload(filter); +} + +#[cfg(feature = "otel")] +fn install_with_otel(registry: S) -> TracerGuard +where + S: tracing::Subscriber + + Send + + Sync + + 'static + + for<'a> tracing_subscriber::registry::LookupSpan<'a>, +{ + // Register W3C TraceContext propagator so trace IDs flow across RPC calls. + opentelemetry::global::set_text_map_propagator( + opentelemetry_sdk::propagation::TraceContextPropagator::new(), + ); + + let (otel_layer, provider) = build_otlp_layer::(); + registry.with(otel_layer).init(); + + TracerGuard { provider } +} + +#[cfg(not(feature = "otel"))] +fn install_with_otel(registry: S) -> TracerGuard +where + S: tracing::Subscriber + Send + Sync + 'static, +{ + registry.init(); + TracerGuard {} +} + +#[cfg(feature = "otel")] +fn build_otlp_layer() -> ( + Option>, + Option, +) +where + S: tracing::Subscriber + for<'span> tracing_subscriber::registry::LookupSpan<'span>, +{ + let endpoint = match std::env::var("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT") { + Ok(v) if !v.trim().is_empty() => v, + _ => return (None, None), + }; + + let service_name = std::env::var("OTEL_SERVICE_NAME") + .ok() + .filter(|s| !s.trim().is_empty()) + .unwrap_or_else(|| "hellas-node".to_string()); + + let sample_rate: f64 = std::env::var("OTEL_TRACES_SAMPLER_ARG") + .ok() + .and_then(|s| s.parse().ok()) + .filter(|r: &f64| (0.0..=1.0).contains(r)) + .unwrap_or(1.0); + + let headers: std::collections::HashMap = + std::env::var("OTEL_EXPORTER_OTLP_HEADERS") + .ok() + .map(|raw| { + raw.split(',') + .filter_map(|pair| { + let (k, v) = pair.split_once('=')?; + Some((k.trim().to_string(), v.trim().to_string())) + }) + .collect() + }) + .unwrap_or_default(); + + let mut http = opentelemetry_otlp::SpanExporter::builder() + .with_http() + .with_endpoint(&endpoint); + + if !headers.is_empty() { + http = http.with_headers(headers); + } + + let exporter = match http.build() { + Ok(e) => e, + Err(err) => { + eprintln!("warning: failed to build OTLP exporter: {err}"); + return (None, None); + } + }; + + let provider = opentelemetry_sdk::trace::SdkTracerProvider::builder() + .with_batch_exporter(exporter) + .with_sampler(opentelemetry_sdk::trace::Sampler::TraceIdRatioBased( + sample_rate, + )) + .with_resource( + opentelemetry_sdk::Resource::builder() + .with_service_name(service_name.clone()) + .build(), + ) + .build(); + + opentelemetry::global::set_tracer_provider(provider.clone()); + let tracer = provider.tracer(service_name.clone()); + + eprintln!("otlp: enabled endpoint={endpoint} service={service_name} sample_rate={sample_rate}"); + + let layer = tracing_opentelemetry::layer().with_tracer(tracer); + (Some(layer), Some(provider)) +} diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml new file mode 100644 index 0000000..5c3e8c0 --- /dev/null +++ b/crates/core/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "hellas-core" +description = "Protocol primitives for Hellas commitments and receipts" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +documentation.workspace = true + +[dependencies] +blake3.workspace = true +k256.workspace = true +serde.workspace = true +serde_bytes.workspace = true +serde_ipld_dagcbor.workspace = true +thiserror.workspace = true + +[dev-dependencies] +serde_json.workspace = true diff --git a/crates/core/src/commitment.rs b/crates/core/src/commitment.rs new file mode 100644 index 0000000..11b1ba8 --- /dev/null +++ b/crates/core/src/commitment.rs @@ -0,0 +1,129 @@ +use serde::de::{Error as DeError, Visitor}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::fmt; + +use crate::digest::Digest; +use crate::tags; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(u8)] +pub enum SchemeId { + Symbolic = tags::SCHEME_SYMBOLIC, + Opaque = tags::SCHEME_OPAQUE, + ZkTls = tags::SCHEME_ZKTLS, +} + +impl SchemeId { + pub const fn to_byte(self) -> u8 { + self as u8 + } + + pub fn from_byte(byte: u8) -> Result { + match byte { + tags::SCHEME_SYMBOLIC => Ok(Self::Symbolic), + tags::SCHEME_OPAQUE => Ok(Self::Opaque), + tags::SCHEME_ZKTLS => Ok(Self::ZkTls), + _ => Err(TagError::UnknownScheme(byte)), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)] +pub enum TagError { + #[error("unknown scheme id byte 0x{0:02x}")] + UnknownScheme(u8), +} + +macro_rules! impl_u8_serde { + ($ty:ty, $from:expr) => { + impl Serialize for $ty { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_u8(self.to_byte()) + } + } + + impl<'de> Deserialize<'de> for $ty { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct ByteVisitor; + + impl Visitor<'_> for ByteVisitor { + type Value = $ty; + + fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("a one-byte protocol tag") + } + + fn visit_u8(self, v: u8) -> Result + where + E: DeError, + { + $from(v).map_err(E::custom) + } + + fn visit_u64(self, v: u64) -> Result + where + E: DeError, + { + let byte = u8::try_from(v).map_err(E::custom)?; + self.visit_u8(byte) + } + } + + deserializer.deserialize_u8(ByteVisitor) + } + } + }; +} + +impl_u8_serde!(SchemeId, SchemeId::from_byte); + +macro_rules! digest_commitment { + ($ty:ident) => { + #[derive( + Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, + )] + pub struct $ty(pub Digest); + + impl $ty { + pub fn from_canonical_bytes(canonical_bytes: &[u8]) -> Self { + Self(Digest::hash(canonical_bytes)) + } + + pub const fn from_digest(digest: Digest) -> Self { + Self(digest) + } + + pub const fn digest(&self) -> Digest { + self.0 + } + + pub const fn as_bytes(&self) -> &[u8; Digest::LEN] { + self.0.as_bytes() + } + } + }; +} + +digest_commitment!(RequestCommitment); +digest_commitment!(ResultCommitment); +digest_commitment!(ReceiptCommitment); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn commitment_newtypes_hash_exact_canonical_bytes() { + let bytes = b"\x82x\x19hellas.example.object.v1Ddata"; + assert_eq!( + RequestCommitment::from_canonical_bytes(bytes).as_bytes(), + Digest::hash(bytes).as_bytes() + ); + } +} diff --git a/crates/core/src/digest.rs b/crates/core/src/digest.rs new file mode 100644 index 0000000..cfb5a91 --- /dev/null +++ b/crates/core/src/digest.rs @@ -0,0 +1,144 @@ +use serde::de::{Error as DeError, Visitor}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::fmt; + +use crate::tags; + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +pub struct Digest([u8; 32]); + +impl Digest { + pub const LEN: usize = 32; + + pub fn hash(bytes: &[u8]) -> Self { + Self::from_bytes(*blake3::hash(bytes).as_bytes()) + } + + pub const fn from_bytes(bytes: [u8; Self::LEN]) -> Self { + Self(bytes) + } + + pub const fn as_bytes(&self) -> &[u8; Self::LEN] { + &self.0 + } + + pub fn into_bytes(self) -> [u8; Self::LEN] { + self.0 + } + + pub fn from_slice(bytes: &[u8]) -> Result { + let bytes: [u8; Self::LEN] = bytes + .try_into() + .map_err(|_| DigestError::WrongLength { len: bytes.len() })?; + Ok(Self(bytes)) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)] +pub enum DigestError { + #[error("digest must be 32 bytes, got {len}")] + WrongLength { len: usize }, +} + +impl fmt::Debug for Digest { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Digest(")?; + for byte in &self.0 { + write!(f, "{byte:02x}")?; + } + write!(f, ")") + } +} + +impl fmt::Display for Digest { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for byte in &self.0 { + write!(f, "{byte:02x}")?; + } + Ok(()) + } +} + +impl Serialize for Digest { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(&self.0) + } +} + +impl<'de> Deserialize<'de> for Digest { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct DigestVisitor; + + impl Visitor<'_> for DigestVisitor { + type Value = Digest; + + fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("a 32-byte digest") + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: DeError, + { + Digest::from_slice(v).map_err(E::custom) + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: DeError, + { + self.visit_bytes(&v) + } + } + + deserializer.deserialize_bytes(DigestVisitor) + } +} + +pub fn hash_tuple(tag: &str, fields: &[&[u8]]) -> Digest { + let mut hasher = blake3::Hasher::new(); + hasher.update(tags::HASH_TUPLE_V1.as_bytes()); + hasher.update(&(tag.len() as u32).to_be_bytes()); + hasher.update(tag.as_bytes()); + hasher.update(&(fields.len() as u32).to_be_bytes()); + for field in fields { + hasher.update(&(field.len() as u64).to_be_bytes()); + hasher.update(field); + } + Digest::from_bytes(*hasher.finalize().as_bytes()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn hash_tuple_is_length_delimited() { + let a = hash_tuple("tag", &[b"ab", b"c"]); + let b = hash_tuple("tag", &[b"a", b"bc"]); + assert_ne!(a, b); + } + + #[test] + fn digest_hash_is_blake3_of_exact_bytes() { + assert_eq!( + Digest::hash(b"abc").as_bytes(), + blake3::hash(b"abc").as_bytes() + ); + } + + #[test] + fn digest_serializes_as_bytes() { + let digest = Digest::from_bytes([7; 32]); + let bytes = serde_ipld_dagcbor::to_vec(&digest).unwrap(); + assert_eq!(bytes, [&[0x58, 0x20][..], &[7; 32][..]].concat()); + let decoded: Digest = serde_ipld_dagcbor::from_slice(&bytes).unwrap(); + assert_eq!(decoded, digest); + } +} diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs new file mode 100644 index 0000000..ebb8c82 --- /dev/null +++ b/crates/core/src/lib.rs @@ -0,0 +1,27 @@ +//! Protocol primitives for Hellas commitments and producer receipts. + +pub mod commitment; +pub mod digest; +pub mod receipt; +pub mod scheme; +pub mod schemes; +pub mod signature; +pub mod tags; +pub mod value; + +pub use commitment::{ReceiptCommitment, RequestCommitment, ResultCommitment, SchemeId}; +pub use digest::{Digest, hash_tuple}; +pub use receipt::{ + DeliveryOutput, DeliveryRequest, ReceiptBody, SignedReceipt, VerifyError, verify_delivery, + verify_receipt, +}; +pub use scheme::CommitmentScheme; +pub use schemes::opaque::{Opaque, OpaqueRequest}; +pub use schemes::symbolic::{Symbolic, SymbolicOutput, SymbolicRequest}; +pub use signature::{ + ProducerId, ProducerSigningKey, PublicKey, Signature, SignatureError, SignatureKind, +}; +pub use value::{ + DagCborDecodeError, DagCborEncodeError, DagCborEncoder, JsonBytes, canonical_dag_cbor, + decode_dag_cbor, +}; diff --git a/crates/core/src/receipt.rs b/crates/core/src/receipt.rs new file mode 100644 index 0000000..7932067 --- /dev/null +++ b/crates/core/src/receipt.rs @@ -0,0 +1,337 @@ +use serde::{Deserialize, Serialize}; + +use crate::signature::verify_digest_signature; +use crate::{ + CommitmentScheme, DagCborEncoder, JsonBytes, Opaque, OpaqueRequest, ProducerId, + ProducerSigningKey, PublicKey, ReceiptCommitment, RequestCommitment, ResultCommitment, + SchemeId, Signature, SignatureError, Symbolic, SymbolicOutput, SymbolicRequest, hash_tuple, + tags, +}; + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct ReceiptBody { + scheme: SchemeId, + request: RequestCommitment, + result: ResultCommitment, + producer: ProducerId, +} + +impl ReceiptBody { + pub fn new( + scheme: SchemeId, + request: RequestCommitment, + result: ResultCommitment, + producer: ProducerId, + ) -> Self { + Self { + scheme, + request, + result, + producer, + } + } + + pub const fn scheme(&self) -> SchemeId { + self.scheme + } + + pub const fn request(&self) -> RequestCommitment { + self.request + } + + pub const fn result(&self) -> ResultCommitment { + self.result + } + + pub const fn producer(&self) -> ProducerId { + self.producer + } + + pub fn canonical_bytes(&self) -> Result, VerifyError> { + let mut encoder = DagCborEncoder::new(); + encoder.array(5); + encoder.str(tags::RECEIPT_BODY_V1); + encoder.u64(self.scheme.to_byte() as u64); + encoder.bytes(self.request.as_bytes()); + encoder.bytes(self.result.as_bytes()); + encoder.bytes(self.producer.as_bytes()); + Ok(encoder.into_bytes()) + } + + pub fn receipt_commitment(&self) -> Result { + Ok(ReceiptCommitment::from_canonical_bytes( + &self.canonical_bytes()?, + )) + } + + pub fn signature_preimage(&self) -> Result { + Ok(hash_tuple( + tags::RECEIPT_SIGNATURE_V1, + &[&self.canonical_bytes()?], + )) + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct SignedReceipt { + body: ReceiptBody, + signature: Signature, + public_key: PublicKey, +} + +impl SignedReceipt { + pub fn sign( + request: &S::Request, + output: &S::Output, + key: &ProducerSigningKey, + ) -> Result + where + S: CommitmentScheme, + { + let public_key = key.public_key(); + let body = ReceiptBody::new( + S::SCHEME, + S::commit_request(request), + S::commit_output(output), + ProducerId::from_public_key(&public_key), + ); + let signature = key.sign_digest(body.signature_preimage()?)?; + let receipt = Self { + body, + signature, + public_key, + }; + receipt.verify()?; + Ok(receipt) + } + + pub fn from_parts_verified( + body: ReceiptBody, + signature: Signature, + public_key: PublicKey, + ) -> Result { + let receipt = Self { + body, + signature, + public_key, + }; + receipt.verify()?; + Ok(receipt) + } + + pub const fn body(&self) -> &ReceiptBody { + &self.body + } + + pub const fn signature(&self) -> &Signature { + &self.signature + } + + pub const fn public_key(&self) -> &PublicKey { + &self.public_key + } + + pub fn verify(&self) -> Result<(), VerifyError> { + if ProducerId::from_public_key(&self.public_key) != self.body.producer { + return Err(VerifyError::ProducerMismatch); + } + verify_digest_signature( + &self.public_key, + &self.signature, + self.body.signature_preimage()?, + )?; + Ok(()) + } + + pub fn receipt_commitment(&self) -> Result { + self.body.receipt_commitment() + } +} + +pub enum DeliveryRequest<'a> { + Symbolic(&'a SymbolicRequest), + Opaque(&'a OpaqueRequest), +} + +pub enum DeliveryOutput<'a> { + Symbolic(&'a SymbolicOutput), + Opaque(&'a JsonBytes), +} + +pub fn verify_receipt(receipt: &SignedReceipt) -> Result<(), VerifyError> { + receipt.verify() +} + +pub fn verify_delivery( + request: DeliveryRequest<'_>, + output: DeliveryOutput<'_>, + receipt: &SignedReceipt, +) -> Result<(), VerifyError> { + verify_receipt(receipt)?; + + match (request, output, receipt.body.scheme) { + ( + DeliveryRequest::Symbolic(request), + DeliveryOutput::Symbolic(output), + SchemeId::Symbolic, + ) => { + if receipt.body.request != Symbolic::commit_request(request) { + return Err(VerifyError::RequestCommitmentMismatch); + } + if receipt.body.result != Symbolic::commit_output(output) { + return Err(VerifyError::ResultCommitmentMismatch); + } + Ok(()) + } + (DeliveryRequest::Opaque(request), DeliveryOutput::Opaque(output), SchemeId::Opaque) => { + if receipt.body.request != Opaque::commit_request(request) { + return Err(VerifyError::RequestCommitmentMismatch); + } + if receipt.body.result != Opaque::commit_output(output) { + return Err(VerifyError::ResultCommitmentMismatch); + } + Ok(()) + } + _ => Err(VerifyError::SchemeMismatch), + } +} + +#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)] +pub enum VerifyError { + #[error("producer id does not match public key")] + ProducerMismatch, + #[error("request commitment does not match request witness")] + RequestCommitmentMismatch, + #[error("result commitment does not match output witness")] + ResultCommitmentMismatch, + #[error("delivery witness scheme does not match receipt envelope")] + SchemeMismatch, + #[error("signature verification failed: {0}")] + Signature(#[from] SignatureError), +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{Digest, JsonBytes}; + + fn symbolic_request() -> SymbolicRequest { + SymbolicRequest { + text_execution_cid: Digest::from_bytes([4; 32]), + } + } + + fn symbolic_output() -> SymbolicOutput { + SymbolicOutput { + text_artifact_cid: Digest::from_bytes([9; 32]), + } + } + + #[test] + fn opaque_receipt_verifies_delivery() { + let key = ProducerSigningKey::deterministic_for_tests(); + let request = OpaqueRequest { + service: "vllm".to_string(), + method: "generate".to_string(), + payload: JsonBytes::new(br#"{"prompt":"hi"}"#.to_vec()), + }; + let output = JsonBytes::new(br#"{"text":"hello"}"#.to_vec()); + let receipt = SignedReceipt::sign::(&request, &output, &key).unwrap(); + let envelope = receipt; + + verify_delivery( + DeliveryRequest::Opaque(&request), + DeliveryOutput::Opaque(&output), + &envelope, + ) + .unwrap(); + } + + #[test] + fn symbolic_receipt_verifies_delivery() { + let key = ProducerSigningKey::deterministic_for_tests(); + let request = symbolic_request(); + let output = symbolic_output(); + let receipt = SignedReceipt::sign::(&request, &output, &key).unwrap(); + let envelope = receipt; + + verify_delivery( + DeliveryRequest::Symbolic(&request), + DeliveryOutput::Symbolic(&output), + &envelope, + ) + .unwrap(); + } + + #[test] + fn verify_delivery_rejects_wrong_output() { + let key = ProducerSigningKey::deterministic_for_tests(); + let request = OpaqueRequest { + service: "vllm".to_string(), + method: "generate".to_string(), + payload: JsonBytes::new(br#"{"prompt":"hi"}"#.to_vec()), + }; + let output = JsonBytes::new(br#"{"text":"hello"}"#.to_vec()); + let wrong = JsonBytes::new(br#"{"text":"bye"}"#.to_vec()); + let receipt = SignedReceipt::sign::(&request, &output, &key).unwrap(); + let envelope = receipt; + + assert_eq!( + verify_delivery( + DeliveryRequest::Opaque(&request), + DeliveryOutput::Opaque(&wrong), + &envelope, + ) + .unwrap_err(), + VerifyError::ResultCommitmentMismatch + ); + } + + #[test] + fn receipt_commitment_excludes_signature() { + let key = ProducerSigningKey::deterministic_for_tests(); + let request = OpaqueRequest { + service: "vllm".to_string(), + method: "generate".to_string(), + payload: JsonBytes::new(br#"{"prompt":"hi"}"#.to_vec()), + }; + let output = JsonBytes::new(br#"{"text":"hello"}"#.to_vec()); + let receipt = SignedReceipt::sign::(&request, &output, &key).unwrap(); + + let body_commitment = receipt.body().receipt_commitment().unwrap(); + let mut changed_signature = *receipt.signature(); + let mut bytes = *changed_signature.bytes(); + bytes[0] ^= 0x01; + changed_signature = Signature::from_compact_secp256k1(bytes); + let rebuilt = SignedReceipt { + body: receipt.body().clone(), + signature: changed_signature, + public_key: *receipt.public_key(), + }; + + assert_eq!( + body_commitment, + rebuilt.body().receipt_commitment().unwrap() + ); + assert!(rebuilt.verify().is_err()); + } + + #[test] + fn receipt_envelope_round_trips_through_dag_cbor() { + let key = ProducerSigningKey::deterministic_for_tests(); + let request = OpaqueRequest { + service: "vllm".to_string(), + method: "generate".to_string(), + payload: JsonBytes::new(br#"{"prompt":"hi"}"#.to_vec()), + }; + let output = JsonBytes::new(br#"{"text":"hello"}"#.to_vec()); + let receipt = SignedReceipt::sign::(&request, &output, &key).unwrap(); + let envelope = receipt; + + let bytes = crate::canonical_dag_cbor(&envelope).unwrap(); + let decoded: SignedReceipt = crate::decode_dag_cbor(&bytes).unwrap(); + + assert_eq!(decoded, envelope); + verify_receipt(&decoded).unwrap(); + } +} diff --git a/crates/core/src/scheme.rs b/crates/core/src/scheme.rs new file mode 100644 index 0000000..def134c --- /dev/null +++ b/crates/core/src/scheme.rs @@ -0,0 +1,11 @@ +use crate::{RequestCommitment, ResultCommitment, SchemeId}; + +pub trait CommitmentScheme { + type Request; + type Output; + + const SCHEME: SchemeId; + + fn commit_request(request: &Self::Request) -> RequestCommitment; + fn commit_output(output: &Self::Output) -> ResultCommitment; +} diff --git a/crates/core/src/schemes/mod.rs b/crates/core/src/schemes/mod.rs new file mode 100644 index 0000000..ac8b1a2 --- /dev/null +++ b/crates/core/src/schemes/mod.rs @@ -0,0 +1,2 @@ +pub mod opaque; +pub mod symbolic; diff --git a/crates/core/src/schemes/opaque.rs b/crates/core/src/schemes/opaque.rs new file mode 100644 index 0000000..55684d7 --- /dev/null +++ b/crates/core/src/schemes/opaque.rs @@ -0,0 +1,77 @@ +use serde::{Deserialize, Serialize}; + +use crate::{ + CommitmentScheme, DagCborEncoder, JsonBytes, RequestCommitment, ResultCommitment, SchemeId, + tags, +}; + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct OpaqueRequest { + pub service: String, + pub method: String, + pub payload: JsonBytes, +} + +pub struct Opaque; + +impl CommitmentScheme for Opaque { + type Request = OpaqueRequest; + type Output = JsonBytes; + + const SCHEME: SchemeId = SchemeId::Opaque; + + fn commit_request(request: &Self::Request) -> RequestCommitment { + RequestCommitment::from_canonical_bytes(&Self::request_bytes(request)) + } + + fn commit_output(output: &Self::Output) -> ResultCommitment { + ResultCommitment::from_canonical_bytes(&Self::output_bytes(output)) + } +} + +impl Opaque { + pub fn request_bytes(request: &OpaqueRequest) -> Vec { + let mut encoder = DagCborEncoder::new(); + encoder.array(4); + encoder.str(tags::OPAQUE_REQUEST_V1); + encoder.str(&request.service); + encoder.str(&request.method); + encoder.bytes(request.payload.as_bytes()); + encoder.into_bytes() + } + + pub fn output_bytes(output: &JsonBytes) -> Vec { + let mut encoder = DagCborEncoder::new(); + encoder.array(2); + encoder.str(tags::OPAQUE_RESULT_V1); + encoder.bytes(output.as_bytes()); + encoder.into_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn opaque_json_bytes_commit_exactly() { + let a = JsonBytes::new(br#"{"temp":0.7}"#.to_vec()); + let b = JsonBytes::new(br#"{"temp": 0.7}"#.to_vec()); + assert_ne!(Opaque::commit_output(&a), Opaque::commit_output(&b)); + } + + #[test] + fn opaque_request_schema_separates_identical_payload_from_output() { + let payload = JsonBytes::new(br#"{"x":1}"#.to_vec()); + let request = OpaqueRequest { + service: "svc".to_string(), + method: "run".to_string(), + payload: payload.clone(), + }; + + assert_ne!( + Opaque::commit_request(&request).digest(), + Opaque::commit_output(&payload).digest() + ); + } +} diff --git a/crates/core/src/schemes/symbolic.rs b/crates/core/src/schemes/symbolic.rs new file mode 100644 index 0000000..77f863d --- /dev/null +++ b/crates/core/src/schemes/symbolic.rs @@ -0,0 +1,32 @@ +use serde::{Deserialize, Serialize}; + +use crate::{CommitmentScheme, Digest, RequestCommitment, ResultCommitment, SchemeId}; + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct SymbolicRequest { + /// catnix InputId. + pub text_execution_cid: Digest, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct SymbolicOutput { + /// catnix OutputId. + pub text_artifact_cid: Digest, +} + +pub struct Symbolic; + +impl CommitmentScheme for Symbolic { + type Request = SymbolicRequest; + type Output = SymbolicOutput; + + const SCHEME: SchemeId = SchemeId::Symbolic; + + fn commit_request(request: &Self::Request) -> RequestCommitment { + RequestCommitment::from_digest(request.text_execution_cid) + } + + fn commit_output(output: &Self::Output) -> ResultCommitment { + ResultCommitment::from_digest(output.text_artifact_cid) + } +} diff --git a/crates/core/src/signature.rs b/crates/core/src/signature.rs new file mode 100644 index 0000000..4ff2254 --- /dev/null +++ b/crates/core/src/signature.rs @@ -0,0 +1,361 @@ +use serde::de::{Error as DeError, Visitor}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::fmt; + +use k256::ecdsa::signature::hazmat::{PrehashSigner, PrehashVerifier}; +use k256::ecdsa::{Signature as K256Signature, SigningKey, VerifyingKey}; + +use crate::digest::Digest; +use crate::{hash_tuple, tags}; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(u8)] +pub enum SignatureKind { + Secp256k1 = tags::SIGNATURE_SECP256K1, +} + +impl SignatureKind { + pub const fn to_byte(self) -> u8 { + self as u8 + } + + pub fn from_byte(byte: u8) -> Result { + match byte { + tags::SIGNATURE_SECP256K1 => Ok(Self::Secp256k1), + _ => Err(SignatureError::UnknownSignatureKind(byte)), + } + } +} + +impl Serialize for SignatureKind { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_u8(self.to_byte()) + } +} + +impl<'de> Deserialize<'de> for SignatureKind { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct KindVisitor; + + impl Visitor<'_> for KindVisitor { + type Value = SignatureKind; + + fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("a one-byte signature kind") + } + + fn visit_u8(self, v: u8) -> Result + where + E: DeError, + { + SignatureKind::from_byte(v).map_err(E::custom) + } + + fn visit_u64(self, v: u64) -> Result + where + E: DeError, + { + let byte = u8::try_from(v).map_err(E::custom)?; + self.visit_u8(byte) + } + } + + deserializer.deserialize_u8(KindVisitor) + } +} + +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub struct PublicKey { + kind: SignatureKind, + bytes: [u8; 33], +} + +impl PublicKey { + pub const LEN: usize = 33; + + pub const fn from_compressed_sec1(bytes: [u8; Self::LEN]) -> Self { + Self { + kind: SignatureKind::Secp256k1, + bytes, + } + } + + pub const fn kind(&self) -> SignatureKind { + self.kind + } + + pub const fn bytes(&self) -> &[u8; Self::LEN] { + &self.bytes + } + + pub fn verifying_key(&self) -> Result { + match self.kind { + SignatureKind::Secp256k1 => { + VerifyingKey::from_sec1_bytes(&self.bytes).map_err(SignatureError::from) + } + } + } +} + +impl fmt::Debug for PublicKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("PublicKey") + .field("kind", &self.kind) + .field("producer_id", &ProducerId::from_public_key(self)) + .finish() + } +} + +impl Serialize for PublicKey { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + (&self.kind, serde_bytes::Bytes::new(&self.bytes)).serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for PublicKey { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let (kind, bytes): (SignatureKind, serde_bytes::ByteBuf) = + Deserialize::deserialize(deserializer)?; + if kind != SignatureKind::Secp256k1 { + return Err(D::Error::custom("unsupported public key kind")); + } + let bytes: [u8; Self::LEN] = bytes.into_vec().try_into().map_err(|bytes: Vec| { + D::Error::custom(format!("public key must be 33 bytes, got {}", bytes.len())) + })?; + Ok(Self { kind, bytes }) + } +} + +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub struct Signature { + kind: SignatureKind, + bytes: [u8; 64], +} + +impl Signature { + pub const LEN: usize = 64; + + pub const fn from_compact_secp256k1(bytes: [u8; Self::LEN]) -> Self { + Self { + kind: SignatureKind::Secp256k1, + bytes, + } + } + + pub const fn kind(&self) -> SignatureKind { + self.kind + } + + pub const fn bytes(&self) -> &[u8; Self::LEN] { + &self.bytes + } + + fn as_k256(&self) -> Result { + match self.kind { + SignatureKind::Secp256k1 => { + let sig = K256Signature::from_slice(&self.bytes).map_err(SignatureError::from)?; + if sig.normalize_s().is_some() { + return Err(SignatureError::HighS); + } + Ok(sig) + } + } + } +} + +impl fmt::Debug for Signature { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Signature") + .field("kind", &self.kind) + .finish_non_exhaustive() + } +} + +impl Serialize for Signature { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + (&self.kind, serde_bytes::Bytes::new(&self.bytes)).serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for Signature { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let (kind, bytes): (SignatureKind, serde_bytes::ByteBuf) = + Deserialize::deserialize(deserializer)?; + if kind != SignatureKind::Secp256k1 { + return Err(D::Error::custom("unsupported signature kind")); + } + let bytes: [u8; Self::LEN] = bytes.into_vec().try_into().map_err(|bytes: Vec| { + D::Error::custom(format!("signature must be 64 bytes, got {}", bytes.len())) + })?; + Ok(Self { kind, bytes }) + } +} + +#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct ProducerId(Digest); + +impl ProducerId { + pub fn from_public_key(public_key: &PublicKey) -> Self { + let kind = [public_key.kind().to_byte()]; + Self(hash_tuple( + tags::PRODUCER_ID_V1, + &[&kind, public_key.bytes()], + )) + } + + pub const fn digest(&self) -> Digest { + self.0 + } + + pub const fn as_bytes(&self) -> &[u8; Digest::LEN] { + self.0.as_bytes() + } +} + +impl fmt::Debug for ProducerId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("ProducerId").field(&self.0).finish() + } +} + +pub struct ProducerSigningKey { + inner: SigningKey, +} + +impl ProducerSigningKey { + pub fn generate() -> Self { + let inner = SigningKey::random(&mut k256::elliptic_curve::rand_core::OsRng); + Self { inner } + } + + pub fn from_secret_bytes(bytes: [u8; 32]) -> Result { + let field_bytes = k256::FieldBytes::from(bytes); + let inner = SigningKey::from_bytes(&field_bytes).map_err(SignatureError::from)?; + Ok(Self { inner }) + } + + pub fn to_secret_bytes(&self) -> [u8; 32] { + self.inner.to_bytes().into() + } + + pub fn public_key(&self) -> PublicKey { + let verifying_key = self.inner.verifying_key(); + let point = verifying_key.to_encoded_point(true); + let bytes: [u8; PublicKey::LEN] = point + .as_bytes() + .try_into() + .expect("compressed secp256k1 public key is 33 bytes"); + PublicKey::from_compressed_sec1(bytes) + } + + pub fn producer_id(&self) -> ProducerId { + ProducerId::from_public_key(&self.public_key()) + } + + pub fn sign_digest(&self, digest: Digest) -> Result { + let signature: K256Signature = self.inner.sign_prehash(digest.as_bytes())?; + let signature = signature.normalize_s().unwrap_or(signature); + Ok(Signature::from_compact_secp256k1( + signature.to_bytes().into(), + )) + } + + #[cfg(test)] + pub(crate) fn deterministic_for_tests() -> Self { + Self::from_secret_bytes([1; 32]).expect("valid deterministic test key") + } +} + +pub fn verify_digest_signature( + public_key: &PublicKey, + signature: &Signature, + digest: Digest, +) -> Result<(), SignatureError> { + if public_key.kind() != signature.kind() { + return Err(SignatureError::KindMismatch { + public_key: public_key.kind(), + signature: signature.kind(), + }); + } + + let verifying_key = public_key.verifying_key()?; + let signature = signature.as_k256()?; + verifying_key.verify_prehash(digest.as_bytes(), &signature)?; + Ok(()) +} + +#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)] +pub enum SignatureError { + #[error("unknown signature kind byte 0x{0:02x}")] + UnknownSignatureKind(u8), + #[error("public key kind {public_key:?} does not match signature kind {signature:?}")] + KindMismatch { + public_key: SignatureKind, + signature: SignatureKind, + }, + #[error("secp256k1 signature is not normalized to low-S form")] + HighS, + #[error("secp256k1 error: {0}")] + Secp256k1(String), +} + +impl From for SignatureError { + fn from(error: k256::ecdsa::Error) -> Self { + Self::Secp256k1(error.to_string()) + } +} + +impl From for SignatureError { + fn from(error: k256::elliptic_curve::Error) -> Self { + Self::Secp256k1(error.to_string()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn secp256k1_sign_verify_round_trip() { + let key = ProducerSigningKey::deterministic_for_tests(); + let digest = hash_tuple("test.digest", &[b"payload"]); + let signature = key.sign_digest(digest).unwrap(); + verify_digest_signature(&key.public_key(), &signature, digest).unwrap(); + } + + #[test] + fn invalid_signature_fails() { + let key = ProducerSigningKey::deterministic_for_tests(); + let digest = hash_tuple("test.digest", &[b"payload"]); + let mut signature = key.sign_digest(digest).unwrap(); + signature.bytes[0] ^= 0x01; + assert!(verify_digest_signature(&key.public_key(), &signature, digest).is_err()); + } + + #[test] + fn producer_id_is_stable() { + let key = ProducerSigningKey::deterministic_for_tests(); + assert_eq!( + ProducerId::from_public_key(&key.public_key()), + key.producer_id() + ); + } +} diff --git a/crates/core/src/tags.rs b/crates/core/src/tags.rs new file mode 100644 index 0000000..4e8d5de --- /dev/null +++ b/crates/core/src/tags.rs @@ -0,0 +1,13 @@ +pub const HASH_TUPLE_V1: &str = "hellas.hash_tuple.v1"; +pub const RECEIPT_SIGNATURE_V1: &str = "hellas.commitment.receipt.v1"; +pub const PRODUCER_ID_V1: &str = "hellas.producer_id.v1"; + +pub const OPAQUE_REQUEST_V1: &str = "hellas.opaque.request.v1"; +pub const OPAQUE_RESULT_V1: &str = "hellas.opaque.result.v1"; +pub const RECEIPT_BODY_V1: &str = "hellas.receipt.body.v1"; + +pub const SCHEME_SYMBOLIC: u8 = 0x00; +pub const SCHEME_OPAQUE: u8 = 0x01; +pub const SCHEME_ZKTLS: u8 = 0x02; + +pub const SIGNATURE_SECP256K1: u8 = 0x00; diff --git a/crates/core/src/value.rs b/crates/core/src/value.rs new file mode 100644 index 0000000..855c8c5 --- /dev/null +++ b/crates/core/src/value.rs @@ -0,0 +1,99 @@ +use serde::Serialize; +use serde::de::DeserializeOwned; + +pub type DagCborEncodeError = serde_ipld_dagcbor::EncodeError; +pub type DagCborDecodeError = serde_ipld_dagcbor::DecodeError; + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, serde::Deserialize)] +pub struct JsonBytes(#[serde(with = "serde_bytes")] pub Vec); + +impl JsonBytes { + pub fn new(bytes: Vec) -> Self { + Self(bytes) + } + + pub fn as_bytes(&self) -> &[u8] { + &self.0 + } + + pub fn into_bytes(self) -> Vec { + self.0 + } +} + +pub fn canonical_dag_cbor(value: &T) -> Result, DagCborEncodeError> { + serde_ipld_dagcbor::to_vec(value) +} + +pub fn decode_dag_cbor(bytes: &[u8]) -> Result { + serde_ipld_dagcbor::from_slice(bytes) +} + +/// Minimal strict DAG-CBOR encoder for commitment blobs whose byte layout is +/// part of the protocol. Use this when serde's struct/enum representation would +/// obscure the exact canonical preimage. +pub struct DagCborEncoder { + bytes: Vec, +} + +impl DagCborEncoder { + pub fn new() -> Self { + Self { bytes: Vec::new() } + } + + pub fn into_bytes(self) -> Vec { + self.bytes + } + + pub fn array(&mut self, len: u64) { + self.header(4, len); + } + + pub fn bytes(&mut self, value: &[u8]) { + self.header(2, value.len() as u64); + self.bytes.extend_from_slice(value); + } + + pub fn str(&mut self, value: &str) { + self.header(3, value.len() as u64); + self.bytes.extend_from_slice(value.as_bytes()); + } + + pub fn u64(&mut self, value: u64) { + self.header(0, value); + } + + pub fn i64(&mut self, value: i64) { + if value >= 0 { + self.header(0, value as u64); + } else { + self.header(1, (-1_i128 - value as i128) as u64); + } + } + + fn header(&mut self, major: u8, value: u64) { + let major = major << 5; + match value { + 0..=23 => self.bytes.push(major | value as u8), + 24..=0xff => self.bytes.extend_from_slice(&[major | 24, value as u8]), + 0x100..=0xffff => { + self.bytes.push(major | 25); + self.bytes.extend_from_slice(&(value as u16).to_be_bytes()); + } + 0x1_0000..=0xffff_ffff => { + self.bytes.push(major | 26); + self.bytes.extend_from_slice(&(value as u32).to_be_bytes()); + } + _ => { + self.bytes.push(major | 27); + self.bytes.extend_from_slice(&value.to_be_bytes()); + } + } + } +} + +impl Default for DagCborEncoder { + fn default() -> Self { + Self::new() + } +} diff --git a/crates/executor/Cargo.toml b/crates/executor/Cargo.toml index 6e3dc56..12160b4 100644 --- a/crates/executor/Cargo.toml +++ b/crates/executor/Cargo.toml @@ -7,18 +7,48 @@ license.workspace = true repository.workspace = true documentation.workspace = true +[features] +default = ["candle"] +candle = ["catgrad/candle-backend"] +candle-cuda = ["candle", "catgrad/cuda"] +candle-metal = ["candle", "catgrad/metal"] + [dependencies] -hellas-rpc = { workspace = true, features = ["server"] } +async-stream = "0.3" +blake3 = "1" +catgrad = { workspace = true, default-features = false, features = ["serde"] } +catgrad-llm = { workspace = true, default-features = false } +catnix.workspace = true +chatgrad = { workspace = true, default-features = false } +half = { workspace = true } +hellas-core.workspace = true +hellas-pb = { workspace = true, features = [ + "hellas", + "symbolic", + "opaque", + "courtesy", + "server", +] } +hellas-rpc = { workspace = true, features = [ + "server", + "client", + "compression", + "node", +] } +hf-hub = { version = "0.5", default-features = false, features = ["ureq"] } +iroh-blobs = { workspace = true, features = ["fs-store"] } +prometheus-client = "0.24" +serde = { workspace = true } +serde_bytes = { workspace = true } +serde_ipld_dagcbor = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } tokio = { workspace = true } tokio-stream = { workspace = true } -thiserror = { workspace = true } +tokio-util = "0.7" tonic = { workspace = true } tracing = { workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } -catgrad = { git = "https://github.com/hellas-ai/catgrad", default-features = false, features = ["serde", "ndarray-backend"] } -catgrad-llm = { git = "https://github.com/hellas-ai/catgrad", default-features = false } -hf-hub = "0.4" -tokenizers = "0.21" -minijinja = "2.11" -minijinja-contrib = { version = "2.11", features = ["pycompat"] } +uuid = { version = "1", features = ["v4"] } + +[dev-dependencies] +proptest = "1" diff --git a/crates/executor/proptest-regressions/runner/tests.txt b/crates/executor/proptest-regressions/runner/tests.txt new file mode 100644 index 0000000..d76720f --- /dev/null +++ b/crates/executor/proptest-regressions/runner/tests.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 8a1979835e517e5eab3152f3cdb2fe2fa0b0a80b6f32eef34ce1d4cb154422c6 # shrinks to prompt = [0], split_ratio = 100, max_new = 0, stop_count = 0, stop_seed = 0 diff --git a/crates/executor/src/artifacts.rs b/crates/executor/src/artifacts.rs new file mode 100644 index 0000000..18d9619 --- /dev/null +++ b/crates/executor/src/artifacts.rs @@ -0,0 +1,1170 @@ +use std::collections::{HashMap, hash_map::Entry}; +use std::fs; +use std::path::{Path, PathBuf}; +use std::str::FromStr; + +use catnix::{Canonical, CanonicalDecode, InputAddressed, OutputAddressed}; +use hellas_core::{Digest, SymbolicRequest, hash_tuple}; +use hellas_rpc::ExecutorError; +use serde::{Deserialize, Serialize}; + +use crate::state::{Invocation, ModelLocator, QuotePlan}; + +const SYMBOLIC_INDEX_FILE: &str = "symbolic-index.json"; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum ArtifactStoreConfig { + Memory, + Fs(PathBuf), +} + +impl ArtifactStoreConfig { + pub fn memory() -> Self { + Self::Memory + } + + pub fn fs(path: impl Into) -> Self { + Self::Fs(path.into()) + } +} + +enum ArtifactBlobStore { + Memory(iroh_blobs::store::mem::MemStore), + Fs(iroh_blobs::store::fs::FsStore), +} + +impl Default for ArtifactBlobStore { + fn default() -> Self { + Self::memory() + } +} + +impl ArtifactBlobStore { + fn memory() -> Self { + Self::Memory(iroh_blobs::store::mem::MemStore::default()) + } + + async fn fs(path: impl AsRef) -> Result { + let path = path.as_ref(); + let store = iroh_blobs::store::fs::FsStore::load(path) + .await + .map_err(|err| { + ExecutorError::ArtifactStore(format!( + "failed to open artifact blob store {}: {err}", + path.display() + )) + })?; + Ok(Self::Fs(store)) + } + + async fn insert_canonical( + &self, + digest: catnix::Digest, + bytes: &[u8], + ) -> Result<(), ExecutorError> { + let expected = iroh_hash(digest); + let tag = match self { + Self::Memory(store) => store.add_slice(bytes).await, + Self::Fs(store) => store.add_slice(bytes).await, + } + .map_err(|err| ExecutorError::ArtifactStore(format!("blob insert failed: {err}")))?; + + if tag.hash != expected { + return Err(ExecutorError::ArtifactStore(format!( + "blob store hash mismatch: expected {}, got {}", + expected.to_hex(), + tag.hash.to_hex() + ))); + } + + Ok(()) + } + + async fn get_canonical( + &self, + digest: catnix::Digest, + ) -> Result>, ExecutorError> { + let hash = iroh_hash(digest); + let has_blob = match self { + Self::Memory(store) => store.has(hash).await, + Self::Fs(store) => store.has(hash).await, + } + .map_err(|err| ExecutorError::ArtifactStore(format!("blob lookup failed: {err}")))?; + if !has_blob { + return Ok(None); + } + + let bytes = match self { + Self::Memory(store) => store.get_bytes(hash).await, + Self::Fs(store) => store.get_bytes(hash).await, + } + .map_err(|err| ExecutorError::ArtifactStore(format!("blob read failed: {err}")))? + .to_vec(); + + if catnix::Digest::from_canonical_bytes(&bytes) != digest { + return Err(ExecutorError::ArtifactStore(format!( + "blob store returned bytes that do not match requested digest {digest}" + ))); + } + + Ok(Some(bytes)) + } + + #[cfg(test)] + async fn shutdown(&self) -> Result<(), ExecutorError> { + match self { + Self::Memory(store) => store.shutdown().await, + Self::Fs(store) => store.shutdown().await, + } + .map_err(|err| ExecutorError::ArtifactStore(format!("blob store shutdown failed: {err}"))) + } +} + +#[derive(Default)] +struct SymbolicIndexData { + bound_terms: HashMap, + outputs_by_execution: HashMap, +} + +#[derive(Default, Serialize, Deserialize)] +struct PersistedSymbolicIndex { + #[serde(default)] + bound_terms: Vec, + #[serde(default)] + outputs_by_execution: Vec, +} + +#[derive(Serialize, Deserialize)] +struct PersistedBoundTerm { + bound_term: String, + model_id: String, + revision: String, + dtype: String, +} + +#[derive(Serialize, Deserialize)] +struct PersistedExecutionOutput { + execution: String, + artifact: String, +} + +#[derive(Clone, Debug)] +pub(crate) struct ResolvedSymbolicExecution { + pub symbolic_request: SymbolicRequest, + pub locator: ModelLocator, + pub invocation: Invocation, +} + +pub(crate) struct SymbolicArtifactStore { + blob_store: ArtifactBlobStore, + index_path: Option, + canonical_blobs: HashMap>, + bound_terms: HashMap, + token_ids: HashMap, + policies: HashMap, + text_executions: HashMap, + text_states: HashMap, + text_artifacts: HashMap, + outputs_by_execution: HashMap, +} + +struct MaterializedTextSource { + locator: ModelLocator, + tokens: Vec, +} + +impl Default for SymbolicArtifactStore { + fn default() -> Self { + Self::memory() + } +} + +impl SymbolicArtifactStore { + pub(crate) fn memory() -> Self { + Self::new(ArtifactBlobStore::memory()) + } + + pub(crate) async fn open(config: ArtifactStoreConfig) -> Result { + match config { + ArtifactStoreConfig::Memory => Ok(Self::memory()), + ArtifactStoreConfig::Fs(path) => { + let index_path = path.join(SYMBOLIC_INDEX_FILE); + let index = load_symbolic_index(&index_path)?; + Ok(Self::with_index( + ArtifactBlobStore::fs(path).await?, + Some(index_path), + index, + )) + } + } + } + + fn new(blob_store: ArtifactBlobStore) -> Self { + Self::with_index(blob_store, None, SymbolicIndexData::default()) + } + + fn with_index( + blob_store: ArtifactBlobStore, + index_path: Option, + index: SymbolicIndexData, + ) -> Self { + Self { + blob_store, + index_path, + canonical_blobs: HashMap::new(), + bound_terms: index.bound_terms, + token_ids: HashMap::new(), + policies: HashMap::new(), + text_executions: HashMap::new(), + text_states: HashMap::new(), + text_artifacts: HashMap::new(), + outputs_by_execution: index.outputs_by_execution, + } + } + + pub async fn record_prepared_text( + &mut self, + plan: &QuotePlan, + ) -> Result { + let bound_term_id = + catnix::BoundTermId::from_digest(to_catnix_digest(binding_digest(&plan.locator))); + if let Entry::Vacant(entry) = self.bound_terms.entry(bound_term_id) { + entry.insert(plan.locator.clone()); + self.persist_symbolic_index()?; + } + + let from = match plan.initial_artifact_id { + Some(artifact_id) => { + let artifact_id = + catnix::TextArtifactId::from_digest(to_catnix_digest(artifact_id)); + let _ = self.materialize_artifact(artifact_id).await?; + catnix::SourceRef::output(artifact_id) + } + None => { + let identity = catnix::TextArtifact::identity(bound_term_id); + let identity_id = identity.output_id(); + self.insert_text_artifact(identity).await?; + catnix::SourceRef::output(identity_id) + } + }; + + let prompt_tokens = catnix::TokenIds::from(plan.invocation.input_ids.clone()); + let prompt_tokens_id = self.insert_token_ids(prompt_tokens).await?; + let policy = text_policy(&plan.invocation)?; + let policy_id = self.insert_policy(policy).await?; + let execution = catnix::TextExecution::new(from, prompt_tokens_id, policy_id); + let execution_id = self.insert_text_execution(execution).await?; + let symbolic_request = SymbolicRequest { + text_execution_cid: from_catnix_digest(execution_id.digest()), + }; + + Ok(ResolvedSymbolicExecution { + symbolic_request, + locator: plan.locator.clone(), + invocation: plan.invocation.clone(), + }) + } + + pub async fn resolve_symbolic_request( + &mut self, + symbolic_request: SymbolicRequest, + ) -> Result { + let execution_id = catnix::TextExecutionId::from_digest(to_catnix_digest( + symbolic_request.text_execution_cid, + )); + let execution = self.text_execution(execution_id).await?; + let source = self.materialize_source(execution.from()).await?; + let prompt_tokens = self.token_ids(execution.prompt_tokens()).await?; + let policy = self.text_policy(execution.policy()).await?; + let mut input_ids = source.tokens; + input_ids.extend(token_ids_to_u32(&prompt_tokens)); + let stop_token_ids = policy + .stop_token_ids() + .iter() + .map(|token| { + i32::try_from(token.as_u32()).map_err(|_| { + ExecutorError::InvalidTokenPayload(format!( + "stop token id {} exceeds i32 range", + token.as_u32() + )) + }) + }) + .collect::, _>>()?; + + Ok(ResolvedSymbolicExecution { + symbolic_request, + locator: source.locator, + invocation: Invocation { + input_ids, + max_new_tokens: policy.max_new_tokens(), + stop_token_ids, + }, + }) + } + + pub async fn publish_canonical_bytes( + &mut self, + bytes: Vec, + ) -> Result { + let digest = catnix::Digest::from_canonical_bytes(&bytes); + if !self.canonical_blobs.contains_key(&digest) { + self.blob_store.insert_canonical(digest, &bytes).await?; + self.canonical_blobs.insert(digest, bytes); + } + Ok(from_catnix_digest(digest)) + } + + pub async fn get_canonical_bytes(&mut self, digest: Digest) -> Result, ExecutorError> { + let digest = to_catnix_digest(digest); + if let Some(bytes) = self.canonical_blobs.get(&digest) { + return Ok(bytes.clone()); + } + let bytes = self + .blob_store + .get_canonical(digest) + .await? + .ok_or_else(|| ExecutorError::ArtifactNotFound(digest.to_string()))?; + self.canonical_blobs.insert(digest, bytes.clone()); + Ok(bytes) + } + + pub async fn record_completed_text( + &mut self, + symbolic_request: &SymbolicRequest, + invocation: &Invocation, + output_tokens: &[u32], + ) -> Result { + let execution_id = catnix::TextExecutionId::from_digest(to_catnix_digest( + symbolic_request.text_execution_cid, + )); + let _ = self.text_execution(execution_id).await?; + + let generated_tokens_id = self + .insert_token_ids(catnix::TokenIds::from(output_tokens.to_vec())) + .await?; + let mut state_tokens = invocation.input_ids.clone(); + state_tokens.extend_from_slice(output_tokens); + let state_tokens_id = self + .insert_token_ids(catnix::TokenIds::from(state_tokens)) + .await?; + let state_id = self + .insert_text_state(catnix::TextState::new(state_tokens_id)) + .await?; + let artifact = catnix::TextArtifact::output( + execution_id, + output_tokens.len() as u64, + state_id, + generated_tokens_id, + ); + let artifact_id = self.insert_text_artifact(artifact).await?; + if let Entry::Vacant(entry) = self.outputs_by_execution.entry(execution_id) { + entry.insert(artifact_id); + self.persist_symbolic_index()?; + } + Ok(from_catnix_digest(artifact_id.digest())) + } + + async fn materialize_source( + &mut self, + source: &catnix::TextSource, + ) -> Result { + match source { + catnix::SourceRef::Input(execution_id) => { + let artifact_id = self.output_artifact_for_execution(*execution_id)?; + self.materialize_execution_output(*execution_id, artifact_id) + .await + } + catnix::SourceRef::Output(artifact_id) => self.materialize_artifact(*artifact_id).await, + } + } + + async fn materialize_artifact( + &mut self, + artifact_id: catnix::TextArtifactId, + ) -> Result { + let artifact = self.text_artifact(artifact_id).await?; + self.materialize_decoded_artifact(artifact).await + } + + async fn materialize_execution_output( + &mut self, + execution_id: catnix::TextExecutionId, + artifact_id: catnix::TextArtifactId, + ) -> Result { + let artifact = self.text_artifact(artifact_id).await?; + validate_execution_output_mapping(execution_id, artifact_id, &artifact)?; + self.materialize_decoded_artifact(artifact).await + } + + async fn materialize_decoded_artifact( + &mut self, + artifact: catnix::TextArtifact, + ) -> Result { + match artifact { + catnix::TextArtifact::Identity { bound_term } => { + let locator = self.bound_term_locator(bound_term)?; + Ok(MaterializedTextSource { + locator, + tokens: Vec::new(), + }) + } + catnix::TextArtifact::Output(output) => { + let execution = self.text_execution(output.execution()).await?; + let locator = self.source_locator(execution.from().clone()).await?; + let state = self.text_state(output.state()).await?; + let tokens = self.token_ids(state.tokens()).await?; + Ok(MaterializedTextSource { + locator, + tokens: token_ids_to_u32(&tokens), + }) + } + } + } + + async fn source_locator( + &mut self, + source: catnix::TextSource, + ) -> Result { + let mut source = source; + loop { + let (artifact_id, expected_execution) = match source { + catnix::SourceRef::Input(id) => (self.output_artifact_for_execution(id)?, Some(id)), + catnix::SourceRef::Output(id) => (id, None), + }; + let artifact = self.text_artifact(artifact_id).await?; + if let Some(expected_execution) = expected_execution { + validate_execution_output_mapping(expected_execution, artifact_id, &artifact)?; + } + match artifact { + catnix::TextArtifact::Identity { bound_term } => { + return self.bound_term_locator(bound_term); + } + catnix::TextArtifact::Output(output) => { + source = self + .text_execution(output.execution()) + .await? + .from() + .clone(); + } + } + } + } + + fn bound_term_locator( + &self, + bound_term: catnix::BoundTermId, + ) -> Result { + self.bound_terms.get(&bound_term).cloned().ok_or_else(|| { + ExecutorError::InvalidQuoteRequest(format!("missing bound term metadata {bound_term}")) + }) + } + + fn output_artifact_for_execution( + &self, + execution_id: catnix::TextExecutionId, + ) -> Result { + self.outputs_by_execution + .get(&execution_id) + .copied() + .ok_or_else(|| { + ExecutorError::InvalidQuoteRequest(format!( + "lazy symbolic source {execution_id} has no cached output artifact" + )) + }) + } + + async fn token_ids( + &mut self, + id: catnix::TokenIdsId, + ) -> Result { + if let Some(value) = self.token_ids.get(&id) { + return Ok(value.clone()); + } + let value = self + .decode_canonical::(id.digest(), "TokenIds") + .await?; + if value.output_id() != id { + return Err(canonical_type_mismatch("TokenIds", id.digest())); + } + self.token_ids.insert(id, value.clone()); + Ok(value) + } + + async fn text_policy( + &mut self, + id: catnix::TextPolicyId, + ) -> Result { + if let Some(value) = self.policies.get(&id) { + return Ok(value.clone()); + } + let value = self + .decode_canonical::(id.digest(), "TextPolicy") + .await?; + if value.output_id() != id { + return Err(canonical_type_mismatch("TextPolicy", id.digest())); + } + self.policies.insert(id, value.clone()); + Ok(value) + } + + async fn text_execution( + &mut self, + id: catnix::TextExecutionId, + ) -> Result { + if let Some(value) = self.text_executions.get(&id) { + return Ok(value.clone()); + } + let value = self + .decode_canonical::(id.digest(), "TextExecution") + .await?; + if value.input_id() != id { + return Err(canonical_type_mismatch("TextExecution", id.digest())); + } + self.text_executions.insert(id, value.clone()); + Ok(value) + } + + async fn text_state( + &mut self, + id: catnix::TextStateId, + ) -> Result { + if let Some(value) = self.text_states.get(&id) { + return Ok(*value); + } + let value = self + .decode_canonical::(id.digest(), "TextState") + .await?; + if value.output_id() != id { + return Err(canonical_type_mismatch("TextState", id.digest())); + } + self.text_states.insert(id, value); + Ok(value) + } + + async fn text_artifact( + &mut self, + id: catnix::TextArtifactId, + ) -> Result { + if let Some(value) = self.text_artifacts.get(&id) { + return Ok(value.clone()); + } + let value = self + .decode_canonical::(id.digest(), "TextArtifact") + .await?; + if value.output_id() != id { + return Err(canonical_type_mismatch("TextArtifact", id.digest())); + } + self.text_artifacts.insert(id, value.clone()); + Ok(value) + } + + async fn decode_canonical( + &mut self, + digest: catnix::Digest, + kind: &str, + ) -> Result { + let bytes = self.load_canonical(digest, kind).await?; + T::from_canonical_bytes(&bytes).map_err(|err| { + ExecutorError::ArtifactStore(format!("invalid {kind} artifact {digest}: {err}")) + }) + } + + async fn load_canonical( + &mut self, + digest: catnix::Digest, + kind: &str, + ) -> Result, ExecutorError> { + if let Some(bytes) = self.canonical_blobs.get(&digest) { + return Ok(bytes.clone()); + } + let bytes = self + .blob_store + .get_canonical(digest) + .await? + .ok_or_else(|| { + ExecutorError::InvalidQuoteRequest(format!("missing {kind} artifact {digest}")) + })?; + self.canonical_blobs.insert(digest, bytes.clone()); + Ok(bytes) + } + + async fn insert_token_ids( + &mut self, + value: catnix::TokenIds, + ) -> Result { + let id = value.output_id(); + self.insert_canonical(id.digest(), &value).await?; + self.token_ids.entry(id).or_insert(value); + Ok(id) + } + + async fn insert_policy( + &mut self, + value: catnix::TextPolicy, + ) -> Result { + let id = value.output_id(); + self.insert_canonical(id.digest(), &value).await?; + self.policies.entry(id).or_insert(value); + Ok(id) + } + + async fn insert_text_execution( + &mut self, + value: catnix::TextExecution, + ) -> Result { + let id = value.input_id(); + self.insert_canonical(id.digest(), &value).await?; + self.text_executions.entry(id).or_insert(value); + Ok(id) + } + + async fn insert_text_state( + &mut self, + value: catnix::TextState, + ) -> Result { + let id = value.output_id(); + self.insert_canonical(id.digest(), &value).await?; + self.text_states.entry(id).or_insert(value); + Ok(id) + } + + async fn insert_text_artifact( + &mut self, + value: catnix::TextArtifact, + ) -> Result { + let id = value.output_id(); + self.insert_canonical(id.digest(), &value).await?; + self.text_artifacts.entry(id).or_insert(value); + Ok(id) + } + + async fn insert_canonical( + &mut self, + digest: catnix::Digest, + value: &impl Canonical, + ) -> Result<(), ExecutorError> { + if self.canonical_blobs.contains_key(&digest) { + return Ok(()); + } + + let bytes = value.canonical_bytes(); + self.blob_store.insert_canonical(digest, &bytes).await?; + self.canonical_blobs.insert(digest, bytes); + Ok(()) + } + + fn persist_symbolic_index(&self) -> Result<(), ExecutorError> { + let Some(path) = &self.index_path else { + return Ok(()); + }; + persist_symbolic_index(path, self) + } + + #[cfg(test)] + async fn shutdown(&self) -> Result<(), ExecutorError> { + self.blob_store.shutdown().await + } +} + +fn canonical_type_mismatch(kind: &str, digest: catnix::Digest) -> ExecutorError { + ExecutorError::ArtifactStore(format!( + "decoded {kind} artifact does not re-address to requested digest {digest}" + )) +} + +fn validate_execution_output_mapping( + execution_id: catnix::TextExecutionId, + artifact_id: catnix::TextArtifactId, + artifact: &catnix::TextArtifact, +) -> Result<(), ExecutorError> { + match artifact { + catnix::TextArtifact::Output(output) if output.execution() == execution_id => Ok(()), + catnix::TextArtifact::Output(output) => Err(ExecutorError::InvalidQuoteRequest(format!( + "lazy symbolic source {execution_id} maps to artifact {artifact_id}, but that artifact realizes {}", + output.execution() + ))), + catnix::TextArtifact::Identity { .. } => Err(ExecutorError::InvalidQuoteRequest(format!( + "lazy symbolic source {execution_id} maps to identity artifact {artifact_id}" + ))), + } +} + +fn load_symbolic_index(path: &Path) -> Result { + let bytes = match fs::read(path) { + Ok(bytes) => bytes, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => { + return Ok(SymbolicIndexData::default()); + } + Err(err) => { + return Err(ExecutorError::ArtifactStore(format!( + "failed to read symbolic artifact index {}: {err}", + path.display() + ))); + } + }; + let persisted: PersistedSymbolicIndex = serde_json::from_slice(&bytes).map_err(|err| { + ExecutorError::ArtifactStore(format!( + "failed to decode symbolic artifact index {}: {err}", + path.display() + )) + })?; + persisted.try_into_index() +} + +fn persist_symbolic_index(path: &Path, store: &SymbolicArtifactStore) -> Result<(), ExecutorError> { + let persisted = PersistedSymbolicIndex::from_store(store); + let bytes = serde_json::to_vec_pretty(&persisted).map_err(|err| { + ExecutorError::ArtifactStore(format!("failed to encode symbolic artifact index: {err}")) + })?; + let parent = path.parent().ok_or_else(|| { + ExecutorError::ArtifactStore(format!( + "symbolic artifact index path {} has no parent", + path.display() + )) + })?; + fs::create_dir_all(parent).map_err(|err| { + ExecutorError::ArtifactStore(format!( + "failed to create symbolic artifact index directory {}: {err}", + parent.display() + )) + })?; + let tmp = path.with_file_name(format!( + ".{}.tmp.{}", + SYMBOLIC_INDEX_FILE, + std::process::id() + )); + fs::write(&tmp, bytes).map_err(|err| { + ExecutorError::ArtifactStore(format!( + "failed to write symbolic artifact index temp file {}: {err}", + tmp.display() + )) + })?; + fs::rename(&tmp, path).map_err(|err| { + let _ = fs::remove_file(&tmp); + ExecutorError::ArtifactStore(format!( + "failed to persist symbolic artifact index {}: {err}", + path.display() + )) + }) +} + +impl PersistedSymbolicIndex { + fn from_store(store: &SymbolicArtifactStore) -> Self { + let mut bound_terms: Vec<_> = store + .bound_terms + .iter() + .map(|(bound_term, locator)| PersistedBoundTerm { + bound_term: bound_term.to_string(), + model_id: locator.model_id.clone(), + revision: locator.revision.clone(), + dtype: dtype_to_wire(locator.dtype), + }) + .collect(); + bound_terms.sort_by(|a, b| a.bound_term.cmp(&b.bound_term)); + + let mut outputs_by_execution: Vec<_> = store + .outputs_by_execution + .iter() + .map(|(execution, artifact)| PersistedExecutionOutput { + execution: execution.to_string(), + artifact: artifact.to_string(), + }) + .collect(); + outputs_by_execution.sort_by(|a, b| a.execution.cmp(&b.execution)); + + Self { + bound_terms, + outputs_by_execution, + } + } + + fn try_into_index(self) -> Result { + let mut index = SymbolicIndexData::default(); + for entry in self.bound_terms { + let bound_term = catnix::BoundTermId::from_digest(parse_catnix_digest( + &entry.bound_term, + "bound_term", + )?); + let dtype = catgrad::prelude::Dtype::from_str(&entry.dtype).map_err(|err| { + ExecutorError::ArtifactStore(format!( + "invalid dtype {:?} in symbolic artifact index: {err}", + entry.dtype + )) + })?; + index.bound_terms.insert( + bound_term, + ModelLocator { + model_id: entry.model_id, + revision: entry.revision, + dtype, + }, + ); + } + + for entry in self.outputs_by_execution { + let execution = catnix::TextExecutionId::from_digest(parse_catnix_digest( + &entry.execution, + "execution", + )?); + let artifact = catnix::TextArtifactId::from_digest(parse_catnix_digest( + &entry.artifact, + "artifact", + )?); + index.outputs_by_execution.insert(execution, artifact); + } + + Ok(index) + } +} + +fn parse_catnix_digest(raw: &str, field: &str) -> Result { + if raw.len() != 64 { + return Err(ExecutorError::ArtifactStore(format!( + "invalid {field} digest length {}, expected 64 hex chars", + raw.len() + ))); + } + let mut bytes = [0u8; 32]; + for (index, chunk) in raw.as_bytes().chunks_exact(2).enumerate() { + let high = hex_value(chunk[0]).ok_or_else(|| invalid_hex(field, raw))?; + let low = hex_value(chunk[1]).ok_or_else(|| invalid_hex(field, raw))?; + bytes[index] = (high << 4) | low; + } + Ok(catnix::Digest::from_bytes(bytes)) +} + +fn invalid_hex(field: &str, raw: &str) -> ExecutorError { + ExecutorError::ArtifactStore(format!("invalid {field} digest hex {raw:?}")) +} + +fn hex_value(byte: u8) -> Option { + match byte { + b'0'..=b'9' => Some(byte - b'0'), + b'a'..=b'f' => Some(byte - b'a' + 10), + b'A'..=b'F' => Some(byte - b'A' + 10), + _ => None, + } +} + +fn token_ids_to_u32(tokens: &catnix::TokenIds) -> Vec { + tokens + .as_slice() + .iter() + .map(|token| token.as_u32()) + .collect() +} + +fn text_policy(invocation: &Invocation) -> Result { + let stop_token_ids = invocation + .stop_token_ids + .iter() + .copied() + .map(catnix::TokenId::try_from) + .collect::, _>>() + .map_err(|err| ExecutorError::InvalidTokenPayload(err.to_string()))?; + Ok(catnix::TextPolicy::new( + invocation.max_new_tokens, + stop_token_ids, + )) +} + +fn binding_digest(locator: &ModelLocator) -> Digest { + hash_tuple( + "hellas.executor.synthetic_binding.v1", + &[ + locator.model_id.as_bytes(), + locator.revision.as_bytes(), + dtype_to_wire(locator.dtype).as_bytes(), + ], + ) +} + +fn dtype_to_wire(dtype: catgrad::prelude::Dtype) -> String { + match dtype { + catgrad::prelude::Dtype::F32 => "f32".to_string(), + catgrad::prelude::Dtype::F16 => "f16".to_string(), + catgrad::prelude::Dtype::BF16 => "bf16".to_string(), + catgrad::prelude::Dtype::F8 => "f8".to_string(), + catgrad::prelude::Dtype::U32 => "u32".to_string(), + } +} + +fn to_catnix_digest(digest: Digest) -> catnix::Digest { + catnix::Digest::from_bytes(digest.into_bytes()) +} + +fn from_catnix_digest(digest: catnix::Digest) -> Digest { + Digest::from_bytes(*digest.as_bytes()) +} + +fn iroh_hash(digest: catnix::Digest) -> iroh_blobs::Hash { + iroh_blobs::Hash::from_bytes(*digest.as_bytes()) +} + +#[cfg(test)] +mod tests { + use super::*; + use catgrad::prelude::Dtype; + + fn plan() -> QuotePlan { + QuotePlan { + locator: ModelLocator { + model_id: "model".to_string(), + revision: "main".to_string(), + dtype: Dtype::F32, + }, + invocation: Invocation { + input_ids: vec![1, 2, 3], + max_new_tokens: 8, + stop_token_ids: vec![4, 5], + }, + initial_artifact_id: None, + } + } + + #[tokio::test] + async fn prepared_text_round_trips_through_store() { + let mut store = SymbolicArtifactStore::default(); + let recorded = store.record_prepared_text(&plan()).await.unwrap(); + let resolved = store + .resolve_symbolic_request(recorded.symbolic_request.clone()) + .await + .unwrap(); + + assert_eq!(resolved.symbolic_request, recorded.symbolic_request); + assert_eq!(resolved.locator, recorded.locator); + assert_eq!(resolved.invocation.input_ids, recorded.invocation.input_ids); + assert_eq!( + resolved.invocation.max_new_tokens, + recorded.invocation.max_new_tokens + ); + assert_eq!( + resolved.invocation.stop_token_ids, + recorded.invocation.stop_token_ids + ); + } + + #[tokio::test] + async fn completed_text_artifact_can_start_a_followup() { + let mut store = SymbolicArtifactStore::default(); + let first = store.record_prepared_text(&plan()).await.unwrap(); + let first_artifact = store + .record_completed_text(&first.symbolic_request, &first.invocation, &[10, 11]) + .await + .unwrap(); + let mut next_plan = plan(); + next_plan.invocation.input_ids = vec![20]; + next_plan.initial_artifact_id = Some(first_artifact); + let next = store.record_prepared_text(&next_plan).await.unwrap(); + let resolved = store + .resolve_symbolic_request(next.symbolic_request) + .await + .unwrap(); + + assert_eq!(resolved.invocation.input_ids, vec![1, 2, 3, 10, 11, 20]); + } + + #[tokio::test] + async fn lazy_input_source_uses_cached_output_artifact() { + let mut store = SymbolicArtifactStore::default(); + let first = store.record_prepared_text(&plan()).await.unwrap(); + store + .record_completed_text(&first.symbolic_request, &first.invocation, &[10, 11]) + .await + .unwrap(); + let first_execution = catnix::TextExecutionId::from_digest(to_catnix_digest( + first.symbolic_request.text_execution_cid, + )); + let prompt_tokens = store + .insert_token_ids(catnix::TokenIds::from([20])) + .await + .unwrap(); + let policy = store + .insert_policy(catnix::TextPolicy::from_u32_stop_tokens(4, [])) + .await + .unwrap(); + let lazy = catnix::TextExecution::new( + catnix::SourceRef::input(first_execution), + prompt_tokens, + policy, + ); + let lazy_id = store.insert_text_execution(lazy).await.unwrap(); + let resolved = store + .resolve_symbolic_request(SymbolicRequest { + text_execution_cid: from_catnix_digest(lazy_id.digest()), + }) + .await + .unwrap(); + + assert_eq!(resolved.invocation.input_ids, vec![1, 2, 3, 10, 11, 20]); + } + + #[tokio::test] + async fn lazy_input_rejects_metadata_that_does_not_realize_execution() { + let mut store = SymbolicArtifactStore::default(); + let first = store.record_prepared_text(&plan()).await.unwrap(); + let first_execution = catnix::TextExecutionId::from_digest(to_catnix_digest( + first.symbolic_request.text_execution_cid, + )); + let first_execution_value = store.text_execution(first_execution).await.unwrap(); + let identity_id = match first_execution_value.from() { + catnix::SourceRef::Output(id) => *id, + catnix::SourceRef::Input(_) => panic!("prepared genesis text should start at output"), + }; + store + .outputs_by_execution + .insert(first_execution, identity_id); + let prompt_tokens = store + .insert_token_ids(catnix::TokenIds::from([20])) + .await + .unwrap(); + let policy = store + .insert_policy(catnix::TextPolicy::from_u32_stop_tokens(4, [])) + .await + .unwrap(); + let lazy = catnix::TextExecution::new( + catnix::SourceRef::input(first_execution), + prompt_tokens, + policy, + ); + let lazy_id = store.insert_text_execution(lazy).await.unwrap(); + let err = store + .resolve_symbolic_request(SymbolicRequest { + text_execution_cid: from_catnix_digest(lazy_id.digest()), + }) + .await + .unwrap_err(); + + assert!(err.to_string().contains("maps to identity artifact")); + } + + #[tokio::test] + async fn unknown_text_execution_is_rejected() { + let mut store = SymbolicArtifactStore::default(); + let err = store + .resolve_symbolic_request(SymbolicRequest { + text_execution_cid: Digest::from_bytes([7; 32]), + }) + .await + .unwrap_err(); + + assert!(err.to_string().contains("missing TextExecution artifact")); + } + + #[tokio::test] + async fn canonical_artifact_bytes_can_be_published_and_fetched() { + let mut store = SymbolicArtifactStore::default(); + let tokens = catnix::TokenIds::from([1, 2, 3]); + let bytes = tokens.canonical_bytes(); + let digest = store.publish_canonical_bytes(bytes.clone()).await.unwrap(); + + assert_eq!(digest, from_catnix_digest(tokens.output_id().digest())); + assert_eq!(store.get_canonical_bytes(digest).await.unwrap(), bytes); + } + + #[tokio::test] + async fn fs_store_reopens_typed_artifacts_from_canonical_blobs() { + let path = temp_artifact_store_path("reopen"); + let _ = std::fs::remove_dir_all(&path); + + let first_artifact; + let first_request; + { + let mut store = SymbolicArtifactStore::open(ArtifactStoreConfig::fs(&path)) + .await + .unwrap(); + let first = store.record_prepared_text(&plan()).await.unwrap(); + first_artifact = store + .record_completed_text(&first.symbolic_request, &first.invocation, &[10, 11]) + .await + .unwrap(); + first_request = first.symbolic_request; + store.shutdown().await.unwrap(); + } + + { + let mut store = SymbolicArtifactStore::open(ArtifactStoreConfig::fs(&path)) + .await + .unwrap(); + let resolved = store.resolve_symbolic_request(first_request).await.unwrap(); + assert_eq!(resolved.invocation.input_ids, vec![1, 2, 3]); + + let mut next_plan = plan(); + next_plan.invocation.input_ids = vec![20]; + next_plan.initial_artifact_id = Some(first_artifact); + let next = store.record_prepared_text(&next_plan).await.unwrap(); + let resolved = store + .resolve_symbolic_request(next.symbolic_request) + .await + .unwrap(); + assert_eq!(resolved.invocation.input_ids, vec![1, 2, 3, 10, 11, 20]); + store.shutdown().await.unwrap(); + } + + let _ = std::fs::remove_dir_all(&path); + } + + #[tokio::test] + async fn fs_store_reopens_cached_lazy_substitutions() { + let path = temp_artifact_store_path("lazy"); + let _ = std::fs::remove_dir_all(&path); + + let first_execution; + { + let mut store = SymbolicArtifactStore::open(ArtifactStoreConfig::fs(&path)) + .await + .unwrap(); + let first = store.record_prepared_text(&plan()).await.unwrap(); + store + .record_completed_text(&first.symbolic_request, &first.invocation, &[10, 11]) + .await + .unwrap(); + first_execution = catnix::TextExecutionId::from_digest(to_catnix_digest( + first.symbolic_request.text_execution_cid, + )); + store.shutdown().await.unwrap(); + } + + { + let mut store = SymbolicArtifactStore::open(ArtifactStoreConfig::fs(&path)) + .await + .unwrap(); + let prompt_tokens = store + .insert_token_ids(catnix::TokenIds::from([20])) + .await + .unwrap(); + let policy = store + .insert_policy(catnix::TextPolicy::from_u32_stop_tokens(4, [])) + .await + .unwrap(); + let lazy = catnix::TextExecution::new( + catnix::SourceRef::input(first_execution), + prompt_tokens, + policy, + ); + let lazy_id = store.insert_text_execution(lazy).await.unwrap(); + let resolved = store + .resolve_symbolic_request(SymbolicRequest { + text_execution_cid: from_catnix_digest(lazy_id.digest()), + }) + .await + .unwrap(); + + assert_eq!(resolved.invocation.input_ids, vec![1, 2, 3, 10, 11, 20]); + store.shutdown().await.unwrap(); + } + + let _ = std::fs::remove_dir_all(&path); + } + + fn temp_artifact_store_path(test: &str) -> PathBuf { + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos(); + std::env::temp_dir().join(format!( + "hellas-executor-artifacts-{test}-{}-{nanos}", + std::process::id() + )) + } +} diff --git a/crates/executor/src/backend.rs b/crates/executor/src/backend.rs new file mode 100644 index 0000000..5a668c1 --- /dev/null +++ b/crates/executor/src/backend.rs @@ -0,0 +1,47 @@ +use catgrad::interpreter::backend::candle::CandleBackend; +use hellas_rpc::error::BackendInitError; +use std::any::Any; +use std::panic::{AssertUnwindSafe, catch_unwind}; +use std::sync::OnceLock; +use tracing::info; + +pub type ExecBackend = CandleBackend; + +static EXEC_BACKEND: OnceLock> = OnceLock::new(); + +fn init_backend() -> Result { + let backend = catch_unwind(AssertUnwindSafe(|| { + #[cfg(any(feature = "candle-cuda", feature = "candle-metal"))] + { + CandleBackend::new_accel(true) + } + + #[cfg(not(any(feature = "candle-cuda", feature = "candle-metal")))] + { + CandleBackend::new() + } + })) + .map_err(|panic| { + BackendInitError::new(format!( + "failed to initialize executor backend: {}", + panic_message(&panic) + )) + })?; + + info!(?backend, "executor backend selected"); + Ok(backend) +} + +pub fn create_backend() -> Result { + EXEC_BACKEND.get_or_init(init_backend).clone() +} + +pub(crate) fn panic_message(panic: &(dyn Any + Send)) -> String { + if let Some(message) = panic.downcast_ref::<&'static str>() { + (*message).to_string() + } else if let Some(message) = panic.downcast_ref::() { + message.clone() + } else { + "unknown panic".to_string() + } +} diff --git a/crates/executor/src/catgrad_support.rs b/crates/executor/src/catgrad_support.rs deleted file mode 100644 index 2086d49..0000000 --- a/crates/executor/src/catgrad_support.rs +++ /dev/null @@ -1,154 +0,0 @@ -use crate::weights::ModelBundle; -use crate::ExecutorError; -use catgrad::interpreter::{self, backend::ndarray::NdArrayBackend, Backend, Interpreter}; -use catgrad::prelude::*; -use catgrad_llm::utils::get_model; -use minijinja::{context, Environment}; -use minijinja_contrib::pycompat::unknown_method_callback; -use tracing::warn; - -/// Format a user prompt using the model's chat template when available. -/// Falls back to the raw prompt if no template exists or rendering fails. -fn prepare_prompt(model_id: &str, chat_template: Option<&str>, prompt: &str) -> String { - let Some(template) = chat_template.filter(|t| !t.trim().is_empty()) else { - return prompt.to_string(); - }; - - // SmolLM3 and a few other templates wrap generation blocks we don't need for single-shot use. - let template = template - .replace("{% generation %}", "") - .replace("{% endgeneration %}", ""); - - let mut env = Environment::new(); - env.set_unknown_method_callback(unknown_method_callback); - - if let Err(err) = env.add_template("chat", &template) { - warn!("failed to parse chat template for {model_id}: {err}"); - return prompt.to_string(); - } - - let tmpl = match env.get_template("chat") { - Ok(t) => t, - Err(err) => { - warn!("failed to load chat template for {model_id}: {err}"); - return prompt.to_string(); - } - }; - - match tmpl.render(context! { - messages => vec![context!(role => "user", content => prompt)], - add_generation_prompt => true, - }) { - Ok(r) => r, - Err(err) => { - warn!("failed to render chat template for {model_id}: {err}"); - prompt.to_string() - } - } -} - -/// Build and serialize a catgrad graph for a HF model id and prompt, returning the templated input. -pub fn build_graph_from_llm_prompt( - bundle: &ModelBundle, - prompt: &str, - max_new_tokens: u32, -) -> Result<(Vec, String), ExecutorError> { - use catgrad_llm::LLMError; - - let prepared_prompt = prepare_prompt( - &bundle.key.model_id.0, - bundle.chat_template.as_deref(), - prompt, - ); - let config = &bundle.config; - let tokenizer = &bundle.tokenizer; - - let encoding = tokenizer - .encode(prepared_prompt.clone(), true) - .map_err(LLMError::from)?; - let prompt_tokens = encoding.get_ids().len(); - let max_sequence_length = prompt_tokens + max_new_tokens as usize; - - let model = get_model(config, max_sequence_length)?; - let typed_term = model - .term() - .ok_or_else(|| ExecutorError::ModelConstruction(model.path().to_string()))?; - - let graph_bytes = serde_json::to_vec_pretty(&typed_term)?; - Ok((graph_bytes, prepared_prompt)) -} - -/// Fetch weights, build the environment, and execute the provided TypedTerm, streaming decoded text. -pub fn run_graph_streaming( - bundle: &ModelBundle, - prepared_input: &str, - typed_term: &catgrad::category::lang::TypedTerm, - max_seq: u32, - mut on_progress: impl FnMut(u64, &[u8], Option<&str>, bool), -) -> Result<(), ExecutorError> { - use catgrad_llm::LLMError; - - let backend = NdArrayBackend; - let config = &bundle.config; - let tokenizer = &bundle.tokenizer; - let parameter_values = &bundle.parameter_values; - let parameter_types = &bundle.parameter_types; - - let encoding = tokenizer - .encode(prepared_input, true) - .map_err(LLMError::from)?; - let tokens: Vec = encoding.get_ids().to_vec(); - - let max_sequence_length = tokens.len() + max_seq as usize; - let model = get_model(config, max_sequence_length)?; - - let mut env = stdlib(); - env.declarations - .extend(to_load_ops(model.path(), parameter_types.keys())); - - let interpreter = Interpreter::new(backend.clone(), env, parameter_values.clone()); - - let mut decoded = String::new(); - let mut current_tokens = tokens; - let mut progress: u64 = 0; - - for _ in 0..max_seq { - let input_tensor = interpreter::tensor( - &interpreter.backend, - Shape(vec![1, current_tokens.len()]), - current_tokens.clone(), - ) - .map_err(ExecutorError::Backend)?; - - let mut results = interpreter.run(typed_term.term.clone(), vec![input_tensor])?; - - let output = results.pop().ok_or(ExecutorError::NoOutput)?; - - let next_token = match output { - interpreter::Value::Tensor(arr) => match interpreter.backend.to_vec(arr) { - interpreter::TaggedVec::U32(v) => v.last().copied(), - _ => None, - }, - _ => None, - } - .ok_or(ExecutorError::UnexpectedOutput)?; - - // Decode and append - let piece = tokenizer - .decode(&[next_token], false) - .unwrap_or_else(|_| next_token.to_string()); - decoded.push_str(&piece); - current_tokens.push(next_token); - progress += 1; - - let done = config.get_eos_token_ids().contains(&(next_token as i32)); - on_progress(progress, piece.as_bytes(), Some(piece.as_str()), done); - - // Stop if EOS - if done { - break; - } - } - - Ok(()) -} diff --git a/crates/executor/src/error.rs b/crates/executor/src/error.rs deleted file mode 100644 index 9fce5e3..0000000 --- a/crates/executor/src/error.rs +++ /dev/null @@ -1,64 +0,0 @@ -use crate::state::StateError; -use catgrad::abstract_interpreter::types::InterpreterError; -use catgrad::interpreter::backend::BackendError; -use catgrad_llm::LLMError; -use thiserror::Error; -use tonic::Status; - -#[derive(Debug, Error)] -pub enum ExecutorError { - #[error("executor channel closed")] - ChannelClosed, - #[error("executor is busy")] - Busy, - #[error("invalid catgrad graph: {0}")] - InvalidGraph(#[from] serde_json::Error), - #[error("LLM error: {0}")] - Llm(#[from] LLMError), - #[error("interpreter error: {0}")] - Interpreter(#[from] InterpreterError), - #[error("backend error: {0:?}")] - Backend(BackendError), - #[error("failed to construct model term for {0}")] - ModelConstruction(String), - #[error("missing quote payload")] - MissingPayload, - #[error("missing weights hint model id")] - MissingWeightsHint, - #[error("weights not ready for model {0}")] - WeightsNotReady(String), - #[error("weights error: {0}")] - WeightsError(String), - #[error("no output from graph")] - NoOutput, - #[error("unexpected output value")] - UnexpectedOutput, - #[error(transparent)] - State(#[from] StateError), -} - -impl From for Status { - fn from(err: ExecutorError) -> Self { - match &err { - ExecutorError::ChannelClosed => Status::internal(err.to_string()), - ExecutorError::Busy => Status::resource_exhausted(err.to_string()), - ExecutorError::InvalidGraph(_) => Status::invalid_argument(err.to_string()), - ExecutorError::Llm(_) => Status::internal(err.to_string()), - ExecutorError::Interpreter(_) => Status::internal(err.to_string()), - ExecutorError::Backend(_) => Status::internal(err.to_string()), - ExecutorError::ModelConstruction(_) => Status::internal(err.to_string()), - ExecutorError::MissingPayload => Status::invalid_argument(err.to_string()), - ExecutorError::MissingWeightsHint => Status::invalid_argument(err.to_string()), - ExecutorError::WeightsNotReady(_) => Status::failed_precondition(err.to_string()), - ExecutorError::WeightsError(_) => Status::internal(err.to_string()), - ExecutorError::NoOutput => Status::internal(err.to_string()), - ExecutorError::UnexpectedOutput => Status::internal(err.to_string()), - ExecutorError::State(StateError::QuoteNotFound(_)) => { - Status::not_found(err.to_string()) - } - ExecutorError::State(StateError::ExecutionNotFound(_)) => { - Status::not_found(err.to_string()) - } - } - } -} diff --git a/crates/executor/src/execute_worker.rs b/crates/executor/src/execute_worker.rs deleted file mode 100644 index aac4ca9..0000000 --- a/crates/executor/src/execute_worker.rs +++ /dev/null @@ -1,194 +0,0 @@ -use crate::catgrad_support; -use crate::state::ExecutionPlan; -use crate::weights::ModelBundle; -use catgrad::category::lang::TypedTerm; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{mpsc, Arc}; -use tracing::{info, warn}; - -use super::{ExecutorError, ExecutorMessage}; - -pub struct ExecuteWorker { - tx: mpsc::Sender, - busy: Arc, -} - -pub struct ExecuteReservation { - tx: mpsc::Sender, - busy: Arc, - committed: bool, -} - -struct BusyGuard { - busy: Arc, -} - -impl Drop for BusyGuard { - fn drop(&mut self) { - self.busy.store(false, Ordering::Release); - } -} - -#[derive(Debug)] -pub enum ExecuteWorkerError { - Busy, - Stopped, -} - -impl Drop for ExecuteReservation { - fn drop(&mut self) { - if !self.committed { - self.busy.store(false, Ordering::Release); - } - } -} - -impl ExecuteReservation { - pub fn enqueue(mut self, job: ExecuteJob) -> Result<(), ExecuteWorkerError> { - if self.tx.send(job).is_err() { - self.busy.store(false, Ordering::Release); - return Err(ExecuteWorkerError::Stopped); - } - self.committed = true; - Ok(()) - } -} - -pub struct ExecuteJob { - pub execution_id: String, - pub plan: ExecutionPlan, - pub bundle: Option>, -} - -impl ExecuteWorker { - pub fn spawn(executor_tx: tokio::sync::mpsc::UnboundedSender) -> Self { - let (tx, rx) = mpsc::channel::(); - let busy = Arc::new(AtomicBool::new(false)); - - let busy2 = busy.clone(); - std::thread::Builder::new() - .name("hellas-execute-worker".to_string()) - .spawn(move || worker_loop(rx, executor_tx, busy2)) - .expect("failed to spawn execute worker thread"); - - Self { tx, busy } - } - - pub fn is_busy(&self) -> bool { - self.busy.load(Ordering::Acquire) - } - - pub fn reserve(&self) -> Result { - match self - .busy - .compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire) - { - Ok(false) => Ok(ExecuteReservation { - tx: self.tx.clone(), - busy: self.busy.clone(), - committed: false, - }), - _ => Err(ExecuteWorkerError::Busy), - } - } -} - -fn worker_loop( - rx: mpsc::Receiver, - executor_tx: tokio::sync::mpsc::UnboundedSender, - busy: Arc, -) { - while let Ok(job) = rx.recv() { - let _busy_guard = BusyGuard { busy: busy.clone() }; - let exec_id = job.execution_id.clone(); - - let outcome = std::panic::catch_unwind(|| run_job(job, executor_tx.clone())); - match outcome { - Ok(Ok(())) => {} - Ok(Err(err)) => { - warn!("execute worker job {exec_id} failed: {err}"); - let _ = executor_tx.send(ExecutorMessage::Complete { - execution_id: exec_id, - result: None, - decoded: None, - success: false, - }); - } - Err(_) => { - warn!("execute worker job {exec_id} panicked"); - let _ = executor_tx.send(ExecutorMessage::Complete { - execution_id: exec_id, - result: None, - decoded: None, - success: false, - }); - } - } - } -} - -fn run_job( - job: ExecuteJob, - tx: tokio::sync::mpsc::UnboundedSender, -) -> Result<(), ExecutorError> { - let execution_id = job.execution_id; - let (result, decoded) = execute_plan_sync(&execution_id, job.plan, job.bundle.as_deref(), &tx)?; - let _ = tx.send(ExecutorMessage::Complete { - execution_id, - result: Some(result), - decoded, - success: true, - }); - Ok(()) -} - -fn execute_plan_sync( - execution_id: &str, - plan: ExecutionPlan, - bundle: Option<&ModelBundle>, - tx: &tokio::sync::mpsc::UnboundedSender, -) -> Result<(Vec, Option), ExecutorError> { - let term: TypedTerm = - serde_json::from_slice(&plan.graph).map_err(ExecutorError::InvalidGraph)?; - - let prompt = plan.input.clone(); - - let Some(key) = plan.weights_hint.clone() else { - return Err(ExecutorError::MissingWeightsHint); - }; - let Some(bundle) = bundle else { - return Err(ExecutorError::WeightsNotReady(key.model_id.0)); - }; - - info!(execution_id, "execute worker running plan"); - - let mut full_result: Vec = Vec::new(); - let mut full_decoded = String::new(); - - catgrad_support::run_graph_streaming( - bundle, - &prompt, - &term, - plan.max_seq, - |progress, chunk, decoded_chunk, done| { - full_result.extend_from_slice(chunk); - - if let Some(decoded_chunk) = decoded_chunk { - full_decoded.push_str(decoded_chunk); - } - - let _ = tx.send(ExecutorMessage::Progress { - execution_id: execution_id.to_string(), - chunk: chunk.to_vec(), - decoded_chunk: decoded_chunk.map(|s| s.to_string()), - progress, - }); - - if done { - return; - } - }, - )?; - - Ok((full_result, Some(full_decoded))) -} diff --git a/crates/executor/src/executor/actor/execution.rs b/crates/executor/src/executor/actor/execution.rs new file mode 100644 index 0000000..9afacea --- /dev/null +++ b/crates/executor/src/executor/actor/execution.rs @@ -0,0 +1,278 @@ +use crate::executor::ExecuteOutcome; +use crate::state::{QuoteKind, new_execution_id}; +use crate::worker::{EnqueueError, ExecuteJob, WorkerCompletion, WorkerCompletionResult}; +use hellas_core::{Digest, Opaque, SignedReceipt, canonical_dag_cbor}; +use hellas_core::{Symbolic, SymbolicOutput}; +use hellas_pb::hellas::{ + FinishStatus, ReceiptEnvelope as PbReceiptEnvelope, RunTicketRequest, WorkEvent, WorkFinished, + work_event, +}; +use hellas_rpc::ExecutorError; +use hellas_rpc::provenance::ExecutionProvenance; +use std::time::Instant; +use tokio::sync::mpsc; +use tokio_util::sync::CancellationToken; + +use super::Executor; + +/// Backpressure buffer for the per-execution event channel. Small enough +/// that a slow consumer stalls the worker quickly (preventing unbounded +/// memory growth); large enough to absorb minor jitter without blocking +/// decode on every chunk. +const PER_EXECUTION_CHANNEL_CAPACITY: usize = 64; + +impl Executor { + pub(super) async fn handle_execute( + &mut self, + request: RunTicketRequest, + ) -> Result { + let request_commitment = request.request_commitment; + let stream_batch_size = 1; + self.store.prune_expired_quotes(Instant::now()); + let quote = self + .store + .get_quote(&request_commitment, Instant::now())? + .clone(); + match quote.kind { + QuoteKind::Symbolic { + symbolic_request, + locator, + invocation, + } => { + let provenance = ExecutionProvenance { + commitment_id: *quote.request_commitment.as_bytes(), + }; + + let stat_prompt = invocation.input_ids.len() as u64; + let stat_cached_output = 0; + + let model_id = quote.model_id.clone(); + let execution_id = new_execution_id(); + let (sender, receiver) = mpsc::channel(PER_EXECUTION_CHANNEL_CAPACITY); + let job = ExecuteJob { + execution_id: execution_id.clone(), + model_id: model_id.clone(), + symbolic_request, + locator, + invocation, + stream_batch_size, + accepted_at: Instant::now(), + cancel: CancellationToken::new(), + sender, + }; + + let queued = match self.try_start_execution(job) { + Ok(()) => false, + Err(StartExecutionError::Busy(job)) => { + if self.pending_executions.len() >= self.queue_capacity { + return Err(ExecutorError::QueueFull { + capacity: self.queue_capacity, + }); + } + self.pending_executions.push_back(*job); + true + } + Err(StartExecutionError::Closed) => return Err(ExecutorError::ChannelClosed), + }; + + // Counters update after the queue accepts the job — no rollback path. + self.metrics.record_execution_started( + &model_id, + stat_prompt, + /* cached_prompt= */ 0, + stat_cached_output, + /* prefill= */ stat_prompt, + ); + let _ = self.store.remove_quote(&request_commitment); + + info!( + %execution_id, + request_commitment = %format_request_commitment(&request_commitment), + queued, + queue_len = self.pending_executions.len(), + "accepted symbolic execution" + ); + + Ok(ExecuteOutcome { + provenance, + events: receiver, + }) + } + QuoteKind::Opaque { request, output } => { + let provenance = ExecutionProvenance { + commitment_id: *quote.request_commitment.as_bytes(), + }; + let model_id = quote.model_id.clone(); + let execution_id = new_execution_id(); + let total_units = output.as_bytes().len() as u64; + let receipt = SignedReceipt::sign::(&request, &output, &self.producer_key) + .map_err(|err| { + ExecutorError::WeightsError(format!("opaque receipt signing failed: {err}")) + })?; + let receipt_dag_cbor = canonical_dag_cbor(&receipt).map_err(|err| { + ExecutorError::WeightsError(format!("opaque receipt encoding failed: {err}")) + })?; + let (sender, receiver) = mpsc::channel(PER_EXECUTION_CHANNEL_CAPACITY); + sender + .send(Ok(WorkEvent { + kind: Some(work_event::Kind::Finished(WorkFinished { + output: output.into_bytes(), + receipt: Some(PbReceiptEnvelope { + dag_cbor: receipt_dag_cbor, + }), + status: FinishStatus::EndOfSequence as i32, + total_units, + })), + })) + .await + .map_err(|_| ExecutorError::ChannelClosed)?; + + self.metrics.record_execution_started( + &model_id, /* prompt= */ 0, /* cached_prompt= */ 0, + /* cached_output= */ 0, /* prefill= */ 0, + ); + self.metrics + .record_execution_completed(&model_id, total_units); + let _ = self.store.remove_quote(&request_commitment); + + info!( + %execution_id, + request_commitment = %format_request_commitment(&request_commitment), + total_units, + "accepted opaque execution" + ); + + Ok(ExecuteOutcome { + provenance, + events: receiver, + }) + } + } + } + + fn try_start_execution(&mut self, job: ExecuteJob) -> Result<(), StartExecutionError> { + match self.worker.try_enqueue(job) { + Ok(()) => Ok(()), + Err(EnqueueError::Busy(job)) => Err(StartExecutionError::Busy(job)), + Err(EnqueueError::Stopped(_job)) => Err(StartExecutionError::Closed), + } + } + + pub(super) async fn handle_worker_finished(&mut self, completion: WorkerCompletion) { + let WorkerCompletion { + execution_id, + model_id, + symbolic_request, + invocation, + sender, + result, + } = completion; + + let generated = result.position(); + let termination = match result { + WorkerCompletionResult::Completed { + stop_reason, + output_tokens, + } => { + match self + .completed_symbolic_termination( + &symbolic_request, + &invocation, + stop_reason, + output_tokens, + ) + .await + { + Ok(termination) => termination, + Err(err) => { + let msg = format!("{err:#}"); + warn!( + "execute worker job {execution_id} failed while recording/signing receipt: {msg}" + ); + crate::state::Termination::Failed { + position: generated, + error: msg, + } + } + } + } + WorkerCompletionResult::Failed { position, error } => { + crate::state::Termination::Failed { position, error } + } + }; + + if termination.is_completed() { + self.metrics + .record_execution_completed(&model_id, generated); + } else { + self.metrics.record_execution_failed(&model_id, generated); + } + + let _ = sender.send(Ok(termination.into_pb())).await; + self.dispatch_next_execution(); + } + + async fn completed_symbolic_termination( + &mut self, + symbolic_request: &hellas_core::SymbolicRequest, + invocation: &crate::state::Invocation, + stop_reason: crate::state::StopReason, + output_tokens: Vec, + ) -> Result { + let text_artifact_cid = self + .artifacts + .record_completed_text(symbolic_request, invocation, &output_tokens) + .await?; + let symbolic_output = SymbolicOutput { text_artifact_cid }; + let receipt = + SignedReceipt::sign::(symbolic_request, &symbolic_output, &self.producer_key) + .map_err(|err| { + ExecutorError::WeightsError(format!("receipt signing failed: {err}")) + })?; + let receipt_dag_cbor = canonical_dag_cbor(&receipt).map_err(|err| { + ExecutorError::WeightsError(format!("receipt encoding failed: {err}")) + })?; + + Ok(crate::state::Termination::Completed { + stop_reason, + output_tokens, + receipt_dag_cbor, + }) + } + + /// Pop pending jobs and dispatch the first one whose consumer is still + /// listening. Stale entries (consumer dropped while queued) are discarded + /// silently — the consumer already lost interest. + pub(super) fn dispatch_next_execution(&mut self) { + while let Some(job) = self.pending_executions.pop_front() { + if job.sender.is_closed() { + debug!( + execution_id = %job.execution_id, + "dropping queued execution: consumer disconnected before dispatch" + ); + continue; + } + match self.try_start_execution(job) { + Ok(()) => return, + Err(StartExecutionError::Busy(job)) => { + self.pending_executions.push_front(*job); + return; + } + Err(StartExecutionError::Closed) => { + warn!("failed to start queued execution: executor channel closed"); + } + } + } + } +} + +fn format_request_commitment(bytes: &[u8]) -> String { + Digest::from_slice(bytes) + .map(|digest| digest.to_string()) + .unwrap_or_else(|_| format!("invalid:{}bytes", bytes.len())) +} + +enum StartExecutionError { + Busy(Box), + Closed, +} diff --git a/crates/executor/src/executor/actor/mod.rs b/crates/executor/src/executor/actor/mod.rs new file mode 100644 index 0000000..80a2033 --- /dev/null +++ b/crates/executor/src/executor/actor/mod.rs @@ -0,0 +1,224 @@ +mod execution; +mod quote; + +use crate::artifacts::{ArtifactStoreConfig, SymbolicArtifactStore}; +use crate::backend; +use crate::metrics::ExecutorMetrics; +use crate::state::{ExecutorState, LocalModelStatus, ModelLocator}; +use crate::worker::{ExecuteJob, ExecuteWorker}; +use catgrad::prelude::Dtype; +use hellas_core::ProducerSigningKey; +use hellas_pb::courtesy::{GetModelStatsResponse, GetStatsResponse, ModelTokenStats}; +use hellas_rpc::ExecutorError; +use hellas_rpc::policy::{DownloadPolicy, ExecutePolicy}; +use std::collections::{HashMap, VecDeque}; +use std::sync::Arc; +use tokio::sync::mpsc; + +use super::{ExecutorHandle, ExecutorMessage}; + +pub struct Executor { + pub(super) rx: mpsc::UnboundedReceiver, + pub(super) store: ExecutorState, + pub(super) artifacts: SymbolicArtifactStore, + pub(super) pending_executions: VecDeque, + pub(super) queue_capacity: usize, + pub(super) models: HashMap, + pub(super) worker: ExecuteWorker, + pub(super) execute_policy: ExecutePolicy, + pub(super) metrics: Arc, + pub(super) producer_key: Arc, + /// Dtypes this executor will accept. The first entry is the *preferred* + /// dtype, used whenever the executor itself constructs a program (e.g. + /// the `QuotePromptRequest` convenience path or `handle_preload`, which + /// don't carry a wire dtype). + pub(super) supported_dtypes: Vec, +} + +impl Executor { + pub fn spawn( + download_policy: DownloadPolicy, + execute_policy: ExecutePolicy, + queue_capacity: usize, + supported_dtypes: Vec, + ) -> Result { + Self::spawn_with_metrics( + download_policy, + execute_policy, + queue_capacity, + supported_dtypes, + Arc::new(ExecutorMetrics::default()), + ) + } + + pub fn spawn_with_producer_key( + download_policy: DownloadPolicy, + execute_policy: ExecutePolicy, + queue_capacity: usize, + supported_dtypes: Vec, + producer_key: ProducerSigningKey, + ) -> Result { + Self::spawn_with_metrics_and_producer_key( + download_policy, + execute_policy, + queue_capacity, + supported_dtypes, + Arc::new(ExecutorMetrics::default()), + Arc::new(producer_key), + ) + } + + pub fn spawn_with_metrics( + download_policy: DownloadPolicy, + execute_policy: ExecutePolicy, + queue_capacity: usize, + supported_dtypes: Vec, + metrics: Arc, + ) -> Result { + Self::spawn_with_metrics_and_producer_key( + download_policy, + execute_policy, + queue_capacity, + supported_dtypes, + metrics, + Arc::new(ProducerSigningKey::generate()), + ) + } + + pub fn spawn_with_metrics_and_producer_key( + _download_policy: DownloadPolicy, + execute_policy: ExecutePolicy, + queue_capacity: usize, + supported_dtypes: Vec, + metrics: Arc, + producer_key: Arc, + ) -> Result { + Self::spawn_with_metrics_producer_key_and_artifacts( + execute_policy, + queue_capacity, + supported_dtypes, + metrics, + producer_key, + SymbolicArtifactStore::memory(), + ) + } + + pub async fn spawn_with_metrics_and_producer_key_and_artifact_store( + _download_policy: DownloadPolicy, + execute_policy: ExecutePolicy, + queue_capacity: usize, + supported_dtypes: Vec, + metrics: Arc, + producer_key: Arc, + artifact_store: ArtifactStoreConfig, + ) -> Result { + let artifacts = SymbolicArtifactStore::open(artifact_store).await?; + Self::spawn_with_metrics_producer_key_and_artifacts( + execute_policy, + queue_capacity, + supported_dtypes, + metrics, + producer_key, + artifacts, + ) + } + + fn spawn_with_metrics_producer_key_and_artifacts( + execute_policy: ExecutePolicy, + queue_capacity: usize, + supported_dtypes: Vec, + metrics: Arc, + producer_key: Arc, + artifacts: SymbolicArtifactStore, + ) -> Result { + assert!( + !supported_dtypes.is_empty(), + "executor must support at least one dtype" + ); + let (tx, rx) = mpsc::unbounded_channel(); + backend::create_backend()?; + let executor = Self { + rx, + store: ExecutorState::new(), + artifacts, + pending_executions: VecDeque::new(), + queue_capacity, + models: HashMap::new(), + worker: ExecuteWorker::spawn(tx.clone()), + execute_policy, + metrics, + producer_key, + supported_dtypes, + }; + tokio::spawn(executor.run()); + Ok(ExecutorHandle { tx }) + } + + /// First entry of [`Executor::supported_dtypes`]. Used when this + /// executor must pick a dtype itself (e.g. preload, prompt-build + /// convenience RPCs). + pub(super) fn preferred_dtype(&self) -> Dtype { + self.supported_dtypes[0] + } + + async fn run(mut self) { + while let Some(message) = self.rx.recv().await { + match message { + ExecutorMessage::QuoteSymbolic { request, reply } => { + let _ = reply.send(self.handle_quote_symbolic(request).await); + } + ExecutorMessage::QuoteOpaque { request, reply } => { + let _ = reply.send(self.handle_quote_opaque(request).await); + } + ExecutorMessage::QuotePrompt { request, reply } => { + let _ = reply.send(self.handle_quote_prompt(request).await); + } + ExecutorMessage::QuotePreparedText { request, reply } => { + let _ = reply.send(self.handle_quote_prepared_text(request).await); + } + ExecutorMessage::QuoteChatPrompt { request, reply } => { + let _ = reply.send(self.handle_quote_chat_prompt(request).await); + } + ExecutorMessage::PutArtifact { request, reply } => { + let _ = reply.send(self.handle_put_artifact(request).await); + } + ExecutorMessage::GetArtifact { request, reply } => { + let _ = reply.send(self.handle_get_artifact(request).await); + } + ExecutorMessage::Preload { model, reply } => { + let _ = reply.send(self.handle_preload(model).await); + } + ExecutorMessage::Execute { request, reply } => { + let _ = reply.send(self.handle_execute(request).await); + } + ExecutorMessage::WorkerFinished(completion) => { + self.handle_worker_finished(completion).await; + } + ExecutorMessage::ListModels { reply } => { + let _ = reply.send(Ok(self.handle_list_models().await)); + } + ExecutorMessage::GetStats { reply } => { + let model_stats = self + .metrics + .known_model_ids() + .into_iter() + .map(|model_id| ModelTokenStats { + stats: Some(self.metrics.model_snapshot(&model_id)), + model_id, + }) + .collect(); + let _ = reply.send(Ok(GetStatsResponse { + stats: Some(self.metrics.global_snapshot()), + model_stats, + })); + } + ExecutorMessage::GetModelStats { request, reply } => { + let _ = reply.send(Ok(GetModelStatsResponse { + stats: Some(self.metrics.model_snapshot(&request.model_id)), + model_id: request.model_id, + })); + } + } + } + } +} diff --git a/crates/executor/src/executor/actor/quote.rs b/crates/executor/src/executor/actor/quote.rs new file mode 100644 index 0000000..b79ebc2 --- /dev/null +++ b/crates/executor/src/executor/actor/quote.rs @@ -0,0 +1,373 @@ +use crate::executor::TicketOutcome; +use crate::state::{ + LocalModelStatus, ModelLocator, QuoteKind, QuotePlan, QuoteRecord, model_spec, + resolve_accept_dtypes, symbolic_request_from_pb, symbolic_request_to_pb, +}; +use catgrad::prelude::Dtype; +use chatgrad::types; +use hellas_core::{CommitmentScheme, Digest, JsonBytes, Opaque, OpaqueRequest, Symbolic}; +use hellas_pb::courtesy::{ + GetArtifactRequest, GetArtifactResponse, ListModelsResponse, ModelInfo, ModelStatus, + PutArtifactRequest, PutArtifactResponse, QuoteChatPromptRequest, QuoteChatPromptResponse, + QuotePreparedTextRequest, QuotePreparedTextResponse, QuotePromptRequest, QuotePromptResponse, +}; +use hellas_pb::hellas::Ticket; +use hellas_pb::opaque::OpaqueRequest as PbOpaqueRequest; +use hellas_pb::symbolic::SymbolicRequest as PbSymbolicRequest; +use hellas_rpc::ExecutorError; +use hellas_rpc::model::ModelAssets; +use hellas_rpc::provenance::ExecutionProvenance; +use hellas_rpc::spec::ModelSpec; +use std::time::{Duration, Instant}; + +use super::Executor; + +const STATIC_QUOTE_AMOUNT: u64 = 1000; +const QUOTE_TTL: Duration = Duration::from_secs(30); + +fn dtype_to_wire(dtype: Dtype) -> String { + match dtype { + Dtype::F32 => "f32".to_string(), + Dtype::F16 => "f16".to_string(), + Dtype::BF16 => "bf16".to_string(), + Dtype::F8 => "f8".to_string(), + Dtype::U32 => "u32".to_string(), + } +} + +impl Executor { + pub(super) fn resolve_accept_dtypes(&self, prefs: &[String]) -> Result { + resolve_accept_dtypes(prefs, &self.supported_dtypes) + } + + pub(super) async fn handle_preload(&mut self, model: String) -> Result<(), ExecutorError> { + let spec = ModelSpec::parse(&model).map_err(hellas_rpc::ModelAssetsError::from)?; + let locator = ModelLocator { + model_id: spec.id, + revision: spec.revision, + dtype: self.preferred_dtype(), + }; + let key = locator.clone(); + match ModelAssets::load(&locator.spec(), locator.dtype) { + Ok(_) => { + self.models.insert(key.clone(), LocalModelStatus::Ready); + info!( + model = %key.model_id, + requested_revision = %key.revision, + dtype = %dtype_to_wire(key.dtype), + "preloaded model metadata" + ); + Ok(()) + } + Err(err) => { + self.models + .insert(key.clone(), LocalModelStatus::Failed(err.to_string())); + Err(err.into()) + } + } + } + + pub(super) async fn handle_quote_symbolic( + &mut self, + request: PbSymbolicRequest, + ) -> Result, ExecutorError> { + self.store.prune_expired_quotes(Instant::now()); + let symbolic_request = symbolic_request_from_pb(request)?; + let resolved = self + .artifacts + .resolve_symbolic_request(symbolic_request.clone()) + .await?; + if !self.supported_dtypes.contains(&resolved.locator.dtype) { + return Err(ExecutorError::DtypeNotSupported { + request: resolved.locator.dtype, + supported: self.supported_dtypes.clone(), + }); + } + if !self.execute_policy.allows_execute( + &resolved.locator.spec(), + Some(resolved.locator.model_id.as_str()), + ) { + return Err(ExecutorError::PolicyDenied(format!( + "execute policy denied model {}", + resolved.locator.spec() + ))); + } + let request_commitment = Symbolic::commit_request(&symbolic_request); + let request_commitment_bytes = self.store.create_quote(QuoteRecord { + request_commitment, + expires_at: Instant::now() + QUOTE_TTL, + model_id: resolved.locator.spec(), + kind: QuoteKind::Symbolic { + symbolic_request, + locator: resolved.locator, + invocation: resolved.invocation, + }, + }); + + Ok(TicketOutcome { + response: Ticket { + request_commitment: request_commitment_bytes.to_vec(), + amount: STATIC_QUOTE_AMOUNT, + ttl_ms: QUOTE_TTL.as_millis() as u64, + }, + provenance: ExecutionProvenance { + commitment_id: request_commitment_bytes, + }, + }) + } + + pub(super) async fn handle_quote_opaque( + &mut self, + request: PbOpaqueRequest, + ) -> Result, ExecutorError> { + self.store.prune_expired_quotes(Instant::now()); + + let service = request.service; + if service.is_empty() { + return Err(ExecutorError::InvalidQuoteRequest( + "opaque service must not be empty".to_string(), + )); + } + let method = request.method; + if method.is_empty() { + return Err(ExecutorError::InvalidQuoteRequest( + "opaque method must not be empty".to_string(), + )); + } + serde_json::from_slice::(&request.payload).map_err(|err| { + ExecutorError::InvalidQuoteRequest(format!("opaque payload must be UTF-8 JSON: {err}")) + })?; + + let opaque_request = OpaqueRequest { + service: service.clone(), + method: method.clone(), + payload: JsonBytes::new(request.payload), + }; + let output = opaque_request.payload.clone(); + let request_commitment = Opaque::commit_request(&opaque_request); + let request_commitment_bytes = self.store.create_quote(QuoteRecord { + request_commitment, + expires_at: Instant::now() + QUOTE_TTL, + model_id: format!("opaque:{service}/{method}"), + kind: QuoteKind::Opaque { + request: opaque_request, + output, + }, + }); + + info!( + request_commitment = %format_request_commitment(&request_commitment_bytes), + service, + method, + amount = STATIC_QUOTE_AMOUNT, + "quoted opaque execution" + ); + + Ok(TicketOutcome { + response: Ticket { + request_commitment: request_commitment_bytes.to_vec(), + amount: STATIC_QUOTE_AMOUNT, + ttl_ms: QUOTE_TTL.as_millis() as u64, + }, + provenance: ExecutionProvenance { + commitment_id: request_commitment_bytes, + }, + }) + } + + pub(super) async fn handle_quote_prepared_text( + &mut self, + request: QuotePreparedTextRequest, + ) -> Result, ExecutorError> { + let total_start = Instant::now(); + self.store.prune_expired_quotes(Instant::now()); + let plan = QuotePlan::from_prepared_text_request(request, &self.supported_dtypes)?; + + if !self + .execute_policy + .allows_execute(&plan.locator.spec(), Some(plan.locator.model_id.as_str())) + { + return Err(ExecutorError::PolicyDenied(format!( + "execute policy denied model {}", + plan.locator.spec() + ))); + } + + let resolved = self.artifacts.record_prepared_text(&plan).await?; + let symbolic_request = resolved.symbolic_request.clone(); + let symbolic_request_pb = symbolic_request_to_pb(&symbolic_request); + let request_commitment = Symbolic::commit_request(&symbolic_request); + let commitment_id = request_commitment.digest(); + let request_commitment_bytes = self.store.create_quote(QuoteRecord { + request_commitment, + expires_at: Instant::now() + QUOTE_TTL, + model_id: plan.locator.spec(), + kind: QuoteKind::Symbolic { + symbolic_request, + locator: resolved.locator, + invocation: resolved.invocation, + }, + }); + + info!( + request_commitment = %format_request_commitment(&request_commitment_bytes), + commitment_id = %commitment_id, + model = %plan.locator.model_id, + requested_revision = %plan.locator.revision, + dtype = %dtype_to_wire(plan.locator.dtype), + prompt_tokens = plan.invocation.input_ids.len(), + max_new_tokens = plan.invocation.max_new_tokens, + amount = STATIC_QUOTE_AMOUNT, + total_ms = total_start.elapsed().as_millis(), + "quoted prepared symbolic text execution" + ); + + Ok(TicketOutcome { + response: QuotePreparedTextResponse { + ticket: Some(Ticket { + request_commitment: request_commitment_bytes.to_vec(), + amount: STATIC_QUOTE_AMOUNT, + ttl_ms: QUOTE_TTL.as_millis() as u64, + }), + prompt_tokens: plan.invocation.input_ids.len() as u32, + dtype: dtype_to_wire(plan.locator.dtype), + symbolic_request: Some(symbolic_request_pb), + }, + provenance: ExecutionProvenance { + commitment_id: *commitment_id.as_bytes(), + }, + }) + } + + pub(super) async fn handle_quote_prompt( + &mut self, + request: QuotePromptRequest, + ) -> Result, ExecutorError> { + let dtype = self.resolve_accept_dtypes(&request.accept_dtypes)?; + let assets = load_assets( + &request.huggingface_model_id, + &request.huggingface_revision, + dtype, + )?; + let prepared = assets.prepare_plain(&request.prompt)?; + let prompt_tokens = prepared.input_ids.len() as u32; + let mut prepared_request = + assets.build_quote_prepared_text_request(&prepared, request.max_new_tokens)?; + prepared_request.accept_dtypes = vec![dtype_to_wire(dtype)]; + let inner = self.handle_quote_prepared_text(prepared_request).await?; + + Ok(TicketOutcome { + response: QuotePromptResponse { + ticket: inner.response.ticket, + prompt_tokens, + dtype: inner.response.dtype, + symbolic_request: inner.response.symbolic_request, + }, + provenance: inner.provenance, + }) + } + + pub(super) async fn handle_quote_chat_prompt( + &mut self, + request: QuoteChatPromptRequest, + ) -> Result, ExecutorError> { + let dtype = self.resolve_accept_dtypes(&request.accept_dtypes)?; + let assets = load_assets( + &request.huggingface_model_id, + &request.huggingface_revision, + dtype, + )?; + + let mut messages: Vec = Vec::new(); + if !request.system_prompt.is_empty() { + messages.push(types::Message::openai(types::openai::ChatMessage::system( + &request.system_prompt, + ))); + } + for m in &request.messages { + let msg = match m.role.as_str() { + "assistant" => types::openai::ChatMessage::assistant(&m.content), + _ => types::openai::ChatMessage::user(&m.content), + }; + messages.push(types::Message::openai(msg)); + } + let prepared = assets.prepare_chat(&messages)?; + let prompt_tokens = prepared.input_ids.len() as u32; + let mut prepared_request = + assets.build_quote_prepared_text_request(&prepared, request.max_new_tokens)?; + prepared_request.accept_dtypes = vec![dtype_to_wire(dtype)]; + let inner = self.handle_quote_prepared_text(prepared_request).await?; + + Ok(TicketOutcome { + response: QuoteChatPromptResponse { + ticket: inner.response.ticket, + prompt_tokens, + dtype: inner.response.dtype, + symbolic_request: inner.response.symbolic_request, + }, + provenance: inner.provenance, + }) + } + + pub(super) async fn handle_put_artifact( + &mut self, + request: PutArtifactRequest, + ) -> Result { + let cid = self + .artifacts + .publish_canonical_bytes(request.canonical_artifact) + .await?; + Ok(PutArtifactResponse { + cid: cid.as_bytes().to_vec(), + }) + } + + pub(super) async fn handle_get_artifact( + &mut self, + request: GetArtifactRequest, + ) -> Result { + let canonical_artifact = self + .artifacts + .get_canonical_bytes(digest_from_slice(&request.cid, "cid")?) + .await?; + Ok(GetArtifactResponse { canonical_artifact }) + } + + pub(super) async fn handle_list_models(&self) -> ListModelsResponse { + let models = self + .models + .iter() + .map(|(locator, status)| { + let (proto_status, error) = match status { + LocalModelStatus::Ready => (ModelStatus::Ready, String::new()), + LocalModelStatus::Failed(err) => (ModelStatus::Failed, err.clone()), + }; + ModelInfo { + model_id: locator.model_id.clone(), + revision: locator.revision.clone(), + status: proto_status.into(), + error, + } + }) + .collect(); + ListModelsResponse { models } + } +} + +fn digest_from_slice(bytes: &[u8], field: &str) -> Result { + Digest::from_slice(bytes).map_err(|_| { + ExecutorError::InvalidQuoteRequest(format!("{field} must be 32 bytes, got {}", bytes.len())) + }) +} + +fn format_request_commitment(bytes: &[u8; 32]) -> String { + Digest::from_bytes(*bytes).to_string() +} + +fn load_assets( + model_id: &str, + revision: &str, + dtype: Dtype, +) -> Result { + ModelAssets::load(&model_spec(model_id, revision), dtype) +} diff --git a/crates/executor/src/executor/handle.rs b/crates/executor/src/executor/handle.rs new file mode 100644 index 0000000..c977565 --- /dev/null +++ b/crates/executor/src/executor/handle.rs @@ -0,0 +1,366 @@ +use hellas_pb::courtesy::courtesy_server::Courtesy; +use hellas_pb::courtesy::{ + DecodeTokensRequest, DecodeTokensResponse, GetArtifactRequest, GetArtifactResponse, + GetModelStatsRequest, GetModelStatsResponse, GetStatsRequest, GetStatsResponse, + ListModelsRequest, ListModelsResponse, PutArtifactRequest, PutArtifactResponse, + QuoteChatPromptRequest, QuoteChatPromptResponse, QuotePreparedTextRequest, + QuotePreparedTextResponse, QuotePromptRequest, QuotePromptResponse, +}; +use hellas_pb::hellas::execute_server::Execute; +use hellas_pb::hellas::{RunTicketRequest, Ticket, WorkEvent}; +use hellas_pb::opaque::OpaqueRequest as PbOpaqueRequest; +use hellas_pb::opaque::opaque_server::Opaque; +use hellas_pb::symbolic::SymbolicRequest as PbSymbolicRequest; +use hellas_pb::symbolic::symbolic_server::Symbolic; +use hellas_rpc::ExecutorError; +use hellas_rpc::driver::{ + ExecuteDriver, QuotedPreparedTextResponse, QuotedResponse, StreamedExecution, +}; +use hellas_rpc::provenance::write_provenance_metadata; +use std::pin::Pin; +use tokio::sync::oneshot; +use tokio_stream::wrappers::ReceiverStream; +use tonic::{Request, Response, Status}; + +use super::{ExecuteOutcome, ExecutorHandle, ExecutorMessage, TicketOutcome}; + +type ExecuteStream = Pin> + Send>>; + +impl ExecutorHandle { + async fn send( + &self, + make_message: impl FnOnce(oneshot::Sender>) -> ExecutorMessage, + ) -> Result { + let (reply_tx, reply_rx) = oneshot::channel(); + self.tx + .send(make_message(reply_tx)) + .map_err(|_| ExecutorError::ChannelClosed)?; + reply_rx.await.map_err(|_| ExecutorError::ChannelClosed)? + } + + pub async fn create_symbolic_ticket( + &self, + request: PbSymbolicRequest, + ) -> Result, ExecutorError> { + self.send(|reply| ExecutorMessage::QuoteSymbolic { request, reply }) + .await + } + + pub async fn create_opaque_ticket( + &self, + request: PbOpaqueRequest, + ) -> Result, ExecutorError> { + self.send(|reply| ExecutorMessage::QuoteOpaque { request, reply }) + .await + } + + pub async fn quote_prompt( + &self, + request: QuotePromptRequest, + ) -> Result, ExecutorError> { + self.send(|reply| ExecutorMessage::QuotePrompt { request, reply }) + .await + } + + pub async fn quote_prepared_text( + &self, + request: QuotePreparedTextRequest, + ) -> Result, ExecutorError> { + self.send(|reply| ExecutorMessage::QuotePreparedText { request, reply }) + .await + } + + pub async fn quote_chat_prompt( + &self, + request: QuoteChatPromptRequest, + ) -> Result, ExecutorError> { + self.send(|reply| ExecutorMessage::QuoteChatPrompt { request, reply }) + .await + } + + pub async fn put_artifact( + &self, + request: PutArtifactRequest, + ) -> Result { + self.send(|reply| ExecutorMessage::PutArtifact { request, reply }) + .await + } + + pub async fn get_artifact( + &self, + request: GetArtifactRequest, + ) -> Result { + self.send(|reply| ExecutorMessage::GetArtifact { request, reply }) + .await + } + + pub async fn list_models(&self) -> Result { + self.send(|reply| ExecutorMessage::ListModels { reply }) + .await + } + + pub async fn preload_weights(&self, model: String) -> Result<(), ExecutorError> { + self.send(|reply| ExecutorMessage::Preload { model, reply }) + .await + } + + pub async fn run_ticket( + &self, + request: RunTicketRequest, + ) -> Result { + self.send(|reply| ExecutorMessage::Execute { request, reply }) + .await + } + + pub async fn get_stats(&self) -> Result { + self.send(|reply| ExecutorMessage::GetStats { reply }).await + } + + pub async fn get_model_stats( + &self, + request: GetModelStatsRequest, + ) -> Result { + self.send(|reply| ExecutorMessage::GetModelStats { request, reply }) + .await + } +} + +fn response_with_provenance(outcome: TicketOutcome) -> Response { + let mut response = Response::new(outcome.response); + write_provenance_metadata(response.metadata_mut(), &outcome.provenance); + response +} + +fn stream_response_with_provenance(outcome: ExecuteOutcome) -> Response { + let mut response = + Response::new(Box::pin(ReceiverStream::new(outcome.events)) as ExecuteStream); + write_provenance_metadata(response.metadata_mut(), &outcome.provenance); + response +} + +#[tonic::async_trait] +impl Execute for ExecutorHandle { + type RunTicketStream = ExecuteStream; + + async fn run_ticket( + &self, + request: Request, + ) -> Result, Status> { + let outcome = self.run_ticket(request.into_inner()).await?; + Ok(stream_response_with_provenance(outcome)) + } +} + +#[tonic::async_trait] +impl Symbolic for ExecutorHandle { + async fn create_ticket( + &self, + request: Request, + ) -> Result, Status> { + let outcome = self.create_symbolic_ticket(request.into_inner()).await?; + Ok(response_with_provenance(outcome)) + } +} + +#[tonic::async_trait] +impl Opaque for ExecutorHandle { + async fn create_ticket( + &self, + request: Request, + ) -> Result, Status> { + let outcome = self.create_opaque_ticket(request.into_inner()).await?; + Ok(response_with_provenance(outcome)) + } +} + +#[tonic::async_trait] +impl Courtesy for ExecutorHandle { + async fn quote_prompt( + &self, + request: Request, + ) -> Result, Status> { + let outcome = self.quote_prompt(request.into_inner()).await?; + Ok(response_with_provenance(outcome)) + } + + async fn quote_prepared_text( + &self, + request: Request, + ) -> Result, Status> { + let outcome = self.quote_prepared_text(request.into_inner()).await?; + Ok(response_with_provenance(outcome)) + } + + async fn quote_chat_prompt( + &self, + request: Request, + ) -> Result, Status> { + let outcome = self.quote_chat_prompt(request.into_inner()).await?; + Ok(response_with_provenance(outcome)) + } + + async fn put_artifact( + &self, + request: Request, + ) -> Result, Status> { + Ok(Response::new( + self.put_artifact(request.into_inner()).await?, + )) + } + + async fn get_artifact( + &self, + request: Request, + ) -> Result, Status> { + Ok(Response::new( + self.get_artifact(request.into_inner()).await?, + )) + } + + async fn list_models( + &self, + _request: Request, + ) -> Result, Status> { + Ok(Response::new(self.list_models().await?)) + } + + async fn get_stats( + &self, + _request: Request, + ) -> Result, Status> { + Ok(Response::new(self.get_stats().await?)) + } + + async fn get_model_stats( + &self, + request: Request, + ) -> Result, Status> { + Ok(Response::new( + self.get_model_stats(request.into_inner()).await?, + )) + } + + type DecodeTokensStream = + Pin> + Send>>; + + async fn decode_tokens( + &self, + request: Request>, + ) -> Result, Status> { + use hellas_rpc::decode_token_ids; + use hellas_rpc::model::ModelAssets; + use tokio_stream::StreamExt; + + let mut stream = request.into_inner(); + + // First message must contain the model ID. + let first = stream + .next() + .await + .ok_or_else(|| Status::invalid_argument("empty stream"))??; + + let model_spec = if first.huggingface_revision.is_empty() { + first.huggingface_model_id.clone() + } else { + format!( + "{}@{}", + first.huggingface_model_id, first.huggingface_revision + ) + }; + // Tokenizer-only path. The dtype is irrelevant for `decode_tokens`; + // F32 is just the cheapest valid value for the model-graph build that + // `ModelAssets::load` does for EOS-id extraction. + let assets = ModelAssets::load(&model_spec, catgrad::prelude::Dtype::F32)?; + + let output_stream = async_stream::stream! { + let decode = |bytes: &[u8]| -> Result { + let ids = decode_token_ids(bytes)?; + let text = assets.decode_tokens(&ids)?; + Ok(DecodeTokensResponse { text }) + }; + + if !first.token_bytes.is_empty() { + yield decode(&first.token_bytes); + } + + tokio::pin!(stream); + while let Some(result) = stream.next().await { + let req = match result { + Ok(req) => req, + Err(status) => { + yield Err(status); + break; + } + }; + if req.token_bytes.is_empty() { + continue; + } + let response = decode(&req.token_bytes); + let stop = response.is_err(); + yield response; + if stop { + break; + } + } + }; + + Ok(Response::new( + Box::pin(output_stream) as Self::DecodeTokensStream + )) + } +} + +#[tonic::async_trait] +impl ExecuteDriver for ExecutorHandle { + async fn create_symbolic_ticket( + &mut self, + request: PbSymbolicRequest, + ) -> Result { + let outcome = ExecutorHandle::create_symbolic_ticket(self, request) + .await + .map_err(>::into)?; + Ok(QuotedResponse { + response: outcome.response, + provenance: outcome.provenance, + }) + } + + async fn create_opaque_ticket( + &mut self, + request: PbOpaqueRequest, + ) -> Result { + let outcome = ExecutorHandle::create_opaque_ticket(self, request) + .await + .map_err(>::into)?; + Ok(QuotedResponse { + response: outcome.response, + provenance: outcome.provenance, + }) + } + + async fn quote_prepared_text( + &mut self, + request: QuotePreparedTextRequest, + ) -> Result { + let outcome = ExecutorHandle::quote_prepared_text(self, request) + .await + .map_err(>::into)?; + Ok(QuotedPreparedTextResponse { + response: outcome.response, + provenance: outcome.provenance, + }) + } + + async fn execute_streaming( + &mut self, + request: RunTicketRequest, + ) -> Result { + let outcome = ExecutorHandle::run_ticket(self, request) + .await + .map_err(>::into)?; + Ok(StreamedExecution { + stream: Box::pin(ReceiverStream::new(outcome.events)), + provenance: outcome.provenance, + }) + } +} diff --git a/crates/executor/src/executor/mod.rs b/crates/executor/src/executor/mod.rs new file mode 100644 index 0000000..fddead6 --- /dev/null +++ b/crates/executor/src/executor/mod.rs @@ -0,0 +1,106 @@ +mod actor; +mod handle; + +use hellas_pb::courtesy::{ + GetArtifactRequest, GetArtifactResponse, GetModelStatsRequest, GetModelStatsResponse, + GetStatsResponse, ListModelsResponse, PutArtifactRequest, PutArtifactResponse, + QuoteChatPromptRequest, QuoteChatPromptResponse, QuotePreparedTextRequest, + QuotePreparedTextResponse, QuotePromptRequest, QuotePromptResponse, +}; +use hellas_pb::hellas::{RunTicketRequest, Ticket, WorkEvent}; +use hellas_pb::opaque::OpaqueRequest as PbOpaqueRequest; +use hellas_pb::symbolic::SymbolicRequest as PbSymbolicRequest; +use hellas_rpc::ExecutorError; +use hellas_rpc::provenance::ExecutionProvenance; +use tokio::sync::{mpsc, oneshot}; +use tonic::Status; + +use crate::worker::WorkerCompletion; +pub use actor::Executor; + +/// Per-execution receiver returned to the streaming `Execute` consumer. +/// Dropping it closes the matching sender held by the worker, which the +/// worker observes on its next chunk send and converts into a cancel. +pub(crate) type ExecuteEventReceiver = mpsc::Receiver>; + +/// Quote response paired with the provenance the executor committed to. +/// `provenance` is the same value the executor logs at quote/accept time; +/// callers (the tonic Execute impl) attach it to outgoing Response +/// metadata so gateways/clients can correlate the wire response with the +/// commitment that produced it. +#[derive(Debug)] +pub struct TicketOutcome { + pub response: R, + pub provenance: ExecutionProvenance, +} + +/// Streaming execution paired with the provenance committed to at +/// quote-acceptance time. The producer receipt is terminal and travels via +/// the final `WorkFinished.receipt` event — it's not part of +/// `ExecutionProvenance`. +#[derive(Debug)] +pub struct ExecuteOutcome { + pub provenance: ExecutionProvenance, + pub events: ExecuteEventReceiver, +} + +pub(crate) enum ExecutorMessage { + QuoteSymbolic { + request: PbSymbolicRequest, + reply: oneshot::Sender, ExecutorError>>, + }, + QuoteOpaque { + request: PbOpaqueRequest, + reply: oneshot::Sender, ExecutorError>>, + }, + QuotePrompt { + request: QuotePromptRequest, + reply: oneshot::Sender, ExecutorError>>, + }, + QuotePreparedText { + request: QuotePreparedTextRequest, + reply: oneshot::Sender, ExecutorError>>, + }, + QuoteChatPrompt { + request: QuoteChatPromptRequest, + reply: oneshot::Sender, ExecutorError>>, + }, + PutArtifact { + request: PutArtifactRequest, + reply: oneshot::Sender>, + }, + GetArtifact { + request: GetArtifactRequest, + reply: oneshot::Sender>, + }, + Preload { + model: String, + reply: oneshot::Sender>, + }, + /// Single streaming entry point: validate the quote, accept the job + /// (queueing if the worker is busy), and return a Receiver wired to + /// the worker's per-execution sender. + Execute { + request: RunTicketRequest, + reply: oneshot::Sender>, + }, + /// Worker → actor: this execution finished (or failed). The actor records + /// terminal artifacts, signs the receipt, sends the final event, and + /// advances the pending queue. + WorkerFinished(WorkerCompletion), + ListModels { + reply: oneshot::Sender>, + }, + GetStats { + reply: oneshot::Sender>, + }, + GetModelStats { + request: GetModelStatsRequest, + reply: oneshot::Sender>, + }, +} + +#[derive(Clone)] +pub struct ExecutorHandle { + pub(super) tx: mpsc::UnboundedSender, +} diff --git a/crates/executor/src/lib.rs b/crates/executor/src/lib.rs index a29bd55..7e141c0 100644 --- a/crates/executor/src/lib.rs +++ b/crates/executor/src/lib.rs @@ -1,672 +1,19 @@ #[macro_use] extern crate tracing; -pub mod catgrad_support; -mod error; -mod execute_worker; +mod artifacts; +mod backend; +mod executor; +mod metrics; mod state; -mod weights; +mod worker; -pub use error::ExecutorError; -pub use hellas_rpc::pb::hellas::execute_server::ExecuteServer; +pub use artifacts::ArtifactStoreConfig; +pub use executor::{Executor, ExecutorHandle}; +pub use hellas_pb::courtesy::courtesy_server::CourtesyServer; +pub use hellas_pb::hellas::execute_server::ExecuteServer; +pub use hellas_pb::opaque::opaque_server::OpaqueServer; +pub use hellas_pb::symbolic::symbolic_server::SymbolicServer; +pub use metrics::ExecutorMetrics; -use execute_worker::{ExecuteJob, ExecuteWorker, ExecuteWorkerError}; -use state::{ExecutionPlan, ExecutionStatus, ExecutorState, StateError}; -use weights::{default_ref_cached, EnsureDisposition, ModelId, WeightsManager}; - -use hellas_rpc::pb::hellas::execute_server::Execute; -use hellas_rpc::pb::hellas::{ - get_quote_request, ExecuteProgress, ExecuteRequest, ExecuteResponse, ExecuteResultRequest, - ExecuteResultResponse, ExecuteStatusRequest, ExecuteStatusResponse, GetGraphRequest, - GetGraphResponse, GetQuoteRequest, GetQuoteResponse, WeightsHint as RpcWeightsHint, -}; -use std::collections::HashMap; -use std::pin::Pin; -use tokio::sync::{mpsc, oneshot}; -use tokio_stream::StreamExt; -use tonic::Status as TonicStatus; -use tonic::{Request, Response, Status}; - -const DEFAULT_MAX_SEQ: u32 = 16; - -enum ExecutorMessage { - Quote { - request: GetQuoteRequest, - reply: oneshot::Sender>, - }, - Graph { - request: GetGraphRequest, - reply: oneshot::Sender>, - }, - Subscribe { - execution_id: String, - reply: oneshot::Sender< - Result<(ExecuteProgress, mpsc::UnboundedReceiver), ExecutorError>, - >, - }, - Execute { - request: ExecuteRequest, - reply: oneshot::Sender>, - }, - Status { - request: ExecuteStatusRequest, - reply: oneshot::Sender>, - }, - Result { - request: ExecuteResultRequest, - reply: oneshot::Sender>, - }, - Progress { - execution_id: String, - chunk: Vec, - decoded_chunk: Option, - progress: u64, - }, - Complete { - execution_id: String, - result: Option>, - decoded: Option, - success: bool, - }, -} - -pub struct Executor { - rx: mpsc::UnboundedReceiver, - state: ExecutorState, - watchers: HashMap>>, - weights: WeightsManager, - execute_worker: ExecuteWorker, -} - -impl Executor { - pub fn spawn() -> ExecutorHandle { - let (tx, rx) = mpsc::unbounded_channel(); - let weights = WeightsManager::spawn(); - let execute_worker = ExecuteWorker::spawn(tx.clone()); - let executor = Self { - rx, - state: ExecutorState::new(), - watchers: HashMap::new(), - weights, - execute_worker, - }; - tokio::spawn(executor.run()); - ExecutorHandle { tx } - } - - async fn run(mut self) { - while let Some(msg) = self.rx.recv().await { - match msg { - ExecutorMessage::Quote { request, reply } => { - let _ = reply.send(self.handle_quote(request).await); - } - ExecutorMessage::Graph { request, reply } => { - let _ = reply.send(self.handle_graph(request)); - } - ExecutorMessage::Subscribe { - execution_id, - reply, - } => { - let _ = reply.send(self.handle_subscribe(execution_id)); - } - ExecutorMessage::Execute { request, reply } => { - let _ = reply.send(self.handle_execute(request).await); - } - ExecutorMessage::Status { request, reply } => { - let _ = reply.send(self.handle_status(request)); - } - ExecutorMessage::Result { request, reply } => { - let _ = reply.send(self.handle_result(request)); - } - ExecutorMessage::Progress { - execution_id, - chunk, - decoded_chunk, - progress, - } => { - let _ = self.state.append_output_chunk( - &execution_id, - &chunk, - decoded_chunk.as_deref(), - progress, - ); - self.send_progress( - &execution_id, - ExecutionStatus::Running, - progress, - chunk, - decoded_chunk, - ); - } - ExecutorMessage::Complete { - execution_id, - result, - decoded, - success, - } => { - self.handle_complete(execution_id, result, decoded, success); - } - } - } - } - - fn handle_graph(&self, request: GetGraphRequest) -> Result { - let graph = self - .state - .get_graph(&request.graph_id) - .cloned() - .ok_or_else(|| ExecutorError::State(StateError::QuoteNotFound(request.graph_id)))?; - Ok(GetGraphResponse { graph }) - } - - fn handle_subscribe( - &mut self, - execution_id: String, - ) -> Result<(ExecuteProgress, mpsc::UnboundedReceiver), ExecutorError> { - // Validate existence and grab current snapshot - let status = *self.state.get_status(&execution_id)?; - let progress = self.state.get_progress(&execution_id).unwrap_or(0); - - let (tx, rx) = mpsc::unbounded_channel(); - - // Only keep watchers alive when more updates are expected - if !matches!(status, ExecutionStatus::Completed | ExecutionStatus::Failed) { - self.watchers.entry(execution_id).or_default().push(tx); - } - - Ok(( - ExecuteProgress { - status: status.as_str().to_string(), - progress, - chunk: Vec::new(), - decoded: None, - }, - rx, - )) - } - - async fn handle_quote( - &mut self, - request: GetQuoteRequest, - ) -> Result { - let payload = request.payload.ok_or(ExecutorError::MissingPayload)?; - - enum QuoteKind { - Graph, - Llm { model_id: String, max_seq: u32 }, - } - - let (graph, input, weights_hint, max_seq, kind) = match payload { - get_quote_request::Payload::Graph(graph) => ( - graph, - String::new(), - None, - DEFAULT_MAX_SEQ, - QuoteKind::Graph, - ), - get_quote_request::Payload::LlmPrompt(llm) => { - let max_seq = if llm.max_seq == 0 { - DEFAULT_MAX_SEQ - } else { - llm.max_seq - }; - - let model_id = llm.huggingface_model_id.clone(); - let model_id_typed = ModelId(model_id.clone()); - let disposition = self - .weights - .ensure_default_ready(model_id_typed.clone()) - .await; - - let key = match disposition { - EnsureDisposition::Ready(key) => key, - EnsureDisposition::Queued | EnsureDisposition::InFlight => { - if default_ref_cached(&model_id) { - let key = self - .weights - .ensure_default_ready_wait( - model_id_typed, - tokio::time::Duration::from_secs(2), - ) - .await - .map_err(|e| match e { - weights::WeightsError::NotReady => { - ExecutorError::WeightsNotReady(model_id.clone()) - } - other => ExecutorError::WeightsError(other.to_string()), - })?; - key - } else { - return Err(ExecutorError::WeightsNotReady(model_id)); - } - } - EnsureDisposition::Failed(err) => { - return Err(ExecutorError::WeightsError(err)); - } - }; - - let bundle = self - .weights - .bundle(&key) - .await - .map_err(|e| ExecutorError::WeightsError(e.to_string()))?; - - let (graph_bytes, templated_input) = catgrad_support::build_graph_from_llm_prompt( - bundle.as_ref(), - &llm.prompt, - max_seq, - )?; - - ( - graph_bytes, - templated_input, - Some(key), - max_seq, - QuoteKind::Llm { model_id, max_seq }, - ) - } - }; - - let plan = ExecutionPlan { - graph: graph.clone(), - weights_hint: weights_hint.clone(), - input: input.clone(), - max_seq, - }; - let graph_id = format!("{:x}", simple_hash(&graph)); - let amount = 1000; // stub - let quote_id = self.state.create_quote(graph_id.clone(), plan); - - match kind { - QuoteKind::Graph => { - info!(%quote_id, %graph_id, amount, "quoted raw graph"); - } - QuoteKind::Llm { model_id, max_seq } => { - info!( - %quote_id, - %graph_id, - amount, - model = model_id, - max_seq, - input_len = input.len(), - "quoted llm prompt" - ); - } - } - - Ok(GetQuoteResponse { - quote_id, - graph_id, - amount, - input, - resolved_weights: weights_hint.map(|hint| RpcWeightsHint { - huggingface_model_id: hint.model_id.0, - revision: hint.revision.0, - }), - }) - } - - async fn handle_execute( - &mut self, - request: ExecuteRequest, - ) -> Result { - let quote_id = String::from_utf8_lossy(&request.quote_id).to_string(); - let plan = self.state.get_quote("e_id)?.plan.clone(); - - if self.execute_worker.is_busy() { - return Err(ExecutorError::Busy); - } - - let bundle = match plan.weights_hint.clone() { - Some(key) => Some(self.weights.bundle(&key).await.map_err(|e| match e { - weights::WeightsError::NotReady => { - ExecutorError::WeightsNotReady(key.model_id.0.clone()) - } - weights::WeightsError::Failed(msg) => ExecutorError::WeightsError(msg), - other => ExecutorError::WeightsError(other.to_string()), - })?), - None => None, - }; - - let reservation = self.execute_worker.reserve().map_err(|e| match e { - ExecuteWorkerError::Busy => ExecutorError::Busy, - ExecuteWorkerError::Stopped => ExecutorError::ChannelClosed, - })?; - - let execution_id = self.state.create_execution(quote_id.clone())?; - self.state - .set_status(&execution_id, ExecutionStatus::Running)?; - - info!( - %execution_id, - %quote_id, - input_len = plan.input.len(), - "starting execution" - ); - - reservation - .enqueue(ExecuteJob { - execution_id: execution_id.clone(), - plan, - bundle, - }) - .map_err(|e| match e { - ExecuteWorkerError::Busy => ExecutorError::Busy, - ExecuteWorkerError::Stopped => ExecutorError::ChannelClosed, - })?; - - Ok(ExecuteResponse { - execution_id, - quote_id, - }) - } - - fn handle_complete( - &mut self, - execution_id: String, - result: Option>, - decoded: Option, - success: bool, - ) { - let status = if success { - ExecutionStatus::Completed - } else { - ExecutionStatus::Failed - }; - info!( - %execution_id, - success, - decoded_len = decoded.as_ref().map(|s| s.len()).unwrap_or(0), - "execution finished" - ); - if let Err(e) = self.state.set_status(&execution_id, status) { - warn!("failed to set status for {execution_id}: {e}"); - return; - } - if let Some(result) = result { - if let Err(e) = self.state.set_result(&execution_id, result, decoded) { - warn!("failed to set result for {execution_id}: {e}"); - } - } - self.send_status(&execution_id, status); - } - - fn handle_status( - &self, - request: ExecuteStatusRequest, - ) -> Result { - let status = self.state.get_status(&request.execution_id)?; - let progress = self.state.get_progress(&request.execution_id).unwrap_or(0); - let result_bytes = self - .state - .get_result(&request.execution_id) - .map(|s| s.to_vec()) - .unwrap_or_default(); - let decoded = self - .state - .get_decoded(&request.execution_id)? - .map(|s| s.to_string()); - Ok(ExecuteStatusResponse { - status: status.as_str().to_string(), - progress, - result: result_bytes, - decoded, - }) - } - - fn handle_result( - &self, - request: ExecuteResultRequest, - ) -> Result { - let result = self.state.get_result(&request.execution_id)?; - let decoded = self - .state - .get_decoded(&request.execution_id)? - .unwrap_or_default(); - Ok(ExecuteResultResponse { - result: result.to_vec(), - decoded: decoded.to_string(), - }) - } - - fn send_progress( - &mut self, - execution_id: &str, - status: ExecutionStatus, - progress: u64, - chunk: Vec, - decoded: Option, - ) { - if let Some(watchers) = self.watchers.get_mut(execution_id) { - watchers.retain(|tx| { - tx.send(ExecuteProgress { - status: status.as_str().to_string(), - progress, - chunk: chunk.clone(), - decoded: decoded.clone(), - }) - .is_ok() - }); - - if matches!(status, ExecutionStatus::Completed | ExecutionStatus::Failed) { - self.watchers.remove(execution_id); - } - } - } - - fn send_status(&mut self, execution_id: &str, status: ExecutionStatus) { - let progress = self.state.get_progress(execution_id).unwrap_or(0); - self.send_progress(execution_id, status, progress, Vec::new(), None); - } -} - -#[derive(Clone)] -pub struct ExecutorHandle { - tx: mpsc::UnboundedSender, -} - -impl ExecutorHandle { - async fn send( - &self, - make_msg: impl FnOnce(oneshot::Sender>) -> ExecutorMessage, - ) -> Result { - let (reply_tx, reply_rx) = oneshot::channel(); - self.tx - .send(make_msg(reply_tx)) - .map_err(|_| ExecutorError::ChannelClosed)?; - reply_rx.await.map_err(|_| ExecutorError::ChannelClosed)? - } - - async fn quote(&self, request: GetQuoteRequest) -> Result { - self.send(|reply| ExecutorMessage::Quote { request, reply }) - .await - } - - async fn graph(&self, request: GetGraphRequest) -> Result { - self.send(|reply| ExecutorMessage::Graph { request, reply }) - .await - } - - async fn execute(&self, request: ExecuteRequest) -> Result { - self.send(|reply| ExecutorMessage::Execute { request, reply }) - .await - } - - async fn status( - &self, - request: ExecuteStatusRequest, - ) -> Result { - self.send(|reply| ExecutorMessage::Status { request, reply }) - .await - } - - async fn result( - &self, - request: ExecuteResultRequest, - ) -> Result { - self.send(|reply| ExecutorMessage::Result { request, reply }) - .await - } - - async fn subscribe( - &self, - execution_id: String, - ) -> Result<(ExecuteProgress, mpsc::UnboundedReceiver), ExecutorError> { - self.send(|reply| ExecutorMessage::Subscribe { - execution_id, - reply, - }) - .await - } -} - -#[tonic::async_trait] -impl Execute for ExecutorHandle { - async fn get_quote( - &self, - request: Request, - ) -> Result, Status> { - Ok(Response::new(self.quote(request.into_inner()).await?)) - } - - async fn get_graph( - &self, - request: Request, - ) -> Result, Status> { - Ok(Response::new(self.graph(request.into_inner()).await?)) - } - - async fn execute( - &self, - request: Request, - ) -> Result, Status> { - Ok(Response::new(self.execute(request.into_inner()).await?)) - } - - async fn execute_status( - &self, - request: Request, - ) -> Result, Status> { - Ok(Response::new(self.status(request.into_inner()).await?)) - } - - type ExecuteStreamStream = - Pin> + Send>>; - - async fn execute_stream( - &self, - request: Request, - ) -> Result, Status> { - let exec_id = request.into_inner().execution_id; - let (initial, rx) = self.subscribe(exec_id).await?; - let initial_stream = tokio_stream::once(Ok::<_, TonicStatus>(initial)); - let updates = - tokio_stream::wrappers::UnboundedReceiverStream::new(rx).map(Ok::<_, TonicStatus>); - let stream = initial_stream.chain(updates); - Ok(Response::new(Box::pin(stream) as Self::ExecuteStreamStream)) - } - - async fn execute_result( - &self, - request: Request, - ) -> Result, Status> { - Ok(Response::new(self.result(request.into_inner()).await?)) - } -} - -fn simple_hash(data: &[u8]) -> u64 { - let mut hash: u64 = 0; - for (i, &byte) in data.iter().enumerate() { - hash = hash.wrapping_add((byte as u64).wrapping_mul(31_u64.wrapping_pow(i as u32))); - } - hash -} - -#[cfg(test)] -mod tests { - use super::*; - - #[tokio::test] - async fn quote_and_execute() { - let handle = Executor::spawn(); - - // Get quote - let quote = handle - .quote(GetQuoteRequest { - payload: Some(get_quote_request::Payload::Graph(b"test-graph".to_vec())), - }) - .await - .expect("should return quote"); - assert!(quote.quote_id.starts_with("quote-")); - - // Execute with quote - let exec = handle - .execute(ExecuteRequest { - quote_id: quote.quote_id.as_bytes().to_vec(), - }) - .await - .expect("should return execution"); - assert!(exec.execution_id.starts_with("exec-")); - assert_eq!(exec.quote_id, quote.quote_id); - } - - #[tokio::test] - async fn execute_with_invalid_quote_fails() { - let handle = Executor::spawn(); - - let result = handle - .execute(ExecuteRequest { - quote_id: b"invalid-quote".to_vec(), - }) - .await; - assert!(result.is_err()); - } - - #[tokio::test] - async fn subscribe_sends_snapshot_immediately() { - let (tx, rx) = mpsc::unbounded_channel(); - let tx2 = tx.clone(); - let mut executor = Executor { - rx, - state: ExecutorState::new(), - watchers: HashMap::new(), - weights: WeightsManager::spawn(), - execute_worker: ExecuteWorker::spawn(tx2), - }; - - let quote_id = executor.state.create_quote( - "graph-0".to_string(), - ExecutionPlan { - graph: Vec::new(), - weights_hint: None, - input: String::new(), - max_seq: DEFAULT_MAX_SEQ, - }, - ); - let execution_id = executor - .state - .create_execution(quote_id) - .expect("execution should be created"); - executor - .state - .set_status(&execution_id, ExecutionStatus::Running) - .unwrap(); - - let (initial, mut updates) = executor - .handle_subscribe(execution_id.clone()) - .expect("subscribe should succeed"); - - assert_eq!(initial.status, "running"); - assert_eq!(initial.progress, 0); - assert!(initial.chunk.is_empty()); - assert!(initial.decoded.is_none()); - - executor.send_status(&execution_id, ExecutionStatus::Completed); - let completed = updates.recv().await.expect("should receive completion"); - assert_eq!(completed.status, "completed"); - assert_eq!(completed.progress, 0); - assert!(completed.chunk.is_empty()); - assert!(completed.decoded.is_none()); - assert!(updates.recv().await.is_none()); - } -} +pub(crate) const DEFAULT_MAX_SEQ: u32 = 16; diff --git a/crates/executor/src/metrics.rs b/crates/executor/src/metrics.rs new file mode 100644 index 0000000..e9db2b9 --- /dev/null +++ b/crates/executor/src/metrics.rs @@ -0,0 +1,257 @@ +//! Live executor counters. +//! +//! Counters are mutated inline at the event source (start/complete/fail), +//! so there is no polling step that copies internal state into a separate +//! prometheus registry. Detached metrics can be created with +//! [`ExecutorMetrics::default`] for tests and non-server callers. + +use prometheus_client::encoding::EncodeLabelSet; +use prometheus_client::metrics::counter::Counter; +use prometheus_client::metrics::family::Family; +use prometheus_client::registry::Registry; +use std::collections::BTreeSet; +use std::sync::Mutex; +use std::sync::atomic::AtomicU64; + +type U64Counter = Counter; + +#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)] +pub struct ModelLabel { + pub model_id: String, +} + +/// Single source of truth for executor counters. Each field is a prometheus +/// counter that can be both registered for scraping and read directly via +/// [`Counter::get`] (used by the GetStats RPC path). +#[derive(Default)] +pub struct ExecutorMetrics { + pub(crate) executions_started: U64Counter, + pub(crate) executions_completed: U64Counter, + pub(crate) executions_failed: U64Counter, + pub(crate) prompt_tokens: U64Counter, + pub(crate) cached_prompt_tokens: U64Counter, + pub(crate) cached_output_tokens: U64Counter, + pub(crate) prefill_tokens: U64Counter, + pub(crate) generated_tokens: U64Counter, + + pub(crate) by_model_executions_started: Family, + pub(crate) by_model_executions_completed: Family, + pub(crate) by_model_executions_failed: Family, + pub(crate) by_model_prompt_tokens: Family, + pub(crate) by_model_cached_prompt_tokens: Family, + pub(crate) by_model_cached_output_tokens: Family, + pub(crate) by_model_prefill_tokens: Family, + pub(crate) by_model_generated_tokens: Family, + + // `Family::read()` is private in prometheus-client, so we mirror the set + // of model ids we've ever incremented to power the GetStats RPC. + seen_models: Mutex>, +} + +impl ExecutorMetrics { + /// Register all counters with the supplied registry under the `hellas` + /// (global) and `hellas_model_*` (per-model labelled) prefixes. The + /// counter handles are shared (`Arc` internally), so clones registered + /// here observe the same updates as the executor's `Arc`. + pub fn register_with(&self, registry: &mut Registry) { + let sub = registry.sub_registry_with_prefix("hellas"); + for (name, desc, ctr) in [ + ( + "executions_started", + "Executions started", + &self.executions_started, + ), + ( + "executions_completed", + "Executions completed", + &self.executions_completed, + ), + ( + "executions_failed", + "Executions failed", + &self.executions_failed, + ), + ("prompt_tokens", "Total prompt tokens", &self.prompt_tokens), + ( + "cached_prompt_tokens", + "Prompt tokens from cache", + &self.cached_prompt_tokens, + ), + ( + "cached_output_tokens", + "Output tokens from cache", + &self.cached_output_tokens, + ), + ( + "prefill_tokens", + "Prefill tokens computed", + &self.prefill_tokens, + ), + ( + "generated_tokens", + "Output tokens generated", + &self.generated_tokens, + ), + ] { + sub.register(name, desc, ctr.clone()); + } + let model_sub = sub.sub_registry_with_prefix("model"); + for (name, desc, fam) in [ + ( + "executions_started", + "Executions started", + &self.by_model_executions_started, + ), + ( + "executions_completed", + "Executions completed", + &self.by_model_executions_completed, + ), + ( + "executions_failed", + "Executions failed", + &self.by_model_executions_failed, + ), + ( + "prompt_tokens", + "Total prompt tokens", + &self.by_model_prompt_tokens, + ), + ( + "cached_prompt_tokens", + "Prompt tokens from cache", + &self.by_model_cached_prompt_tokens, + ), + ( + "cached_output_tokens", + "Output tokens from cache", + &self.by_model_cached_output_tokens, + ), + ( + "prefill_tokens", + "Prefill tokens computed", + &self.by_model_prefill_tokens, + ), + ( + "generated_tokens", + "Output tokens generated", + &self.by_model_generated_tokens, + ), + ] { + model_sub.register(name, desc, fam.clone()); + } + } + + fn note_model(&self, model_id: &str) -> ModelLabel { + if let Ok(mut seen) = self.seen_models.lock() + && !seen.contains(model_id) + { + seen.insert(model_id.to_string()); + } + ModelLabel { + model_id: model_id.to_string(), + } + } + + pub(crate) fn record_execution_started( + &self, + model_id: &str, + prompt: u64, + cached_prompt: u64, + cached_output: u64, + prefill: u64, + ) { + self.executions_started.inc(); + self.prompt_tokens.inc_by(prompt); + self.cached_prompt_tokens.inc_by(cached_prompt); + self.cached_output_tokens.inc_by(cached_output); + self.prefill_tokens.inc_by(prefill); + + let label = self.note_model(model_id); + self.by_model_executions_started.get_or_create(&label).inc(); + self.by_model_prompt_tokens + .get_or_create(&label) + .inc_by(prompt); + self.by_model_cached_prompt_tokens + .get_or_create(&label) + .inc_by(cached_prompt); + self.by_model_cached_output_tokens + .get_or_create(&label) + .inc_by(cached_output); + self.by_model_prefill_tokens + .get_or_create(&label) + .inc_by(prefill); + } + + pub(crate) fn record_execution_completed(&self, model_id: &str, generated: u64) { + self.generated_tokens.inc_by(generated); + self.executions_completed.inc(); + let label = self.note_model(model_id); + self.by_model_generated_tokens + .get_or_create(&label) + .inc_by(generated); + self.by_model_executions_completed + .get_or_create(&label) + .inc(); + } + + pub(crate) fn record_execution_failed(&self, model_id: &str, generated: u64) { + self.generated_tokens.inc_by(generated); + self.executions_failed.inc(); + let label = self.note_model(model_id); + self.by_model_generated_tokens + .get_or_create(&label) + .inc_by(generated); + self.by_model_executions_failed.get_or_create(&label).inc(); + } + + /// Snapshot the global counters for the GetStats RPC. + pub(crate) fn global_snapshot(&self) -> hellas_pb::courtesy::TokenStats { + hellas_pb::courtesy::TokenStats { + executions_started: self.executions_started.get(), + executions_completed: self.executions_completed.get(), + executions_failed: self.executions_failed.get(), + prompt_tokens: self.prompt_tokens.get(), + cached_prompt_tokens: self.cached_prompt_tokens.get(), + cached_output_tokens: self.cached_output_tokens.get(), + prefill_tokens: self.prefill_tokens.get(), + generated_tokens: self.generated_tokens.get(), + } + } + + /// Snapshot a per-model row for the GetStats RPC. Only counters that have + /// observed events for this model are nonzero. + pub(crate) fn model_snapshot(&self, model_id: &str) -> hellas_pb::courtesy::TokenStats { + let label = ModelLabel { + model_id: model_id.to_string(), + }; + hellas_pb::courtesy::TokenStats { + executions_started: self.by_model_executions_started.get_or_create(&label).get(), + executions_completed: self + .by_model_executions_completed + .get_or_create(&label) + .get(), + executions_failed: self.by_model_executions_failed.get_or_create(&label).get(), + prompt_tokens: self.by_model_prompt_tokens.get_or_create(&label).get(), + cached_prompt_tokens: self + .by_model_cached_prompt_tokens + .get_or_create(&label) + .get(), + cached_output_tokens: self + .by_model_cached_output_tokens + .get_or_create(&label) + .get(), + prefill_tokens: self.by_model_prefill_tokens.get_or_create(&label).get(), + generated_tokens: self.by_model_generated_tokens.get_or_create(&label).get(), + } + } + + /// Iterate over all model ids that have ever been observed, for + /// enumerating per-model rows in the GetStats RPC. + pub(crate) fn known_model_ids(&self) -> Vec { + self.seen_models + .lock() + .map(|seen| seen.iter().cloned().collect()) + .unwrap_or_default() + } +} diff --git a/crates/executor/src/state.rs b/crates/executor/src/state.rs index f44ac0c..5936f7b 100644 --- a/crates/executor/src/state.rs +++ b/crates/executor/src/state.rs @@ -1,198 +1,334 @@ use std::collections::HashMap; -use thiserror::Error; +use std::str::FromStr; +use std::time::Instant; -use crate::weights::ResolvedWeightKey; +use crate::DEFAULT_MAX_SEQ; +use catgrad::prelude::Dtype; +use hellas_core::{Digest, JsonBytes, OpaqueRequest, RequestCommitment, SymbolicRequest}; +use hellas_pb::courtesy::{ + QuotePreparedTextRequest, SymbolicStart as PbSymbolicStart, symbolic_start, +}; +use hellas_pb::hellas::{ + FinishStatus as PbFinishStatus, ReceiptEnvelope as PbReceiptEnvelope, WorkEvent as PbWorkEvent, + WorkFailed as PbWorkFailed, WorkFinished as PbWorkFinished, work_event, +}; +use hellas_pb::symbolic::SymbolicRequest as PbSymbolicRequest; +use hellas_rpc::ExecutorError; +use hellas_rpc::encode_token_ids; +use hellas_rpc::spec::DEFAULT_MODEL_REVISION; +use uuid::Uuid; -#[derive(Debug, Error)] -pub enum StateError { - #[error("quote not found: {0}")] - QuoteNotFound(String), - #[error("execution not found: {0}")] - ExecutionNotFound(String), +pub use hellas_rpc::error::StateError; + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub(crate) struct ModelLocator { + pub model_id: String, + pub revision: String, + pub dtype: Dtype, } -#[derive(Clone)] -pub struct ExecutionPlan { - pub graph: Vec, - pub weights_hint: Option, - pub input: String, - pub max_seq: u32, +impl ModelLocator { + pub(crate) fn spec(&self) -> String { + model_spec(&self.model_id, &self.revision) + } } -pub struct Quote { - pub plan: ExecutionPlan, +#[derive(Clone, Debug)] +pub struct Invocation { + pub input_ids: Vec, + pub max_new_tokens: u32, + pub stop_token_ids: Vec, } -pub struct Execution { - pub status: ExecutionStatus, - pub progress: u64, - pub result: Option>, - pub decoded: Option, +pub(crate) struct QuotePlan { + pub locator: ModelLocator, + pub invocation: Invocation, + pub initial_artifact_id: Option, } -#[derive(Clone, Copy)] -pub enum ExecutionStatus { - Pending, - Running, - Completed, - Failed, +impl QuotePlan { + pub(crate) fn from_prepared_text_request( + request: QuotePreparedTextRequest, + supported_dtypes: &[Dtype], + ) -> Result { + let model_id = request.huggingface_model_id.trim(); + if model_id.is_empty() { + return Err(ExecutorError::InvalidQuoteRequest( + "missing huggingface_model_id".to_string(), + )); + } + + let revision = request.huggingface_revision.trim(); + let revision = if revision.is_empty() { + DEFAULT_MODEL_REVISION + } else { + revision + } + .to_string(); + + let dtype = resolve_accept_dtypes(&request.accept_dtypes, supported_dtypes)?; + let max_new_tokens = if request.max_new_tokens == 0 { + DEFAULT_MAX_SEQ + } else { + request.max_new_tokens + }; + + let input_ids = request.prompt_token_ids.clone(); + if input_ids.is_empty() { + return Err(ExecutorError::InvalidTokenPayload( + "prompt is empty after decoding".to_string(), + )); + } + let stop_token_ids = request + .stop_token_ids + .iter() + .copied() + .map(|token| { + i32::try_from(token).map_err(|_| { + ExecutorError::InvalidTokenPayload(format!( + "stop token id {token} exceeds i32 range" + )) + }) + }) + .collect::, _>>()?; + let initial_artifact_id = parse_symbolic_start(request.start)?; + + Ok(Self { + locator: ModelLocator { + model_id: model_id.to_string(), + revision, + dtype, + }, + invocation: Invocation { + input_ids, + max_new_tokens, + stop_token_ids, + }, + initial_artifact_id, + }) + } } -impl ExecutionStatus { - pub fn as_str(&self) -> &'static str { - match self { - Self::Pending => "pending", - Self::Running => "running", - Self::Completed => "completed", - Self::Failed => "failed", +pub(crate) fn resolve_accept_dtypes( + prefs: &[String], + supported_dtypes: &[Dtype], +) -> Result { + if supported_dtypes.is_empty() { + return Err(ExecutorError::InvalidQuoteRequest( + "executor must support at least one dtype".to_string(), + )); + } + if prefs.is_empty() { + return Ok(supported_dtypes[0]); + } + let mut parsed = Vec::with_capacity(prefs.len()); + for raw in prefs { + let dtype = Dtype::from_str(raw).map_err(|e| { + ExecutorError::InvalidQuoteRequest(format!("invalid dtype `{raw}`: {e}")) + })?; + if matches!(dtype, Dtype::U32) { + return Err(ExecutorError::InvalidQuoteRequest( + "model dtype must be f32, f16, bf16, or f8".to_string(), + )); + } + parsed.push(dtype); + } + for dtype in &parsed { + if supported_dtypes.contains(dtype) { + return Ok(*dtype); } } + Err(ExecutorError::DtypeNotSupported { + request: parsed[0], + supported: supported_dtypes.to_vec(), + }) +} + +pub(crate) fn symbolic_request_to_pb(request: &SymbolicRequest) -> PbSymbolicRequest { + PbSymbolicRequest { + text_execution_cid: request.text_execution_cid.as_bytes().to_vec(), + } +} + +pub(crate) fn symbolic_request_from_pb( + request: PbSymbolicRequest, +) -> Result { + Ok(SymbolicRequest { + text_execution_cid: Digest::from_bytes(bytes32( + &request.text_execution_cid, + "text_execution_cid", + )?), + }) +} + +fn parse_symbolic_start(start: Option) -> Result, ExecutorError> { + let start = start + .and_then(|start| start.kind) + .ok_or_else(|| ExecutorError::InvalidQuoteRequest("missing symbolic start".to_string()))?; + match start { + symbolic_start::Kind::Genesis(_) => Ok(None), + symbolic_start::Kind::Artifact(artifact) => Ok(Some(Digest::from_bytes(bytes32( + &artifact.artifact_cid, + "artifact_cid", + )?))), + } +} + +fn bytes32(bytes: &[u8], field: &str) -> Result<[u8; 32], ExecutorError> { + bytes.try_into().map_err(|_| { + ExecutorError::InvalidQuoteRequest(format!("{field} must be 32 bytes, got {}", bytes.len())) + }) +} + +fn hex32(bytes: &[u8; 32]) -> String { + Digest::from_bytes(*bytes).to_string() +} + +pub(crate) fn model_spec(model_id: &str, revision: &str) -> String { + if revision.is_empty() { + model_id.to_string() + } else { + format!("{model_id}@{revision}") + } +} + +#[derive(Clone, Debug)] +pub(crate) enum LocalModelStatus { + Ready, + Failed(String), +} + +#[derive(Clone)] +pub struct QuoteRecord { + pub request_commitment: RequestCommitment, + pub expires_at: Instant, + pub model_id: String, + pub kind: QuoteKind, +} + +#[derive(Clone)] +pub enum QuoteKind { + Symbolic { + symbolic_request: SymbolicRequest, + locator: ModelLocator, + invocation: Invocation, + }, + Opaque { + request: OpaqueRequest, + output: JsonBytes, + }, } +#[derive(Default)] pub struct ExecutorState { - quotes: HashMap, - executions: HashMap, - graphs: HashMap>, - next_quote_id: u64, - next_execution_id: u64, + quotes: HashMap<[u8; 32], QuoteRecord>, } impl ExecutorState { pub fn new() -> Self { - Self { - quotes: HashMap::new(), - executions: HashMap::new(), - graphs: HashMap::new(), - next_quote_id: 0, - next_execution_id: 0, - } + Self::default() } - pub fn create_quote(&mut self, graph_id: String, plan: ExecutionPlan) -> String { - let quote_id = format!("quote-{}", self.next_quote_id); - self.next_quote_id += 1; - self.graphs.insert(graph_id.clone(), plan.graph.clone()); - self.quotes.insert(quote_id.clone(), Quote { plan }); - quote_id + pub fn create_quote(&mut self, quote: QuoteRecord) -> [u8; 32] { + let key = *quote.request_commitment.as_bytes(); + self.quotes.insert(key, quote); + key } - pub fn get_quote(&self, quote_id: &str) -> Result<&Quote, StateError> { - self.quotes - .get(quote_id) - .ok_or_else(|| StateError::QuoteNotFound(quote_id.to_string())) + pub fn get_quote( + &self, + request_commitment: &[u8], + now: Instant, + ) -> Result<&QuoteRecord, StateError> { + let key: [u8; 32] = request_commitment.try_into().map_err(|_| { + StateError::QuoteNotFound(format!( + "invalid request_commitment length {}", + request_commitment.len() + )) + })?; + let quote = self + .quotes + .get(&key) + .ok_or_else(|| StateError::QuoteNotFound(hex32(&key)))?; + if quote.expires_at <= now { + return Err(StateError::QuoteExpired(hex32(&key))); + } + Ok(quote) } - pub fn get_graph(&self, graph_id: &str) -> Option<&Vec> { - self.graphs.get(graph_id) + pub fn remove_quote(&mut self, request_commitment: &[u8]) -> Option { + let key: [u8; 32] = request_commitment.try_into().ok()?; + self.quotes.remove(&key) } - pub fn create_execution(&mut self, quote_id: String) -> Result { - if !self.quotes.contains_key("e_id) { - return Err(StateError::QuoteNotFound(quote_id)); - } - let execution_id = format!("exec-{}", self.next_execution_id); - self.next_execution_id += 1; - self.executions.insert( - execution_id.clone(), - Execution { - status: ExecutionStatus::Pending, - progress: 0, - result: None, - decoded: None, - }, - ); - Ok(execution_id) - } - - pub fn get_status(&self, execution_id: &str) -> Result<&ExecutionStatus, StateError> { - self.executions - .get(execution_id) - .map(|e| &e.status) - .ok_or_else(|| StateError::ExecutionNotFound(execution_id.to_string())) - } - - pub fn get_result(&self, execution_id: &str) -> Result<&[u8], StateError> { - self.executions - .get(execution_id) - .and_then(|e| e.result.as_deref()) - .ok_or_else(|| StateError::ExecutionNotFound(execution_id.to_string())) - } - - pub fn get_progress(&self, execution_id: &str) -> Result { - self.executions - .get(execution_id) - .map(|e| e.progress) - .ok_or_else(|| StateError::ExecutionNotFound(execution_id.to_string())) - } - - pub fn get_decoded(&self, execution_id: &str) -> Result, StateError> { - let decoded = self - .executions - .get(execution_id) - .map(|e| e.decoded.as_deref()); - decoded.ok_or_else(|| StateError::ExecutionNotFound(execution_id.to_string())) - } - - pub fn set_status( - &mut self, - execution_id: &str, - status: ExecutionStatus, - ) -> Result<(), StateError> { - self.executions - .get_mut(execution_id) - .map(|exec| exec.status = status) - .ok_or_else(|| StateError::ExecutionNotFound(execution_id.to_string())) - } - - pub fn set_result( - &mut self, - execution_id: &str, - result: Vec, - decoded: Option, - ) -> Result<(), StateError> { - self.executions - .get_mut(execution_id) - .map(|exec| { - exec.result = Some(result); - exec.decoded = decoded; - }) - .ok_or_else(|| StateError::ExecutionNotFound(execution_id.to_string())) - } - - pub fn append_output_chunk( - &mut self, - execution_id: &str, - chunk: &[u8], - decoded_chunk: Option<&str>, - progress: u64, - ) -> Result<(), StateError> { - let exec = self - .executions - .get_mut(execution_id) - .ok_or_else(|| StateError::ExecutionNotFound(execution_id.to_string()))?; - - exec.progress = progress; - - if !chunk.is_empty() { - exec.result - .get_or_insert_with(Vec::new) - .extend_from_slice(chunk); - } + pub fn prune_expired_quotes(&mut self, now: Instant) -> usize { + let before = self.quotes.len(); + self.quotes.retain(|_, quote| quote.expires_at > now); + before - self.quotes.len() + } +} - if let Some(decoded_chunk) = decoded_chunk { - if !decoded_chunk.is_empty() { - exec.decoded - .get_or_insert_with(String::new) - .push_str(decoded_chunk); - } - } +pub fn new_execution_id() -> String { + make_id("exec") +} + +fn make_id(prefix: &str) -> String { + format!("{prefix}-{}", Uuid::new_v4().simple()) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum StopReason { + EndOfSequence, + MaxNewTokens, + Cancelled, +} - Ok(()) +impl StopReason { + pub fn to_pb(self) -> PbFinishStatus { + match self { + Self::EndOfSequence => PbFinishStatus::EndOfSequence, + Self::MaxNewTokens => PbFinishStatus::MaxOutput, + Self::Cancelled => PbFinishStatus::Cancelled, + } } } -impl Default for ExecutorState { - fn default() -> Self { - Self::new() +#[derive(Debug, Clone)] +pub enum Termination { + Completed { + stop_reason: StopReason, + output_tokens: Vec, + receipt_dag_cbor: Vec, + }, + Failed { + position: u64, + error: String, + }, +} + +impl Termination { + pub fn is_completed(&self) -> bool { + matches!(self, Self::Completed { .. }) + } + + pub fn into_pb(self) -> PbWorkEvent { + let kind = match self { + Self::Completed { + stop_reason, + output_tokens, + receipt_dag_cbor, + } => work_event::Kind::Finished(PbWorkFinished { + total_units: output_tokens.len() as u64, + status: stop_reason.to_pb() as i32, + output: encode_token_ids(&output_tokens), + receipt: Some(PbReceiptEnvelope { + dag_cbor: receipt_dag_cbor, + }), + }), + Self::Failed { position, error } => { + work_event::Kind::Failed(PbWorkFailed { position, error }) + } + }; + PbWorkEvent { kind: Some(kind) } } } diff --git a/crates/executor/src/weights.rs b/crates/executor/src/weights.rs deleted file mode 100644 index dd02799..0000000 --- a/crates/executor/src/weights.rs +++ /dev/null @@ -1,478 +0,0 @@ -use crate::ExecutorError; -use catgrad::interpreter::{self, backend::ndarray::NdArrayBackend}; -use catgrad::typecheck; -use catgrad_llm::legacy::models::utils::Config; -use catgrad_llm::utils::{get_model_chat_template, get_model_files, load_model}; -use hf_hub::Cache; -use std::collections::{HashMap, VecDeque}; -use std::path::Path; -use std::sync::Arc; -use thiserror::Error; -use tokio::sync::{mpsc, oneshot}; -use tokio::time::{sleep, Duration, Instant}; -use tokenizers::Tokenizer; -use tracing::{info, warn}; - -const DEFAULT_REF: &str = "main"; - -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct ModelId(pub String); - -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct ModelRevision(pub String); - -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct ResolvedWeightKey { - pub model_id: ModelId, - pub revision: ModelRevision, -} - -#[derive(Clone)] -pub struct ModelBundle { - pub key: ResolvedWeightKey, - pub config: Config, - pub tokenizer: Tokenizer, - pub chat_template: Option, - pub parameter_values: interpreter::Parameters, - pub parameter_types: typecheck::Parameters, -} - -#[derive(Clone, Debug)] -pub enum EnsureDisposition { - Ready(ResolvedWeightKey), - Queued, - InFlight, - Failed(String), -} - -#[derive(Debug, Error, Clone)] -pub enum WeightsError { - #[error("weights not ready")] - NotReady, - #[error("weights failed: {0}")] - Failed(String), - #[error("unknown weights key")] - UnknownKey, - #[error("weights manager closed")] - ManagerClosed, -} - -#[allow(dead_code)] -#[derive(Clone, Debug)] -pub enum WeightsStatus { - Queued, - Resolving, - Downloading { revision: Option }, - Ready { revision: ModelRevision }, - Failed { error: String }, -} - -#[allow(dead_code)] -#[derive(Clone, Debug, Default)] -pub struct WeightsSnapshot { - pub per_model: HashMap, - pub active: Option, - pub queue: Vec, -} - -#[derive(Clone)] -pub struct WeightsManager { - tx: mpsc::UnboundedSender, -} - -#[allow(dead_code)] -enum Command { - EnsureDefaultReady { - model_id: ModelId, - reply: oneshot::Sender, - }, - Bundle { - key: ResolvedWeightKey, - reply: oneshot::Sender, WeightsError>>, - }, - Snapshot { - reply: oneshot::Sender, - }, -} - -enum JobEvent { - Resolved { model_id: ModelId, revision: ModelRevision }, - Completed { - model_id: ModelId, - revision: ModelRevision, - bundle: Arc, - }, - Failed { model_id: ModelId, error: String }, -} - -struct Entry { - status: WeightsStatus, - bundle: Option>, -} - -impl Default for Entry { - fn default() -> Self { - Self { - status: WeightsStatus::Queued, - bundle: None, - } - } -} - -struct ManagerState { - entries: HashMap, - active: Option, - queue: VecDeque, -} - -impl WeightsManager { - pub fn spawn() -> Self { - let (tx, mut rx) = mpsc::unbounded_channel::(); - let (job_tx, mut job_rx) = mpsc::unbounded_channel::(); - - tokio::spawn(async move { - let mut state = ManagerState { - entries: HashMap::new(), - active: None, - queue: VecDeque::new(), - }; - - loop { - tokio::select! { - cmd = rx.recv() => { - let Some(cmd) = cmd else { break }; - handle_command(&mut state, cmd, job_tx.clone()); - } - evt = job_rx.recv() => { - let Some(evt) = evt else { break }; - handle_job_event(&mut state, evt); - maybe_start_next(&mut state, job_tx.clone()); - } - } - } - }); - - Self { tx } - } - - pub async fn ensure_default_ready(&self, model_id: ModelId) -> EnsureDisposition { - let (reply_tx, reply_rx) = oneshot::channel(); - if self - .tx - .send(Command::EnsureDefaultReady { - model_id, - reply: reply_tx, - }) - .is_err() - { - return EnsureDisposition::Failed("weights manager closed".to_string()); - } - reply_rx - .await - .unwrap_or_else(|_| EnsureDisposition::Failed("weights manager closed".to_string())) - } - - pub async fn ensure_default_ready_wait( - &self, - model_id: ModelId, - timeout: Duration, - ) -> Result { - let start = Instant::now(); - loop { - match self.ensure_default_ready(model_id.clone()).await { - EnsureDisposition::Ready(key) => return Ok(key), - EnsureDisposition::Failed(err) => return Err(WeightsError::Failed(err)), - EnsureDisposition::Queued | EnsureDisposition::InFlight => {} - } - if start.elapsed() >= timeout { - return Err(WeightsError::NotReady); - } - sleep(Duration::from_millis(25)).await; - } - } - - pub async fn bundle( - &self, - key: &ResolvedWeightKey, - ) -> Result, WeightsError> { - let (reply_tx, reply_rx) = oneshot::channel(); - self.tx - .send(Command::Bundle { - key: key.clone(), - reply: reply_tx, - }) - .map_err(|_| WeightsError::ManagerClosed)?; - reply_rx.await.map_err(|_| WeightsError::ManagerClosed)? - } - - #[allow(dead_code)] - pub async fn snapshot(&self) -> Result { - let (reply_tx, reply_rx) = oneshot::channel(); - self.tx - .send(Command::Snapshot { reply: reply_tx }) - .map_err(|_| WeightsError::ManagerClosed)?; - reply_rx.await.map_err(|_| WeightsError::ManagerClosed) - } -} - -pub fn default_ref_cached(model_id: &str) -> bool { - let repo = Cache::default().model(model_id.to_string()); - let has_config = repo.get("config.json").is_some(); - let has_tokenizer = repo.get("tokenizer.json").is_some(); - let has_weights = repo.get("model.safetensors").is_some() - || repo.get("model.safetensors.index.json").is_some(); - has_config && has_tokenizer && has_weights -} - -fn handle_command(state: &mut ManagerState, cmd: Command, job_tx: mpsc::UnboundedSender) { - match cmd { - Command::EnsureDefaultReady { model_id, reply } => { - let entry = state - .entries - .entry(model_id.clone()) - .or_insert_with(|| Entry { - status: WeightsStatus::Queued, - bundle: None, - }); - - let disposition = match &entry.status { - WeightsStatus::Ready { revision } => EnsureDisposition::Ready(ResolvedWeightKey { - model_id: model_id.clone(), - revision: revision.clone(), - }), - WeightsStatus::Failed { error } => { - if !state.queue.contains(&model_id) && state.active.as_ref() != Some(&model_id) - { - entry.status = WeightsStatus::Queued; - state.queue.push_back(model_id.clone()); - maybe_start_next(state, job_tx); - EnsureDisposition::Queued - } else { - EnsureDisposition::Failed(error.clone()) - } - } - WeightsStatus::Queued | WeightsStatus::Resolving | WeightsStatus::Downloading { .. } => { - if !state.queue.contains(&model_id) && state.active.as_ref() != Some(&model_id) - { - state.queue.push_back(model_id.clone()); - maybe_start_next(state, job_tx); - EnsureDisposition::Queued - } else { - EnsureDisposition::InFlight - } - } - }; - let _ = reply.send(disposition); - } - Command::Bundle { key, reply } => { - let entry = state.entries.get(&key.model_id); - let result = match entry.map(|e| (&e.status, &e.bundle)) { - Some((WeightsStatus::Ready { revision }, Some(bundle))) - if *revision == key.revision => - { - Ok(bundle.clone()) - } - Some((WeightsStatus::Ready { .. }, _)) => Err(WeightsError::UnknownKey), - Some((WeightsStatus::Failed { error }, _)) => Err(WeightsError::Failed(error.clone())), - Some((_status, _)) => Err(WeightsError::NotReady), - None => Err(WeightsError::UnknownKey), - }; - let _ = reply.send(result); - } - Command::Snapshot { reply } => { - let snapshot = WeightsSnapshot { - per_model: state - .entries - .iter() - .map(|(k, v)| (k.clone(), v.status.clone())) - .collect(), - active: state.active.clone(), - queue: state.queue.iter().cloned().collect(), - }; - let _ = reply.send(snapshot); - } - } -} - -fn handle_job_event(state: &mut ManagerState, evt: JobEvent) { - match evt { - JobEvent::Resolved { model_id, revision } => { - let entry = state - .entries - .entry(model_id.clone()) - .or_insert_with(Entry::default); - entry.status = WeightsStatus::Downloading { - revision: Some(revision), - }; - } - JobEvent::Completed { - model_id, - revision, - bundle, - } => { - let entry = state - .entries - .entry(model_id.clone()) - .or_insert_with(Entry::default); - entry.status = WeightsStatus::Ready { - revision: revision.clone(), - }; - entry.bundle = Some(bundle); - state.active = None; - info!(model = model_id.0, revision = revision.0, "weights ready"); - } - JobEvent::Failed { model_id, error } => { - let entry = state - .entries - .entry(model_id.clone()) - .or_insert_with(Entry::default); - entry.status = WeightsStatus::Failed { - error: error.clone(), - }; - entry.bundle = None; - state.active = None; - warn!(model = model_id.0, error, "weights failed"); - } - } -} - -fn maybe_start_next(state: &mut ManagerState, job_tx: mpsc::UnboundedSender) { - if state.active.is_some() { - return; - } - - let Some(model_id) = state.queue.pop_front() else { - return; - }; - - state.active = Some(model_id.clone()); - if let Some(entry) = state.entries.get_mut(&model_id) { - entry.status = WeightsStatus::Resolving; - } - - info!(model = model_id.0, "weights ensure started"); - tokio::spawn(async move { - let model_id2 = model_id.clone(); - let job_tx2 = job_tx.clone(); - let result = tokio::task::spawn_blocking(move || load_default_bundle(&model_id2, job_tx2)) - .await - .map_err(|e| format!("weights worker join error: {e}")) - .and_then(|r| r.map_err(|e| e.to_string())); - - match result { - Ok(_) => {} - Err(error) => { - let _ = job_tx.send(JobEvent::Failed { model_id, error }); - } - } - }); -} - -fn load_default_bundle( - model_id: &ModelId, - job_tx: mpsc::UnboundedSender, -) -> Result<(), ExecutorError> { - let backend = NdArrayBackend; - - // Ensure at least config is present and derive the resolved snapshot SHA from its path. - let (_weights, config_path, _tokenizer_path, _tok_config) = - get_model_files(&model_id.0, DEFAULT_REF)?; - let revision = - extract_revision_from_snapshot_path(&config_path).ok_or_else(|| { - ExecutorError::WeightsError(format!( - "unexpected hf cache path (no snapshots/): {config_path:?}" - )) - })?; - - info!(model = model_id.0, revision = revision.0, "weights resolved"); - let _ = job_tx.send(JobEvent::Resolved { - model_id: model_id.clone(), - revision: revision.clone(), - }); - - // Load full model weights + tokenizer + config into memory. - let (parameter_values, parameter_types, config, tokenizer) = - load_model(&model_id.0, DEFAULT_REF, &backend)?; - - let chat_template = match get_model_chat_template(&model_id.0, DEFAULT_REF) { - Ok(t) if !t.trim().is_empty() => Some(t), - Ok(_) => None, - Err(err) => { - warn!(model = model_id.0, "failed to load chat template: {err}"); - None - } - }; - - let key = ResolvedWeightKey { - model_id: model_id.clone(), - revision: revision.clone(), - }; - let bundle = Arc::new(ModelBundle { - key: key.clone(), - config, - tokenizer, - chat_template, - parameter_values, - parameter_types, - }); - - let _ = job_tx.send(JobEvent::Completed { - model_id: model_id.clone(), - revision, - bundle, - }); - Ok(()) -} - -fn extract_revision_from_snapshot_path(path: &Path) -> Option { - let mut components = path.components().map(|c| c.as_os_str().to_string_lossy()); - while let Some(comp) = components.next() { - if comp == "snapshots" { - if let Some(sha) = components.next() { - let sha = sha.to_string(); - if !sha.trim().is_empty() { - return Some(ModelRevision(sha)); - } - } - return None; - } - } - None -} - -#[cfg(test)] -mod tests { - use super::*; - use std::path::PathBuf; - - #[test] - fn extracts_revision_from_snapshot_path() { - let p = PathBuf::from("/x/.cache/huggingface/hub/models--foo--bar/snapshots/abcd1234/config.json"); - assert_eq!( - extract_revision_from_snapshot_path(&p).unwrap().0, - "abcd1234" - ); - } - - #[test] - fn no_snapshot_segment_returns_none() { - let p = PathBuf::from("/x/config.json"); - assert!(extract_revision_from_snapshot_path(&p).is_none()); - } - - #[tokio::test] - async fn snapshot_is_available_without_network() { - let weights = WeightsManager::spawn(); - let snap = weights.snapshot().await.unwrap(); - assert!(snap.per_model.is_empty()); - assert!(snap.active.is_none()); - assert!(snap.queue.is_empty()); - - let status = WeightsStatus::Downloading { - revision: Some(ModelRevision("deadbeef".to_string())), - }; - if let WeightsStatus::Downloading { revision } = status { - assert_eq!(revision.unwrap().0, "deadbeef"); - } - } -} diff --git a/crates/executor/src/worker.rs b/crates/executor/src/worker.rs new file mode 100644 index 0000000..38ab77e --- /dev/null +++ b/crates/executor/src/worker.rs @@ -0,0 +1,266 @@ +use crate::executor::ExecutorMessage; +use crate::state::{Invocation, ModelLocator, StopReason}; +use chatgrad::PreparedPrompt; +use chatgrad::run::{GenerationControl, GenerationTermination, ModelEngine}; +use hellas_core::SymbolicRequest; +use hellas_pb::hellas::{ + WorkChunk as PbChunk, WorkEvent as PbWorkEvent, work_event::Kind as PbEvent, +}; +use std::collections::HashMap; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::mpsc::{self, Receiver, SyncSender, TrySendError}; +use std::time::Instant; +use tokio::sync::mpsc as tokio_mpsc; +use tokio_util::sync::CancellationToken; +use tonic::Status; +use tracing::warn; + +pub(crate) struct ExecuteWorker { + tx: SyncSender, +} + +pub(crate) enum EnqueueError { + Busy(Box), + Stopped(Box), +} + +pub(crate) struct ExecuteJob { + pub execution_id: String, + pub model_id: String, + pub symbolic_request: SymbolicRequest, + pub locator: ModelLocator, + pub invocation: Invocation, + pub stream_batch_size: u32, + pub accepted_at: Instant, + pub cancel: CancellationToken, + pub sender: tokio_mpsc::Sender>, +} + +struct DecodeOutcome { + stop_reason: StopReason, + output_tokens: Vec, +} + +pub(crate) struct WorkerCompletion { + pub execution_id: String, + pub model_id: String, + pub symbolic_request: SymbolicRequest, + pub invocation: Invocation, + pub sender: tokio_mpsc::Sender>, + pub result: WorkerCompletionResult, +} + +pub(crate) enum WorkerCompletionResult { + Completed { + stop_reason: StopReason, + output_tokens: Vec, + }, + Failed { + position: u64, + error: String, + }, +} + +impl WorkerCompletionResult { + pub(crate) fn position(&self) -> u64 { + match self { + Self::Completed { output_tokens, .. } => output_tokens.len() as u64, + Self::Failed { position, .. } => *position, + } + } +} + +impl ExecuteWorker { + pub(crate) fn spawn(executor_tx: tokio_mpsc::UnboundedSender) -> Self { + let (tx, rx) = mpsc::sync_channel::(0); + std::thread::Builder::new() + .name("hellas-execute-worker".to_string()) + .spawn(move || worker_loop(rx, executor_tx)) + .expect("failed to spawn execute worker thread"); + Self { tx } + } + + pub(crate) fn try_enqueue(&self, job: ExecuteJob) -> Result<(), EnqueueError> { + match self.tx.try_send(job) { + Ok(()) => Ok(()), + Err(TrySendError::Full(job)) => Err(EnqueueError::Busy(Box::new(job))), + Err(TrySendError::Disconnected(job)) => Err(EnqueueError::Stopped(Box::new(job))), + } + } +} + +fn worker_loop( + rx: Receiver, + executor_tx: tokio_mpsc::UnboundedSender, +) { + let mut engines: HashMap = HashMap::new(); + while let Ok(job) = rx.recv() { + let execution_id = job.execution_id.clone(); + let model_id = job.model_id.clone(); + let sender = job.sender.clone(); + let cancel = job.cancel.clone(); + let symbolic_request = job.symbolic_request.clone(); + let invocation = job.invocation.clone(); + + let position = Arc::new(AtomicU64::new(0)); + let on_progress = make_on_progress( + Arc::clone(&position), + sender.clone(), + cancel.clone(), + execution_id.clone(), + ); + + let termination = match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + run_job(job, on_progress, &mut engines) + })) { + Ok(Ok(outcome)) => WorkerCompletionResult::Completed { + stop_reason: outcome.stop_reason, + output_tokens: outcome.output_tokens, + }, + Ok(Err(err)) => { + let msg = format!("{err:#}"); + warn!("execute worker job {execution_id} failed: {msg}"); + WorkerCompletionResult::Failed { + position: position.load(Ordering::Relaxed), + error: msg, + } + } + Err(panic) => { + let msg = format!("worker panicked: {}", crate::backend::panic_message(&panic)); + warn!("execute worker job {execution_id} {msg}"); + WorkerCompletionResult::Failed { + position: position.load(Ordering::Relaxed), + error: msg, + } + } + }; + + let _ = executor_tx.send(ExecutorMessage::WorkerFinished(WorkerCompletion { + execution_id, + model_id, + symbolic_request, + invocation, + sender, + result: termination, + })); + } +} + +fn run_job( + job: ExecuteJob, + mut on_progress: impl FnMut(u64, &[u8]), + engines: &mut HashMap, +) -> Result { + let ExecuteJob { + execution_id, + locator, + invocation, + stream_batch_size, + accepted_at, + cancel, + .. + } = job; + + debug!(execution_id = %execution_id, "execute worker running model"); + debug!( + execution_id = %execution_id, + queue_wait_ms = accepted_at.elapsed().as_millis(), + prompt_tokens = invocation.input_ids.len(), + "execute worker starting" + ); + + let engine = match engines.get(&locator) { + Some(engine) => engine.clone(), + None => { + let backend = crate::backend::create_backend()?; + let engine = ModelEngine::new_with_backend( + &locator.model_id, + &locator.revision, + backend, + true, + locator.dtype, + ) + .map_err(|err| hellas_rpc::ExecutorError::WeightsError(err.to_string()))?; + engines.insert(locator.clone(), engine.clone()); + engine + } + }; + let prepared = PreparedPrompt::new( + input_ids_to_i32(&invocation.input_ids)?, + invocation.stop_token_ids, + ); + let batch_size = usize::try_from(stream_batch_size.max(1)) + .unwrap_or(usize::MAX) + .max(1); + let mut output_tokens = Vec::new(); + let mut pending = Vec::with_capacity(batch_size); + let mut generated = 0u64; + + let generated_output = engine + .generate_tokens_from_prepared(&prepared, invocation.max_new_tokens, |token| { + generated = generated.saturating_add(1); + output_tokens.push(token.token_id); + pending.push(token.token_id); + if pending.len() >= batch_size { + on_progress(generated, &hellas_rpc::encode_token_ids(&pending)); + pending.clear(); + } + if cancel.is_cancelled() { + Ok(GenerationControl::Cancel) + } else { + Ok(GenerationControl::Continue) + } + }) + .map_err(|err| hellas_rpc::ExecutorError::WeightsError(err.to_string()))?; + + if !pending.is_empty() { + on_progress(generated, &hellas_rpc::encode_token_ids(&pending)); + } + + let stop_reason = match generated_output.termination { + GenerationTermination::Stop => StopReason::EndOfSequence, + GenerationTermination::MaxTokens => StopReason::MaxNewTokens, + GenerationTermination::Cancelled => StopReason::Cancelled, + }; + + Ok(DecodeOutcome { + stop_reason, + output_tokens, + }) +} + +fn input_ids_to_i32(input_ids: &[u32]) -> Result, hellas_rpc::ExecutorError> { + input_ids + .iter() + .copied() + .map(|token| { + i32::try_from(token).map_err(|_| { + hellas_rpc::ExecutorError::InvalidTokenPayload(format!( + "token id {token} exceeds i32 range" + )) + }) + }) + .collect() +} + +fn make_on_progress( + position: Arc, + sender: tokio_mpsc::Sender>, + cancel: CancellationToken, + execution_id: String, +) -> impl FnMut(u64, &[u8]) + Send { + move |progress: u64, chunk: &[u8]| { + position.store(progress, Ordering::Relaxed); + let event = PbWorkEvent { + kind: Some(PbEvent::Chunk(PbChunk { + position: progress, + bytes: chunk.to_vec(), + })), + }; + if sender.blocking_send(Ok(event)).is_err() { + debug!(%execution_id, "consumer dropped; cancelling worker"); + cancel.cancel(); + } + } +} diff --git a/crates/pb/Cargo.toml b/crates/pb/Cargo.toml new file mode 100644 index 0000000..0642826 --- /dev/null +++ b/crates/pb/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "hellas-pb" +description = "Generated protobuf types for the Hellas protocol" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +documentation.workspace = true + +[features] +default = [] +hellas = [] +symbolic = ["hellas"] +opaque = ["hellas"] +swarm = [] +courtesy = ["hellas", "symbolic"] +settlement = ["hellas"] +client = [] +server = [] +all = ["hellas", "symbolic", "opaque", "swarm", "courtesy", "client", "server"] +compile = ["dep:glob", "dep:tonic-prost-build", "all"] + +[dependencies] +prost = "0.14" +tonic = { version = "0.14", default-features = false, features = ["codegen"] } +tonic-prost = "0.14" + +[build-dependencies] +glob = { version = "0.3", optional = true } +tonic-prost-build = { version = "0.14", optional = true } diff --git a/crates/pb/README.md b/crates/pb/README.md new file mode 100644 index 0000000..ed5c98f --- /dev/null +++ b/crates/pb/README.md @@ -0,0 +1,44 @@ +# hellas-pb + +Generated protobuf bindings for Hellas. + +The source `.proto` files live under `../../proto/hellas`. Generated Rust files +are checked in under `src/` so normal builds do not need `protoc`, `buf`, or the +protobuf compiler toolchain. + +## Features + +Package features select which protobuf packages are exposed: + +- `hellas` - core shared protocol package. +- `symbolic` - symbolic work package; enables `hellas`. +- `opaque` - opaque work package; enables `hellas`. +- `swarm` - node / peer discovery package. +- `courtesy` - non-core convenience package; enables `hellas` and `symbolic`. + +Transport features select generated client/server stubs: + +- `client` - export generated gRPC clients for enabled packages. +- `server` - export generated gRPC server traits and service wrappers for + enabled packages. + +Convenience features: + +- `all` - enable every package plus `client` and `server`. +- `compile` - regenerate checked-in Rust bindings during build. This also + enables `all` and pulls in the optional codegen build dependencies. + +## Regenerating + +After editing files under `proto/`, run: + +```sh +cargo check -p hellas-pb --features compile +``` + +This writes regenerated files into `crates/pb/src/`. Commit the generated files +with the proto changes. + +`compile` is intentionally not a default feature. Downstream crates should +depend on the checked-in bindings and enable only the package/client/server +features they actually need. diff --git a/crates/pb/build.rs b/crates/pb/build.rs new file mode 100644 index 0000000..9fa7859 --- /dev/null +++ b/crates/pb/build.rs @@ -0,0 +1,35 @@ +fn main() { + #[cfg(feature = "compile")] + { + use std::path::Path; + const PROTO_ROOT: &str = "../../proto"; + + let pattern = format!("{PROTO_ROOT}/hellas/**/*.proto"); + let mut protos = glob::glob(&pattern) + .expect("invalid proto glob") + .collect::, _>>() + .expect("failed to read proto glob"); + protos.sort(); + + for proto in &protos { + println!("cargo:rerun-if-changed={}", proto.display()); + } + + let mut prost_config = tonic_prost_build::Config::new(); + prost_config.enable_type_names(); + + let proto_refs = protos + .iter() + .map(std::path::PathBuf::as_path) + .collect::>(); + + tonic_prost_build::configure() + .out_dir("src") + .emit_package(true) + .build_client(true) + .build_server(true) + .build_transport(false) + .compile_with_config(prost_config, &proto_refs, &[Path::new(PROTO_ROOT)]) + .expect("failed to compile Hellas protobuf definitions"); + } +} diff --git a/crates/pb/src/hellas.courtesy.v1.rs b/crates/pb/src/hellas.courtesy.v1.rs new file mode 100644 index 0000000..872268f --- /dev/null +++ b/crates/pb/src/hellas.courtesy.v1.rs @@ -0,0 +1,1446 @@ +// This file is @generated by prost-build. +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct SymbolicStart { + #[prost(oneof = "symbolic_start::Kind", tags = "1, 2")] + pub kind: ::core::option::Option, +} +/// Nested message and enum types in `SymbolicStart`. +pub mod symbolic_start { + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Oneof)] + pub enum Kind { + #[prost(message, tag = "1")] + Genesis(super::SymbolicGenesisStart), + #[prost(message, tag = "2")] + Artifact(super::SymbolicArtifactStart), + } +} +impl ::prost::Name for SymbolicStart { + const NAME: &'static str = "SymbolicStart"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.SymbolicStart".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.SymbolicStart".into() + } +} +#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +pub struct SymbolicGenesisStart {} +impl ::prost::Name for SymbolicGenesisStart { + const NAME: &'static str = "SymbolicGenesisStart"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.SymbolicGenesisStart".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.SymbolicGenesisStart".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct SymbolicArtifactStart { + /// catnix OutputId; exactly 32 bytes. + #[prost(bytes = "vec", tag = "1")] + pub artifact_cid: ::prost::alloc::vec::Vec, +} +impl ::prost::Name for SymbolicArtifactStart { + const NAME: &'static str = "SymbolicArtifactStart"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.SymbolicArtifactStart".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.SymbolicArtifactStart".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct QuotePreparedTextRequest { + #[prost(string, tag = "1")] + pub huggingface_model_id: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub huggingface_revision: ::prost::alloc::string::String, + #[prost(uint32, repeated, tag = "3")] + pub prompt_token_ids: ::prost::alloc::vec::Vec, + #[prost(uint32, tag = "4")] + pub max_new_tokens: u32, + #[prost(uint32, repeated, tag = "5")] + pub stop_token_ids: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "6")] + pub start: ::core::option::Option, + /// Ordered preference list (each one of "f32", "f16", "bf16"). The server + /// picks the first entry it supports. Empty list lets the server pick its + /// preferred dtype freely. None of the entries supported -> request is + /// refused with FailedPrecondition. The chosen dtype is reported back in + /// QuotePreparedTextResponse.dtype. + #[prost(string, repeated, tag = "7")] + pub accept_dtypes: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +impl ::prost::Name for QuotePreparedTextRequest { + const NAME: &'static str = "QuotePreparedTextRequest"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.QuotePreparedTextRequest".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.QuotePreparedTextRequest".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct QuotePreparedTextResponse { + #[prost(message, optional, tag = "1")] + pub ticket: ::core::option::Option, + #[prost(uint32, tag = "2")] + pub prompt_tokens: u32, + /// The dtype the server actually committed to running this quote at. + #[prost(string, tag = "3")] + pub dtype: ::prost::alloc::string::String, + #[prost(message, optional, tag = "4")] + pub symbolic_request: ::core::option::Option< + super::super::symbolic::v1::SymbolicRequest, + >, +} +impl ::prost::Name for QuotePreparedTextResponse { + const NAME: &'static str = "QuotePreparedTextResponse"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.QuotePreparedTextResponse".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.QuotePreparedTextResponse".into() + } +} +/// Convenience RPC: the server handles tokenization and symbolic request +/// construction. Intended for lightweight clients (browsers) that don't have +/// the tokenizer. +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct QuotePromptRequest { + #[prost(string, tag = "1")] + pub huggingface_model_id: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub huggingface_revision: ::prost::alloc::string::String, + #[prost(string, tag = "3")] + pub prompt: ::prost::alloc::string::String, + #[prost(uint32, tag = "4")] + pub max_new_tokens: u32, + /// Ordered preference list (each one of "f32", "f16", "bf16"). The server + /// picks the first entry it supports. Empty list lets the server pick its + /// preferred dtype freely. None of the entries supported -> request is + /// refused with FailedPrecondition. The chosen dtype is reported back in + /// QuotePromptResponse.dtype. + #[prost(string, repeated, tag = "5")] + pub accept_dtypes: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +impl ::prost::Name for QuotePromptRequest { + const NAME: &'static str = "QuotePromptRequest"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.QuotePromptRequest".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.QuotePromptRequest".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct QuotePromptResponse { + #[prost(message, optional, tag = "1")] + pub ticket: ::core::option::Option, + #[prost(uint32, tag = "2")] + pub prompt_tokens: u32, + /// The dtype the server actually committed to running this quote at. + #[prost(string, tag = "3")] + pub dtype: ::prost::alloc::string::String, + #[prost(message, optional, tag = "4")] + pub symbolic_request: ::core::option::Option< + super::super::symbolic::v1::SymbolicRequest, + >, +} +impl ::prost::Name for QuotePromptResponse { + const NAME: &'static str = "QuotePromptResponse"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.QuotePromptResponse".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.QuotePromptResponse".into() + } +} +/// Convenience RPC: chat-style prompt quoting. +/// Like QuotePrompt but accepts a message array + system prompt. +/// The server applies the model's chat template to produce the prompt. +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct ChatMessage { + /// "user", "assistant" + #[prost(string, tag = "1")] + pub role: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub content: ::prost::alloc::string::String, +} +impl ::prost::Name for ChatMessage { + const NAME: &'static str = "ChatMessage"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.ChatMessage".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.ChatMessage".into() + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct QuoteChatPromptRequest { + #[prost(string, tag = "1")] + pub huggingface_model_id: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub huggingface_revision: ::prost::alloc::string::String, + #[prost(message, repeated, tag = "3")] + pub messages: ::prost::alloc::vec::Vec, + #[prost(uint32, tag = "4")] + pub max_new_tokens: u32, + #[prost(string, tag = "5")] + pub system_prompt: ::prost::alloc::string::String, + /// Ordered preference list (each one of "f32", "f16", "bf16"). The server + /// picks the first entry it supports. Empty list lets the server pick its + /// preferred dtype freely. None of the entries supported -> request is + /// refused with FailedPrecondition. The chosen dtype is reported back in + /// QuoteChatPromptResponse.dtype. + #[prost(string, repeated, tag = "6")] + pub accept_dtypes: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +impl ::prost::Name for QuoteChatPromptRequest { + const NAME: &'static str = "QuoteChatPromptRequest"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.QuoteChatPromptRequest".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.QuoteChatPromptRequest".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct QuoteChatPromptResponse { + #[prost(message, optional, tag = "1")] + pub ticket: ::core::option::Option, + #[prost(uint32, tag = "2")] + pub prompt_tokens: u32, + /// The dtype the server actually committed to running this quote at. + #[prost(string, tag = "3")] + pub dtype: ::prost::alloc::string::String, + #[prost(message, optional, tag = "4")] + pub symbolic_request: ::core::option::Option< + super::super::symbolic::v1::SymbolicRequest, + >, +} +impl ::prost::Name for QuoteChatPromptResponse { + const NAME: &'static str = "QuoteChatPromptResponse"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.QuoteChatPromptResponse".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.QuoteChatPromptResponse".into() + } +} +/// Store one canonical catnix artifact by its BLAKE3 CID. This API does not +/// publish symbolic metadata such as model locators or lazy substitutions; those +/// are separate provider-local interpretation state. +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct PutArtifactRequest { + #[prost(bytes = "vec", tag = "1")] + pub canonical_artifact: ::prost::alloc::vec::Vec, +} +impl ::prost::Name for PutArtifactRequest { + const NAME: &'static str = "PutArtifactRequest"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.PutArtifactRequest".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.PutArtifactRequest".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct PutArtifactResponse { + /// BLAKE3 digest of canonical_artifact; exactly 32 bytes. + #[prost(bytes = "vec", tag = "1")] + pub cid: ::prost::alloc::vec::Vec, +} +impl ::prost::Name for PutArtifactResponse { + const NAME: &'static str = "PutArtifactResponse"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.PutArtifactResponse".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.PutArtifactResponse".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct GetArtifactRequest { + /// BLAKE3 digest of canonical artifact bytes; exactly 32 bytes. + #[prost(bytes = "vec", tag = "1")] + pub cid: ::prost::alloc::vec::Vec, +} +impl ::prost::Name for GetArtifactRequest { + const NAME: &'static str = "GetArtifactRequest"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.GetArtifactRequest".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.GetArtifactRequest".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct GetArtifactResponse { + #[prost(bytes = "vec", tag = "1")] + pub canonical_artifact: ::prost::alloc::vec::Vec, +} +impl ::prost::Name for GetArtifactResponse { + const NAME: &'static str = "GetArtifactResponse"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.GetArtifactResponse".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.GetArtifactResponse".into() + } +} +/// List models known to the executor and their readiness status. +#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +pub struct ListModelsRequest {} +impl ::prost::Name for ListModelsRequest { + const NAME: &'static str = "ListModelsRequest"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.ListModelsRequest".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.ListModelsRequest".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct ModelInfo { + #[prost(string, tag = "1")] + pub model_id: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub revision: ::prost::alloc::string::String, + #[prost(enumeration = "ModelStatus", tag = "3")] + pub status: i32, + /// Human-readable error when status is FAILED. + #[prost(string, tag = "4")] + pub error: ::prost::alloc::string::String, +} +impl ::prost::Name for ModelInfo { + const NAME: &'static str = "ModelInfo"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.ModelInfo".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.ModelInfo".into() + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ListModelsResponse { + #[prost(message, repeated, tag = "1")] + pub models: ::prost::alloc::vec::Vec, +} +impl ::prost::Name for ListModelsResponse { + const NAME: &'static str = "ListModelsResponse"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.ListModelsResponse".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.ListModelsResponse".into() + } +} +/// Convenience RPC: stateless token decoding. +/// Client streams raw token bytes, server decodes with the model's tokenizer +/// and streams back text chunks. +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct DecodeTokensRequest { + #[prost(string, tag = "1")] + pub huggingface_model_id: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub huggingface_revision: ::prost::alloc::string::String, + /// Raw token bytes (little-endian u32 token IDs, same format as Symbolic output). + #[prost(bytes = "vec", tag = "3")] + pub token_bytes: ::prost::alloc::vec::Vec, +} +impl ::prost::Name for DecodeTokensRequest { + const NAME: &'static str = "DecodeTokensRequest"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.DecodeTokensRequest".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.DecodeTokensRequest".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct DecodeTokensResponse { + /// Decoded text (incremental delta; concatenate all responses for full output). + #[prost(string, tag = "1")] + pub text: ::prost::alloc::string::String, +} +impl ::prost::Name for DecodeTokensResponse { + const NAME: &'static str = "DecodeTokensResponse"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.DecodeTokensResponse".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.DecodeTokensResponse".into() + } +} +/// Cumulative token statistics since node start. +#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +pub struct GetStatsRequest {} +impl ::prost::Name for GetStatsRequest { + const NAME: &'static str = "GetStatsRequest"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.GetStatsRequest".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.GetStatsRequest".into() + } +} +#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +pub struct TokenStats { + #[prost(uint64, tag = "1")] + pub executions_started: u64, + #[prost(uint64, tag = "2")] + pub executions_completed: u64, + #[prost(uint64, tag = "3")] + pub executions_failed: u64, + #[prost(uint64, tag = "4")] + pub prompt_tokens: u64, + #[prost(uint64, tag = "5")] + pub cached_prompt_tokens: u64, + #[prost(uint64, tag = "6")] + pub cached_output_tokens: u64, + #[prost(uint64, tag = "7")] + pub prefill_tokens: u64, + #[prost(uint64, tag = "8")] + pub generated_tokens: u64, +} +impl ::prost::Name for TokenStats { + const NAME: &'static str = "TokenStats"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.TokenStats".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.TokenStats".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct ModelTokenStats { + #[prost(string, tag = "1")] + pub model_id: ::prost::alloc::string::String, + #[prost(message, optional, tag = "2")] + pub stats: ::core::option::Option, +} +impl ::prost::Name for ModelTokenStats { + const NAME: &'static str = "ModelTokenStats"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.ModelTokenStats".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.ModelTokenStats".into() + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetStatsResponse { + #[prost(message, optional, tag = "1")] + pub stats: ::core::option::Option, + #[prost(message, repeated, tag = "2")] + pub model_stats: ::prost::alloc::vec::Vec, +} +impl ::prost::Name for GetStatsResponse { + const NAME: &'static str = "GetStatsResponse"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.GetStatsResponse".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.GetStatsResponse".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct GetModelStatsRequest { + #[prost(string, tag = "1")] + pub model_id: ::prost::alloc::string::String, +} +impl ::prost::Name for GetModelStatsRequest { + const NAME: &'static str = "GetModelStatsRequest"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.GetModelStatsRequest".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.GetModelStatsRequest".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct GetModelStatsResponse { + #[prost(string, tag = "1")] + pub model_id: ::prost::alloc::string::String, + #[prost(message, optional, tag = "2")] + pub stats: ::core::option::Option, +} +impl ::prost::Name for GetModelStatsResponse { + const NAME: &'static str = "GetModelStatsResponse"; + const PACKAGE: &'static str = "hellas.courtesy.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.courtesy.v1.GetModelStatsResponse".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.courtesy.v1.GetModelStatsResponse".into() + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum ModelStatus { + Unspecified = 0, + Queued = 1, + Loading = 2, + Ready = 3, + Failed = 4, +} +impl ModelStatus { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Unspecified => "MODEL_STATUS_UNSPECIFIED", + Self::Queued => "MODEL_STATUS_QUEUED", + Self::Loading => "MODEL_STATUS_LOADING", + Self::Ready => "MODEL_STATUS_READY", + Self::Failed => "MODEL_STATUS_FAILED", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "MODEL_STATUS_UNSPECIFIED" => Some(Self::Unspecified), + "MODEL_STATUS_QUEUED" => Some(Self::Queued), + "MODEL_STATUS_LOADING" => Some(Self::Loading), + "MODEL_STATUS_READY" => Some(Self::Ready), + "MODEL_STATUS_FAILED" => Some(Self::Failed), + _ => None, + } + } +} +/// Generated client implementations. +pub mod courtesy_client { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value, + )] + use tonic::codegen::*; + use tonic::codegen::http::Uri; + #[derive(Debug, Clone)] + pub struct CourtesyClient { + inner: tonic::client::Grpc, + } + impl CourtesyClient + where + T: tonic::client::GrpcService, + T::Error: Into, + T::ResponseBody: Body + std::marker::Send + 'static, + ::Error: Into + std::marker::Send, + { + pub fn new(inner: T) -> Self { + let inner = tonic::client::Grpc::new(inner); + Self { inner } + } + pub fn with_origin(inner: T, origin: Uri) -> Self { + let inner = tonic::client::Grpc::with_origin(inner, origin); + Self { inner } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> CourtesyClient> + where + F: tonic::service::Interceptor, + T::ResponseBody: Default, + T: tonic::codegen::Service< + http::Request, + Response = http::Response< + >::ResponseBody, + >, + >, + , + >>::Error: Into + std::marker::Send + std::marker::Sync, + { + CourtesyClient::new(InterceptedService::new(inner, interceptor)) + } + /// Compress requests with the given encoding. + /// + /// This requires the server to support it otherwise it might respond with an + /// error. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.send_compressed(encoding); + self + } + /// Enable decompressing responses. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.accept_compressed(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_decoding_message_size(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_encoding_message_size(limit); + self + } + pub async fn quote_prepared_text( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/hellas.courtesy.v1.Courtesy/QuotePreparedText", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert( + GrpcMethod::new("hellas.courtesy.v1.Courtesy", "QuotePreparedText"), + ); + self.inner.unary(req, path, codec).await + } + pub async fn quote_prompt( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/hellas.courtesy.v1.Courtesy/QuotePrompt", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("hellas.courtesy.v1.Courtesy", "QuotePrompt")); + self.inner.unary(req, path, codec).await + } + pub async fn quote_chat_prompt( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/hellas.courtesy.v1.Courtesy/QuoteChatPrompt", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert( + GrpcMethod::new("hellas.courtesy.v1.Courtesy", "QuoteChatPrompt"), + ); + self.inner.unary(req, path, codec).await + } + pub async fn put_artifact( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/hellas.courtesy.v1.Courtesy/PutArtifact", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("hellas.courtesy.v1.Courtesy", "PutArtifact")); + self.inner.unary(req, path, codec).await + } + pub async fn get_artifact( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/hellas.courtesy.v1.Courtesy/GetArtifact", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("hellas.courtesy.v1.Courtesy", "GetArtifact")); + self.inner.unary(req, path, codec).await + } + pub async fn list_models( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/hellas.courtesy.v1.Courtesy/ListModels", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("hellas.courtesy.v1.Courtesy", "ListModels")); + self.inner.unary(req, path, codec).await + } + pub async fn decode_tokens( + &mut self, + request: impl tonic::IntoStreamingRequest< + Message = super::DecodeTokensRequest, + >, + ) -> std::result::Result< + tonic::Response>, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/hellas.courtesy.v1.Courtesy/DecodeTokens", + ); + let mut req = request.into_streaming_request(); + req.extensions_mut() + .insert(GrpcMethod::new("hellas.courtesy.v1.Courtesy", "DecodeTokens")); + self.inner.streaming(req, path, codec).await + } + pub async fn get_stats( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/hellas.courtesy.v1.Courtesy/GetStats", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("hellas.courtesy.v1.Courtesy", "GetStats")); + self.inner.unary(req, path, codec).await + } + pub async fn get_model_stats( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/hellas.courtesy.v1.Courtesy/GetModelStats", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("hellas.courtesy.v1.Courtesy", "GetModelStats")); + self.inner.unary(req, path, codec).await + } + } +} +/// Generated server implementations. +pub mod courtesy_server { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value, + )] + use tonic::codegen::*; + /// Generated trait containing gRPC methods that should be implemented for use with CourtesyServer. + #[async_trait] + pub trait Courtesy: std::marker::Send + std::marker::Sync + 'static { + async fn quote_prepared_text( + &self, + request: tonic::Request, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; + async fn quote_prompt( + &self, + request: tonic::Request, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; + async fn quote_chat_prompt( + &self, + request: tonic::Request, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; + async fn put_artifact( + &self, + request: tonic::Request, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; + async fn get_artifact( + &self, + request: tonic::Request, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; + async fn list_models( + &self, + request: tonic::Request, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; + /// Server streaming response type for the DecodeTokens method. + type DecodeTokensStream: tonic::codegen::tokio_stream::Stream< + Item = std::result::Result, + > + + std::marker::Send + + 'static; + async fn decode_tokens( + &self, + request: tonic::Request>, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; + async fn get_stats( + &self, + request: tonic::Request, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; + async fn get_model_stats( + &self, + request: tonic::Request, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; + } + #[derive(Debug)] + pub struct CourtesyServer { + inner: Arc, + accept_compression_encodings: EnabledCompressionEncodings, + send_compression_encodings: EnabledCompressionEncodings, + max_decoding_message_size: Option, + max_encoding_message_size: Option, + } + impl CourtesyServer { + pub fn new(inner: T) -> Self { + Self::from_arc(Arc::new(inner)) + } + pub fn from_arc(inner: Arc) -> Self { + Self { + inner, + accept_compression_encodings: Default::default(), + send_compression_encodings: Default::default(), + max_decoding_message_size: None, + max_encoding_message_size: None, + } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> InterceptedService + where + F: tonic::service::Interceptor, + { + InterceptedService::new(Self::new(inner), interceptor) + } + /// Enable decompressing requests with the given encoding. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.accept_compression_encodings.enable(encoding); + self + } + /// Compress responses with the given encoding, if the client supports it. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.send_compression_encodings.enable(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.max_decoding_message_size = Some(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.max_encoding_message_size = Some(limit); + self + } + } + impl tonic::codegen::Service> for CourtesyServer + where + T: Courtesy, + B: Body + std::marker::Send + 'static, + B::Error: Into + std::marker::Send + 'static, + { + type Response = http::Response; + type Error = std::convert::Infallible; + type Future = BoxFuture; + fn poll_ready( + &mut self, + _cx: &mut Context<'_>, + ) -> Poll> { + Poll::Ready(Ok(())) + } + fn call(&mut self, req: http::Request) -> Self::Future { + match req.uri().path() { + "/hellas.courtesy.v1.Courtesy/QuotePreparedText" => { + #[allow(non_camel_case_types)] + struct QuotePreparedTextSvc(pub Arc); + impl< + T: Courtesy, + > tonic::server::UnaryService + for QuotePreparedTextSvc { + type Response = super::QuotePreparedTextResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::quote_prepared_text(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = QuotePreparedTextSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/hellas.courtesy.v1.Courtesy/QuotePrompt" => { + #[allow(non_camel_case_types)] + struct QuotePromptSvc(pub Arc); + impl< + T: Courtesy, + > tonic::server::UnaryService + for QuotePromptSvc { + type Response = super::QuotePromptResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::quote_prompt(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = QuotePromptSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/hellas.courtesy.v1.Courtesy/QuoteChatPrompt" => { + #[allow(non_camel_case_types)] + struct QuoteChatPromptSvc(pub Arc); + impl< + T: Courtesy, + > tonic::server::UnaryService + for QuoteChatPromptSvc { + type Response = super::QuoteChatPromptResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::quote_chat_prompt(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = QuoteChatPromptSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/hellas.courtesy.v1.Courtesy/PutArtifact" => { + #[allow(non_camel_case_types)] + struct PutArtifactSvc(pub Arc); + impl< + T: Courtesy, + > tonic::server::UnaryService + for PutArtifactSvc { + type Response = super::PutArtifactResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::put_artifact(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = PutArtifactSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/hellas.courtesy.v1.Courtesy/GetArtifact" => { + #[allow(non_camel_case_types)] + struct GetArtifactSvc(pub Arc); + impl< + T: Courtesy, + > tonic::server::UnaryService + for GetArtifactSvc { + type Response = super::GetArtifactResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::get_artifact(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = GetArtifactSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/hellas.courtesy.v1.Courtesy/ListModels" => { + #[allow(non_camel_case_types)] + struct ListModelsSvc(pub Arc); + impl< + T: Courtesy, + > tonic::server::UnaryService + for ListModelsSvc { + type Response = super::ListModelsResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::list_models(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = ListModelsSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/hellas.courtesy.v1.Courtesy/DecodeTokens" => { + #[allow(non_camel_case_types)] + struct DecodeTokensSvc(pub Arc); + impl< + T: Courtesy, + > tonic::server::StreamingService + for DecodeTokensSvc { + type Response = super::DecodeTokensResponse; + type ResponseStream = T::DecodeTokensStream; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request< + tonic::Streaming, + >, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::decode_tokens(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = DecodeTokensSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.streaming(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/hellas.courtesy.v1.Courtesy/GetStats" => { + #[allow(non_camel_case_types)] + struct GetStatsSvc(pub Arc); + impl tonic::server::UnaryService + for GetStatsSvc { + type Response = super::GetStatsResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::get_stats(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = GetStatsSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/hellas.courtesy.v1.Courtesy/GetModelStats" => { + #[allow(non_camel_case_types)] + struct GetModelStatsSvc(pub Arc); + impl< + T: Courtesy, + > tonic::server::UnaryService + for GetModelStatsSvc { + type Response = super::GetModelStatsResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::get_model_stats(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = GetModelStatsSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + _ => { + Box::pin(async move { + let mut response = http::Response::new( + tonic::body::Body::default(), + ); + let headers = response.headers_mut(); + headers + .insert( + tonic::Status::GRPC_STATUS, + (tonic::Code::Unimplemented as i32).into(), + ); + headers + .insert( + http::header::CONTENT_TYPE, + tonic::metadata::GRPC_CONTENT_TYPE, + ); + Ok(response) + }) + } + } + } + } + impl Clone for CourtesyServer { + fn clone(&self) -> Self { + let inner = self.inner.clone(); + Self { + inner, + accept_compression_encodings: self.accept_compression_encodings, + send_compression_encodings: self.send_compression_encodings, + max_decoding_message_size: self.max_decoding_message_size, + max_encoding_message_size: self.max_encoding_message_size, + } + } + } + /// Generated gRPC service name + pub const SERVICE_NAME: &str = "hellas.courtesy.v1.Courtesy"; + impl tonic::server::NamedService for CourtesyServer { + const NAME: &'static str = SERVICE_NAME; + } +} diff --git a/crates/pb/src/hellas.opaque.v1.rs b/crates/pb/src/hellas.opaque.v1.rs new file mode 100644 index 0000000..a8e35c5 --- /dev/null +++ b/crates/pb/src/hellas.opaque.v1.rs @@ -0,0 +1,307 @@ +// This file is @generated by prost-build. +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct OpaqueRequest { + #[prost(string, tag = "1")] + pub service: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub method: ::prost::alloc::string::String, + /// exact UTF-8 JSON bytes + #[prost(bytes = "vec", tag = "3")] + pub payload: ::prost::alloc::vec::Vec, +} +impl ::prost::Name for OpaqueRequest { + const NAME: &'static str = "OpaqueRequest"; + const PACKAGE: &'static str = "hellas.opaque.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.opaque.v1.OpaqueRequest".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.opaque.v1.OpaqueRequest".into() + } +} +/// Generated client implementations. +pub mod opaque_client { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value, + )] + use tonic::codegen::*; + use tonic::codegen::http::Uri; + #[derive(Debug, Clone)] + pub struct OpaqueClient { + inner: tonic::client::Grpc, + } + impl OpaqueClient + where + T: tonic::client::GrpcService, + T::Error: Into, + T::ResponseBody: Body + std::marker::Send + 'static, + ::Error: Into + std::marker::Send, + { + pub fn new(inner: T) -> Self { + let inner = tonic::client::Grpc::new(inner); + Self { inner } + } + pub fn with_origin(inner: T, origin: Uri) -> Self { + let inner = tonic::client::Grpc::with_origin(inner, origin); + Self { inner } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> OpaqueClient> + where + F: tonic::service::Interceptor, + T::ResponseBody: Default, + T: tonic::codegen::Service< + http::Request, + Response = http::Response< + >::ResponseBody, + >, + >, + , + >>::Error: Into + std::marker::Send + std::marker::Sync, + { + OpaqueClient::new(InterceptedService::new(inner, interceptor)) + } + /// Compress requests with the given encoding. + /// + /// This requires the server to support it otherwise it might respond with an + /// error. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.send_compressed(encoding); + self + } + /// Enable decompressing responses. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.accept_compressed(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_decoding_message_size(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_encoding_message_size(limit); + self + } + pub async fn create_ticket( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/hellas.opaque.v1.Opaque/CreateTicket", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("hellas.opaque.v1.Opaque", "CreateTicket")); + self.inner.unary(req, path, codec).await + } + } +} +/// Generated server implementations. +pub mod opaque_server { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value, + )] + use tonic::codegen::*; + /// Generated trait containing gRPC methods that should be implemented for use with OpaqueServer. + #[async_trait] + pub trait Opaque: std::marker::Send + std::marker::Sync + 'static { + async fn create_ticket( + &self, + request: tonic::Request, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; + } + #[derive(Debug)] + pub struct OpaqueServer { + inner: Arc, + accept_compression_encodings: EnabledCompressionEncodings, + send_compression_encodings: EnabledCompressionEncodings, + max_decoding_message_size: Option, + max_encoding_message_size: Option, + } + impl OpaqueServer { + pub fn new(inner: T) -> Self { + Self::from_arc(Arc::new(inner)) + } + pub fn from_arc(inner: Arc) -> Self { + Self { + inner, + accept_compression_encodings: Default::default(), + send_compression_encodings: Default::default(), + max_decoding_message_size: None, + max_encoding_message_size: None, + } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> InterceptedService + where + F: tonic::service::Interceptor, + { + InterceptedService::new(Self::new(inner), interceptor) + } + /// Enable decompressing requests with the given encoding. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.accept_compression_encodings.enable(encoding); + self + } + /// Compress responses with the given encoding, if the client supports it. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.send_compression_encodings.enable(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.max_decoding_message_size = Some(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.max_encoding_message_size = Some(limit); + self + } + } + impl tonic::codegen::Service> for OpaqueServer + where + T: Opaque, + B: Body + std::marker::Send + 'static, + B::Error: Into + std::marker::Send + 'static, + { + type Response = http::Response; + type Error = std::convert::Infallible; + type Future = BoxFuture; + fn poll_ready( + &mut self, + _cx: &mut Context<'_>, + ) -> Poll> { + Poll::Ready(Ok(())) + } + fn call(&mut self, req: http::Request) -> Self::Future { + match req.uri().path() { + "/hellas.opaque.v1.Opaque/CreateTicket" => { + #[allow(non_camel_case_types)] + struct CreateTicketSvc(pub Arc); + impl tonic::server::UnaryService + for CreateTicketSvc { + type Response = super::super::super::v1::Ticket; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::create_ticket(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = CreateTicketSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + _ => { + Box::pin(async move { + let mut response = http::Response::new( + tonic::body::Body::default(), + ); + let headers = response.headers_mut(); + headers + .insert( + tonic::Status::GRPC_STATUS, + (tonic::Code::Unimplemented as i32).into(), + ); + headers + .insert( + http::header::CONTENT_TYPE, + tonic::metadata::GRPC_CONTENT_TYPE, + ); + Ok(response) + }) + } + } + } + } + impl Clone for OpaqueServer { + fn clone(&self) -> Self { + let inner = self.inner.clone(); + Self { + inner, + accept_compression_encodings: self.accept_compression_encodings, + send_compression_encodings: self.send_compression_encodings, + max_decoding_message_size: self.max_decoding_message_size, + max_encoding_message_size: self.max_encoding_message_size, + } + } + } + /// Generated gRPC service name + pub const SERVICE_NAME: &str = "hellas.opaque.v1.Opaque"; + impl tonic::server::NamedService for OpaqueServer { + const NAME: &'static str = SERVICE_NAME; + } +} diff --git a/crates/pb/src/hellas.swarm.v1.rs b/crates/pb/src/hellas.swarm.v1.rs new file mode 100644 index 0000000..30d9ba5 --- /dev/null +++ b/crates/pb/src/hellas.swarm.v1.rs @@ -0,0 +1,457 @@ +// This file is @generated by prost-build. +#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +pub struct GetNodeInfoRequest {} +impl ::prost::Name for GetNodeInfoRequest { + const NAME: &'static str = "GetNodeInfoRequest"; + const PACKAGE: &'static str = "hellas.swarm.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.swarm.v1.GetNodeInfoRequest".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.swarm.v1.GetNodeInfoRequest".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct GetNodeInfoResponse { + #[prost(string, tag = "1")] + pub node_id: ::prost::alloc::string::String, + #[prost(uint64, tag = "2")] + pub uptime_seconds: u64, + /// Semver string, e.g. "0.1.0". Self-reported; treat as untrusted. + #[prost(string, tag = "3")] + pub version: ::prost::alloc::string::String, + /// Build commit hash (short hex). Self-reported; treat as untrusted. + #[prost(string, tag = "4")] + pub build: ::prost::alloc::string::String, + /// Platform triple, e.g. "x86_64-linux". Self-reported; treat as untrusted. + #[prost(string, tag = "5")] + pub os: ::prost::alloc::string::String, + /// Operator-chosen tag, exactly 16 bytes. Self-reported; treat as untrusted. + #[prost(bytes = "vec", tag = "6")] + pub graffiti: ::prost::alloc::vec::Vec, +} +impl ::prost::Name for GetNodeInfoResponse { + const NAME: &'static str = "GetNodeInfoResponse"; + const PACKAGE: &'static str = "hellas.swarm.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.swarm.v1.GetNodeInfoResponse".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.swarm.v1.GetNodeInfoResponse".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct GetKnownPeersRequest { + #[prost(string, tag = "1")] + pub service_alpn: ::prost::alloc::string::String, +} +impl ::prost::Name for GetKnownPeersRequest { + const NAME: &'static str = "GetKnownPeersRequest"; + const PACKAGE: &'static str = "hellas.swarm.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.swarm.v1.GetKnownPeersRequest".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.swarm.v1.GetKnownPeersRequest".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct GetKnownPeersResponse { + #[prost(bytes = "vec", repeated, tag = "1")] + pub peer_ids: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, +} +impl ::prost::Name for GetKnownPeersResponse { + const NAME: &'static str = "GetKnownPeersResponse"; + const PACKAGE: &'static str = "hellas.swarm.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.swarm.v1.GetKnownPeersResponse".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.swarm.v1.GetKnownPeersResponse".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct Presence { + #[prost(string, tag = "1")] + pub hf_id: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub req_id: ::prost::alloc::string::String, + #[prost(string, tag = "3")] + pub peer_id: ::prost::alloc::string::String, + #[prost(uint64, tag = "4")] + pub ttl_ms: u64, + #[prost(bool, tag = "5")] + pub is_executor: bool, +} +impl ::prost::Name for Presence { + const NAME: &'static str = "Presence"; + const PACKAGE: &'static str = "hellas.swarm.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.swarm.v1.Presence".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.swarm.v1.Presence".into() + } +} +/// Generated client implementations. +pub mod node_client { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value, + )] + use tonic::codegen::*; + use tonic::codegen::http::Uri; + #[derive(Debug, Clone)] + pub struct NodeClient { + inner: tonic::client::Grpc, + } + impl NodeClient + where + T: tonic::client::GrpcService, + T::Error: Into, + T::ResponseBody: Body + std::marker::Send + 'static, + ::Error: Into + std::marker::Send, + { + pub fn new(inner: T) -> Self { + let inner = tonic::client::Grpc::new(inner); + Self { inner } + } + pub fn with_origin(inner: T, origin: Uri) -> Self { + let inner = tonic::client::Grpc::with_origin(inner, origin); + Self { inner } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> NodeClient> + where + F: tonic::service::Interceptor, + T::ResponseBody: Default, + T: tonic::codegen::Service< + http::Request, + Response = http::Response< + >::ResponseBody, + >, + >, + , + >>::Error: Into + std::marker::Send + std::marker::Sync, + { + NodeClient::new(InterceptedService::new(inner, interceptor)) + } + /// Compress requests with the given encoding. + /// + /// This requires the server to support it otherwise it might respond with an + /// error. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.send_compressed(encoding); + self + } + /// Enable decompressing responses. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.accept_compressed(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_decoding_message_size(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_encoding_message_size(limit); + self + } + pub async fn get_node_info( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/hellas.swarm.v1.Node/GetNodeInfo", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("hellas.swarm.v1.Node", "GetNodeInfo")); + self.inner.unary(req, path, codec).await + } + pub async fn get_known_peers( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/hellas.swarm.v1.Node/GetKnownPeers", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("hellas.swarm.v1.Node", "GetKnownPeers")); + self.inner.unary(req, path, codec).await + } + } +} +/// Generated server implementations. +pub mod node_server { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value, + )] + use tonic::codegen::*; + /// Generated trait containing gRPC methods that should be implemented for use with NodeServer. + #[async_trait] + pub trait Node: std::marker::Send + std::marker::Sync + 'static { + async fn get_node_info( + &self, + request: tonic::Request, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; + async fn get_known_peers( + &self, + request: tonic::Request, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; + } + #[derive(Debug)] + pub struct NodeServer { + inner: Arc, + accept_compression_encodings: EnabledCompressionEncodings, + send_compression_encodings: EnabledCompressionEncodings, + max_decoding_message_size: Option, + max_encoding_message_size: Option, + } + impl NodeServer { + pub fn new(inner: T) -> Self { + Self::from_arc(Arc::new(inner)) + } + pub fn from_arc(inner: Arc) -> Self { + Self { + inner, + accept_compression_encodings: Default::default(), + send_compression_encodings: Default::default(), + max_decoding_message_size: None, + max_encoding_message_size: None, + } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> InterceptedService + where + F: tonic::service::Interceptor, + { + InterceptedService::new(Self::new(inner), interceptor) + } + /// Enable decompressing requests with the given encoding. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.accept_compression_encodings.enable(encoding); + self + } + /// Compress responses with the given encoding, if the client supports it. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.send_compression_encodings.enable(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.max_decoding_message_size = Some(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.max_encoding_message_size = Some(limit); + self + } + } + impl tonic::codegen::Service> for NodeServer + where + T: Node, + B: Body + std::marker::Send + 'static, + B::Error: Into + std::marker::Send + 'static, + { + type Response = http::Response; + type Error = std::convert::Infallible; + type Future = BoxFuture; + fn poll_ready( + &mut self, + _cx: &mut Context<'_>, + ) -> Poll> { + Poll::Ready(Ok(())) + } + fn call(&mut self, req: http::Request) -> Self::Future { + match req.uri().path() { + "/hellas.swarm.v1.Node/GetNodeInfo" => { + #[allow(non_camel_case_types)] + struct GetNodeInfoSvc(pub Arc); + impl tonic::server::UnaryService + for GetNodeInfoSvc { + type Response = super::GetNodeInfoResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::get_node_info(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = GetNodeInfoSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/hellas.swarm.v1.Node/GetKnownPeers" => { + #[allow(non_camel_case_types)] + struct GetKnownPeersSvc(pub Arc); + impl< + T: Node, + > tonic::server::UnaryService + for GetKnownPeersSvc { + type Response = super::GetKnownPeersResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::get_known_peers(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = GetKnownPeersSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + _ => { + Box::pin(async move { + let mut response = http::Response::new( + tonic::body::Body::default(), + ); + let headers = response.headers_mut(); + headers + .insert( + tonic::Status::GRPC_STATUS, + (tonic::Code::Unimplemented as i32).into(), + ); + headers + .insert( + http::header::CONTENT_TYPE, + tonic::metadata::GRPC_CONTENT_TYPE, + ); + Ok(response) + }) + } + } + } + } + impl Clone for NodeServer { + fn clone(&self) -> Self { + let inner = self.inner.clone(); + Self { + inner, + accept_compression_encodings: self.accept_compression_encodings, + send_compression_encodings: self.send_compression_encodings, + max_decoding_message_size: self.max_decoding_message_size, + max_encoding_message_size: self.max_encoding_message_size, + } + } + } + /// Generated gRPC service name + pub const SERVICE_NAME: &str = "hellas.swarm.v1.Node"; + impl tonic::server::NamedService for NodeServer { + const NAME: &'static str = SERVICE_NAME; + } +} diff --git a/crates/pb/src/hellas.symbolic.v1.rs b/crates/pb/src/hellas.symbolic.v1.rs new file mode 100644 index 0000000..d250a6e --- /dev/null +++ b/crates/pb/src/hellas.symbolic.v1.rs @@ -0,0 +1,303 @@ +// This file is @generated by prost-build. +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct SymbolicRequest { + /// catnix InputId; exactly 32 bytes. + #[prost(bytes = "vec", tag = "1")] + pub text_execution_cid: ::prost::alloc::vec::Vec, +} +impl ::prost::Name for SymbolicRequest { + const NAME: &'static str = "SymbolicRequest"; + const PACKAGE: &'static str = "hellas.symbolic.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.symbolic.v1.SymbolicRequest".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.symbolic.v1.SymbolicRequest".into() + } +} +/// Generated client implementations. +pub mod symbolic_client { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value, + )] + use tonic::codegen::*; + use tonic::codegen::http::Uri; + #[derive(Debug, Clone)] + pub struct SymbolicClient { + inner: tonic::client::Grpc, + } + impl SymbolicClient + where + T: tonic::client::GrpcService, + T::Error: Into, + T::ResponseBody: Body + std::marker::Send + 'static, + ::Error: Into + std::marker::Send, + { + pub fn new(inner: T) -> Self { + let inner = tonic::client::Grpc::new(inner); + Self { inner } + } + pub fn with_origin(inner: T, origin: Uri) -> Self { + let inner = tonic::client::Grpc::with_origin(inner, origin); + Self { inner } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> SymbolicClient> + where + F: tonic::service::Interceptor, + T::ResponseBody: Default, + T: tonic::codegen::Service< + http::Request, + Response = http::Response< + >::ResponseBody, + >, + >, + , + >>::Error: Into + std::marker::Send + std::marker::Sync, + { + SymbolicClient::new(InterceptedService::new(inner, interceptor)) + } + /// Compress requests with the given encoding. + /// + /// This requires the server to support it otherwise it might respond with an + /// error. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.send_compressed(encoding); + self + } + /// Enable decompressing responses. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.accept_compressed(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_decoding_message_size(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_encoding_message_size(limit); + self + } + pub async fn create_ticket( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/hellas.symbolic.v1.Symbolic/CreateTicket", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("hellas.symbolic.v1.Symbolic", "CreateTicket")); + self.inner.unary(req, path, codec).await + } + } +} +/// Generated server implementations. +pub mod symbolic_server { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value, + )] + use tonic::codegen::*; + /// Generated trait containing gRPC methods that should be implemented for use with SymbolicServer. + #[async_trait] + pub trait Symbolic: std::marker::Send + std::marker::Sync + 'static { + async fn create_ticket( + &self, + request: tonic::Request, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; + } + #[derive(Debug)] + pub struct SymbolicServer { + inner: Arc, + accept_compression_encodings: EnabledCompressionEncodings, + send_compression_encodings: EnabledCompressionEncodings, + max_decoding_message_size: Option, + max_encoding_message_size: Option, + } + impl SymbolicServer { + pub fn new(inner: T) -> Self { + Self::from_arc(Arc::new(inner)) + } + pub fn from_arc(inner: Arc) -> Self { + Self { + inner, + accept_compression_encodings: Default::default(), + send_compression_encodings: Default::default(), + max_decoding_message_size: None, + max_encoding_message_size: None, + } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> InterceptedService + where + F: tonic::service::Interceptor, + { + InterceptedService::new(Self::new(inner), interceptor) + } + /// Enable decompressing requests with the given encoding. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.accept_compression_encodings.enable(encoding); + self + } + /// Compress responses with the given encoding, if the client supports it. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.send_compression_encodings.enable(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.max_decoding_message_size = Some(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.max_encoding_message_size = Some(limit); + self + } + } + impl tonic::codegen::Service> for SymbolicServer + where + T: Symbolic, + B: Body + std::marker::Send + 'static, + B::Error: Into + std::marker::Send + 'static, + { + type Response = http::Response; + type Error = std::convert::Infallible; + type Future = BoxFuture; + fn poll_ready( + &mut self, + _cx: &mut Context<'_>, + ) -> Poll> { + Poll::Ready(Ok(())) + } + fn call(&mut self, req: http::Request) -> Self::Future { + match req.uri().path() { + "/hellas.symbolic.v1.Symbolic/CreateTicket" => { + #[allow(non_camel_case_types)] + struct CreateTicketSvc(pub Arc); + impl tonic::server::UnaryService + for CreateTicketSvc { + type Response = super::super::super::v1::Ticket; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::create_ticket(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = CreateTicketSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + _ => { + Box::pin(async move { + let mut response = http::Response::new( + tonic::body::Body::default(), + ); + let headers = response.headers_mut(); + headers + .insert( + tonic::Status::GRPC_STATUS, + (tonic::Code::Unimplemented as i32).into(), + ); + headers + .insert( + http::header::CONTENT_TYPE, + tonic::metadata::GRPC_CONTENT_TYPE, + ); + Ok(response) + }) + } + } + } + } + impl Clone for SymbolicServer { + fn clone(&self) -> Self { + let inner = self.inner.clone(); + Self { + inner, + accept_compression_encodings: self.accept_compression_encodings, + send_compression_encodings: self.send_compression_encodings, + max_decoding_message_size: self.max_decoding_message_size, + max_encoding_message_size: self.max_encoding_message_size, + } + } + } + /// Generated gRPC service name + pub const SERVICE_NAME: &str = "hellas.symbolic.v1.Symbolic"; + impl tonic::server::NamedService for SymbolicServer { + const NAME: &'static str = SERVICE_NAME; + } +} diff --git a/crates/pb/src/hellas.v1.rs b/crates/pb/src/hellas.v1.rs new file mode 100644 index 0000000..cb8d52d --- /dev/null +++ b/crates/pb/src/hellas.v1.rs @@ -0,0 +1,467 @@ +// This file is @generated by prost-build. +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct Ticket { + /// exactly 32 bytes + #[prost(bytes = "vec", tag = "1")] + pub request_commitment: ::prost::alloc::vec::Vec, + #[prost(uint64, tag = "2")] + pub amount: u64, + #[prost(uint64, tag = "3")] + pub ttl_ms: u64, +} +impl ::prost::Name for Ticket { + const NAME: &'static str = "Ticket"; + const PACKAGE: &'static str = "hellas.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.v1.Ticket".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.v1.Ticket".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct RunTicketRequest { + /// exactly 32 bytes + #[prost(bytes = "vec", tag = "1")] + pub request_commitment: ::prost::alloc::vec::Vec, +} +impl ::prost::Name for RunTicketRequest { + const NAME: &'static str = "RunTicketRequest"; + const PACKAGE: &'static str = "hellas.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.v1.RunTicketRequest".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.v1.RunTicketRequest".into() + } +} +/// Wire protocol: zero or more WorkChunk events, terminated by exactly one +/// WorkFinished or WorkFailed, after which the stream ends. Streaming chunks +/// are transport-only; the terminal output is the object committed to by the +/// receipt. +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct WorkEvent { + #[prost(oneof = "work_event::Kind", tags = "1, 2, 3")] + pub kind: ::core::option::Option, +} +/// Nested message and enum types in `WorkEvent`. +pub mod work_event { + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Oneof)] + pub enum Kind { + #[prost(message, tag = "1")] + Chunk(super::WorkChunk), + #[prost(message, tag = "2")] + Finished(super::WorkFinished), + #[prost(message, tag = "3")] + Failed(super::WorkFailed), + } +} +impl ::prost::Name for WorkEvent { + const NAME: &'static str = "WorkEvent"; + const PACKAGE: &'static str = "hellas.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.v1.WorkEvent".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.v1.WorkEvent".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct WorkChunk { + /// Cumulative position AFTER this chunk. + #[prost(uint64, tag = "1")] + pub position: u64, + #[prost(bytes = "vec", tag = "2")] + pub bytes: ::prost::alloc::vec::Vec, +} +impl ::prost::Name for WorkChunk { + const NAME: &'static str = "WorkChunk"; + const PACKAGE: &'static str = "hellas.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.v1.WorkChunk".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.v1.WorkChunk".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct WorkFinished { + /// Complete output object. Symbolic text uses little-endian u32 token IDs. + /// Opaque uses exact UTF-8 JSON bytes. + #[prost(bytes = "vec", tag = "1")] + pub output: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "2")] + pub receipt: ::core::option::Option, + #[prost(enumeration = "FinishStatus", tag = "3")] + pub status: i32, + #[prost(uint64, tag = "4")] + pub total_units: u64, +} +impl ::prost::Name for WorkFinished { + const NAME: &'static str = "WorkFinished"; + const PACKAGE: &'static str = "hellas.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.v1.WorkFinished".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.v1.WorkFinished".into() + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct WorkFailed { + /// Units emitted before failure (tokens for symbolic text, bytes for opaque). + #[prost(uint64, tag = "1")] + pub position: u64, + #[prost(string, tag = "2")] + pub error: ::prost::alloc::string::String, +} +impl ::prost::Name for WorkFailed { + const NAME: &'static str = "WorkFailed"; + const PACKAGE: &'static str = "hellas.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.v1.WorkFailed".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.v1.WorkFailed".into() + } +} +/// Canonical hellas-core SignedReceipt encoded as strict dag-cbor. +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct ReceiptEnvelope { + #[prost(bytes = "vec", tag = "1")] + pub dag_cbor: ::prost::alloc::vec::Vec, +} +impl ::prost::Name for ReceiptEnvelope { + const NAME: &'static str = "ReceiptEnvelope"; + const PACKAGE: &'static str = "hellas.v1"; + fn full_name() -> ::prost::alloc::string::String { + "hellas.v1.ReceiptEnvelope".into() + } + fn type_url() -> ::prost::alloc::string::String { + "/hellas.v1.ReceiptEnvelope".into() + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum FinishStatus { + Unspecified = 0, + EndOfSequence = 1, + MaxOutput = 2, + Cancelled = 3, +} +impl FinishStatus { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Unspecified => "FINISH_STATUS_UNSPECIFIED", + Self::EndOfSequence => "FINISH_STATUS_END_OF_SEQUENCE", + Self::MaxOutput => "FINISH_STATUS_MAX_OUTPUT", + Self::Cancelled => "FINISH_STATUS_CANCELLED", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "FINISH_STATUS_UNSPECIFIED" => Some(Self::Unspecified), + "FINISH_STATUS_END_OF_SEQUENCE" => Some(Self::EndOfSequence), + "FINISH_STATUS_MAX_OUTPUT" => Some(Self::MaxOutput), + "FINISH_STATUS_CANCELLED" => Some(Self::Cancelled), + _ => None, + } + } +} +/// Generated client implementations. +pub mod execute_client { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value, + )] + use tonic::codegen::*; + use tonic::codegen::http::Uri; + #[derive(Debug, Clone)] + pub struct ExecuteClient { + inner: tonic::client::Grpc, + } + impl ExecuteClient + where + T: tonic::client::GrpcService, + T::Error: Into, + T::ResponseBody: Body + std::marker::Send + 'static, + ::Error: Into + std::marker::Send, + { + pub fn new(inner: T) -> Self { + let inner = tonic::client::Grpc::new(inner); + Self { inner } + } + pub fn with_origin(inner: T, origin: Uri) -> Self { + let inner = tonic::client::Grpc::with_origin(inner, origin); + Self { inner } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> ExecuteClient> + where + F: tonic::service::Interceptor, + T::ResponseBody: Default, + T: tonic::codegen::Service< + http::Request, + Response = http::Response< + >::ResponseBody, + >, + >, + , + >>::Error: Into + std::marker::Send + std::marker::Sync, + { + ExecuteClient::new(InterceptedService::new(inner, interceptor)) + } + /// Compress requests with the given encoding. + /// + /// This requires the server to support it otherwise it might respond with an + /// error. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.send_compressed(encoding); + self + } + /// Enable decompressing responses. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.accept_compressed(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_decoding_message_size(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_encoding_message_size(limit); + self + } + pub async fn run_ticket( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response>, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/hellas.v1.Execute/RunTicket", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("hellas.v1.Execute", "RunTicket")); + self.inner.server_streaming(req, path, codec).await + } + } +} +/// Generated server implementations. +pub mod execute_server { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value, + )] + use tonic::codegen::*; + /// Generated trait containing gRPC methods that should be implemented for use with ExecuteServer. + #[async_trait] + pub trait Execute: std::marker::Send + std::marker::Sync + 'static { + /// Server streaming response type for the RunTicket method. + type RunTicketStream: tonic::codegen::tokio_stream::Stream< + Item = std::result::Result, + > + + std::marker::Send + + 'static; + async fn run_ticket( + &self, + request: tonic::Request, + ) -> std::result::Result, tonic::Status>; + } + #[derive(Debug)] + pub struct ExecuteServer { + inner: Arc, + accept_compression_encodings: EnabledCompressionEncodings, + send_compression_encodings: EnabledCompressionEncodings, + max_decoding_message_size: Option, + max_encoding_message_size: Option, + } + impl ExecuteServer { + pub fn new(inner: T) -> Self { + Self::from_arc(Arc::new(inner)) + } + pub fn from_arc(inner: Arc) -> Self { + Self { + inner, + accept_compression_encodings: Default::default(), + send_compression_encodings: Default::default(), + max_decoding_message_size: None, + max_encoding_message_size: None, + } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> InterceptedService + where + F: tonic::service::Interceptor, + { + InterceptedService::new(Self::new(inner), interceptor) + } + /// Enable decompressing requests with the given encoding. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.accept_compression_encodings.enable(encoding); + self + } + /// Compress responses with the given encoding, if the client supports it. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.send_compression_encodings.enable(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.max_decoding_message_size = Some(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.max_encoding_message_size = Some(limit); + self + } + } + impl tonic::codegen::Service> for ExecuteServer + where + T: Execute, + B: Body + std::marker::Send + 'static, + B::Error: Into + std::marker::Send + 'static, + { + type Response = http::Response; + type Error = std::convert::Infallible; + type Future = BoxFuture; + fn poll_ready( + &mut self, + _cx: &mut Context<'_>, + ) -> Poll> { + Poll::Ready(Ok(())) + } + fn call(&mut self, req: http::Request) -> Self::Future { + match req.uri().path() { + "/hellas.v1.Execute/RunTicket" => { + #[allow(non_camel_case_types)] + struct RunTicketSvc(pub Arc); + impl< + T: Execute, + > tonic::server::ServerStreamingService + for RunTicketSvc { + type Response = super::WorkEvent; + type ResponseStream = T::RunTicketStream; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::run_ticket(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = RunTicketSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.server_streaming(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + _ => { + Box::pin(async move { + let mut response = http::Response::new( + tonic::body::Body::default(), + ); + let headers = response.headers_mut(); + headers + .insert( + tonic::Status::GRPC_STATUS, + (tonic::Code::Unimplemented as i32).into(), + ); + headers + .insert( + http::header::CONTENT_TYPE, + tonic::metadata::GRPC_CONTENT_TYPE, + ); + Ok(response) + }) + } + } + } + } + impl Clone for ExecuteServer { + fn clone(&self) -> Self { + let inner = self.inner.clone(); + Self { + inner, + accept_compression_encodings: self.accept_compression_encodings, + send_compression_encodings: self.send_compression_encodings, + max_decoding_message_size: self.max_decoding_message_size, + max_encoding_message_size: self.max_encoding_message_size, + } + } + } + /// Generated gRPC service name + pub const SERVICE_NAME: &str = "hellas.v1.Execute"; + impl tonic::server::NamedService for ExecuteServer { + const NAME: &'static str = SERVICE_NAME; + } +} diff --git a/crates/pb/src/lib.rs b/crates/pb/src/lib.rs new file mode 100644 index 0000000..5b43c6f --- /dev/null +++ b/crates/pb/src/lib.rs @@ -0,0 +1,115 @@ +//! Generated protobuf bindings for the Hellas protocol. +//! +//! The source `.proto` files live under `proto/hellas` at the workspace root. + +mod generated { + pub mod hellas { + #[cfg(feature = "courtesy")] + #[allow(dead_code)] + pub mod courtesy { + pub mod v1 { + include!("hellas.courtesy.v1.rs"); + } + } + + #[cfg(feature = "hellas")] + #[allow(dead_code)] + pub mod v1 { + include!("hellas.v1.rs"); + } + + #[cfg(feature = "opaque")] + #[allow(dead_code)] + pub mod opaque { + pub mod v1 { + include!("hellas.opaque.v1.rs"); + } + } + + #[cfg(feature = "swarm")] + #[allow(dead_code)] + pub mod swarm { + pub mod v1 { + include!("hellas.swarm.v1.rs"); + } + } + + #[cfg(feature = "symbolic")] + #[allow(dead_code)] + pub mod symbolic { + pub mod v1 { + include!("hellas.symbolic.v1.rs"); + } + } + } +} + +#[allow(unused_macros)] +macro_rules! service_exports { + ($($path:ident)::+, $client:ident, $server:ident) => { + #[cfg(feature = "client")] + pub use $($path)::+::$client; + #[cfg(feature = "server")] + pub use $($path)::+::$server; + }; +} + +#[cfg(feature = "hellas")] +pub mod hellas { + pub use crate::generated::hellas::v1::{ + FinishStatus, ReceiptEnvelope, RunTicketRequest, Ticket, WorkChunk, WorkEvent, WorkFailed, + WorkFinished, work_event, + }; + service_exports!(crate::generated::hellas::v1, execute_client, execute_server); +} + +#[cfg(feature = "symbolic")] +pub mod symbolic { + pub use crate::generated::hellas::symbolic::v1::SymbolicRequest; + service_exports!( + crate::generated::hellas::symbolic::v1, + symbolic_client, + symbolic_server + ); +} + +#[cfg(feature = "opaque")] +pub mod opaque { + pub use crate::generated::hellas::opaque::v1::OpaqueRequest; + service_exports!( + crate::generated::hellas::opaque::v1, + opaque_client, + opaque_server + ); +} + +#[cfg(feature = "courtesy")] +pub mod courtesy { + pub use crate::generated::hellas::courtesy::v1::{ + ChatMessage, DecodeTokensRequest, DecodeTokensResponse, GetArtifactRequest, + GetArtifactResponse, GetModelStatsRequest, GetModelStatsResponse, GetStatsRequest, + GetStatsResponse, ListModelsRequest, ListModelsResponse, ModelInfo, ModelStatus, + ModelTokenStats, PutArtifactRequest, PutArtifactResponse, QuoteChatPromptRequest, + QuoteChatPromptResponse, QuotePreparedTextRequest, QuotePreparedTextResponse, + QuotePromptRequest, QuotePromptResponse, SymbolicArtifactStart, SymbolicGenesisStart, + SymbolicStart, TokenStats, symbolic_start, + }; + service_exports!( + crate::generated::hellas::courtesy::v1, + courtesy_client, + courtesy_server + ); +} + +#[cfg(feature = "swarm")] +pub mod swarm { + pub use crate::generated::hellas::swarm::v1::{ + GetKnownPeersRequest, GetKnownPeersResponse, GetNodeInfoRequest, GetNodeInfoResponse, + Presence, + }; + service_exports!( + crate::generated::hellas::swarm::v1, + node_client, + node_server + ); +} diff --git a/crates/rpc/Cargo.toml b/crates/rpc/Cargo.toml index 53b9323..1cadf9b 100644 --- a/crates/rpc/Cargo.toml +++ b/crates/rpc/Cargo.toml @@ -9,15 +9,66 @@ documentation.workspace = true [features] default = [] -client = ["tonic/channel"] -server = ["tonic/server", "tonic-iroh-transport/discovery"] -compile = ["dep:tonic-prost-build"] +compression = ["tonic/gzip", "tonic/zstd"] +client = [ + "tonic/channel", + "hellas-pb/client", + "hellas-pb/hellas", + "hellas-pb/symbolic", + "hellas-pb/opaque", + "hellas-pb/courtesy", +] +discovery = [ + "client", + "dep:futures", + "dep:mainline", + "dep:tonic-iroh-transport", + "tonic-iroh-transport/discovery-mdns", + "tonic-iroh-transport/discovery-dht", +] +server = ["tonic/server", "hellas-pb/server"] +node = [ + "dep:catgrad", + "dep:catgrad-llm", + "dep:chatgrad", + "hellas-pb/hellas", + "hellas-pb/symbolic", + "hellas-pb/opaque", + "hellas-pb/courtesy", + "dep:serde", + "dep:serde_json", + "dep:tokenizers", + "dep:hf-hub", +] + +[target.'cfg(not(any(target_env = "musl", target_os = "windows")))'.dependencies] +tokenizers = { version = "0.21", features = [ + "onig", + "esaxx_fast", +], optional = true } [dependencies] -tonic-iroh-transport = { workspace = true} +catgrad = { workspace = true, default-features = false, features = [ + "serde", +], optional = true } +catgrad-llm = { workspace = true, default-features = false, optional = true } +chatgrad = { workspace = true, default-features = false, optional = true } +futures = { version = "0.3", optional = true } +futures-core = "0.3" +hellas-pb.workspace = true +hf-hub = { version = "0.5", default-features = false, features = [ + "ureq", +], optional = true } +mainline = { version = "6", optional = true } +serde = { workspace = true, optional = true } +serde_json = { workspace = true, optional = true } +thiserror = { workspace = true } +tokenizers = { version = "0.21", default-features = false, features = [ + "progressbar", + "fancy-regex", +], optional = true } tonic = { version = "0.14", default-features = false, features = ["codegen"] } -tonic-prost = "0.14" -prost = "0.14" +tonic-iroh-transport = { workspace = true, default-features = false, optional = true } -[build-dependencies] -tonic-prost-build = { version = "0.14", optional = true } +[dev-dependencies] +tokio.workspace = true diff --git a/crates/rpc/build.rs b/crates/rpc/build.rs index 9859470..6e51789 100644 --- a/crates/rpc/build.rs +++ b/crates/rpc/build.rs @@ -1,21 +1,20 @@ fn main() { - #[cfg(feature = "compile")] - compile(); -} - -#[cfg(feature = "compile")] -fn compile() { - println!("cargo:rerun-if-changed=proto/*.proto"); - let mut prost_config = tonic_prost_build::Config::new(); - prost_config.enable_type_names(); - - tonic_prost_build::configure() - .out_dir("src/pb") - .include_file("mod.rs") - .emit_package(true) - .build_client(cfg!(feature = "client")) - .build_server(cfg!(feature = "server")) - .build_transport(false) // we use our own transport - .compile_with_config(prost_config, &["proto/hellas.proto"], &["proto"]) - .expect("Failed to compile protos"); + // Capture git rev for version info. + // Try git from this crate's own repo first (correct for cross-workspace path deps), + // then fall back to GIT_REV env var (set by nix where git is unavailable). + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); + let rev = std::process::Command::new("git") + .args(["rev-parse", "--short", "HEAD"]) + .current_dir(&manifest_dir) + .output() + .ok() + .filter(|o| o.status.success()) + .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string()); + if let Some(rev) = rev { + println!("cargo:rustc-env=GIT_REV={rev}"); + } else if let Ok(rev) = std::env::var("GIT_REV") { + println!("cargo:rustc-env=GIT_REV={rev}"); + } + println!("cargo:rerun-if-changed=../../.git/HEAD"); + println!("cargo:rerun-if-changed=../../.git/refs"); } diff --git a/crates/rpc/proto/execute.proto b/crates/rpc/proto/execute.proto deleted file mode 100644 index 418e510..0000000 --- a/crates/rpc/proto/execute.proto +++ /dev/null @@ -1,58 +0,0 @@ -syntax = "proto3"; - -package hellas; - -message WeightsHint { - string huggingface_model_id = 1; - string revision = 2; -} - -message LlmQuoteRequest { - string huggingface_model_id = 1; - string prompt = 2; - // Optional; default to 16 when unset/zero - uint32 max_seq = 3; -} - -message GetQuoteRequest { - oneof payload { - bytes graph = 1; - LlmQuoteRequest llm_prompt = 2; - } -} -message GetQuoteResponse { - string quote_id = 1; - string graph_id = 2; - uint64 amount = 3; - string input = 4; - WeightsHint resolved_weights = 5; -} - -message GetGraphRequest { string graph_id = 1; } -message GetGraphResponse { bytes graph = 1; } - -message ExecuteRequest { bytes quote_id = 1; } -message ExecuteResponse { - string execution_id = 1; - string quote_id = 2; -} - -message ExecuteStatusRequest { string execution_id = 1; } -message ExecuteStatusResponse { - string status = 1; - uint64 progress = 2; - bytes result = 3; - optional string decoded = 4; -} -message ExecuteProgress { - string status = 1; - uint64 progress = 2; - bytes chunk = 3; - optional string decoded = 4; -} - -message ExecuteResultRequest { string execution_id = 1; } -message ExecuteResultResponse { - bytes result = 1; - string decoded = 2; -} diff --git a/crates/rpc/proto/hellas.proto b/crates/rpc/proto/hellas.proto deleted file mode 100644 index cf64134..0000000 --- a/crates/rpc/proto/hellas.proto +++ /dev/null @@ -1,27 +0,0 @@ -syntax = "proto3"; - -package hellas; - -import "execute.proto"; -import "node.proto"; - -service Node { - rpc HealthCheck(HealthCheckRequest) returns (HealthCheckResponse); -} - -service Execute { - rpc GetQuote(GetQuoteRequest) returns (GetQuoteResponse); - rpc GetGraph(GetGraphRequest) returns (GetGraphResponse); - rpc Execute(ExecuteRequest) returns (ExecuteResponse); - rpc ExecuteStatus(ExecuteStatusRequest) returns (ExecuteStatusResponse); - rpc ExecuteStream(ExecuteStatusRequest) returns (stream ExecuteProgress); - rpc ExecuteResult(ExecuteResultRequest) returns (ExecuteResultResponse); -} - -message Presence { - string hf_id = 1; - string req_id = 2; - string peer_id = 3; - uint64 ttl_ms = 4; - bool is_executor = 5; -} diff --git a/crates/rpc/proto/node.proto b/crates/rpc/proto/node.proto deleted file mode 100644 index 9ad6060..0000000 --- a/crates/rpc/proto/node.proto +++ /dev/null @@ -1,10 +0,0 @@ -syntax = "proto3"; - -package hellas; - -message HealthCheckRequest {} -message HealthCheckResponse { - string version = 1; - uint64 uptime_seconds = 2; - string node_id = 3; -} diff --git a/crates/rpc/src/discovery.rs b/crates/rpc/src/discovery.rs new file mode 100644 index 0000000..e227179 --- /dev/null +++ b/crates/rpc/src/discovery.rs @@ -0,0 +1,139 @@ +use std::sync::Arc; + +use mainline::Dht; +use thiserror::Error; +use tonic_iroh_transport::iroh::Endpoint; +use tonic_iroh_transport::iroh::EndpointId; +use tonic_iroh_transport::iroh::SecretKey; +use tonic_iroh_transport::iroh::address_lookup::AddressLookupBuilderError; +use tonic_iroh_transport::iroh::address_lookup::mdns::MdnsAddressLookup; +use tonic_iroh_transport::iroh::address_lookup::pkarr::dht::DhtAddressLookup; +use tonic_iroh_transport::iroh::endpoint::{BindError, EndpointError, presets}; + +const MDNS_SERVICE_NAME: &str = "hellas"; + +pub struct DiscoveryBindings { + pub mdns: MdnsAddressLookup, + pub dht: Arc, +} + +pub struct DiscoveryEndpoint { + pub endpoint: Endpoint, + pub bindings: DiscoveryBindings, +} + +#[derive(Debug, Error)] +pub enum DiscoveryError { + #[error("failed to create iroh endpoint")] + BindEndpoint { + #[source] + source: BindError, + }, + #[error("failed to start mDNS discovery")] + BuildMdnsLookup { + #[source] + source: AddressLookupBuilderError, + }, + #[error("failed to initialize DHT client")] + BuildDhtClient { + #[source] + source: std::io::Error, + }, + #[error("failed to initialize DHT address lookup")] + BuildPkarrLookup { + #[source] + source: AddressLookupBuilderError, + }, + #[error("failed to access endpoint address lookup services")] + AddressLookupUnavailable { + #[source] + source: EndpointError, + }, +} + +impl DiscoveryBindings { + pub fn client(endpoint_id: EndpointId) -> Result { + Ok(Self { + mdns: build_mdns(endpoint_id, false)?, + dht: build_dht()?, + }) + } + + pub fn attach( + endpoint: &Endpoint, + advertise_mdns: bool, + publish_pkarr: bool, + ) -> Result { + let address_lookup = endpoint + .address_lookup() + .map_err(|source| DiscoveryError::AddressLookupUnavailable { source })?; + let mdns = build_mdns(endpoint.id(), advertise_mdns)?; + address_lookup.add(mdns.clone()); + + // Standalone DHT handle for the sharded-service DhtBackend; iroh's + // DhtAddressLookup builds its own Dht internally (0.98 changed the + // constructor to take a DhtBuilder rather than a shared pkarr client). + let dht = build_dht()?; + + let mut dht_lookup = DhtAddressLookup::builder(); + if !publish_pkarr { + dht_lookup = dht_lookup.no_publish(); + } + let dht_lookup = dht_lookup + .build() + .map_err(|source| DiscoveryError::BuildPkarrLookup { source })?; + address_lookup.add(dht_lookup); + + Ok(Self { mdns, dht }) + } +} + +fn build_mdns( + endpoint_id: EndpointId, + advertise: bool, +) -> Result { + MdnsAddressLookup::builder() + .advertise(advertise) + .service_name(MDNS_SERVICE_NAME) + .build(endpoint_id) + .map_err(|source| DiscoveryError::BuildMdnsLookup { source }) +} + +fn build_dht() -> Result, DiscoveryError> { + Dht::client() + .map(Arc::new) + .map_err(|source| DiscoveryError::BuildDhtClient { source }) +} + +impl DiscoveryEndpoint { + pub async fn bind(secret_key: Option) -> Result { + let mut builder = Endpoint::builder(presets::N0); + if let Some(key) = secret_key { + builder = builder.secret_key(key); + } + let endpoint = builder + .bind() + .await + .map_err(|source| DiscoveryError::BindEndpoint { source })?; + let bindings = DiscoveryBindings::attach(&endpoint, false, false)?; + Ok(Self { endpoint, bindings }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // `DiscoveryBindings::client` internally calls `MdnsAddressLookup::builder().build()`, + // which spawns a background task and so needs a running Tokio runtime. + #[tokio::test] + async fn client_bindings_builds_unattached_resources() { + // EndpointId is an Ed25519 public key — not every 32-byte sequence + // decompresses to a valid Edwards point. Any 32-byte secret does + // yield a valid public key though, so derive one deterministically. + let endpoint_id = SecretKey::from_bytes(&[1u8; 32]).public(); + let bindings = DiscoveryBindings::client(endpoint_id).expect("client bindings"); + let _ = bindings.mdns; + let _ = bindings.dht; + } +} diff --git a/crates/rpc/src/driver.rs b/crates/rpc/src/driver.rs new file mode 100644 index 0000000..36a37a4 --- /dev/null +++ b/crates/rpc/src/driver.rs @@ -0,0 +1,280 @@ +use std::pin::Pin; + +use futures_core::Stream; +use tonic::Status; +#[cfg(feature = "compression")] +use tonic::codec::CompressionEncoding; +use tonic::codegen::*; +#[cfg(feature = "discovery")] +use tonic_iroh_transport::IrohChannel; + +use crate::GRPC_MESSAGE_LIMIT; +use crate::provenance::{ExecutionProvenance, read_provenance_metadata}; +use hellas_pb::courtesy::courtesy_client::CourtesyClient; +use hellas_pb::courtesy::{ + GetArtifactRequest, GetArtifactResponse, PutArtifactRequest, PutArtifactResponse, + QuotePreparedTextRequest, QuotePreparedTextResponse, +}; +use hellas_pb::hellas::execute_client::ExecuteClient; +use hellas_pb::hellas::{RunTicketRequest, Ticket, WorkEvent}; +use hellas_pb::opaque::OpaqueRequest; +use hellas_pb::opaque::opaque_client::OpaqueClient; +use hellas_pb::symbolic::SymbolicRequest; +use hellas_pb::symbolic::symbolic_client::SymbolicClient; + +pub type ExecuteEventStream = Pin> + Send>>; + +/// Ticket response paired with the provenance the executor committed to. +/// Carried alongside `Ticket` so callers (the gateway) can +/// expose the same hashes the executor logged at quote/accept time. +#[derive(Debug)] +pub struct QuotedResponse { + pub response: Ticket, + pub provenance: ExecutionProvenance, +} + +#[derive(Debug)] +pub struct QuotedPreparedTextResponse { + pub response: QuotePreparedTextResponse, + pub provenance: ExecutionProvenance, +} + +/// Streaming execution paired with the provenance committed to at +/// quote-acceptance time. The producer receipt is terminal and reaches the +/// caller via the streamed `WorkFinished.receipt` field, not through +/// `ExecutionProvenance`. +pub struct StreamedExecution { + pub stream: ExecuteEventStream, + pub provenance: ExecutionProvenance, +} + +#[tonic::async_trait] +pub trait ExecuteDriver: Send { + async fn create_symbolic_ticket( + &mut self, + request: SymbolicRequest, + ) -> Result; + async fn create_opaque_ticket( + &mut self, + request: OpaqueRequest, + ) -> Result; + async fn quote_prepared_text( + &mut self, + request: QuotePreparedTextRequest, + ) -> Result; + async fn execute_streaming( + &mut self, + request: RunTicketRequest, + ) -> Result; +} + +pub struct RemoteExecuteDriver { + execute: ExecuteClient, + symbolic: Option>, + opaque: Option>, + courtesy: Option>, +} + +#[cfg(feature = "discovery")] +impl RemoteExecuteDriver { + pub fn new(channel: IrohChannel) -> Self { + Self::with_service(channel) + } +} + +impl RemoteExecuteDriver +where + T: tonic::client::GrpcService + Clone, + T::Error: Into, + T::ResponseBody: Body + Send + 'static, + ::Error: Into + Send, +{ + pub fn with_service(service: T) -> Self { + let symbolic = service.clone(); + let opaque = service.clone(); + let courtesy = service.clone(); + Self { + execute: Self::configure_execute(ExecuteClient::new(service)), + symbolic: Some(Self::configure_symbolic(SymbolicClient::new(symbolic))), + opaque: Some(Self::configure_opaque(OpaqueClient::new(opaque))), + courtesy: Some(Self::configure_courtesy(CourtesyClient::new(courtesy))), + } + } + + pub fn with_services(execute: T, symbolic: T, opaque: T, courtesy: T) -> Self { + Self { + execute: Self::configure_execute(ExecuteClient::new(execute)), + symbolic: Some(Self::configure_symbolic(SymbolicClient::new(symbolic))), + opaque: Some(Self::configure_opaque(OpaqueClient::new(opaque))), + courtesy: Some(Self::configure_courtesy(CourtesyClient::new(courtesy))), + } + } + + pub fn with_execute_and_courtesy(execute: T, courtesy: T) -> Self { + Self { + execute: Self::configure_execute(ExecuteClient::new(execute)), + symbolic: None, + opaque: None, + courtesy: Some(Self::configure_courtesy(CourtesyClient::new(courtesy))), + } + } + + pub fn with_execute_and_opaque(execute: T, opaque: T) -> Self { + Self { + execute: Self::configure_execute(ExecuteClient::new(execute)), + symbolic: None, + opaque: Some(Self::configure_opaque(OpaqueClient::new(opaque))), + courtesy: None, + } + } + + fn configure_execute(client: ExecuteClient) -> ExecuteClient { + let client = client + .max_decoding_message_size(GRPC_MESSAGE_LIMIT) + .max_encoding_message_size(GRPC_MESSAGE_LIMIT); + #[cfg(feature = "compression")] + let client = client + .send_compressed(CompressionEncoding::Zstd) + .accept_compressed(CompressionEncoding::Zstd); + client + } + + fn configure_symbolic(client: SymbolicClient) -> SymbolicClient { + let client = client + .max_decoding_message_size(GRPC_MESSAGE_LIMIT) + .max_encoding_message_size(GRPC_MESSAGE_LIMIT); + #[cfg(feature = "compression")] + let client = client + .send_compressed(CompressionEncoding::Zstd) + .accept_compressed(CompressionEncoding::Zstd); + client + } + + fn configure_opaque(client: OpaqueClient) -> OpaqueClient { + let client = client + .max_decoding_message_size(GRPC_MESSAGE_LIMIT) + .max_encoding_message_size(GRPC_MESSAGE_LIMIT); + #[cfg(feature = "compression")] + let client = client + .send_compressed(CompressionEncoding::Zstd) + .accept_compressed(CompressionEncoding::Zstd); + client + } + + fn configure_courtesy(client: CourtesyClient) -> CourtesyClient { + let client = client + .max_decoding_message_size(GRPC_MESSAGE_LIMIT) + .max_encoding_message_size(GRPC_MESSAGE_LIMIT); + #[cfg(feature = "compression")] + let client = client + .send_compressed(CompressionEncoding::Zstd) + .accept_compressed(CompressionEncoding::Zstd); + client + } + + pub async fn put_artifact( + &mut self, + request: PutArtifactRequest, + ) -> Result + where + T: tonic::client::GrpcService + Send + 'static, + T::Error: Into, + T::ResponseBody: Body + Send + 'static, + ::Error: Into + Send, + T::Future: Send, + { + let courtesy = self + .courtesy + .as_mut() + .ok_or_else(|| Status::unimplemented("courtesy service is not configured"))?; + Ok(courtesy.put_artifact(request).await?.into_inner()) + } + + pub async fn get_artifact( + &mut self, + request: GetArtifactRequest, + ) -> Result + where + T: tonic::client::GrpcService + Send + 'static, + T::Error: Into, + T::ResponseBody: Body + Send + 'static, + ::Error: Into + Send, + T::Future: Send, + { + let courtesy = self + .courtesy + .as_mut() + .ok_or_else(|| Status::unimplemented("courtesy service is not configured"))?; + Ok(courtesy.get_artifact(request).await?.into_inner()) + } +} + +#[tonic::async_trait] +impl ExecuteDriver for RemoteExecuteDriver +where + T: tonic::client::GrpcService + Send + 'static, + T::Error: Into, + T::ResponseBody: Body + Send + 'static, + ::Error: Into + Send, + T::Future: Send, +{ + async fn create_symbolic_ticket( + &mut self, + request: SymbolicRequest, + ) -> Result { + let symbolic = self + .symbolic + .as_mut() + .ok_or_else(|| Status::unimplemented("symbolic service is not configured"))?; + let resp = symbolic.create_ticket(request).await?; + let provenance = read_provenance_metadata(resp.metadata())?; + Ok(QuotedResponse { + response: resp.into_inner(), + provenance, + }) + } + + async fn create_opaque_ticket( + &mut self, + request: OpaqueRequest, + ) -> Result { + let opaque = self + .opaque + .as_mut() + .ok_or_else(|| Status::unimplemented("opaque service is not configured"))?; + let resp = opaque.create_ticket(request).await?; + let provenance = read_provenance_metadata(resp.metadata())?; + Ok(QuotedResponse { + response: resp.into_inner(), + provenance, + }) + } + + async fn quote_prepared_text( + &mut self, + request: QuotePreparedTextRequest, + ) -> Result { + let courtesy = self + .courtesy + .as_mut() + .ok_or_else(|| Status::unimplemented("courtesy service is not configured"))?; + let resp = courtesy.quote_prepared_text(request).await?; + let provenance = read_provenance_metadata(resp.metadata())?; + Ok(QuotedPreparedTextResponse { + response: resp.into_inner(), + provenance, + }) + } + + async fn execute_streaming( + &mut self, + request: RunTicketRequest, + ) -> Result { + let resp = self.execute.run_ticket(request).await?; + let provenance = read_provenance_metadata(resp.metadata())?; + Ok(StreamedExecution { + stream: Box::pin(resp.into_inner()), + provenance, + }) + } +} diff --git a/crates/rpc/src/error.rs b/crates/rpc/src/error.rs new file mode 100644 index 0000000..a7296dc --- /dev/null +++ b/crates/rpc/src/error.rs @@ -0,0 +1,118 @@ +use crate::TokenBytesError; +use crate::model::ModelAssetsError; +use catgrad_llm::LLMError; +use thiserror::Error; +use tonic::Status; + +/// Error returned when the backend fails to initialize. +/// +/// Defined here (rather than alongside the concrete backend) so that +/// `ExecutorError` — which the CLI carries across feature configurations — +/// stays in a single backend-free crate. +#[derive(Clone, Debug, Error)] +#[error("{message}")] +pub struct BackendInitError { + pub message: String, +} + +impl BackendInitError { + pub fn new(message: impl Into) -> Self { + Self { + message: message.into(), + } + } +} + +/// Errors from the in-memory quote/execution state machine. +#[derive(Debug, Error)] +pub enum StateError { + #[error("quote not found: {0}")] + QuoteNotFound(String), + #[error("quote expired: {0}")] + QuoteExpired(String), +} + +#[derive(Debug, Error)] +pub enum ExecutorError { + #[error("executor channel closed")] + ChannelClosed, + #[error("execution queue is full (capacity {capacity})")] + QueueFull { capacity: usize }, + #[error("invalid quote request: {0}")] + InvalidQuoteRequest(String), + #[error(transparent)] + BackendInit(#[from] BackendInitError), + #[error(transparent)] + ModelAssets(#[from] ModelAssetsError), + #[error("LLM error: {0}")] + Llm(#[from] LLMError), + #[error("weights not ready for {0}")] + WeightsNotReady(String), + #[error("weights error: {0}")] + WeightsError(String), + #[error("artifact not found: {0}")] + ArtifactNotFound(String), + #[error("artifact store error: {0}")] + ArtifactStore(String), + #[error("policy denied: {0}")] + PolicyDenied(String), + #[error("invalid token payload: {0}")] + InvalidTokenPayload(String), + #[error(transparent)] + TokenBytes(#[from] TokenBytesError), + #[error( + "program was built for dtype {request:?} but this executor only supports {supported:?}; rebuild the program at one of the supported dtypes or run an executor with --dtype {request:?} in its supported set" + )] + DtypeNotSupported { + request: catgrad::prelude::Dtype, + supported: Vec, + }, + #[error(transparent)] + State(#[from] StateError), +} + +fn model_assets_status_code(err: &ModelAssetsError) -> tonic::Code { + match err { + ModelAssetsError::Spec(_) + | ModelAssetsError::ParseModelConfig { .. } + | ModelAssetsError::ConstructModelConfig { .. } + | ModelAssetsError::NegativePromptTokenId { .. } + | ModelAssetsError::NegativeStopTokenId { .. } => tonic::Code::InvalidArgument, + _ => tonic::Code::Internal, + } +} + +fn executor_status_code(err: &ExecutorError) -> tonic::Code { + match err { + ExecutorError::QueueFull { .. } => tonic::Code::ResourceExhausted, + ExecutorError::InvalidQuoteRequest(_) + | ExecutorError::InvalidTokenPayload(_) + | ExecutorError::TokenBytes(_) => tonic::Code::InvalidArgument, + ExecutorError::DtypeNotSupported { .. } => tonic::Code::FailedPrecondition, + ExecutorError::ModelAssets(model_err) => model_assets_status_code(model_err), + ExecutorError::WeightsNotReady(_) | ExecutorError::State(StateError::QuoteExpired(_)) => { + tonic::Code::FailedPrecondition + } + ExecutorError::PolicyDenied(_) => tonic::Code::PermissionDenied, + ExecutorError::ArtifactNotFound(_) | ExecutorError::State(StateError::QuoteNotFound(_)) => { + tonic::Code::NotFound + } + ExecutorError::ChannelClosed + | ExecutorError::BackendInit(_) + | ExecutorError::Llm(_) + | ExecutorError::WeightsError(_) + | ExecutorError::ArtifactStore(_) => tonic::Code::Internal, + } +} + +impl From for Status { + fn from(err: ModelAssetsError) -> Self { + Status::new(model_assets_status_code(&err), err.to_string()) + } +} + +impl From for Status { + fn from(err: ExecutorError) -> Self { + Status::new(executor_status_code(&err), err.to_string()) + } +} diff --git a/crates/rpc/src/lib.rs b/crates/rpc/src/lib.rs index ffaf208..794ab63 100644 --- a/crates/rpc/src/lib.rs +++ b/crates/rpc/src/lib.rs @@ -1 +1,96 @@ -pub mod pb; +pub const VERSION: &str = env!("CARGO_PKG_VERSION"); +pub const GIT_REV: &str = match option_env!("GIT_REV") { + Some(rev) => rev, + None => "unknown", +}; + +#[cfg(feature = "discovery")] +pub mod discovery; +#[cfg(feature = "client")] +pub mod driver; +#[cfg(feature = "node")] +pub mod error; +#[cfg(feature = "node")] +pub mod model; +#[cfg(feature = "node")] +pub mod policy; +pub mod provenance; +pub mod service; +pub mod spec; + +pub use spec::ModelSpec; + +#[cfg(feature = "node")] +pub use error::ExecutorError; +#[cfg(feature = "node")] +pub use model::ModelAssetsError; + +/// Default bound on the in-memory execution queue carried by `hellas_executor::Executor`. +#[cfg(feature = "node")] +pub const DEFAULT_EXECUTION_QUEUE_CAPACITY: usize = 8; + +// Graph execution requests can carry full serialized model graphs for large models. +pub const GRPC_MESSAGE_LIMIT: usize = 128 * 1024 * 1024; +const TOKEN_BYTES_LEN: usize = std::mem::size_of::(); + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct TokenBytesError { + len: usize, +} + +impl std::fmt::Display for TokenBytesError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "token byte payload length {} is not divisible by 4", + self.len + ) + } +} + +impl std::error::Error for TokenBytesError {} + +impl From for tonic::Status { + fn from(err: TokenBytesError) -> Self { + tonic::Status::invalid_argument(err.to_string()) + } +} + +pub fn encode_token_ids(token_ids: &[u32]) -> Vec { + let mut bytes = Vec::with_capacity(token_ids.len() * TOKEN_BYTES_LEN); + for token_id in token_ids { + bytes.extend_from_slice(&token_id.to_le_bytes()); + } + bytes +} + +pub fn decode_token_ids(bytes: &[u8]) -> Result, TokenBytesError> { + let (chunks, remainder) = bytes.as_chunks::(); + if !remainder.is_empty() { + return Err(TokenBytesError { len: bytes.len() }); + } + + Ok(chunks + .iter() + .map(|chunk| u32::from_le_bytes(*chunk)) + .collect()) +} + +#[cfg(test)] +mod tests { + use super::{TokenBytesError, decode_token_ids, encode_token_ids}; + + #[test] + fn token_ids_round_trip_through_bytes() { + let token_ids = [1, 42, u32::MAX, 7]; + let encoded = encode_token_ids(&token_ids); + let decoded = decode_token_ids(&encoded).expect("token bytes should decode"); + assert_eq!(decoded, token_ids); + } + + #[test] + fn decode_rejects_partial_token_bytes() { + let err = decode_token_ids(&[1, 2, 3]).expect_err("partial token bytes must fail"); + assert_eq!(err, TokenBytesError { len: 3 }); + } +} diff --git a/crates/rpc/src/model/assets.rs b/crates/rpc/src/model/assets.rs new file mode 100644 index 0000000..8e2eb5b --- /dev/null +++ b/crates/rpc/src/model/assets.rs @@ -0,0 +1,172 @@ +use std::sync::Arc; + +use catgrad::prelude::Dtype; +use catgrad_llm::LLMError; +use catgrad_llm::utils::{get_model, get_model_architecture, get_model_chat_template}; +use chatgrad::types::Message; +use chatgrad::{PreparedPrompt, RenderChatTemplateOptions}; +use hellas_pb::courtesy::{ + QuotePreparedTextRequest, SymbolicGenesisStart, SymbolicStart, symbolic_start, +}; +use serde_json::Value; +use tokenizers::Tokenizer; + +use super::config::encode_i32_tokens; +use super::hf::get_model_metadata_files; +use super::{ModelAssetsError, Result}; +use crate::spec::ModelSpec; + +pub struct ModelAssets { + model: ModelSpec, + config: Value, + tokenizer: Arc, + tokenizer_config: Arc, + chat_template: Option>, + stop_token_ids: Arc<[i32]>, + dtype: Dtype, +} + +impl ModelAssets { + pub fn load(model_name: &str, dtype: Dtype) -> Result { + let model = ModelSpec::parse(model_name)?; + let (config_path, tokenizer_path, tokenizer_config_path) = + get_model_metadata_files(&model)?; + let config_bytes = + std::fs::read(&config_path).map_err(|source| ModelAssetsError::ReadModelConfig { + path: config_path.clone(), + source, + })?; + let config: Value = serde_json::from_slice(&config_bytes) + .map_err(|source| ModelAssetsError::ParseModelConfig { source })?; + let tokenizer_config_bytes = std::fs::read(&tokenizer_config_path).map_err(|source| { + ModelAssetsError::ReadModelConfig { + path: tokenizer_config_path.clone(), + source, + } + })?; + let tokenizer_config: Value = serde_json::from_slice(&tokenizer_config_bytes) + .map_err(|source| ModelAssetsError::ParseModelConfig { source })?; + + let graph_model = get_model(&config, 1, None, dtype) + .map_err(|source| ModelAssetsError::ConstructModelConfig { source })?; + let stop_token_ids: Vec = graph_model.config().get_eos_token_ids(); + + let tokenizer = Tokenizer::from_file(&tokenizer_path).map_err(|source| { + ModelAssetsError::LoadTokenizer { + path: tokenizer_path, + source, + } + })?; + + let chat_template = get_model_chat_template(&model.id, &model.revision) + .ok() + .map(Arc::::from); + + Ok(Self { + model, + config, + tokenizer: Arc::new(tokenizer), + tokenizer_config: Arc::new(tokenizer_config), + chat_template, + stop_token_ids: Arc::from(stop_token_ids.as_slice()), + dtype, + }) + } + + pub fn build_quote_prepared_text_request( + &self, + prepared_prompt: &PreparedPrompt, + max_seq: u32, + ) -> Result { + let input_ids = encode_i32_tokens(&prepared_prompt.input_ids, |token| { + ModelAssetsError::NegativePromptTokenId { token } + })?; + let stop_token_ids = encode_i32_tokens(&prepared_prompt.stop_token_ids, |token| { + ModelAssetsError::NegativeStopTokenId { token } + })?; + + Ok(QuotePreparedTextRequest { + huggingface_model_id: self.model.id.clone(), + huggingface_revision: self.model.revision.clone(), + prompt_token_ids: input_ids, + max_new_tokens: max_seq, + stop_token_ids, + start: Some(SymbolicStart { + kind: Some(symbolic_start::Kind::Genesis(SymbolicGenesisStart {})), + }), + accept_dtypes: vec![dtype_to_wire(self.dtype).to_string()], + }) + } + + pub fn has_chat_template(&self) -> bool { + self.chat_template.is_some() + } + + pub fn prepare_chat(&self, messages: &[Message]) -> Result { + let template = self.chat_template.as_deref().ok_or_else(|| { + ModelAssetsError::PreparePromptRequest { + source: LLMError::InvalidModelConfig("model has no chat template".to_string()), + } + })?; + PreparedPrompt::from_messages( + &self.tokenizer, + template, + &self.tokenizer_config, + messages, + &self.stop_token_ids, + ) + .map_err(|source| ModelAssetsError::PreparePromptRequest { source }) + } + + pub fn prepare_chat_with_options( + &self, + messages: &[Message], + tools: Option<&[serde_json::Value]>, + enable_thinking: bool, + ) -> Result { + let template = self.chat_template.as_deref().ok_or_else(|| { + ModelAssetsError::PreparePromptRequest { + source: LLMError::InvalidModelConfig("model has no chat template".to_string()), + } + })?; + PreparedPrompt::from_messages_with_options( + &self.tokenizer, + template, + &self.tokenizer_config, + messages, + &self.stop_token_ids, + RenderChatTemplateOptions { + enable_thinking, + tools, + }, + ) + .map_err(|source| ModelAssetsError::PreparePromptRequest { source }) + } + + pub fn prepare_plain(&self, prompt: &str) -> Result { + PreparedPrompt::from_prompt(&self.tokenizer, prompt, &self.stop_token_ids) + .map_err(|source| ModelAssetsError::PreparePromptRequest { source }) + } + + pub fn decode_tokens(&self, token_ids: &[u32]) -> Result { + self.tokenizer + .decode(token_ids, false) + .map_err(|source| ModelAssetsError::DecodeTokens { source }) + } + + pub fn architecture(&self) -> Result { + get_model_architecture(&self.config) + .map(str::to_string) + .map_err(|source| ModelAssetsError::PreparePromptRequest { source }) + } +} + +fn dtype_to_wire(dtype: Dtype) -> &'static str { + match dtype { + Dtype::F32 => "f32", + Dtype::F16 => "f16", + Dtype::BF16 => "bf16", + Dtype::F8 => "f8", + Dtype::U32 => "u32", + } +} diff --git a/crates/rpc/src/model/config.rs b/crates/rpc/src/model/config.rs new file mode 100644 index 0000000..90b586a --- /dev/null +++ b/crates/rpc/src/model/config.rs @@ -0,0 +1,11 @@ +use super::{ModelAssetsError, Result}; + +pub(super) fn encode_i32_tokens( + token_ids: &[i32], + make_error: impl Fn(i32) -> ModelAssetsError, +) -> Result> { + token_ids + .iter() + .map(|&token| u32::try_from(token).map_err(|_| make_error(token))) + .collect() +} diff --git a/crates/rpc/src/model/hf.rs b/crates/rpc/src/model/hf.rs new file mode 100644 index 0000000..e1fca63 --- /dev/null +++ b/crates/rpc/src/model/hf.rs @@ -0,0 +1,43 @@ +use std::path::PathBuf; + +use hf_hub::api::sync::ApiBuilder; +use hf_hub::{Repo, RepoType}; + +use super::{ModelAssetsError, Result}; +use crate::spec::ModelSpec; + +pub(super) fn get_model_metadata_files(model: &ModelSpec) -> Result<(PathBuf, PathBuf, PathBuf)> { + let mut builder = ApiBuilder::from_env(); + let env_token = std::env::var("HF_TOKEN") + .ok() + .or_else(|| std::env::var("HUGGING_FACE_HUB_TOKEN").ok()) + .map(|token| token.trim().to_string()) + .filter(|token| !token.is_empty()); + if let Some(token) = env_token { + builder = builder.with_token(Some(token)); + } + + let api = builder + .build() + .map_err(|source| ModelAssetsError::BuildHfApi { source })?; + let repo = api.repo(Repo::with_revision( + model.id.clone(), + RepoType::Model, + model.revision.clone(), + )); + + let fetch = |file: &'static str| { + repo.get(file) + .map_err(|source| ModelAssetsError::FetchModelMetadata { + model_id: model.id.clone(), + revision: model.revision.clone(), + file, + source, + }) + }; + let config = fetch("config.json")?; + let tokenizer = fetch("tokenizer.json")?; + let tokenizer_config = fetch("tokenizer_config.json")?; + + Ok((config, tokenizer, tokenizer_config)) +} diff --git a/crates/rpc/src/model/mod.rs b/crates/rpc/src/model/mod.rs new file mode 100644 index 0000000..afd4111 --- /dev/null +++ b/crates/rpc/src/model/mod.rs @@ -0,0 +1,81 @@ +mod assets; +mod config; +mod hf; + +use std::path::PathBuf; + +use catgrad_llm::LLMError; +use hf_hub::api::sync::ApiError; +use thiserror::Error; +use tokenizers::Error as TokenizerError; + +use crate::spec::ModelSpecError; + +pub use assets::ModelAssets; + +type Result = std::result::Result; + +#[derive(Debug, Error)] +pub enum ModelAssetsError { + #[error(transparent)] + Spec(#[from] ModelSpecError), + #[error("failed to initialize Hugging Face API")] + BuildHfApi { + #[source] + source: ApiError, + }, + #[error("failed to fetch {file} for {model_id}@{revision}")] + FetchModelMetadata { + model_id: String, + revision: String, + file: &'static str, + #[source] + source: ApiError, + }, + #[error("failed to read model config {path:?}")] + ReadModelConfig { + path: PathBuf, + #[source] + source: std::io::Error, + }, + #[error("failed to parse model config JSON")] + ParseModelConfig { + #[source] + source: serde_json::Error, + }, + #[error("failed to construct model config")] + ConstructModelConfig { + #[source] + source: LLMError, + }, + #[error("failed to load tokenizer {path:?}")] + LoadTokenizer { + path: PathBuf, + #[source] + source: TokenizerError, + }, + #[error("failed to prepare prompt request")] + PreparePromptRequest { + #[source] + source: LLMError, + }, + #[error("negative prompt token id {token} cannot be encoded")] + NegativePromptTokenId { token: i32 }, + #[error("negative stop token id {token} cannot be encoded")] + NegativeStopTokenId { token: i32 }, + #[error("failed to build program model")] + BuildProgramModel { + #[source] + source: LLMError, + }, + #[error("failed to serialize program")] + SerializeProgram { + #[source] + source: LLMError, + }, + #[error("failed to decode tokens")] + DecodeTokens { + #[source] + source: TokenizerError, + }, +} diff --git a/crates/rpc/src/pb/hellas.rs b/crates/rpc/src/pb/hellas.rs deleted file mode 100644 index b41d523..0000000 --- a/crates/rpc/src/pb/hellas.rs +++ /dev/null @@ -1,1231 +0,0 @@ -// This file is @generated by prost-build. -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] -pub struct WeightsHint { - #[prost(string, tag = "1")] - pub huggingface_model_id: ::prost::alloc::string::String, - #[prost(string, tag = "2")] - pub revision: ::prost::alloc::string::String, -} -impl ::prost::Name for WeightsHint { - const NAME: &'static str = "WeightsHint"; - const PACKAGE: &'static str = "hellas"; - fn full_name() -> ::prost::alloc::string::String { - "hellas.WeightsHint".into() - } - fn type_url() -> ::prost::alloc::string::String { - "/hellas.WeightsHint".into() - } -} -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] -pub struct LlmQuoteRequest { - #[prost(string, tag = "1")] - pub huggingface_model_id: ::prost::alloc::string::String, - #[prost(string, tag = "2")] - pub prompt: ::prost::alloc::string::String, - /// Optional; default to 16 when unset/zero - #[prost(uint32, tag = "3")] - pub max_seq: u32, -} -impl ::prost::Name for LlmQuoteRequest { - const NAME: &'static str = "LlmQuoteRequest"; - const PACKAGE: &'static str = "hellas"; - fn full_name() -> ::prost::alloc::string::String { - "hellas.LlmQuoteRequest".into() - } - fn type_url() -> ::prost::alloc::string::String { - "/hellas.LlmQuoteRequest".into() - } -} -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] -pub struct GetQuoteRequest { - #[prost(oneof = "get_quote_request::Payload", tags = "1, 2")] - pub payload: ::core::option::Option, -} -/// Nested message and enum types in `GetQuoteRequest`. -pub mod get_quote_request { - #[derive(Clone, PartialEq, Eq, Hash, ::prost::Oneof)] - pub enum Payload { - #[prost(bytes, tag = "1")] - Graph(::prost::alloc::vec::Vec), - #[prost(message, tag = "2")] - LlmPrompt(super::LlmQuoteRequest), - } -} -impl ::prost::Name for GetQuoteRequest { - const NAME: &'static str = "GetQuoteRequest"; - const PACKAGE: &'static str = "hellas"; - fn full_name() -> ::prost::alloc::string::String { - "hellas.GetQuoteRequest".into() - } - fn type_url() -> ::prost::alloc::string::String { - "/hellas.GetQuoteRequest".into() - } -} -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] -pub struct GetQuoteResponse { - #[prost(string, tag = "1")] - pub quote_id: ::prost::alloc::string::String, - #[prost(string, tag = "2")] - pub graph_id: ::prost::alloc::string::String, - #[prost(uint64, tag = "3")] - pub amount: u64, - #[prost(string, tag = "4")] - pub input: ::prost::alloc::string::String, - #[prost(message, optional, tag = "5")] - pub resolved_weights: ::core::option::Option, -} -impl ::prost::Name for GetQuoteResponse { - const NAME: &'static str = "GetQuoteResponse"; - const PACKAGE: &'static str = "hellas"; - fn full_name() -> ::prost::alloc::string::String { - "hellas.GetQuoteResponse".into() - } - fn type_url() -> ::prost::alloc::string::String { - "/hellas.GetQuoteResponse".into() - } -} -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] -pub struct GetGraphRequest { - #[prost(string, tag = "1")] - pub graph_id: ::prost::alloc::string::String, -} -impl ::prost::Name for GetGraphRequest { - const NAME: &'static str = "GetGraphRequest"; - const PACKAGE: &'static str = "hellas"; - fn full_name() -> ::prost::alloc::string::String { - "hellas.GetGraphRequest".into() - } - fn type_url() -> ::prost::alloc::string::String { - "/hellas.GetGraphRequest".into() - } -} -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] -pub struct GetGraphResponse { - #[prost(bytes = "vec", tag = "1")] - pub graph: ::prost::alloc::vec::Vec, -} -impl ::prost::Name for GetGraphResponse { - const NAME: &'static str = "GetGraphResponse"; - const PACKAGE: &'static str = "hellas"; - fn full_name() -> ::prost::alloc::string::String { - "hellas.GetGraphResponse".into() - } - fn type_url() -> ::prost::alloc::string::String { - "/hellas.GetGraphResponse".into() - } -} -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] -pub struct ExecuteRequest { - #[prost(bytes = "vec", tag = "1")] - pub quote_id: ::prost::alloc::vec::Vec, -} -impl ::prost::Name for ExecuteRequest { - const NAME: &'static str = "ExecuteRequest"; - const PACKAGE: &'static str = "hellas"; - fn full_name() -> ::prost::alloc::string::String { - "hellas.ExecuteRequest".into() - } - fn type_url() -> ::prost::alloc::string::String { - "/hellas.ExecuteRequest".into() - } -} -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] -pub struct ExecuteResponse { - #[prost(string, tag = "1")] - pub execution_id: ::prost::alloc::string::String, - #[prost(string, tag = "2")] - pub quote_id: ::prost::alloc::string::String, -} -impl ::prost::Name for ExecuteResponse { - const NAME: &'static str = "ExecuteResponse"; - const PACKAGE: &'static str = "hellas"; - fn full_name() -> ::prost::alloc::string::String { - "hellas.ExecuteResponse".into() - } - fn type_url() -> ::prost::alloc::string::String { - "/hellas.ExecuteResponse".into() - } -} -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] -pub struct ExecuteStatusRequest { - #[prost(string, tag = "1")] - pub execution_id: ::prost::alloc::string::String, -} -impl ::prost::Name for ExecuteStatusRequest { - const NAME: &'static str = "ExecuteStatusRequest"; - const PACKAGE: &'static str = "hellas"; - fn full_name() -> ::prost::alloc::string::String { - "hellas.ExecuteStatusRequest".into() - } - fn type_url() -> ::prost::alloc::string::String { - "/hellas.ExecuteStatusRequest".into() - } -} -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] -pub struct ExecuteStatusResponse { - #[prost(string, tag = "1")] - pub status: ::prost::alloc::string::String, - #[prost(uint64, tag = "2")] - pub progress: u64, - #[prost(bytes = "vec", tag = "3")] - pub result: ::prost::alloc::vec::Vec, - #[prost(string, optional, tag = "4")] - pub decoded: ::core::option::Option<::prost::alloc::string::String>, -} -impl ::prost::Name for ExecuteStatusResponse { - const NAME: &'static str = "ExecuteStatusResponse"; - const PACKAGE: &'static str = "hellas"; - fn full_name() -> ::prost::alloc::string::String { - "hellas.ExecuteStatusResponse".into() - } - fn type_url() -> ::prost::alloc::string::String { - "/hellas.ExecuteStatusResponse".into() - } -} -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] -pub struct ExecuteProgress { - #[prost(string, tag = "1")] - pub status: ::prost::alloc::string::String, - #[prost(uint64, tag = "2")] - pub progress: u64, - #[prost(bytes = "vec", tag = "3")] - pub chunk: ::prost::alloc::vec::Vec, - #[prost(string, optional, tag = "4")] - pub decoded: ::core::option::Option<::prost::alloc::string::String>, -} -impl ::prost::Name for ExecuteProgress { - const NAME: &'static str = "ExecuteProgress"; - const PACKAGE: &'static str = "hellas"; - fn full_name() -> ::prost::alloc::string::String { - "hellas.ExecuteProgress".into() - } - fn type_url() -> ::prost::alloc::string::String { - "/hellas.ExecuteProgress".into() - } -} -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] -pub struct ExecuteResultRequest { - #[prost(string, tag = "1")] - pub execution_id: ::prost::alloc::string::String, -} -impl ::prost::Name for ExecuteResultRequest { - const NAME: &'static str = "ExecuteResultRequest"; - const PACKAGE: &'static str = "hellas"; - fn full_name() -> ::prost::alloc::string::String { - "hellas.ExecuteResultRequest".into() - } - fn type_url() -> ::prost::alloc::string::String { - "/hellas.ExecuteResultRequest".into() - } -} -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] -pub struct ExecuteResultResponse { - #[prost(bytes = "vec", tag = "1")] - pub result: ::prost::alloc::vec::Vec, - #[prost(string, tag = "2")] - pub decoded: ::prost::alloc::string::String, -} -impl ::prost::Name for ExecuteResultResponse { - const NAME: &'static str = "ExecuteResultResponse"; - const PACKAGE: &'static str = "hellas"; - fn full_name() -> ::prost::alloc::string::String { - "hellas.ExecuteResultResponse".into() - } - fn type_url() -> ::prost::alloc::string::String { - "/hellas.ExecuteResultResponse".into() - } -} -#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] -pub struct HealthCheckRequest {} -impl ::prost::Name for HealthCheckRequest { - const NAME: &'static str = "HealthCheckRequest"; - const PACKAGE: &'static str = "hellas"; - fn full_name() -> ::prost::alloc::string::String { - "hellas.HealthCheckRequest".into() - } - fn type_url() -> ::prost::alloc::string::String { - "/hellas.HealthCheckRequest".into() - } -} -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] -pub struct HealthCheckResponse { - #[prost(string, tag = "1")] - pub version: ::prost::alloc::string::String, - #[prost(uint64, tag = "2")] - pub uptime_seconds: u64, - #[prost(string, tag = "3")] - pub node_id: ::prost::alloc::string::String, -} -impl ::prost::Name for HealthCheckResponse { - const NAME: &'static str = "HealthCheckResponse"; - const PACKAGE: &'static str = "hellas"; - fn full_name() -> ::prost::alloc::string::String { - "hellas.HealthCheckResponse".into() - } - fn type_url() -> ::prost::alloc::string::String { - "/hellas.HealthCheckResponse".into() - } -} -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] -pub struct Presence { - #[prost(string, tag = "1")] - pub hf_id: ::prost::alloc::string::String, - #[prost(string, tag = "2")] - pub req_id: ::prost::alloc::string::String, - #[prost(string, tag = "3")] - pub peer_id: ::prost::alloc::string::String, - #[prost(uint64, tag = "4")] - pub ttl_ms: u64, - #[prost(bool, tag = "5")] - pub is_executor: bool, -} -impl ::prost::Name for Presence { - const NAME: &'static str = "Presence"; - const PACKAGE: &'static str = "hellas"; - fn full_name() -> ::prost::alloc::string::String { - "hellas.Presence".into() - } - fn type_url() -> ::prost::alloc::string::String { - "/hellas.Presence".into() - } -} -/// Generated client implementations. -pub mod node_client { - #![allow( - unused_variables, - dead_code, - missing_docs, - clippy::wildcard_imports, - clippy::let_unit_value, - )] - use tonic::codegen::*; - use tonic::codegen::http::Uri; - #[derive(Debug, Clone)] - pub struct NodeClient { - inner: tonic::client::Grpc, - } - impl NodeClient - where - T: tonic::client::GrpcService, - T::Error: Into, - T::ResponseBody: Body + std::marker::Send + 'static, - ::Error: Into + std::marker::Send, - { - pub fn new(inner: T) -> Self { - let inner = tonic::client::Grpc::new(inner); - Self { inner } - } - pub fn with_origin(inner: T, origin: Uri) -> Self { - let inner = tonic::client::Grpc::with_origin(inner, origin); - Self { inner } - } - pub fn with_interceptor( - inner: T, - interceptor: F, - ) -> NodeClient> - where - F: tonic::service::Interceptor, - T::ResponseBody: Default, - T: tonic::codegen::Service< - http::Request, - Response = http::Response< - >::ResponseBody, - >, - >, - , - >>::Error: Into + std::marker::Send + std::marker::Sync, - { - NodeClient::new(InterceptedService::new(inner, interceptor)) - } - /// Compress requests with the given encoding. - /// - /// This requires the server to support it otherwise it might respond with an - /// error. - #[must_use] - pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { - self.inner = self.inner.send_compressed(encoding); - self - } - /// Enable decompressing responses. - #[must_use] - pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { - self.inner = self.inner.accept_compressed(encoding); - self - } - /// Limits the maximum size of a decoded message. - /// - /// Default: `4MB` - #[must_use] - pub fn max_decoding_message_size(mut self, limit: usize) -> Self { - self.inner = self.inner.max_decoding_message_size(limit); - self - } - /// Limits the maximum size of an encoded message. - /// - /// Default: `usize::MAX` - #[must_use] - pub fn max_encoding_message_size(mut self, limit: usize) -> Self { - self.inner = self.inner.max_encoding_message_size(limit); - self - } - pub async fn health_check( - &mut self, - request: impl tonic::IntoRequest, - ) -> std::result::Result< - tonic::Response, - tonic::Status, - > { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::unknown( - format!("Service was not ready: {}", e.into()), - ) - })?; - let codec = tonic_prost::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static("/hellas.Node/HealthCheck"); - let mut req = request.into_request(); - req.extensions_mut().insert(GrpcMethod::new("hellas.Node", "HealthCheck")); - self.inner.unary(req, path, codec).await - } - } -} -/// Generated server implementations. -pub mod node_server { - #![allow( - unused_variables, - dead_code, - missing_docs, - clippy::wildcard_imports, - clippy::let_unit_value, - )] - use tonic::codegen::*; - /// Generated trait containing gRPC methods that should be implemented for use with NodeServer. - #[async_trait] - pub trait Node: std::marker::Send + std::marker::Sync + 'static { - async fn health_check( - &self, - request: tonic::Request, - ) -> std::result::Result< - tonic::Response, - tonic::Status, - >; - } - #[derive(Debug)] - pub struct NodeServer { - inner: Arc, - accept_compression_encodings: EnabledCompressionEncodings, - send_compression_encodings: EnabledCompressionEncodings, - max_decoding_message_size: Option, - max_encoding_message_size: Option, - } - impl NodeServer { - pub fn new(inner: T) -> Self { - Self::from_arc(Arc::new(inner)) - } - pub fn from_arc(inner: Arc) -> Self { - Self { - inner, - accept_compression_encodings: Default::default(), - send_compression_encodings: Default::default(), - max_decoding_message_size: None, - max_encoding_message_size: None, - } - } - pub fn with_interceptor( - inner: T, - interceptor: F, - ) -> InterceptedService - where - F: tonic::service::Interceptor, - { - InterceptedService::new(Self::new(inner), interceptor) - } - /// Enable decompressing requests with the given encoding. - #[must_use] - pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { - self.accept_compression_encodings.enable(encoding); - self - } - /// Compress responses with the given encoding, if the client supports it. - #[must_use] - pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { - self.send_compression_encodings.enable(encoding); - self - } - /// Limits the maximum size of a decoded message. - /// - /// Default: `4MB` - #[must_use] - pub fn max_decoding_message_size(mut self, limit: usize) -> Self { - self.max_decoding_message_size = Some(limit); - self - } - /// Limits the maximum size of an encoded message. - /// - /// Default: `usize::MAX` - #[must_use] - pub fn max_encoding_message_size(mut self, limit: usize) -> Self { - self.max_encoding_message_size = Some(limit); - self - } - } - impl tonic::codegen::Service> for NodeServer - where - T: Node, - B: Body + std::marker::Send + 'static, - B::Error: Into + std::marker::Send + 'static, - { - type Response = http::Response; - type Error = std::convert::Infallible; - type Future = BoxFuture; - fn poll_ready( - &mut self, - _cx: &mut Context<'_>, - ) -> Poll> { - Poll::Ready(Ok(())) - } - fn call(&mut self, req: http::Request) -> Self::Future { - match req.uri().path() { - "/hellas.Node/HealthCheck" => { - #[allow(non_camel_case_types)] - struct HealthCheckSvc(pub Arc); - impl tonic::server::UnaryService - for HealthCheckSvc { - type Response = super::HealthCheckResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; - fn call( - &mut self, - request: tonic::Request, - ) -> Self::Future { - let inner = Arc::clone(&self.0); - let fut = async move { - ::health_check(&inner, request).await - }; - Box::pin(fut) - } - } - let accept_compression_encodings = self.accept_compression_encodings; - let send_compression_encodings = self.send_compression_encodings; - let max_decoding_message_size = self.max_decoding_message_size; - let max_encoding_message_size = self.max_encoding_message_size; - let inner = self.inner.clone(); - let fut = async move { - let method = HealthCheckSvc(inner); - let codec = tonic_prost::ProstCodec::default(); - let mut grpc = tonic::server::Grpc::new(codec) - .apply_compression_config( - accept_compression_encodings, - send_compression_encodings, - ) - .apply_max_message_size_config( - max_decoding_message_size, - max_encoding_message_size, - ); - let res = grpc.unary(method, req).await; - Ok(res) - }; - Box::pin(fut) - } - _ => { - Box::pin(async move { - let mut response = http::Response::new( - tonic::body::Body::default(), - ); - let headers = response.headers_mut(); - headers - .insert( - tonic::Status::GRPC_STATUS, - (tonic::Code::Unimplemented as i32).into(), - ); - headers - .insert( - http::header::CONTENT_TYPE, - tonic::metadata::GRPC_CONTENT_TYPE, - ); - Ok(response) - }) - } - } - } - } - impl Clone for NodeServer { - fn clone(&self) -> Self { - let inner = self.inner.clone(); - Self { - inner, - accept_compression_encodings: self.accept_compression_encodings, - send_compression_encodings: self.send_compression_encodings, - max_decoding_message_size: self.max_decoding_message_size, - max_encoding_message_size: self.max_encoding_message_size, - } - } - } - /// Generated gRPC service name - pub const SERVICE_NAME: &str = "hellas.Node"; - impl tonic::server::NamedService for NodeServer { - const NAME: &'static str = SERVICE_NAME; - } -} -/// Generated client implementations. -pub mod execute_client { - #![allow( - unused_variables, - dead_code, - missing_docs, - clippy::wildcard_imports, - clippy::let_unit_value, - )] - use tonic::codegen::*; - use tonic::codegen::http::Uri; - #[derive(Debug, Clone)] - pub struct ExecuteClient { - inner: tonic::client::Grpc, - } - impl ExecuteClient - where - T: tonic::client::GrpcService, - T::Error: Into, - T::ResponseBody: Body + std::marker::Send + 'static, - ::Error: Into + std::marker::Send, - { - pub fn new(inner: T) -> Self { - let inner = tonic::client::Grpc::new(inner); - Self { inner } - } - pub fn with_origin(inner: T, origin: Uri) -> Self { - let inner = tonic::client::Grpc::with_origin(inner, origin); - Self { inner } - } - pub fn with_interceptor( - inner: T, - interceptor: F, - ) -> ExecuteClient> - where - F: tonic::service::Interceptor, - T::ResponseBody: Default, - T: tonic::codegen::Service< - http::Request, - Response = http::Response< - >::ResponseBody, - >, - >, - , - >>::Error: Into + std::marker::Send + std::marker::Sync, - { - ExecuteClient::new(InterceptedService::new(inner, interceptor)) - } - /// Compress requests with the given encoding. - /// - /// This requires the server to support it otherwise it might respond with an - /// error. - #[must_use] - pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { - self.inner = self.inner.send_compressed(encoding); - self - } - /// Enable decompressing responses. - #[must_use] - pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { - self.inner = self.inner.accept_compressed(encoding); - self - } - /// Limits the maximum size of a decoded message. - /// - /// Default: `4MB` - #[must_use] - pub fn max_decoding_message_size(mut self, limit: usize) -> Self { - self.inner = self.inner.max_decoding_message_size(limit); - self - } - /// Limits the maximum size of an encoded message. - /// - /// Default: `usize::MAX` - #[must_use] - pub fn max_encoding_message_size(mut self, limit: usize) -> Self { - self.inner = self.inner.max_encoding_message_size(limit); - self - } - pub async fn get_quote( - &mut self, - request: impl tonic::IntoRequest, - ) -> std::result::Result< - tonic::Response, - tonic::Status, - > { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::unknown( - format!("Service was not ready: {}", e.into()), - ) - })?; - let codec = tonic_prost::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static("/hellas.Execute/GetQuote"); - let mut req = request.into_request(); - req.extensions_mut().insert(GrpcMethod::new("hellas.Execute", "GetQuote")); - self.inner.unary(req, path, codec).await - } - pub async fn get_graph( - &mut self, - request: impl tonic::IntoRequest, - ) -> std::result::Result< - tonic::Response, - tonic::Status, - > { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::unknown( - format!("Service was not ready: {}", e.into()), - ) - })?; - let codec = tonic_prost::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static("/hellas.Execute/GetGraph"); - let mut req = request.into_request(); - req.extensions_mut().insert(GrpcMethod::new("hellas.Execute", "GetGraph")); - self.inner.unary(req, path, codec).await - } - pub async fn execute( - &mut self, - request: impl tonic::IntoRequest, - ) -> std::result::Result< - tonic::Response, - tonic::Status, - > { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::unknown( - format!("Service was not ready: {}", e.into()), - ) - })?; - let codec = tonic_prost::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static("/hellas.Execute/Execute"); - let mut req = request.into_request(); - req.extensions_mut().insert(GrpcMethod::new("hellas.Execute", "Execute")); - self.inner.unary(req, path, codec).await - } - pub async fn execute_status( - &mut self, - request: impl tonic::IntoRequest, - ) -> std::result::Result< - tonic::Response, - tonic::Status, - > { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::unknown( - format!("Service was not ready: {}", e.into()), - ) - })?; - let codec = tonic_prost::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static( - "/hellas.Execute/ExecuteStatus", - ); - let mut req = request.into_request(); - req.extensions_mut() - .insert(GrpcMethod::new("hellas.Execute", "ExecuteStatus")); - self.inner.unary(req, path, codec).await - } - pub async fn execute_stream( - &mut self, - request: impl tonic::IntoRequest, - ) -> std::result::Result< - tonic::Response>, - tonic::Status, - > { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::unknown( - format!("Service was not ready: {}", e.into()), - ) - })?; - let codec = tonic_prost::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static( - "/hellas.Execute/ExecuteStream", - ); - let mut req = request.into_request(); - req.extensions_mut() - .insert(GrpcMethod::new("hellas.Execute", "ExecuteStream")); - self.inner.server_streaming(req, path, codec).await - } - pub async fn execute_result( - &mut self, - request: impl tonic::IntoRequest, - ) -> std::result::Result< - tonic::Response, - tonic::Status, - > { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::unknown( - format!("Service was not ready: {}", e.into()), - ) - })?; - let codec = tonic_prost::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static( - "/hellas.Execute/ExecuteResult", - ); - let mut req = request.into_request(); - req.extensions_mut() - .insert(GrpcMethod::new("hellas.Execute", "ExecuteResult")); - self.inner.unary(req, path, codec).await - } - } -} -/// Generated server implementations. -pub mod execute_server { - #![allow( - unused_variables, - dead_code, - missing_docs, - clippy::wildcard_imports, - clippy::let_unit_value, - )] - use tonic::codegen::*; - /// Generated trait containing gRPC methods that should be implemented for use with ExecuteServer. - #[async_trait] - pub trait Execute: std::marker::Send + std::marker::Sync + 'static { - async fn get_quote( - &self, - request: tonic::Request, - ) -> std::result::Result< - tonic::Response, - tonic::Status, - >; - async fn get_graph( - &self, - request: tonic::Request, - ) -> std::result::Result< - tonic::Response, - tonic::Status, - >; - async fn execute( - &self, - request: tonic::Request, - ) -> std::result::Result, tonic::Status>; - async fn execute_status( - &self, - request: tonic::Request, - ) -> std::result::Result< - tonic::Response, - tonic::Status, - >; - /// Server streaming response type for the ExecuteStream method. - type ExecuteStreamStream: tonic::codegen::tokio_stream::Stream< - Item = std::result::Result, - > - + std::marker::Send - + 'static; - async fn execute_stream( - &self, - request: tonic::Request, - ) -> std::result::Result< - tonic::Response, - tonic::Status, - >; - async fn execute_result( - &self, - request: tonic::Request, - ) -> std::result::Result< - tonic::Response, - tonic::Status, - >; - } - #[derive(Debug)] - pub struct ExecuteServer { - inner: Arc, - accept_compression_encodings: EnabledCompressionEncodings, - send_compression_encodings: EnabledCompressionEncodings, - max_decoding_message_size: Option, - max_encoding_message_size: Option, - } - impl ExecuteServer { - pub fn new(inner: T) -> Self { - Self::from_arc(Arc::new(inner)) - } - pub fn from_arc(inner: Arc) -> Self { - Self { - inner, - accept_compression_encodings: Default::default(), - send_compression_encodings: Default::default(), - max_decoding_message_size: None, - max_encoding_message_size: None, - } - } - pub fn with_interceptor( - inner: T, - interceptor: F, - ) -> InterceptedService - where - F: tonic::service::Interceptor, - { - InterceptedService::new(Self::new(inner), interceptor) - } - /// Enable decompressing requests with the given encoding. - #[must_use] - pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { - self.accept_compression_encodings.enable(encoding); - self - } - /// Compress responses with the given encoding, if the client supports it. - #[must_use] - pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { - self.send_compression_encodings.enable(encoding); - self - } - /// Limits the maximum size of a decoded message. - /// - /// Default: `4MB` - #[must_use] - pub fn max_decoding_message_size(mut self, limit: usize) -> Self { - self.max_decoding_message_size = Some(limit); - self - } - /// Limits the maximum size of an encoded message. - /// - /// Default: `usize::MAX` - #[must_use] - pub fn max_encoding_message_size(mut self, limit: usize) -> Self { - self.max_encoding_message_size = Some(limit); - self - } - } - impl tonic::codegen::Service> for ExecuteServer - where - T: Execute, - B: Body + std::marker::Send + 'static, - B::Error: Into + std::marker::Send + 'static, - { - type Response = http::Response; - type Error = std::convert::Infallible; - type Future = BoxFuture; - fn poll_ready( - &mut self, - _cx: &mut Context<'_>, - ) -> Poll> { - Poll::Ready(Ok(())) - } - fn call(&mut self, req: http::Request) -> Self::Future { - match req.uri().path() { - "/hellas.Execute/GetQuote" => { - #[allow(non_camel_case_types)] - struct GetQuoteSvc(pub Arc); - impl tonic::server::UnaryService - for GetQuoteSvc { - type Response = super::GetQuoteResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; - fn call( - &mut self, - request: tonic::Request, - ) -> Self::Future { - let inner = Arc::clone(&self.0); - let fut = async move { - ::get_quote(&inner, request).await - }; - Box::pin(fut) - } - } - let accept_compression_encodings = self.accept_compression_encodings; - let send_compression_encodings = self.send_compression_encodings; - let max_decoding_message_size = self.max_decoding_message_size; - let max_encoding_message_size = self.max_encoding_message_size; - let inner = self.inner.clone(); - let fut = async move { - let method = GetQuoteSvc(inner); - let codec = tonic_prost::ProstCodec::default(); - let mut grpc = tonic::server::Grpc::new(codec) - .apply_compression_config( - accept_compression_encodings, - send_compression_encodings, - ) - .apply_max_message_size_config( - max_decoding_message_size, - max_encoding_message_size, - ); - let res = grpc.unary(method, req).await; - Ok(res) - }; - Box::pin(fut) - } - "/hellas.Execute/GetGraph" => { - #[allow(non_camel_case_types)] - struct GetGraphSvc(pub Arc); - impl tonic::server::UnaryService - for GetGraphSvc { - type Response = super::GetGraphResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; - fn call( - &mut self, - request: tonic::Request, - ) -> Self::Future { - let inner = Arc::clone(&self.0); - let fut = async move { - ::get_graph(&inner, request).await - }; - Box::pin(fut) - } - } - let accept_compression_encodings = self.accept_compression_encodings; - let send_compression_encodings = self.send_compression_encodings; - let max_decoding_message_size = self.max_decoding_message_size; - let max_encoding_message_size = self.max_encoding_message_size; - let inner = self.inner.clone(); - let fut = async move { - let method = GetGraphSvc(inner); - let codec = tonic_prost::ProstCodec::default(); - let mut grpc = tonic::server::Grpc::new(codec) - .apply_compression_config( - accept_compression_encodings, - send_compression_encodings, - ) - .apply_max_message_size_config( - max_decoding_message_size, - max_encoding_message_size, - ); - let res = grpc.unary(method, req).await; - Ok(res) - }; - Box::pin(fut) - } - "/hellas.Execute/Execute" => { - #[allow(non_camel_case_types)] - struct ExecuteSvc(pub Arc); - impl tonic::server::UnaryService - for ExecuteSvc { - type Response = super::ExecuteResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; - fn call( - &mut self, - request: tonic::Request, - ) -> Self::Future { - let inner = Arc::clone(&self.0); - let fut = async move { - ::execute(&inner, request).await - }; - Box::pin(fut) - } - } - let accept_compression_encodings = self.accept_compression_encodings; - let send_compression_encodings = self.send_compression_encodings; - let max_decoding_message_size = self.max_decoding_message_size; - let max_encoding_message_size = self.max_encoding_message_size; - let inner = self.inner.clone(); - let fut = async move { - let method = ExecuteSvc(inner); - let codec = tonic_prost::ProstCodec::default(); - let mut grpc = tonic::server::Grpc::new(codec) - .apply_compression_config( - accept_compression_encodings, - send_compression_encodings, - ) - .apply_max_message_size_config( - max_decoding_message_size, - max_encoding_message_size, - ); - let res = grpc.unary(method, req).await; - Ok(res) - }; - Box::pin(fut) - } - "/hellas.Execute/ExecuteStatus" => { - #[allow(non_camel_case_types)] - struct ExecuteStatusSvc(pub Arc); - impl< - T: Execute, - > tonic::server::UnaryService - for ExecuteStatusSvc { - type Response = super::ExecuteStatusResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; - fn call( - &mut self, - request: tonic::Request, - ) -> Self::Future { - let inner = Arc::clone(&self.0); - let fut = async move { - ::execute_status(&inner, request).await - }; - Box::pin(fut) - } - } - let accept_compression_encodings = self.accept_compression_encodings; - let send_compression_encodings = self.send_compression_encodings; - let max_decoding_message_size = self.max_decoding_message_size; - let max_encoding_message_size = self.max_encoding_message_size; - let inner = self.inner.clone(); - let fut = async move { - let method = ExecuteStatusSvc(inner); - let codec = tonic_prost::ProstCodec::default(); - let mut grpc = tonic::server::Grpc::new(codec) - .apply_compression_config( - accept_compression_encodings, - send_compression_encodings, - ) - .apply_max_message_size_config( - max_decoding_message_size, - max_encoding_message_size, - ); - let res = grpc.unary(method, req).await; - Ok(res) - }; - Box::pin(fut) - } - "/hellas.Execute/ExecuteStream" => { - #[allow(non_camel_case_types)] - struct ExecuteStreamSvc(pub Arc); - impl< - T: Execute, - > tonic::server::ServerStreamingService - for ExecuteStreamSvc { - type Response = super::ExecuteProgress; - type ResponseStream = T::ExecuteStreamStream; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; - fn call( - &mut self, - request: tonic::Request, - ) -> Self::Future { - let inner = Arc::clone(&self.0); - let fut = async move { - ::execute_stream(&inner, request).await - }; - Box::pin(fut) - } - } - let accept_compression_encodings = self.accept_compression_encodings; - let send_compression_encodings = self.send_compression_encodings; - let max_decoding_message_size = self.max_decoding_message_size; - let max_encoding_message_size = self.max_encoding_message_size; - let inner = self.inner.clone(); - let fut = async move { - let method = ExecuteStreamSvc(inner); - let codec = tonic_prost::ProstCodec::default(); - let mut grpc = tonic::server::Grpc::new(codec) - .apply_compression_config( - accept_compression_encodings, - send_compression_encodings, - ) - .apply_max_message_size_config( - max_decoding_message_size, - max_encoding_message_size, - ); - let res = grpc.server_streaming(method, req).await; - Ok(res) - }; - Box::pin(fut) - } - "/hellas.Execute/ExecuteResult" => { - #[allow(non_camel_case_types)] - struct ExecuteResultSvc(pub Arc); - impl< - T: Execute, - > tonic::server::UnaryService - for ExecuteResultSvc { - type Response = super::ExecuteResultResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; - fn call( - &mut self, - request: tonic::Request, - ) -> Self::Future { - let inner = Arc::clone(&self.0); - let fut = async move { - ::execute_result(&inner, request).await - }; - Box::pin(fut) - } - } - let accept_compression_encodings = self.accept_compression_encodings; - let send_compression_encodings = self.send_compression_encodings; - let max_decoding_message_size = self.max_decoding_message_size; - let max_encoding_message_size = self.max_encoding_message_size; - let inner = self.inner.clone(); - let fut = async move { - let method = ExecuteResultSvc(inner); - let codec = tonic_prost::ProstCodec::default(); - let mut grpc = tonic::server::Grpc::new(codec) - .apply_compression_config( - accept_compression_encodings, - send_compression_encodings, - ) - .apply_max_message_size_config( - max_decoding_message_size, - max_encoding_message_size, - ); - let res = grpc.unary(method, req).await; - Ok(res) - }; - Box::pin(fut) - } - _ => { - Box::pin(async move { - let mut response = http::Response::new( - tonic::body::Body::default(), - ); - let headers = response.headers_mut(); - headers - .insert( - tonic::Status::GRPC_STATUS, - (tonic::Code::Unimplemented as i32).into(), - ); - headers - .insert( - http::header::CONTENT_TYPE, - tonic::metadata::GRPC_CONTENT_TYPE, - ); - Ok(response) - }) - } - } - } - } - impl Clone for ExecuteServer { - fn clone(&self) -> Self { - let inner = self.inner.clone(); - Self { - inner, - accept_compression_encodings: self.accept_compression_encodings, - send_compression_encodings: self.send_compression_encodings, - max_decoding_message_size: self.max_decoding_message_size, - max_encoding_message_size: self.max_encoding_message_size, - } - } - } - /// Generated gRPC service name - pub const SERVICE_NAME: &str = "hellas.Execute"; - impl tonic::server::NamedService for ExecuteServer { - const NAME: &'static str = SERVICE_NAME; - } -} diff --git a/crates/rpc/src/pb/mod.rs b/crates/rpc/src/pb/mod.rs deleted file mode 100644 index 934e064..0000000 --- a/crates/rpc/src/pb/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -// This file is @generated by prost-build. -pub mod hellas { - include!("hellas.rs"); -} diff --git a/crates/rpc/src/policy/download.rs b/crates/rpc/src/policy/download.rs new file mode 100644 index 0000000..fec5de3 --- /dev/null +++ b/crates/rpc/src/policy/download.rs @@ -0,0 +1,114 @@ +use std::fmt; +use std::str::FromStr; + +use super::glob; +use super::parse_allow_patterns; + +/// Controls whether the executor may download model weights from `HuggingFace`. +#[derive(Clone, Debug, Default)] +pub enum DownloadPolicy { + /// Download any model if not cached (default). + #[default] + Eager, + /// Download only models whose `HuggingFace` model ID matches one of the + /// given glob patterns; deny all others unless already cached locally. + Allow(Vec), + /// Never download; only use models already present in the local HF cache. + Skip, +} + +impl DownloadPolicy { + /// Returns `true` if this policy permits downloading the given model. + pub fn allows_download(&self, model_id: &str) -> bool { + match self { + Self::Eager => true, + Self::Skip => false, + Self::Allow(patterns) => patterns + .iter() + .any(|pattern| glob::matches(pattern, model_id)), + } + } +} + +impl FromStr for DownloadPolicy { + type Err = String; + + fn from_str(policy: &str) -> Result { + let trimmed = policy.trim(); + match trimmed { + "eager" => Ok(Self::Eager), + "skip" => Ok(Self::Skip), + _ if trimmed.starts_with("allow(") => Ok(Self::Allow(parse_allow_patterns(trimmed)?)), + _ => Err(format!( + "invalid download policy '{trimmed}': expected 'eager', 'skip', or 'allow(pattern,...)'" + )), + } + } +} + +impl fmt::Display for DownloadPolicy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Eager => write!(f, "eager"), + Self::Skip => write!(f, "skip"), + Self::Allow(patterns) => write!(f, "allow({})", patterns.join(",")), + } + } +} + +#[cfg(test)] +mod tests { + use super::DownloadPolicy; + + #[test] + fn parse_eager() { + let policy: DownloadPolicy = "eager".parse().unwrap(); + assert!(matches!(policy, DownloadPolicy::Eager)); + assert_eq!(policy.to_string(), "eager"); + } + + #[test] + fn parse_skip() { + let policy: DownloadPolicy = "skip".parse().unwrap(); + assert!(matches!(policy, DownloadPolicy::Skip)); + assert_eq!(policy.to_string(), "skip"); + } + + #[test] + fn parse_allow_single() { + let policy: DownloadPolicy = "allow(Qwen3/*)".parse().unwrap(); + match &policy { + DownloadPolicy::Allow(patterns) => assert_eq!(patterns, &["Qwen3/*"]), + _ => panic!("expected Allow"), + } + assert_eq!(policy.to_string(), "allow(Qwen3/*)"); + } + + #[test] + fn parse_allow_multiple() { + let policy: DownloadPolicy = "allow(Qwen3/*, meta-llama/*)".parse().unwrap(); + match &policy { + DownloadPolicy::Allow(patterns) => { + assert_eq!(patterns, &["Qwen3/*", "meta-llama/*"]); + } + _ => panic!("expected Allow"), + } + } + + #[test] + fn parse_invalid() { + assert!("unknown".parse::().is_err()); + assert!("allow()".parse::().is_err()); + } + + #[test] + fn allows_download() { + assert!(DownloadPolicy::Eager.allows_download("anything")); + assert!(!DownloadPolicy::Skip.allows_download("anything")); + + let policy = DownloadPolicy::Allow(vec!["Qwen3/*".into(), "meta-llama/*".into()]); + assert!(policy.allows_download("Qwen3/Qwen3-0.6B")); + assert!(policy.allows_download("meta-llama/Llama-3.1-8B")); + assert!(!policy.allows_download("HuggingFaceTB/SmolLM2-135M")); + } +} diff --git a/crates/rpc/src/policy/execute.rs b/crates/rpc/src/policy/execute.rs new file mode 100644 index 0000000..5003ab2 --- /dev/null +++ b/crates/rpc/src/policy/execute.rs @@ -0,0 +1,208 @@ +use std::fmt; +use std::str::FromStr; + +use super::glob; +use super::parse_allow_patterns; + +/// A namespaced pattern for execute policy matching. +#[derive(Clone, Debug)] +pub enum ExecutePattern { + /// `hf/` matches on the `HuggingFace` model ID. + HuggingFace(String), + /// `graph/` matches on the blake3 graph hash. + Graph(String), +} + +/// Controls which graphs the executor will run. +#[derive(Clone, Debug, Default)] +pub enum ExecutePolicy { + /// Execute any graph (default). + #[default] + Eager, + /// Execute only graphs matching one of the given patterns. + Allow(Vec), + /// Refuse all executions. + Skip, +} + +impl ExecutePolicy { + /// Returns `true` if this policy permits executing a graph with the given + /// identifiers. For LLM graphs `hf_model_id` is `Some(id)`; for raw graphs + /// it is `None`. + pub fn allows_execute(&self, graph_id: &str, hf_model_id: Option<&str>) -> bool { + match self { + Self::Eager => true, + Self::Skip => false, + Self::Allow(patterns) => patterns.iter().any(|pattern| match pattern { + ExecutePattern::HuggingFace(pattern) => { + hf_model_id.is_some_and(|model_id| glob::matches(pattern, model_id)) + } + ExecutePattern::Graph(pattern) => glob::matches(pattern, graph_id), + }), + } + } +} + +impl FromStr for ExecutePolicy { + type Err = String; + + fn from_str(policy: &str) -> Result { + let trimmed = policy.trim(); + match trimmed { + "eager" => Ok(Self::Eager), + "skip" => Ok(Self::Skip), + _ if trimmed.starts_with("allow(") => { + let patterns = parse_allow_patterns(trimmed)? + .iter() + .map(|pattern| ExecutePattern::parse(pattern)) + .collect::, _>>()?; + Ok(Self::Allow(patterns)) + } + _ => Err(format!( + "invalid execute policy '{trimmed}': expected 'eager', 'skip', or 'allow(hf/pattern,...,graph/pattern,...)'" + )), + } + } +} + +impl fmt::Display for ExecutePolicy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Eager => write!(f, "eager"), + Self::Skip => write!(f, "skip"), + Self::Allow(patterns) => { + write!(f, "allow(")?; + for (index, pattern) in patterns.iter().enumerate() { + if index > 0 { + write!(f, ",")?; + } + write!(f, "{pattern}")?; + } + write!(f, ")") + } + } + } +} + +impl ExecutePattern { + fn parse(pattern: &str) -> Result { + if let Some(rest) = pattern.strip_prefix("hf/") { + if rest.is_empty() { + return Err("hf/ pattern must not be empty".to_string()); + } + Ok(Self::HuggingFace(rest.to_string())) + } else if let Some(rest) = pattern.strip_prefix("graph/") { + if rest.is_empty() { + return Err("graph/ pattern must not be empty".to_string()); + } + Ok(Self::Graph(rest.to_string())) + } else { + Err(format!( + "execute pattern '{pattern}' must start with 'hf/' or 'graph/'" + )) + } + } +} + +impl fmt::Display for ExecutePattern { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::HuggingFace(pattern) => write!(f, "hf/{pattern}"), + Self::Graph(pattern) => write!(f, "graph/{pattern}"), + } + } +} + +#[cfg(test)] +mod tests { + use super::{ExecutePattern, ExecutePolicy}; + + #[test] + fn parse_eager() { + let policy: ExecutePolicy = "eager".parse().unwrap(); + assert!(matches!(policy, ExecutePolicy::Eager)); + assert_eq!(policy.to_string(), "eager"); + } + + #[test] + fn parse_skip() { + let policy: ExecutePolicy = "skip".parse().unwrap(); + assert!(matches!(policy, ExecutePolicy::Skip)); + } + + #[test] + fn parse_allow_hf() { + let policy: ExecutePolicy = "allow(hf/Qwen3/*)".parse().unwrap(); + match &policy { + ExecutePolicy::Allow(patterns) => { + assert_eq!(patterns.len(), 1); + assert!( + matches!(&patterns[0], ExecutePattern::HuggingFace(pattern) if pattern == "Qwen3/*") + ); + } + _ => panic!("expected Allow"), + } + assert_eq!(policy.to_string(), "allow(hf/Qwen3/*)"); + } + + #[test] + fn parse_allow_graph() { + let policy: ExecutePolicy = "allow(graph/abc123*)".parse().unwrap(); + match &policy { + ExecutePolicy::Allow(patterns) => { + assert_eq!(patterns.len(), 1); + assert!( + matches!(&patterns[0], ExecutePattern::Graph(pattern) if pattern == "abc123*") + ); + } + _ => panic!("expected Allow"), + } + } + + #[test] + fn parse_allow_mixed() { + let policy: ExecutePolicy = "allow(hf/Qwen3/*,graph/abc*)".parse().unwrap(); + match &policy { + ExecutePolicy::Allow(patterns) => { + assert_eq!(patterns.len(), 2); + assert!( + matches!(&patterns[0], ExecutePattern::HuggingFace(pattern) if pattern == "Qwen3/*") + ); + assert!( + matches!(&patterns[1], ExecutePattern::Graph(pattern) if pattern == "abc*") + ); + } + _ => panic!("expected Allow"), + } + } + + #[test] + fn parse_invalid_namespace() { + assert!("allow(unknown/foo)".parse::().is_err()); + } + + #[test] + fn allows_execute() { + assert!(ExecutePolicy::Eager.allows_execute("anyhash", Some("any/model"))); + assert!(ExecutePolicy::Eager.allows_execute("anyhash", None)); + assert!(!ExecutePolicy::Skip.allows_execute("anyhash", Some("any/model"))); + + let hf_only = ExecutePolicy::Allow(vec![ExecutePattern::HuggingFace("Qwen3/*".into())]); + assert!(hf_only.allows_execute("", Some("Qwen3/Qwen3-0.6B"))); + assert!(!hf_only.allows_execute("", Some("meta-llama/X"))); + assert!(!hf_only.allows_execute("somehash", None)); + + let graph_only = ExecutePolicy::Allow(vec![ExecutePattern::Graph("abc*".into())]); + assert!(graph_only.allows_execute("abc123", None)); + assert!(!graph_only.allows_execute("def456", None)); + assert!(graph_only.allows_execute("abc123", Some("anything"))); + + let mixed = ExecutePolicy::Allow(vec![ + ExecutePattern::HuggingFace("Qwen3/*".into()), + ExecutePattern::Graph("abc*".into()), + ]); + assert!(mixed.allows_execute("xyz", Some("Qwen3/Qwen3-0.6B"))); + assert!(mixed.allows_execute("abc123", Some("unknown/model"))); + assert!(!mixed.allows_execute("def456", Some("unknown/model"))); + } +} diff --git a/crates/rpc/src/policy/glob.rs b/crates/rpc/src/policy/glob.rs new file mode 100644 index 0000000..892bf38 --- /dev/null +++ b/crates/rpc/src/policy/glob.rs @@ -0,0 +1,73 @@ +/// Simple glob match supporting `*` as a wildcard for any sequence of characters. +pub(super) fn matches(pattern: &str, text: &str) -> bool { + let parts: Vec<&str> = pattern.split('*').collect(); + if parts.len() == 1 { + return pattern == text; + } + + let mut pos = 0; + for (index, part) in parts.iter().enumerate() { + if part.is_empty() { + continue; + } + + match text[pos..].find(part) { + Some(found) => { + if index == 0 && found != 0 { + return false; + } + pos += found + part.len(); + } + None => return false, + } + } + + if parts.last().is_some_and(|last| !last.is_empty()) { + return pos == text.len(); + } + + true +} + +#[cfg(test)] +mod tests { + use super::matches; + + #[test] + fn exact_match() { + assert!(matches("exact", "exact")); + assert!(!matches("exact", "exactX")); + assert!(!matches("exact", "Xexact")); + } + + #[test] + fn trailing_star() { + assert!(matches("Qwen3/*", "Qwen3/Qwen3-0.6B")); + assert!(matches("Qwen3/*", "Qwen3/anything")); + assert!(!matches("Qwen3/*", "meta-llama/Llama-3")); + } + + #[test] + fn leading_star() { + assert!(matches("*-Instruct", "SmolLM2-135M-Instruct")); + assert!(!matches("*-Instruct", "SmolLM2-135M")); + } + + #[test] + fn middle_star() { + assert!(matches("meta-llama/Llama*8B", "meta-llama/Llama-3.1-8B")); + assert!(!matches("meta-llama/Llama*8B", "meta-llama/Llama-3.1-70B")); + } + + #[test] + fn star_matches_all() { + assert!(matches("*", "anything/at-all")); + assert!(matches("*", "")); + } + + #[test] + fn multiple_stars() { + assert!(matches("*llama*8B", "meta-llama/Llama-3.1-8B")); + assert!(!matches("*llama*70B", "meta-llama/Llama-3.1-8B")); + } +} diff --git a/crates/rpc/src/policy/mod.rs b/crates/rpc/src/policy/mod.rs new file mode 100644 index 0000000..4e64f27 --- /dev/null +++ b/crates/rpc/src/policy/mod.rs @@ -0,0 +1,25 @@ +mod download; +mod execute; +mod glob; + +pub use download::DownloadPolicy; +pub use execute::{ExecutePattern, ExecutePolicy}; + +fn parse_allow_patterns(policy: &str) -> Result, String> { + let trimmed = policy.trim(); + let inner = trimmed + .strip_prefix("allow(") + .and_then(|s| s.strip_suffix(')')) + .ok_or_else(|| format!("expected 'allow(pattern,...)' but got '{trimmed}'"))?; + + let patterns: Vec = inner + .split(',') + .map(|pattern| pattern.trim().to_string()) + .filter(|pattern| !pattern.is_empty()) + .collect(); + if patterns.is_empty() { + return Err("allow() requires at least one pattern".to_string()); + } + + Ok(patterns) +} diff --git a/crates/rpc/src/provenance.rs b/crates/rpc/src/provenance.rs new file mode 100644 index 0000000..3b52dc8 --- /dev/null +++ b/crates/rpc/src/provenance.rs @@ -0,0 +1,228 @@ +//! Execution provenance — content-addressed identifiers that travel +//! alongside every gateway/executor RPC. Two boundaries to cross: +//! +//! - **Executor → gateway** over tonic Response metadata using the +//! `x-hellas-*` keys defined below. Mirrors the OTel W3C trace-context +//! propagation pattern; this module is the read/write half on both sides. +//! - **Gateway → HTTP client** over response headers (same names) and named +//! SSE events. Translation happens in the gateway's tower layer and SSE +//! handlers, not here. +//! +//! Commitment wire form everywhere: 64-char lowercase hex of the underlying +//! 32-byte digest. We carry raw bytes in `ExecutionProvenance` rather than +//! typed CIDs so this module doesn't pull catgrad into the rpc crate's +//! `client` feature; callers reconstitute typed values at their boundary. + +use std::fmt::Write; +use thiserror::Error; +use tonic::metadata::{Ascii, MetadataMap, MetadataValue}; + +/// HTTP header / tonic metadata key for the work commitment. The commitment +/// transitively names the request, so we don't expose scheme-specific inputs +/// separately. +pub const COMMITMENT_HEADER: &str = "x-hellas-commitment"; + +/// HTTP header key for the terminal signed receipt envelope. On streaming +/// responses this only appears in-band on the terminal semantic event because +/// the receipt is unknown at header-flush time. +pub const RECEIPT_HEADER: &str = "x-hellas-receipt"; + +/// Pre-flight provenance for a single execution. The signed receipt envelope +/// is terminal and not part of this struct — it travels via the streaming +/// `Outcome::Completed` payload. +#[derive(Clone, PartialEq, Eq)] +pub struct ExecutionProvenance { + pub commitment_id: [u8; 32], +} + +/// Renders as the commitment's lowercase-hex string, matching how it +/// appears in tonic metadata and HTTP headers. Lets callers log +/// provenance with `%prov` (or `?Option` for the +/// `Some(deadbeef…) | None` form tracing produces) instead of +/// hand-rolling the hex render. +impl std::fmt::Display for ExecutionProvenance { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for byte in &self.commitment_id { + write!(f, "{byte:02x}")?; + } + Ok(()) + } +} + +/// Debug == Display so `?provenance` and `?Option` +/// stay readable in tracing output. The default derive would render +/// `ExecutionProvenance { commitment_id: [171, 171, …] }` which is the +/// opposite of useful. +impl std::fmt::Debug for ExecutionProvenance { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self, f) + } +} + +#[derive(Clone, Debug, Error, PartialEq, Eq)] +pub enum ProvenanceError { + #[error("provenance metadata missing key `{key}`")] + Missing { key: &'static str }, + #[error("provenance metadata key `{key}` is not printable ASCII")] + NotAscii { key: &'static str }, + #[error("provenance metadata key `{key}` is not 64-char lowercase hex (got {len} chars)")] + BadLength { key: &'static str, len: usize }, + #[error("provenance metadata key `{key}` contains a non-hex character")] + BadHex { key: &'static str }, +} + +impl From for tonic::Status { + fn from(err: ProvenanceError) -> Self { + tonic::Status::internal(err.to_string()) + } +} + +/// Render a 32-byte digest as 64-char lowercase hex. +pub fn encode_hex(bytes: &[u8; 32]) -> String { + let mut s = String::with_capacity(64); + for byte in bytes { + write!(&mut s, "{byte:02x}").expect("writing to String never fails"); + } + s +} + +/// Build an ASCII-typed tonic metadata value from digest bytes. +pub fn cid_bytes_to_metadata(bytes: &[u8; 32]) -> MetadataValue { + encode_hex(bytes) + .parse() + .expect("64-char hex is always valid ASCII metadata") +} + +/// Read a single CID-bearing key out of a tonic metadata map and decode +/// the hex value back into raw bytes. +pub fn cid_bytes_from_metadata( + md: &MetadataMap, + key: &'static str, +) -> Result<[u8; 32], ProvenanceError> { + let value = md.get(key).ok_or(ProvenanceError::Missing { key })?; + let s = value + .to_str() + .map_err(|_| ProvenanceError::NotAscii { key })?; + if s.len() != 64 { + return Err(ProvenanceError::BadLength { key, len: s.len() }); + } + let bytes = s.as_bytes(); + let mut out = [0_u8; 32]; + for (idx, byte) in out.iter_mut().enumerate() { + let hi = hex_nibble(bytes[idx * 2]).ok_or(ProvenanceError::BadHex { key })?; + let lo = hex_nibble(bytes[idx * 2 + 1]).ok_or(ProvenanceError::BadHex { key })?; + *byte = (hi << 4) | lo; + } + Ok(out) +} + +/// Read the pre-flight provenance from a tonic metadata map. Returns +/// `Err(Missing)` if the commitment key is absent, so older servers that +/// don't set provenance are detectable. +pub fn read_provenance_metadata(md: &MetadataMap) -> Result { + Ok(ExecutionProvenance { + commitment_id: cid_bytes_from_metadata(md, COMMITMENT_HEADER)?, + }) +} + +/// Insert pre-flight provenance into a tonic metadata map. Used +/// server-side on `Response::metadata_mut()` for both unary and +/// streaming RPCs. +pub fn write_provenance_metadata(md: &mut MetadataMap, prov: &ExecutionProvenance) { + md.insert( + COMMITMENT_HEADER, + cid_bytes_to_metadata(&prov.commitment_id), + ); +} + +fn hex_nibble(byte: u8) -> Option { + match byte { + b'0'..=b'9' => Some(byte - b'0'), + b'a'..=b'f' => Some(byte - b'a' + 10), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn sample() -> ExecutionProvenance { + ExecutionProvenance { + commitment_id: [0xab; 32], + } + } + + #[test] + fn encode_hex_renders_lowercase_hex() { + let s = encode_hex(&[0xab; 32]); + assert_eq!(s.len(), 64); + assert!( + s.chars() + .all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase()) + ); + assert_eq!(s, "ab".repeat(32)); + } + + #[test] + fn round_trip_through_metadata() { + let prov = sample(); + let mut md = MetadataMap::new(); + write_provenance_metadata(&mut md, &prov); + let decoded = read_provenance_metadata(&md).expect("round-trip should succeed"); + assert_eq!(decoded, prov); + } + + #[test] + fn missing_key_reports_which_key() { + let md = MetadataMap::new(); + let err = read_provenance_metadata(&md).expect_err("empty metadata must fail"); + assert_eq!( + err, + ProvenanceError::Missing { + key: COMMITMENT_HEADER + } + ); + } + + #[test] + fn bad_length_reports_actual_length() { + let mut md = MetadataMap::new(); + md.insert(COMMITMENT_HEADER, "deadbeef".parse().unwrap()); + let err = read_provenance_metadata(&md).expect_err("too-short value must fail"); + assert_eq!( + err, + ProvenanceError::BadLength { + key: COMMITMENT_HEADER, + len: 8 + } + ); + } + + #[test] + fn bad_hex_rejected() { + let mut md = MetadataMap::new(); + md.insert(COMMITMENT_HEADER, "z".repeat(64).parse().unwrap()); + let err = read_provenance_metadata(&md).expect_err("non-hex value must fail"); + assert_eq!( + err, + ProvenanceError::BadHex { + key: COMMITMENT_HEADER + } + ); + } + + #[test] + fn uppercase_hex_rejected() { + // Display is lowercase; we reject uppercase so the wire form is unambiguous. + let mut md = MetadataMap::new(); + md.insert(COMMITMENT_HEADER, "AB".repeat(32).parse().unwrap()); + let err = read_provenance_metadata(&md).expect_err("uppercase hex must fail"); + assert_eq!( + err, + ProvenanceError::BadHex { + key: COMMITMENT_HEADER + } + ); + } +} diff --git a/crates/rpc/src/service.rs b/crates/rpc/src/service.rs new file mode 100644 index 0000000..21a7fa3 --- /dev/null +++ b/crates/rpc/src/service.rs @@ -0,0 +1,36 @@ +//! Client-side service markers used for ALPN selection with tonic-iroh transport. + +/// Service marker for the node RPC service. +pub struct NodeService; + +impl tonic::server::NamedService for NodeService { + const NAME: &'static str = "hellas.swarm.v1.Node"; +} + +/// Service marker for the execute RPC service. +pub struct ExecuteService; + +impl tonic::server::NamedService for ExecuteService { + const NAME: &'static str = "hellas.v1.Execute"; +} + +/// Service marker for the symbolic ticket RPC service. +pub struct SymbolicService; + +impl tonic::server::NamedService for SymbolicService { + const NAME: &'static str = "hellas.symbolic.v1.Symbolic"; +} + +/// Service marker for the opaque ticket RPC service. +pub struct OpaqueService; + +impl tonic::server::NamedService for OpaqueService { + const NAME: &'static str = "hellas.opaque.v1.Opaque"; +} + +/// Service marker for the provider courtesy RPC service. +pub struct CourtesyService; + +impl tonic::server::NamedService for CourtesyService { + const NAME: &'static str = "hellas.courtesy.v1.Courtesy"; +} diff --git a/crates/rpc/src/spec.rs b/crates/rpc/src/spec.rs new file mode 100644 index 0000000..ac1a88c --- /dev/null +++ b/crates/rpc/src/spec.rs @@ -0,0 +1,107 @@ +use thiserror::Error; + +pub const DEFAULT_MODEL_REVISION: &str = "main"; + +/// Parse errors for [`ModelSpec`]. Carries no external dependencies so it stays +/// WASM-safe for consumers that only need identifier parsing. +#[derive(Clone, Debug, Error, PartialEq, Eq)] +pub enum ModelSpecError { + #[error("model id is empty")] + EmptyId, + #[error("model revision is empty")] + EmptyRevision, +} + +/// A HuggingFace-style model identifier with an optional revision. +/// +/// Parsed from strings of the form `org/model` (revision defaults to +/// [`DEFAULT_MODEL_REVISION`]) or `org/model@revision`. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ModelSpec { + pub id: String, + pub revision: String, +} + +impl ModelSpec { + pub fn parse(raw: &str) -> Result { + let raw = raw.trim(); + if raw.is_empty() { + return Err(ModelSpecError::EmptyId); + } + + let (id, revision) = match raw.rsplit_once('@') { + Some((id, revision)) => { + let id = id.trim(); + let revision = revision.trim(); + if id.is_empty() { + return Err(ModelSpecError::EmptyId); + } + if revision.is_empty() { + return Err(ModelSpecError::EmptyRevision); + } + (id.to_string(), revision.to_string()) + } + None => (raw.to_string(), DEFAULT_MODEL_REVISION.to_string()), + }; + + Ok(Self { id, revision }) + } +} + +impl std::fmt::Display for ModelSpec { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.revision.is_empty() || self.revision == DEFAULT_MODEL_REVISION { + write!(f, "{}", self.id) + } else { + write!(f, "{}@{}", self.id, self.revision) + } + } +} + +#[cfg(test)] +mod tests { + use super::{DEFAULT_MODEL_REVISION, ModelSpec, ModelSpecError}; + + #[test] + fn parses_default_revision_when_not_specified() { + let spec = ModelSpec::parse("HuggingFaceTB/SmolLM2-135M-Instruct").unwrap(); + assert_eq!(spec.id, "HuggingFaceTB/SmolLM2-135M-Instruct"); + assert_eq!(spec.revision, DEFAULT_MODEL_REVISION); + } + + #[test] + fn parses_explicit_revision_suffix() { + let spec = ModelSpec::parse("foo/bar@refs/pr/7").unwrap(); + assert_eq!(spec.id, "foo/bar"); + assert_eq!(spec.revision, "refs/pr/7"); + } + + #[test] + fn rejects_empty_revision_suffix() { + assert_eq!( + ModelSpec::parse("foo/bar@").unwrap_err(), + ModelSpecError::EmptyRevision, + ); + } + + #[test] + fn rejects_empty_id() { + assert_eq!(ModelSpec::parse("").unwrap_err(), ModelSpecError::EmptyId,); + assert_eq!( + ModelSpec::parse("@main").unwrap_err(), + ModelSpecError::EmptyId, + ); + } + + #[test] + fn display_elides_default_revision() { + let spec = ModelSpec::parse("org/model").unwrap(); + assert_eq!(spec.to_string(), "org/model"); + } + + #[test] + fn display_renders_explicit_revision() { + let spec = ModelSpec::parse("org/model@v2").unwrap(); + assert_eq!(spec.to_string(), "org/model@v2"); + } +} diff --git a/flake.lock b/flake.lock index 2b78c6b..43768ad 100644 --- a/flake.lock +++ b/flake.lock @@ -1,5 +1,26 @@ { "nodes": { + "catgrad": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1777626809, + "narHash": "sha256-wc38eHVxW4xBqLrTeoAtOcbsGXokDS7ZOkDm7WVQrnY=", + "owner": "hellas-ai", + "repo": "catgrad", + "rev": "3cd07079ca27882baa7e89b25753dd9ccd170bf0", + "type": "github" + }, + "original": { + "owner": "hellas-ai", + "repo": "catgrad", + "type": "github" + } + }, "flake-utils": { "inputs": { "systems": "systems" @@ -20,11 +41,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1767640445, - "narHash": "sha256-UWYqmD7JFBEDBHWYcqE6s6c77pWdcU/i+bwD6XxMb8A=", + "lastModified": 1777578337, + "narHash": "sha256-Ad49moKWeXtKBJNy2ebiTQUEgdLyvGmTeykAQ9xM+Z4=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "9f0c42f8bc7151b8e7e5840fb3bd454ad850d8c5", + "rev": "15f4ee454b1dce334612fa6843b3e05cf546efab", "type": "github" }, "original": { @@ -52,7 +73,7 @@ }, "root": { "inputs": { - "flake-utils": "flake-utils", + "catgrad": "catgrad", "nixpkgs": "nixpkgs", "rust-overlay": "rust-overlay" } @@ -62,11 +83,11 @@ "nixpkgs": "nixpkgs_2" }, "locked": { - "lastModified": 1767754000, - "narHash": "sha256-znoNJs2QZFl+wCFLd6FbUJ00c74kvzOjyQYXc45uFvo=", + "lastModified": 1777691680, + "narHash": "sha256-sdCAzrPAaKu+yo7L2pWddy5PN6U9bO++WEWc1zcr7aQ=", "owner": "oxalica", "repo": "rust-overlay", - "rev": "0b3a5ad260479f2c9bdadf3ba5b2a4be359cfcdd", + "rev": "4757db4358c77c1cbe878fa5990e6ea88d82f6b5", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index f2b996f..2d0cc61 100644 --- a/flake.nix +++ b/flake.nix @@ -1,135 +1,70 @@ { description = "Hellas Node"; + + # CA derivations let the HF cache packages (and any other system-independent + # outputs) substitute across Linux/Darwin from a shared binary cache. + # extra-substituters / extra-trusted-public-keys are an opt-in for downstream + # users — nix will prompt to accept on first use (or auto-accept with + # `--accept-flake-config`). + nixConfig = { + extra-experimental-features = [ "ca-derivations" ]; + extra-substituters = [ "https://cache.hellas.ai" ]; + extra-trusted-public-keys = [ "cache.hellas.ai-1:PYolh95U/Ms5fKE+NQTcNZUHyEv4QikaNocg9I9iy0g=" ]; + }; + inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; - flake-utils.url = "github:numtide/flake-utils"; rust-overlay.url = "github:oxalica/rust-overlay"; + catgrad = { + url = "github:hellas-ai/catgrad"; + inputs.nixpkgs.follows = "nixpkgs"; + }; }; - outputs = { - self, - nixpkgs, - flake-utils, - rust-overlay, - }: - flake-utils.lib.eachDefaultSystem (system: let - overlays = [(import rust-overlay)]; - pkgs = import nixpkgs { - inherit system overlays; - }; - - rust-toolchain = pkgs.buildPackages.rust-bin.fromRustupToolchainFile ./rust-toolchain.toml; - rustPlatform = pkgs.makeRustPlatform { - rustc = rust-toolchain; - cargo = rust-toolchain; - }; - - commonArgs = { - pname = "hellas"; - version = "0.1.0"; - src = ./.; - cargoLock = { - lockFile = ./Cargo.lock; - outputHashes = { - "catgrad-0.2.1" = "sha256-rlhwlUACdJyIlRg2jTA5nb2KcPQ+lCpWnhu68Z2idbM="; - }; - }; - auditable = false; - defaultFeatures = false; - buildInputs = with pkgs; [openssl]; - nativeBuildInputs = with pkgs; [pkg-config protobuf]; - checkInputs = with pkgs; [cargo-deny cargo-outdated]; - separateDebugInfo = true; - meta.mainProgram = "hellas-cli"; - }; - - cli = rustPlatform.buildRustPackage commonArgs; - server = rustPlatform.buildRustPackage (commonArgs // {buildFeatures = ["serve"];}); - in { - packages = { - default = cli; - inherit cli server; - }; + outputs = + { + self, + nixpkgs, + rust-overlay, + catgrad, + }: + let + systems = [ + "x86_64-linux" + "aarch64-linux" + "aarch64-darwin" + ]; + forAllSystems = nixpkgs.lib.genAttrs systems; + perSystem = forAllSystems ( + system: + import ./nix { + inherit + self + system + nixpkgs + rust-overlay + catgrad + ; + } + ); + in + { + packages = forAllSystems (system: perSystem.${system}.packages); + apps = forAllSystems (system: perSystem.${system}.apps); + devShells = forAllSystems (system: perSystem.${system}.devShells); + checks = forAllSystems (system: perSystem.${system}.checks); + nixosTests = forAllSystems (system: perSystem.${system}.nixosTests); + ci = forAllSystems (system: perSystem.${system}.ci); overlays.default = final: _prev: { - hellas = self.packages.${final.system}.cli; - hellas-serve = self.packages.${final.system}.server; - }; - - devShells.default = pkgs.mkShell { - inputsFrom = [self.packages.${system}.default]; - buildInputs = with pkgs; [ - pre-commit - protobuf-language-server - cargo-watch - gh - ]; - }; - }) - // { - nixosModules.hellas = { - config, - lib, - pkgs, - ... - }: let - inherit (lib) mkEnableOption mkIf mkOption types concatStringsSep optional; - cfg = config.services.hellas; - cliArgs = concatStringsSep " " (["serve"] ++ optional cfg.discovery "--discovery" ++ cfg.extraArgs); - in { - options.services.hellas = { - enable = mkEnableOption "Hellas node server"; - package = mkOption { - type = types.package; - default = self.packages.${pkgs.stdenv.hostPlatform.system}.server; - description = "Package providing the hellas CLI (with serve feature)."; - }; - discovery = mkOption { - type = types.bool; - default = false; - description = "Enable discovery (LAN mDNS + internet discovery via pkarr/DNS + DHT)."; - }; - openFirewall = mkOption { - type = types.bool; - default = false; - description = "Open firewall port for the hellas node."; - }; - port = mkOption { - type = types.port; - default = 31145; - description = "Port for the hellas node to listen on."; - }; - extraArgs = mkOption { - type = types.listOf types.str; - default = []; - description = "Extra arguments to pass to `hellas-cli serve`."; - }; - }; - - config = mkIf cfg.enable { - systemd.services.hellas = { - description = "Hellas node server"; - wantedBy = ["multi-user.target"]; - after = ["network-online.target"]; - wants = ["network-online.target"]; - environment = { - HOME = "/var/lib/hellas"; - }; - serviceConfig = { - ExecStart = "${cfg.package}/bin/hellas-cli ${cliArgs}"; - Restart = "on-failure"; - DynamicUser = true; - StateDirectory = "hellas"; - WorkingDirectory = "/var/lib/hellas"; - }; - }; - - networking.firewall = mkIf cfg.openFirewall { - allowedUDPPorts = [cfg.port]; - }; - }; + hellas = self.packages.${final.system}; + hellasLib = import ./nix/lib { pkgs = final; }; }; + nixosModules.hellas = import ./nix/modules/nixos.nix { inherit self; }; nixosModules.default = self.nixosModules.hellas; + + homeManagerModules.hellas = import ./nix/modules/home-manager.nix { inherit self; }; + homeManagerModules.default = self.homeManagerModules.hellas; }; } diff --git a/nix/ci.nix b/nix/ci.nix new file mode 100644 index 0000000..0607be2 --- /dev/null +++ b/nix/ci.nix @@ -0,0 +1,123 @@ +{ + pkgs, + lib, + rustToolchain, + workspaceNativeBuildInputs, +}: +let + mk = + name: cmd: inputs: + pkgs.writeShellApplication { + inherit name; + text = '' + export PATH="${lib.makeBinPath inputs}" + ${cmd} + ''; + }; + + cargoEnv = + toolchain: + [ + toolchain + pkgs.stdenv.cc + ] + ++ workspaceNativeBuildInputs; + + # CI-gating checks. These surface as `apps..check-` and run in + # the GitHub Actions matrix. + checks = { + fmt = mk "check-fmt" "cargo fmt --all -- --check" [ rustToolchain ]; + clippy = mk "check-clippy" "cargo clippy --workspace --all-targets -- -D warnings" ( + cargoEnv rustToolchain + ); + sort = mk "check-sort" "cargo-sort --workspace --check" [ pkgs.cargo-sort ]; + taplo = mk "check-taplo" "taplo fmt --check '*.toml' 'crates/**/Cargo.toml'" [ pkgs.taplo ]; + buf = mk "check-buf" "buf lint" [ pkgs.buf ]; + deadnix = mk "check-deadnix" '' + shopt -s globstar + deadnix --fail flake.nix nix/**/*.nix + '' [ pkgs.deadnix ]; + statix = mk "check-statix" "statix check ." [ pkgs.statix ]; + nixfmt = mk "check-nixfmt" '' + shopt -s globstar + nixfmt --check flake.nix nix/**/*.nix + '' [ pkgs.nixfmt-rfc-style ]; + flake-check = mk "check-flake-check" "nix flake check --no-build" [ pkgs.nix ]; + wasm-rpc = mk "check-wasm-rpc" "cargo check -p hellas-rpc --target wasm32-unknown-unknown" ( + cargoEnv (rustToolchain.override { targets = [ "wasm32-unknown-unknown" ]; }) + ); + }; + + # Auto-fix variants. Not all checks have one (e.g. test, wasm-rpc). + fixes = { + fmt = mk "fix-fmt" "cargo fmt --all" [ rustToolchain ]; + clippy = + mk "fix-clippy" "cargo clippy --workspace --all-targets --fix --allow-dirty --allow-staged" + (cargoEnv rustToolchain); + sort = mk "fix-sort" "cargo-sort --workspace" [ pkgs.cargo-sort ]; + }; + + # Heuristic — noisy enough to keep out of CI. Surfaced only via + # `nix run .#check` for occasional dev use. + outdatedCheck = + mk "check-outdated" + '' + report="$(cargo outdated --workspace --root-deps-only --format json)" + breaking_updates="$( + echo "$report" | jq -r ' + . as $pkg + | .dependencies[]? + | select( + .kind != "Development" + and .latest != "Removed" + and .latest != "---" + and .compat == "---" + ) + | "\($pkg.crate_name)\t\(.name)\t\(.project)\t\(.latest)" + ' + )" + if [ -n "$breaking_updates" ]; then + echo "Semver-breaking root dependency updates available:" + printf "crate\tdependency\tcurrent\tlatest\n" + echo "$breaking_updates" + exit 1 + fi + echo "No semver-breaking root dependency updates detected." + '' + ( + with pkgs; + [ + rustToolchain + cargo-outdated + jq + ] + ); + + # `nix run .#check` runs every gating check plus outdated. + # `nix run .#fix` runs the auto-fix variants. + mkAggregate = + name: pkgList: + pkgs.writeShellApplication { + inherit name; + text = lib.concatMapStringsSep "\n" lib.getExe pkgList; + }; + + # Extended (post-gate) builds run only on push. Each value is an attribute + # path under `packages.` consumed by the GitHub Actions matrix as + # `nix build .#packages..`. + ciBuilds = { + cli = "cli"; + cli-candle = "cli-candle"; + static-x86_64 = "cross-x86_64-linux-musl-cli"; + static-aarch64 = "cross-aarch64-linux-musl-cli"; + static-windows = "cross-x86_64-windows-cli"; + docker-cuda = "docker-cuda"; + hellas-rpc-wasm = "hellas-rpc-wasm"; + }; +in +{ + inherit checks fixes; + builds = ciBuilds; + checkAll = mkAggregate "check-all" ((lib.attrValues checks) ++ [ outdatedCheck ]); + fixAll = mkAggregate "fix-all" (lib.attrValues fixes); +} diff --git a/nix/default.nix b/nix/default.nix new file mode 100644 index 0000000..19387cc --- /dev/null +++ b/nix/default.nix @@ -0,0 +1,319 @@ +{ + self, + system, + nixpkgs, + rust-overlay, + catgrad, +}: +let + nativePkg = import ./package.nix { + inherit + self + system + nixpkgs + rust-overlay + ; + }; + inherit (nativePkg) + pkgs + lib + rustToolchain + workspaceNativeBuildInputs + ; + + # Template for the pi provider extension. Substituted by piShim at runtime. + piExtensionTemplate = pkgs.writeText "hellas-pi-extension.template.js" '' + export default function (pi) { + pi.registerProvider("hellas", { + baseUrl: "@@BASE@@", + apiKey: "unused", + api: "@@API@@", + models: [{ + id: "@@MODEL@@", + name: "@@MODEL@@ (Hellas)", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 32768, + maxTokens: 2048, + }], + }); + } + ''; + + # Wrapper for running pi behind `hellas-cli gateway --wrap`. It reads the + # gateway base URL from env (set by `--wrap`), writes a one-shot provider + # extension, then execs pi against that provider. + piShim = pkgs.writeShellApplication { + name = "hellas-pi-shim"; + runtimeInputs = [ + pkgs.coreutils + pkgs.gnused + ]; + text = '' + set -eu + model="''${HELLAS_MODEL:-Qwen/Qwen3-0.6B}" + api="''${HELLAS_API:-anthropic-messages}" + case "$api" in + anthropic-messages) base="''${ANTHROPIC_BASE_URL:?ANTHROPIC_BASE_URL not set}" ;; + openai-completions) base="''${OPENAI_BASE_URL:?OPENAI_BASE_URL not set}" ;; + *) echo "hellas-pi-shim: unsupported HELLAS_API='$api'" >&2; exit 2 ;; + esac + ext=$(mktemp --suffix=.js -t hellas-pi-XXXXXX) + sed -e "s|@@BASE@@|$base|g" -e "s|@@API@@|$api|g" -e "s|@@MODEL@@|$model|g" \ + ${piExtensionTemplate} > "$ext" + export ANTHROPIC_API_KEY=unused OPENAI_API_KEY=unused + exec ${pkgs.pi-coding-agent}/bin/pi -e "$ext" --provider hellas --model "$model" "$@" + ''; + }; + + piShimPath = pkgs.runCommand "hellas-pi-shim-path" { } '' + mkdir -p "$out/bin" + ln -s ${piShim}/bin/hellas-pi-shim "$out/bin/pi" + ''; + + mkHellasRun = + { gatewayCommand }: + pkgs.writeShellScriptBin "hellas-run" '' + # Usage: hellas-run [--gw-flag=value...] CMD [CMD-ARGS...] + # Leading flags (anything starting with `-`) go to `hellas-cli gateway`. + # First positional is the wrapped command; the rest are its args. + # Use `--flag=value` for gateway options that take a value. + set -eu + export PATH="${piShimPath}/bin:$PATH" + gw=() + while [ $# -gt 0 ]; do + case "$1" in -*) gw+=("$1"); shift ;; *) break ;; esac + done + [ $# -gt 0 ] || { echo "usage: hellas-run [--gw-flag=value...] CMD [args]" >&2; exit 2; } + cmd="$1"; shift + # `pi` doesn't honor *_BASE_URL env vars — route it through the packaged + # shim that runs inside the wrap and registers a hellas provider. + case "$(${pkgs.coreutils}/bin/basename "$cmd")" in pi) cmd=${piShim}/bin/hellas-pi-shim ;; esac + exec ${gatewayCommand} "''${gw[@]}" --wrap "$cmd" -- "$@" + ''; + + hellasRunDev = mkHellasRun { + gatewayCommand = "cargo run --quiet --features candle --bin hellas-cli -- gateway"; + }; + + devShellPackages = with pkgs; [ + rustToolchain + pkg-config + protobuf + llvmPackages.lld + pre-commit + protobuf-language-server + cargo-watch + gh + cargo-audit + cargo-outdated + cargo-sort + skopeo + pi-coding-agent + piShim + hellasRunDev + ]; + + envShellHook = '' + if [ -f .env ]; then + set -a + source .env + set +a + fi + ''; + + ci = import ./ci.nix { + inherit + pkgs + lib + rustToolchain + workspaceNativeBuildInputs + ; + }; + + hfCaches = pkgs.hellasLib.hf; + + packagesFor = + crossSystem: + let + pkgSpec = import ./package.nix { + inherit + self + system + nixpkgs + rust-overlay + crossSystem + ; + }; + inherit (pkgSpec.pkgs.stdenv) hostPlatform; + in + { + cli = pkgSpec.mkHellasPackage { + buildInputs = [ ]; + }; + cli-candle = pkgSpec.mkHellasPackage { + buildNoDefaultFeatures = true; + buildFeatures = [ "candle" ]; + }; + } + // lib.optionalAttrs hostPlatform.isDarwin { + cli-candle-metal = pkgSpec.mkHellasPackage { + buildNoDefaultFeatures = true; + buildFeatures = [ "candle-metal" ]; + }; + }; + + crossTargets = { + "aarch64-linux" = nixpkgs.lib.systems.examples.aarch64-multiplatform; + "riscv64-linux" = nixpkgs.lib.systems.examples.riscv64; + "x86_64-linux-musl" = nixpkgs.lib.systems.examples.musl64 // { + isStatic = true; + }; + "aarch64-linux-musl" = nixpkgs.lib.systems.examples.aarch64-multiplatform-musl // { + isStatic = true; + }; + "x86_64-windows" = nixpkgs.lib.systems.examples.mingwW64; + }; + + nativePackages = packagesFor null; + hellasRun = mkHellasRun { + gatewayCommand = "${nativePackages.cli-candle}/bin/hellas-cli gateway"; + }; + # Flat `cross--` packages. Nested `packages..cross..` + # violates the flake schema (each entry must be a derivation), which `nix flake check` + # rightly flags. + crossPackages = lib.concatMapAttrs ( + tgt: lib.mapAttrs' (name: pkg: lib.nameValuePair "cross-${tgt}-${name}" pkg) + ) (lib.mapAttrs (_: packagesFor) crossTargets); + + # Wasm build of hellas-rpc. Native rustToolchain with an additional wasm32 + # target — not a nix crossSystem (that's for OS-level cross), just a rust + # target. Output is whatever cargo produces in + # `target/wasm32-unknown-unknown/release/` (rlib today; .wasm if/when the + # crate adds `crate-type = ["cdylib"]`). + hellasRpcWasm = + let + wasmRust = rustToolchain.override { targets = [ "wasm32-unknown-unknown" ]; }; + wasmPlatform = pkgs.makeRustPlatform { + rustc = wasmRust; + cargo = wasmRust; + stdenv = pkgs.clangStdenv; + }; + in + wasmPlatform.buildRustPackage ( + nativePkg.commonArgs + // { + pname = "hellas-rpc-wasm"; + cargoBuildFlags = [ + "-p" + "hellas-rpc" + ]; + CARGO_BUILD_TARGET = "wasm32-unknown-unknown"; + # wasm tests need wasm-bindgen-test infra (deferred). buildRustPackage's + # canExecute heuristic doesn't see our CARGO_BUILD_TARGET override, so + # without this it'd try to invoke `cargo test` against wasm and fail. + doCheck = false; + } + ); + + linuxOutputs = lib.optionalAttrs pkgs.stdenv.hostPlatform.isLinux ( + let + docker = import ./docker.nix { + inherit + pkgs + lib + rustToolchain + catgrad + system + ; + inherit (nativePkg) mkHellasPackage; + cliCandle = nativePackages.cli-candle; + }; + + nixosTests = import ./tests { + inherit self pkgs lib; + package = nativePackages.cli-candle; + inherit hellasRun; + }; + in + { + packages = { + cli-candle-cuda = docker.defaultCudaCli; + docker-cuda = docker.defaultCudaImage; + } + // lib.mapAttrs' (name: value: lib.nameValuePair "docker-${name}" value) docker.dockerImages + // lib.mapAttrs' ( + name: value: lib.nameValuePair "cli-candle-cuda-${name}" value + ) docker.cudaCliPackages; + + apps."docker-push-all" = { + type = "app"; + program = "${docker.pushAll}/bin/docker-push-all"; + }; + + devShells.cuda = pkgs.mkShell { + packages = devShellPackages; + shellHook = envShellHook; + inherit (docker.defaultCudaEnv) nativeBuildInputs; + inherit (docker.defaultCudaEnv) buildInputs; + inherit (docker.defaultCudaEnv) CUDA_COMPUTE_CAP CUDA_TOOLKIT_ROOT_DIR; + LD_LIBRARY_PATH = "${docker.defaultCudaEnv.runtimeLibraryPath}:${docker.defaultCudaEnv.driverLink}/lib"; + }; + + inherit nixosTests; + } + ); +in +{ + packages = + nativePackages + // crossPackages + // { + default = nativePackages.cli; + "hf-cache-lfm2-350m" = hfCaches.lfm2_350MCache; + "hf-cache-qwen3-0_6b" = hfCaches.qwen3_0_6BCache; + "hellas-pi-shim" = piShim; + "hellas-run" = hellasRun; + "hellas-rpc-wasm" = hellasRpcWasm; + } + // (linuxOutputs.packages or { }); + + apps = { + check = { + type = "app"; + program = lib.getExe ci.checkAll; + meta.description = "Run all CI checks (sort, fmt, clippy, test, wasm-rpc, outdated)"; + }; + fix = { + type = "app"; + program = lib.getExe ci.fixAll; + meta.description = "Apply auto-fixes (fmt, sort, clippy)"; + }; + } + // (lib.mapAttrs' ( + name: pkg: + lib.nameValuePair "check-${name}" { + type = "app"; + program = lib.getExe pkg; + } + ) ci.checks) + // (linuxOutputs.apps or { }); + + devShells = { + default = pkgs.mkShell { + packages = devShellPackages; + shellHook = envShellHook; + }; + } + // (linuxOutputs.devShells or { }); + + # Data exposed for the GitHub Actions matrix: + # .checks → { name → derivation } (workflow uses `attrNames`) + # .builds → { name → attrPath } (extended post-gate builds) + ci = { inherit (ci) checks builds; }; + + # nixosTests are also surfaced under `checks` so `nix flake check` runs them. + checks = linuxOutputs.nixosTests or { }; + nixosTests = linuxOutputs.nixosTests or { }; +} diff --git a/nix/docker.nix b/nix/docker.nix new file mode 100644 index 0000000..b87b86c --- /dev/null +++ b/nix/docker.nix @@ -0,0 +1,207 @@ +{ + pkgs, + lib, + mkHellasPackage, + rustToolchain, + catgrad, + system, + cliCandle, +}: +let + imageRepository = "ghcr.io/hellas-ai/node"; + runtimeCoreLibs = with pkgs; [ + stdenv.cc.cc.lib + glibc + ]; + + # Each variant maps to exactly one CUDA toolkit × SM architecture build. + # bindgen_cuda compiles kernels for a single --gpu-architecture, so we need + # one binary per target GPU generation. + variants = [ + { + cuda = pkgs.cudaPackages_12; + sm = "80"; + tag = "cuda12-sm80"; + } # A100, A30 + { + cuda = pkgs.cudaPackages_12; + sm = "86"; + tag = "cuda12-sm86"; + } # RTX 3090/3080, A40 + { + cuda = pkgs.cudaPackages_12; + sm = "89"; + tag = "cuda12-sm89"; + } # RTX 4090/4080, L40S + { + cuda = pkgs.cudaPackages_13; + sm = "89"; + tag = "cuda13-sm89"; + } # RTX 4090/4080, L40S + { + cuda = pkgs.cudaPackages_13; + sm = "120"; + tag = "cuda13-sm120"; + } # RTX 5090/5080, Blackwell + ]; + defaultTag = "cuda12-sm89"; + + mkCudaEnv = + v: + catgrad.lib.${system}.mkCudaEnv { + cudaPackages = v.cuda; + cudaCapability = v.sm; + }; + + mkCliRuntime = + { + name, + pkg, + sourceBin, + }: + pkgs.runCommand name + { + nativeBuildInputs = [ pkgs.removeReferencesTo ]; + } + '' + mkdir -p "$out/bin" + cp "${pkg}/bin/${sourceBin}" "$out/bin/hellas-cli" + chmod u+w "$out/bin/hellas-cli" + remove-references-to -t ${rustToolchain} "$out/bin/hellas-cli" + chmod 0555 "$out/bin/hellas-cli" + ''; + + mkServerImage = + { + imageTag, + runtimePkg, + extraRuntimeContents ? [ ], + cudaEnv ? null, + }: + pkgs.dockerTools.streamLayeredImage { + name = imageRepository; + tag = imageTag; + contents = [ + runtimePkg + pkgs.cacert + pkgs.iana-etc + ] + ++ runtimeCoreLibs + ++ extraRuntimeContents; + config = { + Entrypoint = [ + "${runtimePkg}/bin/hellas-cli" + "serve" + ]; + WorkingDir = "/var/lib/hellas"; + Volumes = { + "/var/lib/hellas" = { }; + }; + ExposedPorts = { + "31145/udp" = { }; + }; + Env = [ + "HOME=/home/hellas" + "HF_HOME=/home/hellas/.cache/huggingface" + "SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt" + "NIX_SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt" + ] + ++ lib.optionals (cudaEnv != null) [ + "NVIDIA_VISIBLE_DEVICES=all" + "NVIDIA_DRIVER_CAPABILITIES=compute,utility" + "LD_LIBRARY_PATH=${cudaEnv.runtimeLibraryPath}:/usr/lib/x86_64-linux-gnu:/usr/lib64:/usr/local/nvidia/lib64" + ]; + }; + }; + + cliCandleRuntime = mkCliRuntime { + name = "hellas-cli-candle-runtime"; + pkg = cliCandle; + sourceBin = "hellas-cli"; + }; + + mkCudaImage = + v: + let + cudaEnv = mkCudaEnv v; + cliCuda = mkHellasPackage { + buildNoDefaultFeatures = true; + buildFeatures = [ "candle-cuda" ]; + doCheck = false; + nativeBuildInputs = + (with pkgs.buildPackages; [ + pkg-config + protobuf + llvmPackages.lld + makeWrapper + ]) + ++ cudaEnv.nativeBuildInputs; + inherit (cudaEnv) buildInputs; + inherit (cudaEnv) CUDA_COMPUTE_CAP CUDA_TOOLKIT_ROOT_DIR; + postInstall = '' + for bin in $out/bin/*; do + if [ -x "$bin" ] && [ ! -L "$bin" ]; then + wrapProgram "$bin" \ + --prefix LD_LIBRARY_PATH : "${cudaEnv.runtimeLibraryPath}" + fi + done + ''; + }; + runtime = mkCliRuntime { + name = "hellas-cli-${v.tag}-runtime"; + pkg = cliCuda; + sourceBin = ".hellas-cli-wrapped"; + }; + in + { + inherit cudaEnv; + cli = cliCuda; + image = mkServerImage { + imageTag = v.tag; + runtimePkg = runtime; + extraRuntimeContents = cudaEnv.buildInputs; + inherit cudaEnv; + }; + }; + + cudaImages = lib.listToAttrs ( + map (v: { + name = v.tag; + value = mkCudaImage v; + }) variants + ); + + defaultCuda = cudaImages.${defaultTag}; + + dockerImages = { + cpu = mkServerImage { + imageTag = "cpu"; + runtimePkg = cliCandleRuntime; + }; + } + // lib.mapAttrs (_: v: v.image) cudaImages; + + pushAll = pkgs.writeShellApplication { + name = "docker-push-all"; + runtimeInputs = [ pkgs.skopeo ]; + text = lib.concatStringsSep "\n" ( + lib.mapAttrsToList (name: image: '' + echo "pushing ${imageRepository}:${name}" + ${image} | skopeo copy docker-archive:/dev/stdin "docker://${imageRepository}:${name}" "$@" + '') dockerImages + ); + }; + cudaCliPackages = lib.mapAttrs (_: v: v.cli) cudaImages; + defaultCudaCli = defaultCuda.cli; + defaultCudaImage = defaultCuda.image; +in +{ + defaultCudaEnv = defaultCuda.cudaEnv; + inherit + dockerImages + pushAll + cudaCliPackages + defaultCudaCli + defaultCudaImage + ; +} diff --git a/nix/lib/default.nix b/nix/lib/default.nix new file mode 100644 index 0000000..6b6a6fa --- /dev/null +++ b/nix/lib/default.nix @@ -0,0 +1,16 @@ +{ pkgs }: +{ + apiFlavor = { + anthropic = "anthropic-messages"; + openai = "openai-completions"; + }; + # Canonical executor UDP port. Matches `DEFAULT_PORT` in + # `crates/cli/src/commands/serve/node.rs` and the `31145/udp` exposed by + # the docker images. + executorPort = 31145; + # Default state directory for the Hellas serve daemon. Used by the NixOS + # module as HOME / WorkingDirectory, and as the base for any documented + # path examples. + defaultStateDir = "/var/lib/hellas"; + hf = import ./hf.nix { inherit pkgs; }; +} diff --git a/nix/lib/hf.nix b/nix/lib/hf.nix new file mode 100644 index 0000000..1c2f168 --- /dev/null +++ b/nix/lib/hf.nix @@ -0,0 +1,72 @@ +{ pkgs }: +let + inherit (pkgs) lib; +in +rec { + # Build a HuggingFace-shaped cache directory. `files` is an attrset mapping + # in-snapshot file name -> SRI hash; each fetched file is symlinked into the + # snapshot tree so HF_HOME= behaves like a populated hub cache. + mkHuggingFaceCache = + { + name, + repo, + revision, + files, + ref ? "main", + }: + let + repoPath = "models--${lib.replaceStrings [ "/" ] [ "--" ] repo}"; + snapshotPath = "$out/hub/${repoPath}/snapshots/${revision}"; + fetchFile = + file: hash: + pkgs.fetchurl { + url = "https://huggingface.co/${repo}/resolve/${revision}/${file}"; + sha256 = hash; + }; + linkCommands = lib.concatStringsSep "\n" ( + lib.mapAttrsToList (file: hash: '' + ln -s ${fetchFile file hash} "${snapshotPath}/${file}" + '') files + ); + in + pkgs.runCommand name + { + # Output is just symlinks to fetchurl FOD paths, byte-identical across + # systems. CA derivation -> store path derived from the NAR hash, so a + # cache built on Linux substitutes cleanly into a Darwin closure. + __contentAddressed = true; + outputHashMode = "recursive"; + outputHashAlgo = "sha256"; + } + '' + mkdir -p "$out/hub/${repoPath}/refs" "${snapshotPath}" + printf '%s' '${revision}' > "$out/hub/${repoPath}/refs/${ref}" + ${linkCommands} + ''; + + lfm2_350MCache = mkHuggingFaceCache { + name = "hf-cache-lfm2-350m"; + repo = "LiquidAI/LFM2-350M"; + revision = "b29be27ca6f2a4f5523cd9efbfd4c6caa3951d36"; + files = { + "config.json" = "sha256-/Ts/uk5Q57miK9QcurWemyjjGbLeGWaNf9l3fI0am6E="; + "model.safetensors" = "sha256-OHY43Iif8aE5XDwquWBSEeTH4W8tN1Nh3U5CO5CaJU4="; + "special_tokens_map.json" = "sha256-dCrv4rfexJboyv/boDp10MGpkl1TvT8+DTiMlrWRtvQ="; + "tokenizer.json" = "sha256-mM/4O09tfp2JKb68YrB+ks8bP5nIDRa6/ouEp1RI9As="; + "tokenizer_config.json" = "sha256-Y87Y7oYn+ksGOMTAVzUfAPtOMyyiMnqaAO7MWXjoSDU="; + "chat_template.jinja" = "sha256-zvGHQA1ipZUHqrOmQuqajSou8mNWK8NDBWDhFpRSc88="; + }; + }; + + qwen3_0_6BCache = mkHuggingFaceCache { + name = "hf-cache-qwen3-0_6b"; + repo = "Qwen/Qwen3-0.6B"; + revision = "c1899de289a04d12100db370d81485cdf75e47ca"; + files = { + "config.json" = "sha256-Zg2ztz14gRnARTXkjPm+X1W8MQCEGnGGN65pW0QvJ90="; + "model.safetensors" = "sha256-9H9xF38yvNEBt1c+yRcealf09NMRSNOOOCMG9CmWh0s="; + "tokenizer.json" = "sha256-rrEzB6cazY/oGGHZStVKtonfdzMYgJ7tPL55S0SS2uQ="; + "tokenizer_config.json" = "sha256-1dCfB7SMMIbFCLMNHJEUvRGJFFt06YKiZTUMkjrNgQE="; + }; + }; +} diff --git a/nix/modules/hellas.nix b/nix/modules/hellas.nix new file mode 100644 index 0000000..c08afe1 --- /dev/null +++ b/nix/modules/hellas.nix @@ -0,0 +1,226 @@ +{ self }: +rec { + # Pick the best available hellas CLI variant for the target system: + # Darwin → cli-candle-metal + # Linux + cuda → cli-candle-cuda (requires `nixpkgs.config.cudaSupport = true`) + # otherwise → cli-candle + # Each step checks the package set for membership so a missing variant + # falls through instead of erroring. + pickCliPackage = + pkgs: + let + pkgSet = self.packages.${pkgs.stdenv.hostPlatform.system}; + inherit (pkgs.stdenv.hostPlatform) isDarwin; + cudaEnabled = pkgs.config.cudaSupport or false; + in + if isDarwin && pkgSet ? cli-candle-metal then + pkgSet.cli-candle-metal + else if cudaEnabled && pkgSet ? cli-candle-cuda then + pkgSet.cli-candle-cuda + else + pkgSet.cli-candle; + + renderEnvironment = builtins.mapAttrs (_: toString); + + commonOptions = + { + lib, + package, + packageDescription, + }: + let + inherit (lib) mkEnableOption mkOption types; + in + { + enable = mkEnableOption "Hellas"; + package = mkOption { + type = types.package; + default = package; + description = packageDescription; + }; + environment = mkOption { + type = types.attrsOf ( + types.oneOf [ + types.str + types.path + types.package + types.int + ] + ); + default = { }; + example = { + HF_HOME = "/var/lib/hellas/huggingface"; + OTEL_SERVICE_NAME = "hellas"; + }; + description = "Environment variables exported to Hellas processes."; + }; + otel = otelOptions { inherit lib; }; + }; + + otelOptions = + { lib }: + let + inherit (lib) mkOption types; + in + { + endpoint = mkOption { + type = types.nullOr types.str; + default = null; + example = "https://jaeger.example.com/v1/traces"; + description = "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT — OTLP collector URL. Enables trace export when set."; + }; + serviceName = mkOption { + type = types.str; + default = "hellas-node"; + description = "OTEL_SERVICE_NAME — service name attached to exported spans."; + }; + sampleRate = mkOption { + type = types.nullOr (types.numbers.between 0.0 1.0); + default = null; + example = 0.5; + description = "OTEL_TRACES_SAMPLER_ARG — trace sample rate (0.0–1.0). Null uses the CLI default of 1.0."; + }; + headers = mkOption { + type = types.attrsOf types.str; + default = { }; + example = { + CF-Access-Client-Id = "abc123"; + CF-Access-Client-Secret = "secret"; + }; + description = '' + OTEL_EXPORTER_OTLP_HEADERS — extra headers sent with each OTLP export request. + Useful for Cloudflare Access or other auth proxies. + ''; + }; + }; + + # Serve-daemon options. Reused by NixOS systemd, HM-on-darwin launchd, and + # any other future daemon surface. The keys here mirror `hellas-cli serve`'s + # CLI flags one-for-one — see `mkServeArgs` for the binding. + serveOptions = + { lib, pkgs }: + let + inherit (lib) mkOption types; + in + { + port = mkOption { + type = types.nullOr types.port; + default = pkgs.hellasLib.executorPort; + description = "Port for the Hellas node to listen on. Null lets the CLI auto-select."; + }; + downloadPolicy = mkOption { + type = types.nullOr (types.either types.str (types.listOf types.str)); + default = null; + example = [ + "Qwen3/*" + "meta-llama/*" + ]; + description = '' + Model download policy. + "skip" (CLI default) never downloads, + "eager" downloads any requested model, + and "allow(pattern,...)" downloads only matching Hugging Face models. + A list of patterns is shorthand for "allow(p1,p2,...)". + ''; + }; + executePolicy = mkOption { + type = types.nullOr (types.either types.str (types.listOf types.str)); + default = null; + example = [ + "hf/Qwen/*" + "graph/llm/*" + ]; + description = '' + Graph execution policy. + "skip" (CLI default) refuses all executions, + "eager" executes any graph, + and "allow(hf/pattern,...,graph/pattern,...)" executes only matching requests. + A list of patterns is shorthand for "allow(p1,p2,...)". + ''; + }; + queueSize = mkOption { + type = types.nullOr types.ints.positive; + default = null; + description = "Maximum number of queued executions waiting behind the active worker."; + }; + preloadWeights = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Model identifiers to preload on startup."; + }; + metricsPort = mkOption { + type = types.nullOr types.port; + default = null; + description = "Optional Prometheus metrics port."; + }; + graffiti = mkOption { + type = types.nullOr types.str; + default = null; + description = "Operator graffiti tag (up to 16 bytes, padded/truncated). Self-reported to peers."; + }; + extraArgs = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Extra arguments to pass to `hellas-cli serve`."; + }; + }; + + # OTEL_EXPORTER_OTLP_* env vars derived from a resolved `otel` cfg. + # Returns {} when no endpoint is set so callers can `//`-merge unconditionally. + mkOtelEnv = + { + lib, + otel, + }: + lib.optionalAttrs (otel.endpoint != null) ( + { + OTEL_EXPORTER_OTLP_TRACES_ENDPOINT = otel.endpoint; + OTEL_SERVICE_NAME = otel.serviceName; + } + // lib.optionalAttrs (otel.sampleRate != null) { + OTEL_TRACES_SAMPLER_ARG = toString otel.sampleRate; + } + // lib.optionalAttrs (otel.headers != { }) { + OTEL_EXPORTER_OTLP_HEADERS = lib.concatStringsSep "," ( + lib.mapAttrsToList (k: v: "${k}=${v}") otel.headers + ); + } + ); + + # `hellas-cli serve ...` argv from a resolved serve cfg. The cfg shape is + # whatever attrset carries `serveOptions` keys — for NixOS that's the top- + # level `services.hellas`, for HM-darwin it's `programs.hellas.serve`. + mkServeArgs = + { + lib, + serve, + }: + let + optArg = + flag: value: + lib.optionals (value != null) [ + flag + (toString value) + ]; + renderPolicy = + value: + if value == null then + null + else if lib.isList value then + "allow(${lib.concatStringsSep "," value})" + else + value; + in + [ "serve" ] + ++ optArg "--port" serve.port + ++ optArg "--download-policy" (renderPolicy serve.downloadPolicy) + ++ optArg "--execute-policy" (renderPolicy serve.executePolicy) + ++ optArg "--queue-size" serve.queueSize + ++ optArg "--metrics-port" serve.metricsPort + ++ optArg "--graffiti" serve.graffiti + ++ lib.concatMap (model: [ + "--preload" + model + ]) serve.preloadWeights + ++ serve.extraArgs; +} diff --git a/nix/modules/home-manager.nix b/nix/modules/home-manager.nix new file mode 100644 index 0000000..7c4289b --- /dev/null +++ b/nix/modules/home-manager.nix @@ -0,0 +1,90 @@ +{ + self, + hellas ? import ./hellas.nix { inherit self; }, +}: +{ + config, + lib, + pkgs, + ... +}: +let + inherit (lib) + mkEnableOption + mkIf + mkMerge + optionals + ; + cfg = config.programs.hellas; + inherit (pkgs.stdenv.hostPlatform) isDarwin; + + baseEnv = + hellas.mkOtelEnv { + inherit lib; + inherit (cfg) otel; + } + // cfg.environment; +in +{ + options.programs.hellas = + hellas.commonOptions { + inherit lib; + package = hellas.pickCliPackage pkgs; + packageDescription = '' + The hellas CLI package. Defaults to the best backend variant for + the host: cli-candle-metal on Darwin, cli-candle-cuda when + `nixpkgs.config.cudaSupport` is enabled on Linux, otherwise + cli-candle. Override to `pkgs.hellas.cli` (lean remote-only) if + you don't want a local backend. + ''; + } + // { + # User-space serve daemon. Currently darwin-only (uses HM's launchd + # integration). Linux users should use the NixOS module instead. + serve = { + enable = mkEnableOption "Hellas serve daemon as a launchd user agent (darwin only)"; + } + // hellas.serveOptions { inherit lib pkgs; }; + }; + + config = mkMerge [ + (mkIf cfg.enable { + home.packages = [ cfg.package ]; + home.sessionVariables = hellas.renderEnvironment baseEnv; + }) + + # Surface a clear assertion on Linux rather than a "no such option" error + # when the user enables `programs.hellas.serve` on the wrong platform. + (mkIf cfg.serve.enable { + assertions = optionals (!isDarwin) [ + { + assertion = false; + message = '' + programs.hellas.serve is only supported on darwin (HM launchd). + On Linux, use the NixOS module `services.hellas` instead. + ''; + } + ]; + }) + + (mkIf (cfg.serve.enable && isDarwin) { + launchd.agents.hellas = { + enable = true; + config = { + ProgramArguments = [ + "${cfg.package}/bin/hellas-cli" + ] + ++ hellas.mkServeArgs { + inherit lib; + inherit (cfg) serve; + }; + RunAtLoad = true; + KeepAlive = true; + EnvironmentVariables = hellas.renderEnvironment (baseEnv // { HOME = config.home.homeDirectory; }); + StandardOutPath = "${config.home.homeDirectory}/Library/Logs/hellas/stdout.log"; + StandardErrorPath = "${config.home.homeDirectory}/Library/Logs/hellas/stderr.log"; + }; + }; + }) + ]; +} diff --git a/nix/modules/nixos.nix b/nix/modules/nixos.nix new file mode 100644 index 0000000..474d4ad --- /dev/null +++ b/nix/modules/nixos.nix @@ -0,0 +1,80 @@ +{ + self, + hellas ? import ./hellas.nix { inherit self; }, +}: +{ + config, + lib, + pkgs, + ... +}: +let + inherit (lib) mkIf mkOption types; + cfg = config.services.hellas; +in +{ + options.services.hellas = + hellas.commonOptions { + inherit lib; + package = hellas.pickCliPackage pkgs; + packageDescription = '' + The hellas CLI used to run the serve daemon. Defaults to the best + backend variant for the host: cli-candle-metal on Darwin, + cli-candle-cuda when `nixpkgs.config.cudaSupport` is enabled on + Linux, otherwise cli-candle. Override to a specific SM build (e.g. + `pkgs.hellas.cli-candle-cuda-cuda12-sm80`) to pin a particular GPU + generation. + ''; + } + // hellas.serveOptions { inherit lib pkgs; } + // { + openFirewall = mkOption { + type = types.bool; + default = false; + description = "Open the Hellas UDP listen port in the firewall."; + }; + }; + + config = mkIf cfg.enable { + assertions = [ + { + assertion = pkgs.stdenv.hostPlatform.isLinux; + message = "services.hellas is only supported on Linux."; + } + ]; + + systemd.services.hellas = { + description = "Hellas node server"; + wantedBy = [ "multi-user.target" ]; + after = [ "network-online.target" ]; + wants = [ "network-online.target" ]; + environment = hellas.renderEnvironment ( + hellas.mkOtelEnv { + inherit lib; + inherit (cfg) otel; + } + // cfg.environment + // { + HOME = pkgs.hellasLib.defaultStateDir; + } + ); + serviceConfig = { + ExecStart = lib.escapeShellArgs ( + [ "${cfg.package}/bin/hellas-cli" ] + ++ hellas.mkServeArgs { + inherit lib; + serve = cfg; + } + ); + Restart = "on-failure"; + DynamicUser = true; + StateDirectory = "hellas"; + WorkingDirectory = pkgs.hellasLib.defaultStateDir; + }; + }; + + networking.firewall = mkIf (cfg.openFirewall && cfg.port != null) { + allowedUDPPorts = [ cfg.port ]; + }; + }; +} diff --git a/nix/package.nix b/nix/package.nix new file mode 100644 index 0000000..a6dbeb5 --- /dev/null +++ b/nix/package.nix @@ -0,0 +1,103 @@ +{ + self, + system, + nixpkgs, + rust-overlay, + # When set, builds everything for this target triple via `pkgsCross`. + # Leave null for native builds. + crossSystem ? null, +}: +let + overlays = [ + (import rust-overlay) + (final: _prev: { + hellasLib = import ./lib { pkgs = final; }; + }) + ]; + pkgs = import nixpkgs ( + { + inherit system overlays; + config.allowUnfree = true; + } + // nixpkgs.lib.optionalAttrs (crossSystem != null) { inherit crossSystem; } + ); + inherit (pkgs) lib; + + isCross = crossSystem != null; + targetTriple = pkgs.stdenv.hostPlatform.rust.rustcTarget; + + rustToolchain = + (pkgs.buildPackages.rust-bin.fromRustupToolchainFile ../rust-toolchain.toml).override + { + targets = lib.optional isCross targetTriple; + }; + + # clangStdenv avoids the GCC 15 ICE in zstd-sys (gimple_lower_bitint crash). + # Under pkgsCross this is the *target* stdenv. + stdenv = pkgs.clangStdenv; + + rustPlatform = pkgs.makeRustPlatform { + rustc = rustToolchain; + cargo = rustToolchain; + inherit stdenv; + }; + + # Flake `self` is git-tracked-only; nothing in the previous filter list + # (.direnv, target, result-*, etc.) ever lands here in the first place. + buildSrc = self; + + workspaceBuildInputs = [ ]; + workspaceNativeBuildInputs = with pkgs.buildPackages; [ + pkg-config + protobuf + llvmPackages.lld + ]; + + rev = self.rev or self.dirtyRev or "unknown"; + + rustEnvTarget = pkgs.stdenv.hostPlatform.rust.cargoEnvVarTarget; + + crossEnv = lib.optionalAttrs isCross { + CARGO_BUILD_TARGET = targetTriple; + "CARGO_TARGET_${rustEnvTarget}_LINKER" = "${stdenv.cc}/bin/${stdenv.cc.targetPrefix}cc"; + }; + + commonArgs = { + pname = "hellas"; + version = "0.1.0"; + src = buildSrc; + cargoLock = { + lockFile = ../Cargo.lock; + outputHashes = { + "catgrad-0.2.1" = "sha256-O/H2WGacF9Z4ZA6TXpYaGsgy6pWZAW71zvfE2Xyl2ZU="; + }; + }; + inherit stdenv; + auditable = false; + RUST_MIN_STACK = "16777216"; + GIT_REV = builtins.substring 0 12 rev; + buildInputs = workspaceBuildInputs; + nativeBuildInputs = workspaceNativeBuildInputs; + checkInputs = with pkgs; [ cargo-outdated ]; + separateDebugInfo = true; + # stdenv's default stripDebugList only does --strip-debug on bin/; + # stripAllList promotes it to --strip-all so .symtab goes too. + stripAllList = [ "bin" ]; + meta.mainProgram = "hellas-cli"; + } + // crossEnv; + + mkHellasPackage = overrides: rustPlatform.buildRustPackage (commonArgs // overrides); +in +{ + inherit + pkgs + lib + rustToolchain + rustPlatform + workspaceNativeBuildInputs + buildSrc + commonArgs + mkHellasPackage + ; +} diff --git a/nix/tests/basic.nix b/nix/tests/basic.nix new file mode 100644 index 0000000..e7b8749 --- /dev/null +++ b/nix/tests/basic.nix @@ -0,0 +1,29 @@ +{ + pkgs, + package, +}: +{ + basic = + pkgs.runCommand "hellas-cli-basic" + { + nativeBuildInputs = with pkgs; [ + coreutils + gnugrep + ]; + } + '' + export HOME="$TMPDIR/home" + mkdir -p "$HOME" + + ${package}/bin/hellas-cli --version + ${package}/bin/hellas-cli --help | grep -F "Hellas node CLI" + ${package}/bin/hellas-cli gateway --help | grep -F -- "--wrap" + ${package}/bin/hellas-cli serve --help | grep -F -- "--preload" + + head -c 32 /dev/zero > "$TMPDIR/identity" + ${package}/bin/hellas-cli --identity "$TMPDIR/identity" identity show-node-id \ + | grep -E '^[0-9a-f]{64}$' + + touch "$out" + ''; +} diff --git a/nix/tests/default.nix b/nix/tests/default.nix new file mode 100644 index 0000000..e047873 --- /dev/null +++ b/nix/tests/default.nix @@ -0,0 +1,17 @@ +{ + self, + pkgs, + lib, + package, + hellasRun, +}: +(import ./basic.nix { inherit pkgs package; }) +// (import ./e2e.nix { + inherit + self + pkgs + lib + package + hellasRun + ; +}) diff --git a/nix/tests/e2e.nix b/nix/tests/e2e.nix new file mode 100644 index 0000000..8679afc --- /dev/null +++ b/nix/tests/e2e.nix @@ -0,0 +1,504 @@ +{ + self, + pkgs, + lib, + package, + hellasRun, +}: +let + inherit (pkgs.hellasLib) hf executorPort defaultStateDir; + flavors = pkgs.hellasLib.apiFlavor; + hellasModule = import ../modules/nixos.nix { inherit self; }; + + gatewayPort = 8080; + executorIdentityPath = "${defaultStateDir}/.hellas/identity"; + + models = { + lfm2_350m = { + id = "LiquidAI/LFM2-350M"; + hfHome = hf.lfm2_350MCache; + cores = 2; + memorySize = 6144; + }; + qwen3_0_6b = { + id = "Qwen/Qwen3-0.6B"; + hfHome = hf.qwen3_0_6BCache; + cores = 4; + memorySize = 12288; + }; + }; + + mkPrompt = + marker: proofPath: + "Use the bash tool to run: echo ${marker} > ${proofPath}. Confirm in your reply once the file has been written."; + + harnesses = { + pi = + { + marker ? "hellas-tool-loop-works", + }: + { + kind = "pi"; + inherit marker; + }; + }; + + topologies = { + local = attrs: { kind = "local"; } // attrs; + remote = attrs: { kind = "remote"; } // attrs; + gateway = attrs: { kind = "gateway"; } // attrs; + }; + + mkBaseNode = hellasPackage: { + networking.firewall = { + enable = true; + allowedUDPPorts = [ 5353 ]; + checkReversePath = false; + }; + environment.systemPackages = with pkgs; [ + coreutils + curl + jq + gnugrep + hellasPackage + ]; + }; + + mkHellasNode = + { + hellasPackage, + model, + executePolicy ? "skip", + preload ? false, + }: + { + services.hellas = { + enable = true; + package = hellasPackage; + port = executorPort; + openFirewall = true; + downloadPolicy = "skip"; + inherit executePolicy; + queueSize = 2; + preloadWeights = lib.optionals preload [ model.id ]; + environment.HF_HOME = model.hfHome; + }; + }; + + mkExecutorNode = + { + hellasPackage, + model, + cores ? model.cores, + memorySize ? model.memorySize, + }: + _: { + imports = [ hellasModule ]; + config = lib.mkMerge [ + (mkBaseNode hellasPackage) + (mkHellasNode { + inherit hellasPackage model; + executePolicy = "eager"; + preload = true; + }) + { + virtualisation.cores = cores; + virtualisation.memorySize = memorySize; + } + ]; + }; + + mkGatewayNode = + { + hellasPackage, + runner, + hfHome, + cores, + memorySize, + extraEnv ? { }, + }: + _: { + config = lib.mkMerge [ + (mkBaseNode hellasPackage) + { + environment.systemPackages = [ runner ]; + environment.variables = { + HF_HOME = hfHome; + } + // extraEnv; + virtualisation.cores = cores; + virtualisation.memorySize = memorySize; + } + ]; + }; + + mkHarnessCommand = + harness: + if harness.kind == "pi" then + "pi -p --no-session --no-extensions --offline --verbose ${lib.escapeShellArg harness.prompt}" + else + throw "unsupported e2e harness: ${harness.kind}"; + + normalizeAgent = + index: agent: + let + agentName = agent.name or "agent-${toString index}"; + proofPath = agent.proofPath or "/tmp/${agentName}.proof"; + in + agent + // { + apiFlavor = agent.apiFlavor or flavors.openai; + name = agentName; + logPath = agent.logPath or "/tmp/${agentName}.log"; + statusPath = agent.statusPath or "/tmp/${agentName}.status"; + inherit proofPath; + harness = agent.harness // { + prompt = mkPrompt agent.harness.marker proofPath; + }; + }; + + mkAgentScript = + name: agents: + let + statusPaths = map (agent: agent.statusPath) agents; + cleanupPaths = statusPaths ++ map (agent: agent.proofPath) agents; + runAgents = lib.concatMapStringsSep "\n" ( + agent: + let + inherit (agent) apiFlavor harness; + in + '' + ( + HELLAS_API=${lib.escapeShellArg apiFlavor} \ + HELLAS_MODEL=${lib.escapeShellArg agent.model.id} \ + ${mkHarnessCommand harness} > ${agent.logPath} 2>&1 + echo $? > ${agent.statusPath} + ) & + '' + ) agents; + statusChecks = lib.concatMapStringsSep "\n" (statusPath: '' + status="$(cat ${statusPath} 2>/dev/null || echo 127)" + if [ "$status" -ne 0 ]; then failed=1; fi + '') statusPaths; + in + pkgs.writeShellScript "hellas-${name}-agents" '' + set +e + rm -f ${lib.concatStringsSep " " cleanupPaths} + ${runAgents} + wait + failed=0 + ${statusChecks} + exit "$failed" + ''; + + mkHfHome = + name: agents: + pkgs.symlinkJoin { + name = "hf-cache-${name}"; + paths = map (agent: agent.model.hfHome) agents; + }; + + mkCaseAssertions = + agents: + lib.concatMapStringsSep "\n" (agent: '' + print("==== ${agent.name} output ====") + print(gateway.succeed("cat ${agent.logPath}")) + assert int(gateway.succeed("cat ${agent.statusPath}").strip()) == 0, "${agent.name} exited nonzero" + proof = gateway.succeed("cat ${agent.proofPath}").strip() + assert "${agent.harness.marker}" in proof, ( + f"${agent.name}: proof file ${agent.proofPath} did not contain marker" + f" '${agent.harness.marker}' (got {proof!r})" + ) + '') agents; + + mkRunWrappedScript = + { + name, + runner, + agentScript, + flags, + agents, + expectReceipts ? false, + }: + '' + (run_status, _) = gateway.execute( + "${runner}/bin/hellas-run" + " --log-file=/tmp/gateway.log" + " --host=127.0.0.1 --port=${toString gatewayPort}" + " --retries=1" + ${flags} + " ${agentScript}" + " > /tmp/hellas-run.log 2>&1" + ) + + print("==== hellas-run output (${name}) ====") + print(gateway.succeed("cat /tmp/hellas-run.log")) + print("==== gateway log (${name}) ====") + journal = gateway.succeed("cat /tmp/gateway.log") + print(journal) + ${mkCaseAssertions agents} + assert run_status == 0, f"hellas-run exited {run_status}" + ${lib.optionalString expectReceipts '' + import re + receipts = set(re.findall(r"receipt=(\S+)", journal)) + commits = set(re.findall(r"provenance=Some\(([0-9a-fA-F]{64})\)", journal)) + assert len(receipts) >= ${toString (builtins.length agents)}, f"expected receipts, got {receipts}" + assert len(commits) >= ${toString (builtins.length agents)}, f"expected commitments, got {commits}" + ''} + ''; + + mkToolUseTest = + { + name, + topology, + model ? null, + harness ? null, + apiFlavor ? flavors.openai, + agents ? null, + hellasPackage ? package, + runner ? hellasRun, + }: + let + normalizedAgents = lib.imap0 normalizeAgent ( + if agents == null then + [ + { + inherit model harness apiFlavor; + name = harness.kind or "agent"; + } + ] + else + agents + ); + primaryModel = (lib.head normalizedAgents).model; + agentScript = mkAgentScript name normalizedAgents; + gatewayHfHome = + topology.hfHome + or (if topology.kind == "gateway" then mkHfHome name normalizedAgents else primaryModel.hfHome); + gatewayNode = mkGatewayNode { + inherit hellasPackage runner; + hfHome = gatewayHfHome; + cores = topology.gatewayCores or (if topology.kind == "local" then primaryModel.cores else 2); + memorySize = + topology.gatewayMemorySize or (if topology.kind == "local" then primaryModel.memorySize else 3072); + extraEnv = topology.gatewayEnv or { }; + }; + runLocal = mkRunWrappedScript { + inherit name runner agentScript; + agents = normalizedAgents; + flags = '' + " --local" + " --force-model=${primaryModel.id}" + ''; + }; + in + pkgs.testers.runNixOSTest ( + { + name = "hellas-${name}"; + } + // ( + if topology.kind == "local" then + { + nodes.gateway = gatewayNode; + testScript = '' + start_all() + gateway.wait_for_unit("multi-user.target") + ${runLocal} + ''; + } + else if topology.kind == "remote" then + let + executorPackage = topology.executorPackage or hellasPackage; + in + { + nodes = { + executor = mkExecutorNode { + hellasPackage = executorPackage; + model = primaryModel; + cores = topology.executorCores or primaryModel.cores; + memorySize = topology.executorMemorySize or primaryModel.memorySize; + }; + gateway = gatewayNode; + }; + testScript = + { nodes, ... }: + let + executorAddr = (lib.head nodes.executor.networking.interfaces.eth1.ipv4.addresses).address; + runRemote = mkRunWrappedScript { + inherit name runner agentScript; + agents = normalizedAgents; + flags = '' + f" --node-id={executor_node_id}" + " --node-addr=${executorAddr}:${toString executorPort}" + " --force-model=${primaryModel.id}" + ''; + }; + in + '' + start_all() + executor.wait_for_unit("hellas.service") + gateway.wait_for_unit("multi-user.target") + + executor_node_id = executor.wait_until_succeeds( + "${executorPackage}/bin/hellas-cli --identity ${executorIdentityPath} identity show-node-id" + ).strip() + + gateway.wait_until_succeeds( + f"${hellasPackage}/bin/hellas-cli rpc {executor_node_id} --node-addr ${executorAddr}:${toString executorPort}" + ) + + ${runRemote} + print("==== executor journal (${name}) ====") + print(executor.succeed("journalctl -u hellas.service --no-pager -o cat")) + ''; + } + else if topology.kind == "gateway" then + let + executorNodes = lib.mapAttrs ( + _nodeName: node: + mkExecutorNode { + hellasPackage = node.package or hellasPackage; + inherit (node) model; + cores = node.cores or node.model.cores; + memorySize = node.memorySize or node.model.memorySize; + } + ) topology.executors; + executorNames = lib.attrNames topology.executors; + targetedAgents = builtins.filter (agent: agent ? executor) normalizedAgents; + discoveryAgents = builtins.filter (agent: !(agent ? executor)) normalizedAgents; + waitForExecutors = lib.concatMapStringsSep "\n" ( + nodeName: ''${nodeName}.wait_for_unit("hellas.service")'' + ) executorNames; + printExecutorJournals = lib.concatMapStringsSep "\n" (nodeName: '' + print("==== ${nodeName} journal (${name}) ====") + print(${nodeName}.succeed("journalctl -u hellas.service --no-pager -o cat")) + '') executorNames; + in + { + nodes = executorNodes // { + gateway = gatewayNode; + }; + testScript = + { nodes, ... }: + let + executorAddrs = lib.mapAttrs ( + nodeName: _node: (lib.head nodes.${nodeName}.networking.interfaces.eth1.ipv4.addresses).address + ) topology.executors; + loadExecutorIds = lib.concatMapStringsSep "\n" ( + nodeName: + let + node = topology.executors.${nodeName}; + executorPackage = node.package or hellasPackage; + in + '' + ${nodeName}_node_id = ${nodeName}.wait_until_succeeds( + "${executorPackage}/bin/hellas-cli --identity ${executorIdentityPath} identity show-node-id" + ).strip() + + gateway.wait_until_succeeds( + f"${hellasPackage}/bin/hellas-cli rpc {${nodeName}_node_id} --node-addr ${executorAddrs.${nodeName}}:${toString executorPort}" + ) + '' + ) executorNames; + runTargetedGateway = lib.concatMapStringsSep "\n" ( + agent: + let + nodeName = agent.executor; + agentScriptForTarget = mkAgentScript "${name}-${agent.name}" [ agent ]; + in + mkRunWrappedScript { + name = "${name}-${agent.name}"; + inherit runner; + agentScript = agentScriptForTarget; + agents = [ agent ]; + expectReceipts = topology.expectReceipts or false; + flags = '' + f" --node-id={${nodeName}_node_id}" + " --node-addr=${executorAddrs.${nodeName}}:${toString executorPort}" + " --force-model=${agent.model.id}" + ''; + } + ) targetedAgents; + runDiscoveryGateway = lib.optionalString (discoveryAgents != [ ]) (mkRunWrappedScript { + inherit name runner; + agentScript = mkAgentScript "${name}-discovery" discoveryAgents; + agents = discoveryAgents; + expectReceipts = topology.expectReceipts or false; + flags = ""; + }); + in + '' + start_all() + ${waitForExecutors} + gateway.wait_for_unit("multi-user.target") + ${loadExecutorIds} + ${runTargetedGateway} + ${runDiscoveryGateway} + ${printExecutorJournals} + ''; + } + else + throw "unsupported e2e topology: ${topology.kind}" + ) + ); +in +{ + gateway-tool-use-local = mkToolUseTest { + name = "gateway-tool-use-local"; + model = models.qwen3_0_6b; + apiFlavor = flavors.openai; + harness = harnesses.pi { + marker = "hellas-local-tool-loop-works"; + }; + topology = topologies.local { }; + }; + + gateway-tool-use-openai = mkToolUseTest { + name = "gateway-tool-use-openai"; + model = models.qwen3_0_6b; + apiFlavor = flavors.openai; + harness = harnesses.pi { }; + topology = topologies.remote { }; + }; + + gateway-tool-use-anthropic = mkToolUseTest { + name = "gateway-tool-use-anthropic"; + model = models.qwen3_0_6b; + apiFlavor = flavors.anthropic; + harness = harnesses.pi { }; + topology = topologies.remote { }; + }; + + gateway-multi-model = mkToolUseTest { + name = "gateway-multi-model"; + topology = topologies.gateway { + gatewayMemorySize = 4096; + gatewayEnv.RUST_LOG = "info,iroh=warn,iroh_relay=warn,pkarr=warn,iroh_dns=warn"; + expectReceipts = true; + executors = { + executor_qwen.model = models.qwen3_0_6b; + executor_lfm2.model = models.lfm2_350m; + }; + }; + agents = [ + { + name = "pi-qwen"; + executor = "executor_qwen"; + model = models.qwen3_0_6b; + apiFlavor = flavors.openai; + harness = harnesses.pi { + marker = "qwen-marker-works"; + }; + } + { + name = "pi-lfm2"; + executor = "executor_lfm2"; + model = models.lfm2_350m; + apiFlavor = flavors.openai; + harness = harnesses.pi { + marker = "lfm2-marker-works"; + }; + } + ]; + }; +} diff --git a/proto/hellas/courtesy/v1/courtesy.proto b/proto/hellas/courtesy/v1/courtesy.proto new file mode 100644 index 0000000..7c4147f --- /dev/null +++ b/proto/hellas/courtesy/v1/courtesy.proto @@ -0,0 +1,205 @@ +syntax = "proto3"; + +package hellas.courtesy.v1; + +import "hellas/symbolic/v1/symbolic.proto"; +import "hellas/v1/hellas.proto"; + +// Non-core provider conveniences. These APIs are not settlement/protocol +// objects: providers may offer Hugging Face resolution, tokenization, chat +// templates, model listing, and metrics, or decline to serve them. + +service Courtesy { + rpc QuotePreparedText(QuotePreparedTextRequest) returns (QuotePreparedTextResponse); + rpc QuotePrompt(QuotePromptRequest) returns (QuotePromptResponse); + rpc QuoteChatPrompt(QuoteChatPromptRequest) returns (QuoteChatPromptResponse); + rpc PutArtifact(PutArtifactRequest) returns (PutArtifactResponse); + rpc GetArtifact(GetArtifactRequest) returns (GetArtifactResponse); + rpc ListModels(ListModelsRequest) returns (ListModelsResponse); + rpc DecodeTokens(stream DecodeTokensRequest) returns (stream DecodeTokensResponse); + rpc GetStats(GetStatsRequest) returns (GetStatsResponse); + rpc GetModelStats(GetModelStatsRequest) returns (GetModelStatsResponse); +} + +message SymbolicStart { + oneof kind { + SymbolicGenesisStart genesis = 1; + SymbolicArtifactStart artifact = 2; + } +} + +message SymbolicGenesisStart {} + +message SymbolicArtifactStart { + // catnix OutputId; exactly 32 bytes. + bytes artifact_cid = 1; +} + +message QuotePreparedTextRequest { + string huggingface_model_id = 1; + string huggingface_revision = 2; + repeated uint32 prompt_token_ids = 3; + uint32 max_new_tokens = 4; + repeated uint32 stop_token_ids = 5; + SymbolicStart start = 6; + // Ordered preference list (each one of "f32", "f16", "bf16"). The server + // picks the first entry it supports. Empty list lets the server pick its + // preferred dtype freely. None of the entries supported -> request is + // refused with FailedPrecondition. The chosen dtype is reported back in + // QuotePreparedTextResponse.dtype. + repeated string accept_dtypes = 7; +} + +message QuotePreparedTextResponse { + .hellas.v1.Ticket ticket = 1; + uint32 prompt_tokens = 2; + // The dtype the server actually committed to running this quote at. + string dtype = 3; + .hellas.symbolic.v1.SymbolicRequest symbolic_request = 4; +} + +// Convenience RPC: the server handles tokenization and symbolic request +// construction. Intended for lightweight clients (browsers) that don't have +// the tokenizer. +message QuotePromptRequest { + string huggingface_model_id = 1; + string huggingface_revision = 2; + string prompt = 3; + uint32 max_new_tokens = 4; + // Ordered preference list (each one of "f32", "f16", "bf16"). The server + // picks the first entry it supports. Empty list lets the server pick its + // preferred dtype freely. None of the entries supported -> request is + // refused with FailedPrecondition. The chosen dtype is reported back in + // QuotePromptResponse.dtype. + repeated string accept_dtypes = 5; +} + +message QuotePromptResponse { + .hellas.v1.Ticket ticket = 1; + uint32 prompt_tokens = 2; + // The dtype the server actually committed to running this quote at. + string dtype = 3; + .hellas.symbolic.v1.SymbolicRequest symbolic_request = 4; +} + +// Convenience RPC: chat-style prompt quoting. +// Like QuotePrompt but accepts a message array + system prompt. +// The server applies the model's chat template to produce the prompt. +message ChatMessage { + string role = 1; // "user", "assistant" + string content = 2; +} + +message QuoteChatPromptRequest { + string huggingface_model_id = 1; + string huggingface_revision = 2; + repeated ChatMessage messages = 3; + uint32 max_new_tokens = 4; + string system_prompt = 5; + // Ordered preference list (each one of "f32", "f16", "bf16"). The server + // picks the first entry it supports. Empty list lets the server pick its + // preferred dtype freely. None of the entries supported -> request is + // refused with FailedPrecondition. The chosen dtype is reported back in + // QuoteChatPromptResponse.dtype. + repeated string accept_dtypes = 6; +} + +message QuoteChatPromptResponse { + .hellas.v1.Ticket ticket = 1; + uint32 prompt_tokens = 2; + // The dtype the server actually committed to running this quote at. + string dtype = 3; + .hellas.symbolic.v1.SymbolicRequest symbolic_request = 4; +} + +// Store one canonical catnix artifact by its BLAKE3 CID. This API does not +// publish symbolic metadata such as model locators or lazy substitutions; those +// are separate provider-local interpretation state. +message PutArtifactRequest { + bytes canonical_artifact = 1; +} + +message PutArtifactResponse { + // BLAKE3 digest of canonical_artifact; exactly 32 bytes. + bytes cid = 1; +} + +message GetArtifactRequest { + // BLAKE3 digest of canonical artifact bytes; exactly 32 bytes. + bytes cid = 1; +} + +message GetArtifactResponse { + bytes canonical_artifact = 1; +} + +// List models known to the executor and their readiness status. +message ListModelsRequest {} + +message ModelInfo { + string model_id = 1; + string revision = 2; + ModelStatus status = 3; + // Human-readable error when status is FAILED. + string error = 4; +} + +enum ModelStatus { + MODEL_STATUS_UNSPECIFIED = 0; + MODEL_STATUS_QUEUED = 1; + MODEL_STATUS_LOADING = 2; + MODEL_STATUS_READY = 3; + MODEL_STATUS_FAILED = 4; +} + +message ListModelsResponse { + repeated ModelInfo models = 1; +} + +// Convenience RPC: stateless token decoding. +// Client streams raw token bytes, server decodes with the model's tokenizer +// and streams back text chunks. +message DecodeTokensRequest { + string huggingface_model_id = 1; + string huggingface_revision = 2; + // Raw token bytes (little-endian u32 token IDs, same format as Symbolic output). + bytes token_bytes = 3; +} + +message DecodeTokensResponse { + // Decoded text (incremental delta; concatenate all responses for full output). + string text = 1; +} + +// Cumulative token statistics since node start. +message GetStatsRequest {} + +message TokenStats { + uint64 executions_started = 1; + uint64 executions_completed = 2; + uint64 executions_failed = 3; + uint64 prompt_tokens = 4; + uint64 cached_prompt_tokens = 5; + uint64 cached_output_tokens = 6; + uint64 prefill_tokens = 7; + uint64 generated_tokens = 8; +} + +message ModelTokenStats { + string model_id = 1; + TokenStats stats = 2; +} + +message GetStatsResponse { + TokenStats stats = 1; + repeated ModelTokenStats model_stats = 2; +} + +message GetModelStatsRequest { + string model_id = 1; +} + +message GetModelStatsResponse { + string model_id = 1; + TokenStats stats = 2; +} diff --git a/proto/hellas/opaque/v1/opaque.proto b/proto/hellas/opaque/v1/opaque.proto new file mode 100644 index 0000000..8e5c397 --- /dev/null +++ b/proto/hellas/opaque/v1/opaque.proto @@ -0,0 +1,19 @@ +syntax = "proto3"; + +package hellas.opaque.v1; + +import "hellas/v1/hellas.proto"; + +// Trust-based opaque work. The protocol commits to the exact bytes; it does +// not interpret service/method/payload or provide a non-cooperative validity +// path for them. + +service Opaque { + rpc CreateTicket(OpaqueRequest) returns (.hellas.v1.Ticket); +} + +message OpaqueRequest { + string service = 1; + string method = 2; + bytes payload = 3; // exact UTF-8 JSON bytes +} diff --git a/proto/hellas/swarm/v1/swarm.proto b/proto/hellas/swarm/v1/swarm.proto new file mode 100644 index 0000000..3592146 --- /dev/null +++ b/proto/hellas/swarm/v1/swarm.proto @@ -0,0 +1,42 @@ +syntax = "proto3"; + +package hellas.swarm.v1; + +// P2P/node-facing service. This is transport/discovery metadata, not the core +// execution protocol. + +service Node { + rpc GetNodeInfo(GetNodeInfoRequest) returns (GetNodeInfoResponse); + rpc GetKnownPeers(GetKnownPeersRequest) returns (GetKnownPeersResponse); +} + +message GetNodeInfoRequest {} + +message GetNodeInfoResponse { + string node_id = 1; + uint64 uptime_seconds = 2; + // Semver string, e.g. "0.1.0". Self-reported; treat as untrusted. + string version = 3; + // Build commit hash (short hex). Self-reported; treat as untrusted. + string build = 4; + // Platform triple, e.g. "x86_64-linux". Self-reported; treat as untrusted. + string os = 5; + // Operator-chosen tag, exactly 16 bytes. Self-reported; treat as untrusted. + bytes graffiti = 6; +} + +message GetKnownPeersRequest { + string service_alpn = 1; +} + +message GetKnownPeersResponse { + repeated bytes peer_ids = 1; +} + +message Presence { + string hf_id = 1; + string req_id = 2; + string peer_id = 3; + uint64 ttl_ms = 4; + bool is_executor = 5; +} diff --git a/proto/hellas/symbolic/v1/symbolic.proto b/proto/hellas/symbolic/v1/symbolic.proto new file mode 100644 index 0000000..5af268f --- /dev/null +++ b/proto/hellas/symbolic/v1/symbolic.proto @@ -0,0 +1,17 @@ +syntax = "proto3"; + +package hellas.symbolic.v1; + +import "hellas/v1/hellas.proto"; + +// Binding/verifiable symbolic work. This is the protocol-level Catgrad path: +// all large artifacts are named by CIDs and fetched/resolved outside protobuf. + +service Symbolic { + rpc CreateTicket(SymbolicRequest) returns (.hellas.v1.Ticket); +} + +message SymbolicRequest { + // catnix InputId; exactly 32 bytes. + bytes text_execution_cid = 1; +} diff --git a/proto/hellas/v1/hellas.proto b/proto/hellas/v1/hellas.proto new file mode 100644 index 0000000..fae37d2 --- /dev/null +++ b/proto/hellas/v1/hellas.proto @@ -0,0 +1,67 @@ +syntax = "proto3"; + +package hellas.v1; + +// Core Hellas work protocol. This file owns the generic ticket and execution +// surface plus transport-neutral event/receipt shapes. Scheme-specific ticket +// creation lives in symbolic.proto / opaque.proto; non-core helper APIs live in +// courtesy.proto; p2p/node discovery lives in swarm.proto. + +service Execute { + rpc RunTicket(RunTicketRequest) returns (stream WorkEvent); +} + +message Ticket { + bytes request_commitment = 1; // exactly 32 bytes + uint64 amount = 2; + uint64 ttl_ms = 3; +} + +message RunTicketRequest { + bytes request_commitment = 1; // exactly 32 bytes +} + +// Wire protocol: zero or more WorkChunk events, terminated by exactly one +// WorkFinished or WorkFailed, after which the stream ends. Streaming chunks +// are transport-only; the terminal output is the object committed to by the +// receipt. +message WorkEvent { + oneof kind { + WorkChunk chunk = 1; + WorkFinished finished = 2; + WorkFailed failed = 3; + } +} + +message WorkChunk { + // Cumulative position AFTER this chunk. + uint64 position = 1; + bytes bytes = 2; +} + +message WorkFinished { + // Complete output object. Symbolic text uses little-endian u32 token IDs. + // Opaque uses exact UTF-8 JSON bytes. + bytes output = 1; + ReceiptEnvelope receipt = 2; + FinishStatus status = 3; + uint64 total_units = 4; +} + +message WorkFailed { + // Units emitted before failure (tokens for symbolic text, bytes for opaque). + uint64 position = 1; + string error = 2; +} + +enum FinishStatus { + FINISH_STATUS_UNSPECIFIED = 0; + FINISH_STATUS_END_OF_SEQUENCE = 1; + FINISH_STATUS_MAX_OUTPUT = 2; + FINISH_STATUS_CANCELLED = 3; +} + +// Canonical hellas-core SignedReceipt encoded as strict dag-cbor. +message ReceiptEnvelope { + bytes dag_cbor = 1; +} diff --git a/rust-analyzer.toml b/rust-analyzer.toml new file mode 100644 index 0000000..d495be6 --- /dev/null +++ b/rust-analyzer.toml @@ -0,0 +1,2 @@ +[cargo] +features = ["candle", "otel"]