From 382ed761ab19e54474f7d2be263ece4af25ec165 Mon Sep 17 00:00:00 2001 From: Martin Taillefer Date: Fri, 27 Feb 2026 11:04:01 -0800 Subject: [PATCH] feat: Add the file and sync_thunk crates --- .spelling | 101 +- CHANGELOG.md | 4 + Cargo.lock | 240 +- Cargo.toml | 55 +- README.md | 2 + crates/anyspawn/src/spawner.rs | 2 +- crates/automation/Cargo.toml | 2 +- crates/file/CHANGELOG.md | 1 + crates/file/Cargo.toml | 67 + crates/file/ECOSYSTEM-ANALYSIS.md | 508 ++++ crates/file/README.md | 321 +++ crates/file/TODO.md | 11 + crates/file/benches/fs_comparison.rs | 795 +++++++ crates/file/examples/basic_read_write.rs | 35 + crates/file/examples/directory_ops.rs | 62 + crates/file/examples/file_types.rs | 43 + crates/file/examples/open_options.rs | 62 + crates/file/examples/positional_io.rs | 39 + crates/file/examples/positional_types.rs | 56 + crates/file/examples/streaming_io.rs | 45 + crates/file/favicon.ico | 3 + crates/file/logo.png | 3 + crates/file/src/dir_builder.rs | 57 + crates/file/src/dir_entry.rs | 73 + crates/file/src/directory.rs | 576 +++++ crates/file/src/file.rs | 581 +++++ crates/file/src/file_inner.rs | 388 ++++ crates/file/src/io_helpers.rs | 30 + crates/file/src/lib.rs | 314 +++ crates/file/src/open_options.rs | 206 ++ crates/file/src/path_utils.rs | 167 ++ crates/file/src/positional_file.rs | 505 ++++ crates/file/src/positional_file_inner.rs | 441 ++++ crates/file/src/read_dir.rs | 49 + crates/file/src/read_only_file.rs | 399 ++++ crates/file/src/read_only_positional_file.rs | 334 +++ crates/file/src/root.rs | 43 + crates/file/src/shared_memory.rs | 55 + crates/file/src/write_only_file.rs | 399 ++++ crates/file/src/write_only_positional_file.rs | 337 +++ crates/file/tests/integration.rs | 2039 +++++++++++++++++ crates/seatbelt/src/context.rs | 6 +- crates/seatbelt/src/fallback/layer.rs | 3 +- crates/seatbelt/src/fallback/mod.rs | 4 +- crates/sync_thunk/CHANGELOG.md | 1 + crates/sync_thunk/Cargo.toml | 45 + crates/sync_thunk/README.md | 155 ++ crates/sync_thunk/examples/from_field.rs | 48 + crates/sync_thunk/examples/from_method.rs | 52 + crates/sync_thunk/examples/from_parameter.rs | 47 + crates/sync_thunk/examples/from_static.rs | 49 + crates/sync_thunk/favicon.ico | 3 + crates/sync_thunk/logo.png | 3 + crates/sync_thunk/src/lib.rs | 155 ++ crates/sync_thunk/src/macros.rs | 84 + crates/sync_thunk/src/stack_state.rs | 353 +++ crates/sync_thunk/src/thunk_future.rs | 183 ++ crates/sync_thunk/src/thunker.rs | 325 +++ crates/sync_thunk/src/thunker_builder.rs | 164 ++ crates/sync_thunk/src/work_item.rs | 87 + crates/sync_thunk/tests/adversarial.rs | 71 + crates/sync_thunk_macros/CHANGELOG.md | 1 + crates/sync_thunk_macros/Cargo.toml | 35 + crates/sync_thunk_macros/README.md | 24 + crates/sync_thunk_macros/favicon.ico | 3 + crates/sync_thunk_macros/logo.png | 3 + crates/sync_thunk_macros/src/lib.rs | 21 + crates/sync_thunk_macros_impl/CHANGELOG.md | 1 + crates/sync_thunk_macros_impl/Cargo.toml | 53 + crates/sync_thunk_macros_impl/README.md | 24 + crates/sync_thunk_macros_impl/favicon.ico | 3 + crates/sync_thunk_macros_impl/logo.png | 3 + crates/sync_thunk_macros_impl/src/lib.rs | 17 + crates/sync_thunk_macros_impl/src/thunk.rs | 360 +++ .../thunk_basic__from_field_mut_self.snap | 65 + .../thunk_basic__from_field_ref_self.snap | 65 + .../thunk_basic__from_method_call.snap | 65 + .../thunk_basic__from_parameter.snap | 69 + ...k_basic__from_parameter_multiple_args.snap | 85 + .../snapshots/thunk_basic__from_static.snap | 65 + .../thunk_basic__generic_return_type.snap | 69 + .../thunk_basic__multiple_params.snap | 77 + .../snapshots/thunk_basic__mut_ref_param.snap | 70 + .../thunk_basic__no_params_beyond_self.snap | 65 + .../snapshots/thunk_basic__owned_param.snap | 69 + .../thunk_basic__preserves_allow_attrs.snap | 66 + .../thunk_basic__preserves_expect_attrs.snap | 70 + .../thunk_basic__pub_crate_visibility.snap | 65 + .../thunk_basic__pub_visibility.snap | 65 + .../snapshots/thunk_basic__ref_param.snap | 69 + .../thunk_basic__result_return_type.snap | 74 + .../thunk_basic__unit_return_type.snap | 63 + .../thunk_errors__missing_equals.snap | 9 + .../snapshots/thunk_errors__missing_from.snap | 9 + .../thunk_errors__not_a_function.snap | 9 + .../snapshots/thunk_errors__unknown_key.snap | 9 + .../tests/thunk_basic.rs | 245 ++ .../tests/thunk_errors.rs | 56 + crates/sync_thunk_macros_impl/tests/util.rs | 49 + crates/tick/Cargo.toml | 2 +- 100 files changed, 13359 insertions(+), 101 deletions(-) create mode 100644 crates/file/CHANGELOG.md create mode 100644 crates/file/Cargo.toml create mode 100644 crates/file/ECOSYSTEM-ANALYSIS.md create mode 100644 crates/file/README.md create mode 100644 crates/file/TODO.md create mode 100644 crates/file/benches/fs_comparison.rs create mode 100644 crates/file/examples/basic_read_write.rs create mode 100644 crates/file/examples/directory_ops.rs create mode 100644 crates/file/examples/file_types.rs create mode 100644 crates/file/examples/open_options.rs create mode 100644 crates/file/examples/positional_io.rs create mode 100644 crates/file/examples/positional_types.rs create mode 100644 crates/file/examples/streaming_io.rs create mode 100644 crates/file/favicon.ico create mode 100644 crates/file/logo.png create mode 100644 crates/file/src/dir_builder.rs create mode 100644 crates/file/src/dir_entry.rs create mode 100644 crates/file/src/directory.rs create mode 100644 crates/file/src/file.rs create mode 100644 crates/file/src/file_inner.rs create mode 100644 crates/file/src/io_helpers.rs create mode 100644 crates/file/src/lib.rs create mode 100644 crates/file/src/open_options.rs create mode 100644 crates/file/src/path_utils.rs create mode 100644 crates/file/src/positional_file.rs create mode 100644 crates/file/src/positional_file_inner.rs create mode 100644 crates/file/src/read_dir.rs create mode 100644 crates/file/src/read_only_file.rs create mode 100644 crates/file/src/read_only_positional_file.rs create mode 100644 crates/file/src/root.rs create mode 100644 crates/file/src/shared_memory.rs create mode 100644 crates/file/src/write_only_file.rs create mode 100644 crates/file/src/write_only_positional_file.rs create mode 100644 crates/file/tests/integration.rs create mode 100644 crates/sync_thunk/CHANGELOG.md create mode 100644 crates/sync_thunk/Cargo.toml create mode 100644 crates/sync_thunk/README.md create mode 100644 crates/sync_thunk/examples/from_field.rs create mode 100644 crates/sync_thunk/examples/from_method.rs create mode 100644 crates/sync_thunk/examples/from_parameter.rs create mode 100644 crates/sync_thunk/examples/from_static.rs create mode 100644 crates/sync_thunk/favicon.ico create mode 100644 crates/sync_thunk/logo.png create mode 100644 crates/sync_thunk/src/lib.rs create mode 100644 crates/sync_thunk/src/macros.rs create mode 100644 crates/sync_thunk/src/stack_state.rs create mode 100644 crates/sync_thunk/src/thunk_future.rs create mode 100644 crates/sync_thunk/src/thunker.rs create mode 100644 crates/sync_thunk/src/thunker_builder.rs create mode 100644 crates/sync_thunk/src/work_item.rs create mode 100644 crates/sync_thunk/tests/adversarial.rs create mode 100644 crates/sync_thunk_macros/CHANGELOG.md create mode 100644 crates/sync_thunk_macros/Cargo.toml create mode 100644 crates/sync_thunk_macros/README.md create mode 100644 crates/sync_thunk_macros/favicon.ico create mode 100644 crates/sync_thunk_macros/logo.png create mode 100644 crates/sync_thunk_macros/src/lib.rs create mode 100644 crates/sync_thunk_macros_impl/CHANGELOG.md create mode 100644 crates/sync_thunk_macros_impl/Cargo.toml create mode 100644 crates/sync_thunk_macros_impl/README.md create mode 100644 crates/sync_thunk_macros_impl/favicon.ico create mode 100644 crates/sync_thunk_macros_impl/logo.png create mode 100644 crates/sync_thunk_macros_impl/src/lib.rs create mode 100644 crates/sync_thunk_macros_impl/src/thunk.rs create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_field_mut_self.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_field_ref_self.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_method_call.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_parameter.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_parameter_multiple_args.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_static.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__generic_return_type.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__multiple_params.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__mut_ref_param.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__no_params_beyond_self.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__owned_param.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__preserves_allow_attrs.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__preserves_expect_attrs.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__pub_crate_visibility.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__pub_visibility.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__ref_param.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__result_return_type.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__unit_return_type.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_errors__missing_equals.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_errors__missing_from.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_errors__not_a_function.snap create mode 100644 crates/sync_thunk_macros_impl/tests/snapshots/thunk_errors__unknown_key.snap create mode 100644 crates/sync_thunk_macros_impl/tests/thunk_basic.rs create mode 100644 crates/sync_thunk_macros_impl/tests/thunk_errors.rs create mode 100644 crates/sync_thunk_macros_impl/tests/util.rs diff --git a/.spelling b/.spelling index 745abe2cc..cfb933fcc 100644 --- a/.spelling +++ b/.spelling @@ -1,12 +1,18 @@ -306 +356 +— → 0.X.Y 100k 10k 10ms +16K 1h +1K 1ms +4K +64K ACLs +accessor acyclic addrs agentic @@ -15,12 +21,13 @@ allocator Ani api APIs -AppError appender +AppError args AspNet async Async +atomicity auditable backend backends @@ -37,9 +44,7 @@ branch_name btree_map buildable bytesbuf -callee -cancelled -Cargo.toml +C #-Rust C-BITFLAG C-CONV C-CONV-SPECIFIC @@ -48,27 +53,36 @@ C-CTOR C-DEREF C-DTOR-BLOCK C-DTOR-FAIL +C-ITER +C-ITER-TY +C-MACRO-ATTR +C-MACRO-VIS +C-NEWTYPE +C-NUM-FMT +C-RELNOTES +C-RW-VALUE +C-SERDE +C-SMART-PTR +callee +cancelled +Cargo.toml certificate_generator.rs cfg chainable Changelog -chrono Chrono -C-ITER -C-ITER-TY +chrono clippt clippy Clippy clonable -C-MACRO-ATTR -C-MACRO-VIS -C-NEWTYPE -C-NUM-FMT +cloneable codebase codebases +combinators composability composable -combinators +condvar config const contoso @@ -76,11 +90,6 @@ CONTRIBUTING.md coverage.json CPUs crates.io -C-RELNOTES -C #-Rust -C-RW-VALUE -C-SERDE -C-SMART-PTR deallocate Debuggability Deduplicate @@ -89,14 +98,18 @@ deduplication deque Deque dereferenced +dereferencing +derefs deserialization -destructors +destructor Destructors +destructors dev Dev DevOps DI DLLs +DNS docs.rs docsrs docstring @@ -104,13 +117,17 @@ DotNet dSMS Dyn e.g. -enum Enum +enum +enqueued enums +EOF +FFI FFI-compatible fhl-scus4-app-win fhl-scus4-app-win2 filesystem +filesystems fn foldhash footguns @@ -119,6 +136,7 @@ frontend fundle Fundle Fundle's +getter getters glommio grey @@ -142,13 +160,15 @@ interop Interop interoperability interoperate -jitter -JSON +invariants IOCP IP +jitter +JSON KiB Kubernetes libc +lookups libs libunwind-devel lifecycle @@ -158,15 +178,16 @@ macros Macros Makefile MEMORYSTATUSEX +memset metadata Metas Microservices microsoft.com middleware -mimalloc Mimalloc -miri +mimalloc Miri +miri misconfigured mitigations mockable @@ -175,8 +196,10 @@ modularity monomorphization monomorphized msrc +MPMC MSRV msvc +Multithreaded mutex Namespace NeutralMemoryPool @@ -188,6 +211,7 @@ Nomicon non-mockable ns nuget +NUL NUMA observability ohno @@ -207,11 +231,13 @@ PowerShell pre-approved pre-generate pre-heating +PRN proc Proc profiler PullRequest RDME +reborrows recoverability recoverable Redis @@ -223,20 +249,22 @@ Reqwest Reusability RPC runtime +runtime's runtimes rustc -rustdoc Rustdoc +rustdoc rustfmt Sandana sans-io SCCACHE scopeguard SDKs +seekable seeked SemVer -serde Serde +serde Serde-based Serde's serializable @@ -252,8 +280,9 @@ stdlib stdout struct struct's -structs Structs +structs +subdirectory submodule submodules subtrait @@ -261,13 +290,21 @@ sudo supertrait supertraits Symcrypt +symlink +symlinks sys syscall +syscalls sysinfo tdnf testability +thunk +thunked +thunker +Thunker timestamp timestamps +TOCTOU Tokio Tokio's toml @@ -281,8 +318,8 @@ tuple tuples typesafe UDEPS +unaliased unconfigured -Multithreaded uncontended unhandleable unicode @@ -295,17 +332,20 @@ unregister unregistered unregisters unsized +unsynchronized untrusted UTC UTF-8 v4 v6 vec -versioning Versioning +versioning Vijay VMs vtable +w.r.t. +waker wildcard wildcards Win32 @@ -313,6 +353,5 @@ winsock workflow workflows workspace -w.r.t. Xamarin -xxH3 +xxH3 \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c22ef8e7..009ad70d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ Please see each crate's change log below: - [`data_privacy`](./crates/data_privacy/CHANGELOG.md) - [`data_privacy_macros`](./crates/data_privacy_macros/CHANGELOG.md) - [`data_privacy_macros_impl`](./crates/data_privacy_macros_impl/CHANGELOG.md) +- [`file`](./crates/file/CHANGELOG.md) - [`fundle`](./crates/fundle/CHANGELOG.md) - [`fundle_macros`](./crates/fundle_macros/CHANGELOG.md) - [`fundle_macros_impl`](./crates/fundle_macros_impl/CHANGELOG.md) @@ -16,6 +17,9 @@ Please see each crate's change log below: - [`ohno_macros`](./crates/ohno_macros/CHANGELOG.md) - [`recoverable`](./crates/recoverable/CHANGELOG.md) - [`seatbelt`](./crates/seatbelt/CHANGELOG.md) +- [`sync_thunk`](./crates/sync_thunk/CHANGELOG.md) +- [`sync_thunk_macros`](./crates/sync_thunk_macros/CHANGELOG.md) +- [`sync_thunk_macros_impl`](./crates/sync_thunk_macros_impl/CHANGELOG.md) - [`thread_aware`](./crates/thread_aware/CHANGELOG.md) - [`thread_aware_macros`](./crates/thread_aware_macros/CHANGELOG.md) - [`thread_aware_macros_impl`](./crates/thread_aware_macros_impl/CHANGELOG.md) diff --git a/Cargo.lock b/Cargo.lock index 71be6ba81..db7af291b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -197,6 +197,22 @@ version = "4.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" +[[package]] +name = "async_file" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0b6f097c7d987f73de3c760ec755c820340bb640b2a9f8b2005368633e1cb77" +dependencies = [ + "blocking", + "js-sys", + "logwise", + "priority", + "some_executor", + "thiserror", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -394,6 +410,17 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "continue" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0f015c8a689633523d8df1e8bfdd7911d958042cc5f15fe1ff7d13542a64451" +dependencies = [ + "atomic-waker", + "thiserror", + "wasm-bindgen", +] + [[package]] name = "convert_case" version = "0.10.0" @@ -422,9 +449,9 @@ dependencies = [ [[package]] name = "criterion" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d883447757bb0ee46f233e9dc22eb84d93a9508c9b868687b274fc431d886bf" +checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3" dependencies = [ "alloca", "anes", @@ -446,9 +473,9 @@ dependencies = [ [[package]] name = "criterion-plot" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed943f81ea2faa8dcecbbfa50164acf95d555afec96a27871663b300e387b2e4" +checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea" dependencies = [ "cast", "itertools 0.13.0", @@ -460,6 +487,18 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crossfire" +version = "3.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb12e9c05ae4854f743f0acec2f817148ba59a902484f6aa298d4fc7df2fac4" +dependencies = [ + "crossbeam-utils", + "futures-core", + "parking_lot", + "smallvec", +] + [[package]] name = "crunchy" version = "0.2.4" @@ -635,6 +674,21 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "file" +version = "0.2.0" +dependencies = [ + "async-fs", + "async_file", + "bytesbuf", + "bytesbuf_io", + "criterion", + "futures-lite", + "sync_thunk", + "tempfile", + "tokio", +] + [[package]] name = "find-msvc-tools" version = "0.1.8" @@ -689,9 +743,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", @@ -704,9 +758,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", "futures-sink", @@ -714,15 +768,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-executor" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" dependencies = [ "futures-core", "futures-task", @@ -731,9 +785,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" [[package]] name = "futures-lite" @@ -750,9 +804,9 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", @@ -761,21 +815,21 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-channel", "futures-core", @@ -785,7 +839,6 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", - "pin-utils", "slab", ] @@ -1023,15 +1076,15 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.180" +version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "lock_api" @@ -1048,6 +1101,25 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "logwise" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8c740ba6b59c00738b2f6f3ce3a3179f58f60586f85425a7dd19e93dbe8a740" +dependencies = [ + "logwise_proc", + "wasm-bindgen", + "wasm_safe_thread", + "web-sys", + "web-time", +] + +[[package]] +name = "logwise_proc" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de5e88465f84c94abea59d256aa42df32b532cec3b4bcd132293d74a647bb32e" + [[package]] name = "many_cpus" version = "2.1.0" @@ -1325,12 +1397,6 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - [[package]] name = "piper" version = "0.2.4" @@ -1422,6 +1488,12 @@ dependencies = [ "syn", ] +[[package]] +name = "priority" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3331288c73a29bd726cd92d947059399bf8e4b01f13d24f02caecd5187e5b5d5" + [[package]] name = "proc-macro2" version = "1.0.106" @@ -1579,9 +1651,9 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ "bitflags", "errno", @@ -1778,6 +1850,23 @@ dependencies = [ "futures-lite", ] +[[package]] +name = "some_executor" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7cda54d8a5e4c26d6f4c1a69b65dc3cbcbdf71088b605d0d95cb9fd83cdadba" +dependencies = [ + "atomic-waker", + "continue", + "js-sys", + "priority", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm_safe_thread", + "web-sys", + "web-time", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -1801,6 +1890,35 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_thunk" +version = "0.1.0" +dependencies = [ + "crossfire", + "sync_thunk_macros", + "tokio", +] + +[[package]] +name = "sync_thunk_macros" +version = "0.1.0" +dependencies = [ + "mutants", + "sync_thunk_macros_impl", +] + +[[package]] +name = "sync_thunk_macros_impl" +version = "0.1.0" +dependencies = [ + "insta", + "mutants", + "prettyplease", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "sync_wrapper" version = "1.0.2" @@ -1815,9 +1933,9 @@ checksum = "591ef38edfb78ca4771ee32cf494cb8771944bee237a9b91fc9c1424ac4b777b" [[package]] name = "tempfile" -version = "3.24.0" +version = "3.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" dependencies = [ "fastrand", "getrandom", @@ -1983,6 +2101,7 @@ version = "1.49.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" dependencies = [ + "bytes", "pin-project-lite", "tokio-macros", ] @@ -2223,6 +2342,20 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" +dependencies = [ + "cfg-if", + "futures-util", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.108" @@ -2255,6 +2388,39 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm_safe_thread" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cbd0fac61fdac75f6413e236f521802c4d5deea057b0c85c4cea98a856bfb04" +dependencies = [ + "continue", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-time", +] + +[[package]] +name = "web-sys" +version = "0.3.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index 902ecd525..f8108e226 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,50 +22,47 @@ homepage = "https://github.com/microsoft/oxidizer" [workspace.dependencies] -# local dependencies -anyspawn = { path = "crates/anyspawn", default-features = false, version = "0.1.0" } -bytesbuf = { path = "crates/bytesbuf", default-features = false, version = "0.4.0" } -bytesbuf_io = { path = "crates/bytesbuf_io", default-features = false, version = "0.4.0" } -data_privacy = { path = "crates/data_privacy", default-features = false, version = "0.10.1" } -data_privacy_macros = { path = "crates/data_privacy_macros", default-features = false, version = "0.9.0" } -data_privacy_macros_impl = { path = "crates/data_privacy_macros_impl", default-features = false, version = "0.9.0" } -fundle = { path = "crates/fundle", default-features = false, version = "0.3.0" } -fundle_macros = { path = "crates/fundle_macros", default-features = false, version = "0.3.0" } -fundle_macros_impl = { path = "crates/fundle_macros_impl", default-features = false, version = "0.3.0" } -layered = { path = "crates/layered", default-features = false, version = "0.3.0" } -ohno = { path = "crates/ohno", default-features = false, version = "0.3.1" } -ohno_macros = { path = "crates/ohno_macros", default-features = false, version = "0.3.0" } -recoverable = { path = "crates/recoverable", default-features = false, version = "0.1.0" } -seatbelt = { path = "crates/seatbelt", default-features = false, version = "0.3.1" } -testing_aids = { path = "crates/testing_aids", default-features = false } -thread_aware = { path = "crates/thread_aware", default-features = false, version = "0.6.2" } -thread_aware_macros = { path = "crates/thread_aware_macros", default-features = false, version = "0.6.1" } -thread_aware_macros_impl = { path = "crates/thread_aware_macros_impl", default-features = false, version = "0.6.1" } -tick = { path = "crates/tick", default-features = false, version = "0.2.1" } -uniflight = { path = "crates/uniflight", default-features = false, version = "0.1.0" } - # external dependencies ahash = { version = "0.8", default-features = false } alloc_tracker = { version = "0.5.9", default-features = false } anyhow = { version = "1.0.100", default-features = false } + +# local dependencies +anyspawn = { path = "crates/anyspawn", default-features = false, version = "0.1.0" } +async-fs = { version = "2", default-features = false } async-once-cell = { version = "0.5", default-features = false } +async-task = { version = "4.7.1", default-features = false } +async_file = { version = "0.1", default-features = false } bytes = { version = "1.11.1", default-features = false } +bytesbuf = { path = "crates/bytesbuf", default-features = false, version = "0.4.0" } +bytesbuf_io = { path = "crates/bytesbuf_io", default-features = false, version = "0.4.0" } chrono = { version = "0.4.40", default-features = false } chrono-tz = { version = "0.10.4", default-features = false } criterion = { version = "0.8.1", default-features = false } +crossfire = { version = "3.1", default-features = false } dashmap = { version = "6.1", default-features = false } +data_privacy = { path = "crates/data_privacy", default-features = false, version = "0.10.1" } +data_privacy_macros = { path = "crates/data_privacy_macros", default-features = false, version = "0.9.0" } +data_privacy_macros_impl = { path = "crates/data_privacy_macros_impl", default-features = false, version = "0.9.0" } derive_more = { version = "2.0.1", default-features = false } duct = { version = "1.1.1", default-features = false } dynosaur = { version = "0.3.0", default-features = false } fastrand = { version = "2.3.0", default-features = false, features = ["std"] } +file = { path = "crates/file", default-features = false, version = "0.1.0" } +flume = { version = "0.12", default-features = false } +fundle = { path = "crates/fundle", default-features = false, version = "0.3.0" } +fundle_macros = { path = "crates/fundle_macros", default-features = false, version = "0.3.0" } +fundle_macros_impl = { path = "crates/fundle_macros_impl", default-features = false, version = "0.3.0" } futures = { version = "0.3.31", default-features = false } futures-channel = { version = "0.3.31", default-features = false } futures-core = { version = "0.3.31", default-features = false } +futures-lite = { version = "2", default-features = false } futures-util = { version = "0.3.31", default-features = false } http = { version = "1.2.0", default-features = false, features = ["std"] } infinity_pool = { version = "0.8.1", default-features = false } insta = { version = "1.44.1", default-features = false } jiff = { version = "0.2.21", default-features = false } +layered = { path = "crates/layered", default-features = false, version = "0.3.0" } libc = { version = "0.2.178", default-features = false } many_cpus = { version = "2.1.0", default-features = false } mockall = { version = "0.14.0", default-features = false } @@ -73,6 +70,8 @@ mutants = { version = "0.0.3", default-features = false } new_zealand = { version = "1.0.1", default-features = false } nm = { version = "0.1.21", default-features = false } num-traits = { version = "0.2.19", default-features = false } +ohno = { path = "crates/ohno", default-features = false, version = "0.3.1" } +ohno_macros = { path = "crates/ohno_macros", default-features = false, version = "0.3.0" } once_cell = { version = "1.21.3", default-features = false } opentelemetry = { version = "0.31.0", default-features = false } opentelemetry-stdout = { version = "0.31.0", default-features = false } @@ -84,16 +83,27 @@ prettyplease = { version = "0.2.37", default-features = false } proc-macro2 = { version = "1.0.103", default-features = false } quote = { version = "1.0.42", default-features = false } rapidhash = { version = "4.1.1", default-features = false } +recoverable = { path = "crates/recoverable", default-features = false, version = "0.1.0" } regex = { version = "1.12.2", default-features = false } rstest = { version = "0.26", default-features = false } rustc-hash = { version = "2.1.0", default-features = false } +seatbelt = { path = "crates/seatbelt", default-features = false, version = "0.3.1" } serde = { version = "1.0.228", default-features = false } serde_core = { version = "1.0.228", default-features = false } serde_json = { version = "1.0.145", default-features = false } smallvec = { version = "1.15.1", default-features = false } static_assertions = { version = "1.1.0", default-features = false } syn = { version = "2.0.111", default-features = false } +sync_thunk = { path = "crates/sync_thunk", default-features = false, version = "0.1.0" } +sync_thunk_macros = { path = "crates/sync_thunk_macros", default-features = false, version = "0.1.0" } +sync_thunk_macros_impl = { path = "crates/sync_thunk_macros_impl", default-features = false, version = "0.1.0" } +tempfile = { version = "3", default-features = false } +testing_aids = { path = "crates/testing_aids", default-features = false } thiserror = { version = "2.0.17", default-features = false } +thread_aware = { path = "crates/thread_aware", default-features = false, version = "0.6.2" } +thread_aware_macros = { path = "crates/thread_aware_macros", default-features = false, version = "0.6.1" } +thread_aware_macros_impl = { path = "crates/thread_aware_macros_impl", default-features = false, version = "0.6.1" } +tick = { path = "crates/tick", default-features = false, version = "0.2.1" } time = { version = "0.3.47", default-features = false } tokio = { version = "1.48.0", default-features = false } tower = { version = "0.5.2", default-features = false } @@ -104,6 +114,7 @@ tracing-subscriber = { version = "0.3.20", default-features = false } trait-variant = { version = "0.1.2", default-features = false } trybuild = { version = "1.0.114", default-features = false } typeid = { version = "1.0.3", default-features = false } +uniflight = { path = "crates/uniflight", default-features = false, version = "0.1.0" } windows-sys = { version = "0.61.2", default-features = false } xutex = { version = "0.2.0", default-features = false } xxhash-rust = { version = "0.8.15", default-features = false } diff --git a/README.md b/README.md index 4f6f51408..ae4b782df 100644 --- a/README.md +++ b/README.md @@ -29,11 +29,13 @@ These are the primary crates built out of this repo: - [`bytesbuf`](./crates/bytesbuf/README.md) - Types for creating and manipulating byte sequences. - [`bytesbuf_io`](./crates/bytesbuf_io/README.md) - Asynchronous I/O abstractions expressed via `bytesbuf` types. - [`data_privacy`](./crates/data_privacy/README.md) - Mechanisms to classify, manipulate, and redact sensitive data. +- [`file`](./crates/file/README.md) - Zero-copy asynchronous filesystem API. - [`fundle`](./crates/fundle/README.md) - Compile-time safe dependency injection for Rust. - [`layered`](./crates/layered/README.md) - A foundational service abstraction for building composable, middleware-driven systems. - [`ohno`](./crates/ohno/README.md) - High-quality Rust error handling. - [`recoverable`](./crates/recoverable/README.md) - Recovery information and classification for resilience patterns. - [`seatbelt`](./crates/seatbelt/README.md) - Resilience and recovery mechanisms for fallible operations. +- [`sync_thunk`](./crates/sync_thunk/README.md) - Efficiently handle blocking calls in async code. - [`thread_aware`](./crates/thread_aware/README.md) - Facilities to support thread-isolated state. - [`tick`](./crates/tick/README.md) - Provides primitives to interact with and manipulate machine time. - [`uniflight`](./crates/uniflight/README.md) - Coalesces duplicate async tasks into a single execution. diff --git a/crates/anyspawn/src/spawner.rs b/crates/anyspawn/src/spawner.rs index 4adc60ab4..910995a8f 100644 --- a/crates/anyspawn/src/spawner.rs +++ b/crates/anyspawn/src/spawner.rs @@ -33,7 +33,7 @@ use crate::handle::JoinHandleInner; /// println!("Task running!"); /// }); /// handle.await; // Wait for task to complete -/// +/// /// # } /// ``` /// diff --git a/crates/automation/Cargo.toml b/crates/automation/Cargo.toml index 3fd1cee8a..1fc99e7eb 100644 --- a/crates/automation/Cargo.toml +++ b/crates/automation/Cargo.toml @@ -13,9 +13,9 @@ homepage.workspace = true publish = false [dependencies] -ohno = { workspace = true, features = ["app-err"] } duct = { workspace = true } +ohno = { workspace = true, features = ["app-err"] } serde = { workspace = true, features = ["derive", "alloc"] } serde_json = { workspace = true, features = ["std"] } diff --git a/crates/file/CHANGELOG.md b/crates/file/CHANGELOG.md new file mode 100644 index 000000000..825c32f0d --- /dev/null +++ b/crates/file/CHANGELOG.md @@ -0,0 +1 @@ +# Changelog diff --git a/crates/file/Cargo.toml b/crates/file/Cargo.toml new file mode 100644 index 000000000..b1a3daf50 --- /dev/null +++ b/crates/file/Cargo.toml @@ -0,0 +1,67 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +[package] +name = "file" +description = "Zero-copy asynchronous filesystem API." +version = "0.2.0" +readme = "README.md" +keywords = ["oxidizer", "file", "async", "filesystem"] +categories = ["asynchronous", "filesystem"] + +edition.workspace = true +rust-version = "1.89" +authors.workspace = true +license.workspace = true +homepage.workspace = true +repository = "https://github.com/microsoft/oxidizer/tree/main/crates/file" + +[package.metadata.cargo_check_external_types] +allowed_external_types = ["bytesbuf::*", "bytesbuf_io::*", "sync_thunk::thunker::Thunker"] + +[package.metadata.docs.rs] +all-features = true + +[features] +sync-compat = [] + +[dependencies] +bytesbuf.workspace = true +bytesbuf_io.workspace = true +sync_thunk.workspace = true + +[dev-dependencies] +async-fs.workspace = true +async_file.workspace = true +criterion = { workspace = true, features = ["async_tokio"] } +futures-lite.workspace = true +tempfile.workspace = true +tokio = { workspace = true, features = ["rt", "rt-multi-thread", "macros", "fs", "io-util"] } + +[[bench]] +name = "fs_comparison" +harness = false + +[[example]] +name = "basic_read_write" + +[[example]] +name = "directory_ops" + +[[example]] +name = "file_types" + +[[example]] +name = "open_options" + +[[example]] +name = "positional_io" + +[[example]] +name = "positional_types" + +[[example]] +name = "streaming_io" + +[lints] +workspace = true diff --git a/crates/file/ECOSYSTEM-ANALYSIS.md b/crates/file/ECOSYSTEM-ANALYSIS.md new file mode 100644 index 000000000..b444b474a --- /dev/null +++ b/crates/file/ECOSYSTEM-ANALYSIS.md @@ -0,0 +1,508 @@ +# Ecosystem Analysis: `file` crate vs. Rust File I/O Alternatives + +A thorough comparison of the `file` crate against `std::fs`, `tokio::fs` +(tokio 1.49.0), `async-fs` (2.2.0), and `async_file` (0.1.3). + +All benchmarks were collected on Windows using Criterion, with files in the +OS page cache. Times are medians from 100 samples. + +--- + +## 1. Introduction + +Rust's async ecosystem offers several approaches to file I/O, each with +different trade-offs in performance, safety, and API design. This document +compares five options: + +| Library | Role | +|------------------|-------------------------------------------------------------------------------------------| +| **`std::fs`** | The standard library's synchronous, blocking file I/O | +| **`tokio::fs`** | Tokio's async file I/O, wrapping blocking ops via `spawn_blocking` | +| **`async-fs`** | Runtime-agnostic async file I/O from the smol ecosystem | +| **`async_file`** | Platform-native async I/O (IOCP/io_uring) with a priority scheduler | +| **`file`** | This crate — `sync_thunk` dispatch, `BytesBuf` zero-copy buffers, capability-based access | + +The goal is to help developers choose the right abstraction for their +workload. We cover architecture, features, measured performance, allocation +behavior, thread models, and practical guidance. + +--- + +## 2. Architectural Comparison + +| Aspect | `std::fs` | `tokio::fs` | `async-fs` | `async_file` | **`file`** | +|---------------------|-------------------------|--------------------------------------------------------------|--------------------------------------------------|-----------------------------------------|---------------------------------------------------------------------------------------------------------| +| **I/O model** | Synchronous, blocking | Async wrapper over `spawn_blocking` | Async wrapper over `blocking::unblock` | Platform-native async (IOCP / io_uring) | Async via `sync_thunk` dispatch | +| **Thread pool** | None (caller's thread) | Global blocking pool (default 512 threads, unbounded growth) | `blocking` crate adaptive pool (grows on demand) | Internal pool with priority queue | Bounded dedicated worker pool (1–4 threads) | +| **Per-op dispatch** | Direct syscall | Box closure → `spawn_blocking` → task allocation | Box closure → `blocking::unblock` | OVERLAPPED struct or io_uring SQE | Enum-based `FileOp` dispatch, no closure boxing | +| **File handle** | Owned `File` | `Arc` + `Mutex` | `Arc>` | Owned `File` | Bare `*mut File` (seekable) / `Arc` (positional, Unix) / `Arc>` (positional, Windows) | +| **Buffer system** | `Vec` / `&mut [u8]` | `Vec` / `&mut [u8]` | `Vec` / `&mut [u8]` | `Vec` returned per read | `BytesBuf` / `BytesView` — pooled, reference-counted, zero-copy capable | +| **Positional I/O** | Manual seek + read | Manual seek + read | Manual seek + read | Not supported | Native `pread`/`pwrite` via dedicated positional types | +| **Access control** | None | None | None | None | Capability-based `Root` / `Directory` + 6 typed file handles | + +### Key architectural differences + +**`file` crate — enum dispatch, no closure boxing.** The `file` crate avoids +heap-allocating a closure for every I/O operation. Instead, operations are +encoded as `FileOp` enum variants and sent through a channel to a dedicated +worker thread. For seekable files, a raw `*mut File` pointer is passed +directly — no `Arc`, no `Mutex` — because `&mut self` on the async handle +guarantees exclusive access. Positional files on Unix use `Arc` with +`pread`/`pwrite`, which are inherently thread-safe and require no locking. + +**`tokio::fs` — closure boxing per operation.** Every I/O call boxes a +closure and spawns it onto tokio's global blocking thread pool via +`spawn_blocking`. The file handle is wrapped in `Arc>`, and the +mutex is locked on the worker thread for each syscall. This means even +single-threaded sequential I/O pays lock acquisition overhead. + +**`async-fs` — similar to tokio, different pool.** Uses `blocking::unblock` +instead of `spawn_blocking`, delegating to the `blocking` crate's adaptive +thread pool. Uses `parking_lot::Mutex` (slightly cheaper than +`std::sync::Mutex`), but the per-operation model is the same: box a closure, +dispatch to a thread, lock the mutex, run the syscall. + +**`async_file` — platform-native async.** Uses IOCP on Windows and io_uring +on Linux for truly non-blocking file I/O without thread pools. In theory +this should be fastest, but in practice the overhead of the priority queue +scheduler and per-operation OVERLAPPED/SQE allocation makes it the slowest +option in benchmarks. + +--- + +## 3. Functional Comparison + +### 3.1 File type system + +| Feature | `file` | `tokio::fs` | `async-fs` | `async_file` | `std::fs` | +|-----------------------|---------------------------|-------------|------------|--------------|-----------| +| Read-only file type | `ReadOnlyFile` | ✗ | ✗ | ✗ | ✗ | +| Write-only file type | `WriteOnlyFile` | ✗ | ✗ | ✗ | ✗ | +| Read-write file type | `File` | `File` | `File` | `File` | `File` | +| Positional read-only | `ReadOnlyPositionalFile` | ✗ | ✗ | ✗ | ✗ | +| Positional write-only | `WriteOnlyPositionalFile` | ✗ | ✗ | ✗ | ✗ | +| Positional read-write | `PositionalFile` | ✗ | ✗ | ✗ | ✗ | +| Capability narrowing | `From` conversions | ✗ | ✗ | ✗ | ✗ | + +The `file` crate's six file types enforce read/write permissions at the Rust +type level. Seekable types take `&mut self` (preventing concurrent cursor +corruption); positional types take `&self` (enabling concurrent random access +from multiple tasks without any locking on Unix). + +### 3.2 Positional I/O (`pread` / `pwrite`) + +Only the `file` crate exposes OS-native positional I/O: + +- `read_at(offset, len)`, `read_exact_at(offset, len)`, `read_max_at(offset, len)` +- `write_at(offset, data)`, `write_all_at(offset, data)` + +Other crates require a seek-then-read/write pattern, which is not atomic and +requires `&mut self` (or a mutex) to prevent cursor races. + +### 3.3 Capability-based access control + +Only the `file` crate provides directory capability scoping: + +- `Root::bind(thunker, path)` — the sole entry point for absolute paths +- `Directory` — all subsequent operations are relative, cannot escape the + bound directory tree +- Path validation rejects leading `/`, `\`, and `..` traversals + +All other crates accept arbitrary `AsRef`, providing no sandbox +guarantees. + +### 3.4 Buffer management + +| Feature | `file` | `tokio::fs` | `async-fs` | `async_file` | +|--------------------------|--------------------------|----------------------|----------------------|--------------| +| Pooled memory | `BytesBuf` / `BytesView` | ✗ | ✗ | ✗ | +| Custom memory providers | `_with_memory` variants | ✗ | ✗ | ✗ | +| Zero-copy pipeline | Via shared `BytesView` | ✗ | ✗ | ✗ | +| Read into caller buffer | `read_into_slice` | `AsyncReadExt::read` | `AsyncReadExt::read` | ✗ | +| Read into managed buffer | `read_into_bytesbuf` | ✗ | ✗ | ✗ | + +### 3.5 File locking + +| Feature | `file` | `tokio::fs` | `async-fs` | `async_file` | `std::fs` | +|-----------------------|------------------------------------|-------------|------------|--------------|-----------| +| Exclusive lock | `lock()` | ✗ | ✗ | ✗ | ✗ | +| Shared lock | `lock_shared()` | ✗ | ✗ | ✗ | ✗ | +| Non-blocking try-lock | `try_lock()` / `try_lock_shared()` | ✗ | ✗ | ✗ | ✗ | +| Unlock | `unlock()` | ✗ | ✗ | ✗ | ✗ | + +All six `file` crate handle types support advisory file locking. + +### 3.6 Directory operations + +| Operation | `file` | `tokio::fs` | `async-fs` | `async_file` | `std::fs` | +|------------------|--------|-------------|------------|--------------|-----------| +| Create directory | ✓ | ✓ | ✓ | ✗ | ✓ | +| Read directory | ✓ | ✓ | ✓ | ✗ | ✓ | +| Remove file/dir | ✓ | ✓ | ✓ | ✗ | ✓ | +| Rename | ✓ | ✓ | ✓ | ✗ | ✓ | +| Metadata / stat | ✓ | ✓ | ✓ | ✗ | ✓ | +| Symlink creation | ✓ | ✓ | ✓ | ✗ | ✓ | + +### 3.7 Trait implementations + +| Trait | `file` (seekable) | `tokio::fs` | `async-fs` | `async_file` | +|---------------------------|-----------------------|-------------|------------|--------------| +| `bytesbuf_io::Read` | ✓ | ✗ | ✗ | ✗ | +| `bytesbuf_io::Write` | ✓ | ✗ | ✗ | ✗ | +| `tokio::io::AsyncRead` | ✗ | ✓ | ✗ | ✗ | +| `tokio::io::AsyncWrite` | ✗ | ✓ | ✗ | ✗ | +| `futures::AsyncRead` | ✗ | ✗ | ✓ | ✗ | +| `futures::AsyncWrite` | ✗ | ✗ | ✓ | ✗ | +| `std::io::Read` (sync) | `sync-compat` feature | ✗ | ✗ | ✗ | +| `std::io::Write` (sync) | `sync-compat` feature | ✗ | ✗ | ✗ | +| `AsRawFd` / `AsRawHandle` | ✓ | ✓ | ✓ | ✗ | +| `AsFd` / `AsHandle` | ✓ | ✓ | ✗ | ✗ | + +--- + +## 4. Performance Comparison + +All benchmarks use Criterion with a tokio multi-threaded runtime. Files are +in the OS page cache. Times are median values from 100 samples. + +### 4.1 Sequential whole-file write + +Write the entire file in a single operation, then `sync_all` and close. + +| Size | `std::fs` | `tokio::fs` | **`file`** | `async-fs` | +|-------|-----------|-------------|-------------|------------| +| 1 KB | 587 µs | 695 µs | **698 µs** | 724 µs | +| 64 KB | 3.24 ms | 4.91 ms | **5.16 ms** | 5.38 ms | +| 1 MB | 6.20 ms | 7.86 ms | **6.32 ms** | 7.78 ms | + +**Analysis:** + +- **At 1 MB, the `file` crate matches `std::fs` within 2%** (6.32 ms vs + 6.20 ms) and **beats `tokio::fs` by 20%** (6.32 ms vs 7.86 ms). At this + size the write syscall itself dominates, and `sync_thunk`'s enum-based + dispatch adds near-zero overhead compared to `tokio::fs`'s closure boxing + + `spawn_blocking` + mutex acquisition. + +- **At 1 KB, all async libraries cluster within ~650–725 µs.** The + `fsync`/flush cost (~580 µs synchronous baseline) dominates, making the + dispatch mechanism irrelevant. The ~110 µs async overhead is the cost of + one thread-hop round-trip. + +- The `file` crate **consistently beats `async-fs`** across all sizes and is + competitive with `tokio::fs` at small sizes while pulling ahead as the + write payload grows. + +### 4.2 Sequential whole-file read + +Read the entire file contents in a single operation. + +| Size | `std::fs` | `tokio::fs` | **`file`** | `async-fs` | `async_file` | +|-------|-----------|-------------|-------------|------------|--------------| +| 1 KB | 94 µs | 152 µs | **163 µs** | 153 µs | 241 µs | +| 64 KB | 115 µs | 182 µs | **884 µs** | 196 µs | 276 µs | +| 1 MB | 623 µs | 663 µs | **1.04 ms** | 661 µs | 761 µs | + +**Analysis:** + +- **At 1 KB, all async libraries are within ~10% of each other** + (~150–165 µs), dominated by the syscall + thread dispatch overhead. The + `file` crate's `BytesBuf` path adds negligible cost at this scale. + +- **At 64 KB, the `file` crate (884 µs) is notably slower than `tokio::fs` + (182 µs).** This is the cost of the zero-copy buffer architecture: + `Directory::read()` uses a `read_into_bytesbuf` loop that allocates from + the `BytesBuf` pool and reads in chunks, whereas `tokio::fs` delegates + directly to `std::fs::read` which does a single `Vec` allocation + + `read_to_end` that the OS can satisfy in one copy. The `BytesBuf` + approach trades raw whole-file-read speed for pooled, reference-counted + buffers that enable zero-copy data pipelines downstream. + +- **At 1 MB, the `file` crate (1.04 ms) is ~1.6× slower than `tokio::fs` + (663 µs)** — the same `BytesBuf` chunked-read overhead. The gap narrows + in relative terms as the file grows because the actual I/O time becomes + a larger fraction of total time. + +- **This is a known and intentional trade-off.** The `file` crate's read + path is optimized for buffer reuse and zero-copy handoff, not for the + `read-entire-file-into-Vec` pattern. Applications that primarily do + whole-file reads with no downstream buffer sharing may be better served + by `tokio::fs` or `std::fs::read`. + +### 4.3 Streaming read (1 MB file, 8 KB chunks) + +Read a 1 MB file in 128 × 8 KB chunks using each crate's streaming API. + +| Library | Time | Throughput | +|--------------|-------------|---------------| +| `std::fs` | 508 µs | 1.92 GiB/s | +| `async-fs` | 857 µs | 1.08 GiB/s | +| **`file`** | **5.89 ms** | **170 MiB/s** | +| `tokio::fs` | 5.95 ms | 168 MiB/s | +| `async_file` | 5.72 ms | 175 MiB/s | + +**Analysis:** + +- **`tokio::fs` streaming (5.95 ms for 1 MB in 8 KB chunks) is ~12× slower + than `std::fs` (508 µs)** because each chunk requires a separate + `spawn_blocking` → thread wakeup → task completion round-trip. With 128 + chunks, that's 128 thread-hops. + +- The `file` crate (5.89 ms) is **within 1% of `tokio::fs`** (5.95 ms) for + streaming reads. Both pay a per-chunk thread dispatch cost; `sync_thunk`'s + lower per-dispatch overhead is offset by the `BytesBuf` allocation path. + +- **`async-fs` (857 µs) is dramatically faster** for streaming because the + `blocking` crate keeps the operation on a blocking thread between chunks, + avoiding the per-chunk thread-hop that `tokio::fs` and `file` both pay. + +- For streaming workloads, the per-dispatch overhead is the bottleneck, not + the I/O itself. Both `tokio::fs` and `file` would benefit from batching + multiple chunks per dispatch. + +### 4.4 Streaming write (128 × 8 KB chunks) + +| Library | Time | Throughput | +|-------------|-------------|---------------| +| `std::fs` | 6.31 ms | 159 MiB/s | +| `async-fs` | 5.65 ms | 177 MiB/s | +| **`file`** | **7.47 ms** | **134 MiB/s** | +| `tokio::fs` | 7.44 ms | 134 MiB/s | + +The `file` crate matches `tokio::fs` within 0.4% for streaming writes. +`async-fs` again benefits from keeping the blocking thread alive across +chunks. + +### 4.5 Many small files (100 × 256 B: create + write + read + delete) + +| Library | Time | Throughput | +|-------------|------------|-----------------| +| `std::fs` | 140 ms | 714 files/s | +| **`file`** | **154 ms** | **651 files/s** | +| `async-fs` | 154 ms | 651 files/s | +| `tokio::fs` | 156 ms | 641 files/s | + +All async crates are within 2% of each other for metadata-heavy small-file +workloads. The actual filesystem operations (create, fsync, delete) dominate; +dispatch overhead is negligible. + +### 4.6 Metadata (100 stat calls) + +| Library | Time | Throughput | +|--------------|-------------|-----------------| +| `std::fs` | 2.87 ms | 34.8K ops/s | +| `tokio::fs` | 7.37 ms | 13.6K ops/s | +| `async-fs` | 7.47 ms | 13.4K ops/s | +| **`file`** | **7.91 ms** | **12.6K ops/s** | +| `async_file` | 17.4 ms | 5.7K ops/s | + +The `file` crate's metadata path goes through `Directory::metadata()` which +performs `safe_join` path validation before dispatching. The ~7% overhead +vs `tokio::fs` is the cost of capability-based path validation — a security +feature, not an inefficiency. + +### 4.7 Positional read (128 × 8 KB scattered reads from 1 MB file) + +| Library | Time | Throughput | +|-------------------------|-------------|---------------| +| `std::fs` (seek+read) | 529 µs | 1.83 GiB/s | +| **`file`** (`pread`) | **5.95 ms** | **170 MiB/s** | +| `tokio::fs` (seek+read) | 10.3 ms | 98 MiB/s | +| `async-fs` (seek+read) | 26.0 ms | 39.7 MiB/s | + +**The `file` crate is 1.7× faster than `tokio::fs`** for positional reads. +Two factors contribute: (1) OS-native `pread` avoids the seek + read +two-syscall pattern, and (2) positional file types take `&self`, eliminating +mutex overhead on Unix (where `pread` is inherently thread-safe). + +`async-fs` is 2.5× slower than `tokio::fs` because each seek + read +pair requires two separate `blocking::unblock` dispatches. + +### 4.8 Positional write (128 × 8 KB scattered writes) + +| Library | Time | Throughput | +|--------------------------|-------------|---------------| +| `std::fs` (seek+write) | 6.17 ms | 162 MiB/s | +| **`file`** (`pwrite`) | **9.55 ms** | **105 MiB/s** | +| `tokio::fs` (seek+write) | 10.2 ms | 98 MiB/s | +| `async-fs` (seek+write) | 15.2 ms | 66 MiB/s | + +The `file` crate is 7% faster than `tokio::fs` for scattered positional +writes, again due to native `pwrite` and lower per-op overhead. + +### 4.9 Concurrent positional reads (4 × 256 KB from 1 MB file) + +| Variant | Time | Throughput | +|------------------------------------------|---------|------------| +| `std::fs` (sequential) | 190 µs | 5.0 GiB/s | +| `file` (sequential) | 1.25 ms | 809 MiB/s | +| `file` (4 concurrent via `tokio::join!`) | 1.25 ms | 806 MiB/s | + +On Windows, positional reads use `seek_read` which requires a `Mutex`, +serializing concurrent access. On Unix, `pread` requires no locking, so +concurrent positional reads would show true parallelism scaling. + +### 4.10 Performance summary + +| Workload | Winner | `file` crate standing | +|-------------------------------|------------|---------------------------------------------------------| +| Large sequential write (1 MB) | `std::fs` | **Within 2% of `std::fs`**, 20% faster than `tokio::fs` | +| Small sequential write (1 KB) | `std::fs` | Competitive with all async options | +| Large sequential read (1 MB) | `std::fs` | 1.6× slower than `tokio::fs` (BytesBuf overhead) | +| Small sequential read (1 KB) | `std::fs` | Within 10% of all async options | +| Streaming read (chunked) | `async-fs` | Comparable to `tokio::fs` | +| Streaming write (chunked) | `async-fs` | Comparable to `tokio::fs` | +| Positional read | `std::fs` | **1.7× faster than `tokio::fs`** | +| Positional write | `std::fs` | 7% faster than `tokio::fs` | +| Many small files | `std::fs` | Comparable to all async options | +| Metadata | `std::fs` | ~7% slower than `tokio::fs` (path validation cost) | + +--- + +## 5. Allocation Behavior + +Per-operation heap allocation count for a single read or write call: + +| Library | Allocations per operation | What gets allocated | +|------------------|---------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **`std::fs`** | 0 | Synchronous — no async machinery needed | +| **`tokio::fs`** | 2+ | `Box`ed closure + `Task` allocation + `spawn_blocking` overhead. Each call to `spawn_blocking` allocates a boxed `FnOnce`, a `JoinHandle`, and an internal task struct. | +| **`async-fs`** | 2+ | Similar to `tokio::fs` — `blocking::unblock` boxes a closure and allocates a task through the `blocking` crate's thread pool. Uses `parking_lot::Mutex` (no additional alloc, but still a per-op cost). | +| **`async_file`** | 1+ | OVERLAPPED struct (Windows) or io_uring SQE (Linux). The priority queue scheduler may allocate for insertion. | +| **`file`** | 1 | Single `Waker` clone via `sync_thunk`. No closure boxing — the operation is encoded as an enum variant sent through a pre-allocated channel. The `BytesBuf` read path may draw from a memory pool (amortized zero allocation if the pool has capacity). | + +### Why allocation count matters + +In high-frequency I/O loops (e.g., database page reads, log writes), +per-operation allocations compound. The `file` crate's enum-based dispatch +avoids the 2+ allocations that `tokio::fs` and `async-fs` incur on every +call. For a workload doing 10,000 reads/second, this is the difference +between ~10K allocations/s (`file`) and ~20–30K allocations/s (`tokio::fs`). + +--- + +## 6. Thread Model + +### `std::fs` — no threads + +Runs on the caller's thread. Simple and fast, but blocks the async +executor if called from async code. + +### `tokio::fs` — global blocking pool, unbounded growth + +Each file operation calls `tokio::task::spawn_blocking`, which dispatches +the closure to tokio's global blocking thread pool. The pool starts with a +small number of threads and grows up to 512 (configurable). Threads are +shared across all blocking work in the application (not just file I/O). + +**Implications:** +- No control over how many threads are used for file I/O specifically +- Under load, file operations compete with other `spawn_blocking` users + (database drivers, CPU-bound work, etc.) +- Thread creation/teardown overhead under bursty loads +- Each operation pays a full thread wakeup + task scheduling round-trip + +### `async-fs` — `blocking` crate adaptive pool + +Similar model to `tokio::fs`, but uses the `blocking` crate's thread pool +instead of tokio's. The pool grows adaptively based on demand and shrinks +when idle. + +**Implications:** +- Runtime-agnostic (works with tokio, async-std, smol, etc.) +- Pool behavior is controlled by the `blocking` crate, not the async runtime +- Same per-operation dispatch overhead as `tokio::fs` + +### `async_file` — internal pool with priority queue + +Maintains its own thread pool with a priority-based work queue. Operations +carry priority levels, allowing high-priority I/O to preempt lower-priority +work. + +**Implications:** +- Additional thread pool beyond the async runtime's own pool +- Priority scheduling adds overhead to every operation +- Not widely used or maintained + +### `file` crate — bounded dedicated pool via `sync_thunk` + +The `file` crate dispatches operations through `sync_thunk`, which uses a +bounded, dedicated worker pool (typically 1–4 threads). Operations are +encoded as `FileOp` enum variants and sent through a channel — no closure +boxing required. + +**Implications:** +- **Bounded and predictable** — the thread count is fixed at construction, + preventing runaway thread creation under load +- **Dedicated to file I/O** — no contention with other blocking work +- **Lower per-dispatch overhead** — enum dispatch + channel send vs. closure + boxing + task allocation + thread pool scheduling +- **Seekable files are lock-free** — `&mut self` guarantees exclusive access, + so no `Arc` or `Mutex` is needed on the hot path. The raw `*mut File` + pointer is sent directly to the worker thread. +- **Positional files on Unix are lock-free** — `pread`/`pwrite` are + thread-safe, so `Arc` with no mutex suffices +- **Cancellation-safe** — `ScopedDispatchFuture` blocks on drop, ensuring + borrowed data remains valid even if the future is cancelled + +--- + +## 7. When to Use What + +### Use `std::fs` when… + +- You are not in an async context +- You need maximum raw throughput and can afford to block the thread +- You are doing a single large sequential read (`std::fs::read` is the + fastest way to slurp a file into a `Vec`) + +### Use `tokio::fs` when… + +- You need `AsyncRead` / `AsyncWrite` / `AsyncSeek` trait compatibility + (e.g., piping file data through tokio's codec/framing layer) +- You are already using tokio and want minimal dependencies +- Your workload is primarily whole-file reads where `BytesBuf` overhead + would hurt +- You don't need positional I/O, file locking, or type-level access control + +### Use `async-fs` when… + +- You need runtime-agnostic async file I/O (works with smol, async-std, + tokio, or any executor) +- You need `futures::AsyncRead` / `futures::AsyncWrite` trait compatibility +- Streaming read/write performance matters (the `blocking` crate's thread + reuse gives it a significant edge for chunked workloads) + +### Use `async_file` when… + +- You specifically need priority-based I/O scheduling +- You want to experiment with platform-native async I/O (IOCP/io_uring) +- **Caveat:** It is the slowest option in all benchmarks and has the most + limited API (no seek, no metadata, no directory operations) + +### Use the `file` crate when… + +- **Write-heavy workloads** — within 2% of `std::fs` for large writes, + 20% faster than `tokio::fs` +- **Positional / random-access I/O** — 1.7× faster than `tokio::fs` for + scattered reads, with native `pread`/`pwrite` support +- **Type-safe access control** — six file types enforce read/write/seekable + permissions at compile time, preventing an entire class of bugs +- **Capability-based directory scoping** — `Root::bind` + `Directory` + prevent path traversal attacks by construction +- **Zero-copy buffer pipelines** — `BytesBuf`/`BytesView` enable + reference-counted buffer sharing across subsystems without copying +- **File locking** — advisory locking built into every file type +- **Bounded resource usage** — dedicated worker pool with fixed thread count, + no runaway thread creation under load +- **Cancellation safety** — scoped dispatch ensures borrowed data validity + across async cancellation boundaries + +**Accept these trade-offs:** +- Whole-file reads are ~1.6× slower than `tokio::fs` at 1 MB due to the + `BytesBuf` chunked-read path +- No `AsyncRead`/`AsyncWrite` trait implementations — not directly + composable with tokio/futures I/O combinators +- Per-chunk streaming overhead is comparable to `tokio::fs` (both pay a + thread-hop per chunk) +- Path validation is lexical only — symlinks can escape the directory + capability boundary diff --git a/crates/file/README.md b/crates/file/README.md new file mode 100644 index 000000000..32c81d52c --- /dev/null +++ b/crates/file/README.md @@ -0,0 +1,321 @@ +
+ File Logo + +# File + +[![crate.io](https://img.shields.io/crates/v/file.svg)](https://crates.io/crates/file) +[![docs.rs](https://docs.rs/file/badge.svg)](https://docs.rs/file) +[![MSRV](https://img.shields.io/crates/msrv/file)](https://crates.io/crates/file) +[![CI](https://github.com/microsoft/oxidizer/actions/workflows/main.yml/badge.svg?event=push)](https://github.com/microsoft/oxidizer/actions/workflows/main.yml) +[![Coverage](https://codecov.io/gh/microsoft/oxidizer/graph/badge.svg?token=FCUG0EL5TI)](https://codecov.io/gh/microsoft/oxidizer) +[![License](https://img.shields.io/badge/license-MIT-blue.svg)](../../LICENSE) +This crate was developed as part of the Oxidizer project + +
+ +Zero-copy asynchronous filesystem API. + +This crate provides a filesystem API that differs from [`std::fs`][__link0] in three key ways: + +1. **Fully asynchronous.** Every I/O operation is `async`. The implementation uses + a pool of dedicated background threads to perform blocking filesystem calls, + keeping the async executor free. + +1. **Managed buffers via [`bytesbuf`][__link1].** Reads produce + [`BytesView`][__link2] values backed by pooled memory; writes + accept them. This enables zero-copy data pipelines: data read from a file can + be written to a socket (or another file) without intermediate copies, as long + as both endpoints share a compatible memory provider. + +1. **Capability-based access control.** All filesystem operations are scoped to a + [`Directory`][__link3] capability obtained via [`Root::bind_std`][__link4]. Paths are always relative + to a directory, and path traversals that would escape the directory (such as + leading `/` or `..` above the root) are rejected. This makes it possible to + grant a subsystem access to a specific directory tree without risking access + to the rest of the filesystem. + +## Quick start + +```rust +use std::path::Path; + +use file::Root; +use sync_thunk::Thunker; + +// Bind to a directory — the only place an absolute path is accepted. +let thunker = Thunker::builder().build(); +let dir = Root::bind_std(&thunker, Path::new("/var/data")).await?; + +// Read and write whole files through the Directory capability. +dir.write_slice("greeting.txt", b"Hello!").await?; +let text = dir.read_to_string("greeting.txt").await?; + +// Narrow the capability to a subdirectory. +let sub = dir.open_dir("subdir").await?; +let data = sub.read("nested_file.txt").await?; +``` + +## File types + +The crate provides **six file types** organized into two families. Within each +family, three types enforce read, write, or read-write access at the type level. + +### Seekable files — streaming I/O with a cursor + +Seekable files maintain an internal cursor that advances with each read or +write. They implement [`bytesbuf_io::Read`][__link5] and/or [`bytesbuf_io::Write`][__link6] for +streaming I/O and support [`seek`][__link7], +[`stream_position`][__link8], and [`rewind`][__link9]. + +Because the cursor is shared mutable state, all I/O methods take **`&mut self`**, +ensuring only one operation is in flight at a time. This makes seekable files +ideal for sequential processing: reading a log from top to bottom, writing a +report line by line, or appending to a file. + +|Type|Access|Obtained via| +|----|------|------------| +|[`ReadOnlyFile`][__link10]|Read + seek|[`ReadOnlyFile::open`][__link11]| +|[`WriteOnlyFile`][__link12]|Write + seek|[`WriteOnlyFile::create`][__link13], [`WriteOnlyFile::create_new`][__link14]| +|[`File`][__link15]|Read + write + seek|[`File::open`][__link16], [`File::create`][__link17], [`OpenOptions`][__link18]| + +```rust +use std::path::Path; + +use file::{ReadOnlyFile, Root}; +use sync_thunk::Thunker; + +let thunker = Thunker::builder().build(); +let dir = Root::bind_std(&thunker, Path::new("/var/data")).await?; +let mut file = ReadOnlyFile::open(&dir, "log.txt").await?; + +// Stream through the file in 8 KB chunks. +loop { + let chunk = file.read_max(8192).await?; + if chunk.is_empty() { + break; // EOF + } + // process chunk... +} +``` + +### Positional files — offset-based I/O without a cursor + +Positional files have **no cursor**. Every I/O operation specifies an explicit +byte offset. Because there is no shared mutable state, all I/O methods take +**`&self`**, enabling multiple operations to be dispatched concurrently from +different tasks on the same handle. + +Positional files are ideal when the access pattern is non-sequential: reading +scattered records from a database file, writing blocks to a pre-allocated +image, or serving range requests from a large static asset. + +|Type|Access|Obtained via| +|----|------|------------| +|[`ReadOnlyPositionalFile`][__link19]|Positional read|[`ReadOnlyPositionalFile::open`][__link20]| +|[`WriteOnlyPositionalFile`][__link21]|Positional write|[`WriteOnlyPositionalFile::create`][__link22], [`WriteOnlyPositionalFile::create_new`][__link23]| +|[`PositionalFile`][__link24]|Positional read + write|[`PositionalFile::open`][__link25], [`PositionalFile::create`][__link26], [`OpenOptions`][__link27]| + +```rust +use std::path::Path; + +use file::{ReadOnlyPositionalFile, Root}; +use sync_thunk::Thunker; + +let thunker = Thunker::builder().build(); +let dir = Root::bind_std(&thunker, Path::new("/var/data")).await?; +let file = ReadOnlyPositionalFile::open(&dir, "db.bin").await?; + +// Read two disjoint regions concurrently — both calls use &self. +let (header, record) = tokio::join!(file.read_exact_at(0, 128), file.read_exact_at(4096, 256),); +let header = header?; +let record = record?; +``` + +### Choosing between seekable and positional + +|Use case|Recommended type| +|--------|----------------| +|Read a file from start to end|[`ReadOnlyFile`][__link28]| +|Append log entries|[`WriteOnlyFile`][__link29]| +|Build a file incrementally (write, then rewind and read)|[`File`][__link30]| +|Read scattered records from a database or index|[`ReadOnlyPositionalFile`][__link31]| +|Write blocks to a pre-allocated file|[`WriteOnlyPositionalFile`][__link32]| +|Serve concurrent range requests from a static asset|[`ReadOnlyPositionalFile`][__link33]| +|Read and update a memory-mapped-style structure|[`PositionalFile`][__link34]| + +### Narrowing capabilities + +Both [`File`][__link35] and [`PositionalFile`][__link36] can be permanently narrowed to their +single-access counterparts via [`From`][__link37] conversions. Once narrowed, the +dropped capability cannot be recovered: + +```rust +use std::path::Path; + +use file::{File, ReadOnlyFile, Root}; +use sync_thunk::Thunker; + +let thunker = Thunker::builder().build(); +let dir = Root::bind_std(&thunker, Path::new("/var/data")).await?; +let rw = File::open(&dir, "data.bin").await?; + +// Narrow to read-only — the write capability is permanently dropped. +let ro: ReadOnlyFile = rw.into(); +``` + +## Buffer management + +All I/O uses buffers from the [`bytesbuf`][__link38] crate. [`BytesBuf`][__link39] +is a mutable write buffer; [`BytesView`][__link40] is an immutable, +reference-counted read view. Buffers are allocated from a memory provider +(defaulting to [`GlobalPool`][__link41]). + +Each file type implements [`HasMemory`][__link42] and +[`Memory`][__link43], so you can reserve optimally-sized buffers +directly from the file: + +```rust +use std::path::Path; + +use bytesbuf::mem::Memory; +use file::{Root, WriteOnlyFile}; +use sync_thunk::Thunker; + +let thunker = Thunker::builder().build(); +let dir = Root::bind_std(&thunker, Path::new("/var/data")).await?; +let mut file = WriteOnlyFile::create(&dir, "output.bin").await?; + +let mut buf = file.reserve(4096); +buf.put_slice(*b"Hello, world!"); +file.write(buf.consume_all()).await?; +``` + +For zero-copy cross-subsystem transfers, constructors accept an optional custom +memory provider via `_with_memory` variants: + +```rust +// Open a file using the socket's memory provider. +let file = ReadOnlyFile::open_with_memory(&dir, "data.bin", socket.memory()).await?; + +// Data lands in memory optimal for the socket — zero copies on write. +let data = file.read_max(8192).await?; +socket.write(data).await?; +``` + +## Streaming I/O (seekable files) + +Seekable files support cursor-relative streaming. Use `read_max` to pull +data in chunks, or `write` / `write_slice` to push data sequentially: + +```rust +use std::path::Path; + +use bytesbuf::mem::Memory; +use file::{Root, WriteOnlyFile}; +use sync_thunk::Thunker; + +let thunker = Thunker::builder().build(); +let dir = Root::bind_std(&thunker, Path::new("/var/data")).await?; +let mut file = WriteOnlyFile::create(&dir, "output.bin").await?; +for i in 0..10 { + let mut buf = file.reserve(1024); + buf.put_slice(*b"some data\n"); + file.write(buf.consume_all()).await?; +} +file.flush().await?; +``` + +Callers working with plain `&[u8]` slices can use convenience methods like +[`WriteOnlyFile::write_slice`][__link44] and [`ReadOnlyFile::read_into_slice`][__link45]. Note +that these copy data internally; for large or performance-sensitive I/O, +prefer the [`BytesView`][__link46] methods. + +## Positional I/O (positional files) + +Positional files accept an explicit byte offset on every call. Because +they take `&self`, you can share a single handle across tasks: + +```rust +use std::path::Path; + +use file::{PositionalFile, Root}; +use sync_thunk::Thunker; + +let thunker = Thunker::builder().build(); +let dir = Root::bind_std(&thunker, Path::new("/var/data")).await?; + +// Pre-allocate a 1 MB file, then write four 256 KB regions concurrently. +let file = PositionalFile::create(&dir, "image.bin").await?; +file.set_len(1_048_576).await?; + +let data = vec![0xABu8; 262_144]; +let (a, b, c, d) = tokio::join!( + file.write_slice_at(0, &data), + file.write_slice_at(262_144, &data), + file.write_slice_at(524_288, &data), + file.write_slice_at(786_432, &data), +); +a?; +b?; +c?; +d?; +file.flush().await?; +``` + +Positional files also offer `read_into_slice_at` and `write_slice_at` for +callers working with plain byte slices. + + +
+ +This crate was developed as part of The Oxidizer Project. Browse this crate's source code. + + + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEGyQuy9Sa1B0qG3UO309FTgy9G4q0XjDYy0wWG356-8AALNXcYWSCgmhieXRlc2J1ZmUwLjQuMIJrYnl0ZXNidWZfaW9lMC40LjA + [__link0]: https://doc.rust-lang.org/stable/std/?search=fs + [__link1]: https://crates.io/crates/bytesbuf/0.4.0 + [__link10]: https://doc.rust-lang.org/stable/std/?search=file::read_only_file::ReadOnlyFile + [__link11]: https://doc.rust-lang.org/stable/std/?search=file::read_only_file::ReadOnlyFile::open + [__link12]: https://doc.rust-lang.org/stable/std/?search=file::write_only_file::WriteOnlyFile + [__link13]: https://doc.rust-lang.org/stable/std/?search=file::write_only_file::WriteOnlyFile::create + [__link14]: https://doc.rust-lang.org/stable/std/?search=file::write_only_file::WriteOnlyFile::create_new + [__link15]: https://doc.rust-lang.org/stable/std/?search=file::file::File + [__link16]: https://doc.rust-lang.org/stable/std/?search=file::file::File::open + [__link17]: https://doc.rust-lang.org/stable/std/?search=file::file::File::create + [__link18]: https://doc.rust-lang.org/stable/std/?search=file::open_options::OpenOptions + [__link19]: https://doc.rust-lang.org/stable/std/?search=file::read_only_positional_file::ReadOnlyPositionalFile + [__link2]: https://docs.rs/bytesbuf/0.4.0/bytesbuf/?search=BytesView + [__link20]: https://doc.rust-lang.org/stable/std/?search=file::read_only_positional_file::ReadOnlyPositionalFile::open + [__link21]: https://doc.rust-lang.org/stable/std/?search=file::write_only_positional_file::WriteOnlyPositionalFile + [__link22]: https://doc.rust-lang.org/stable/std/?search=file::write_only_positional_file::WriteOnlyPositionalFile::create + [__link23]: https://doc.rust-lang.org/stable/std/?search=file::write_only_positional_file::WriteOnlyPositionalFile::create_new + [__link24]: https://doc.rust-lang.org/stable/std/?search=file::positional_file::PositionalFile + [__link25]: https://doc.rust-lang.org/stable/std/?search=file::positional_file::PositionalFile::open + [__link26]: https://doc.rust-lang.org/stable/std/?search=file::positional_file::PositionalFile::create + [__link27]: https://doc.rust-lang.org/stable/std/?search=file::open_options::OpenOptions + [__link28]: https://doc.rust-lang.org/stable/std/?search=file::read_only_file::ReadOnlyFile + [__link29]: https://doc.rust-lang.org/stable/std/?search=file::write_only_file::WriteOnlyFile + [__link3]: https://doc.rust-lang.org/stable/std/?search=file::directory::Directory + [__link30]: https://doc.rust-lang.org/stable/std/?search=file::file::File + [__link31]: https://doc.rust-lang.org/stable/std/?search=file::read_only_positional_file::ReadOnlyPositionalFile + [__link32]: https://doc.rust-lang.org/stable/std/?search=file::write_only_positional_file::WriteOnlyPositionalFile + [__link33]: https://doc.rust-lang.org/stable/std/?search=file::read_only_positional_file::ReadOnlyPositionalFile + [__link34]: https://doc.rust-lang.org/stable/std/?search=file::positional_file::PositionalFile + [__link35]: https://doc.rust-lang.org/stable/std/?search=file::file::File + [__link36]: https://doc.rust-lang.org/stable/std/?search=file::positional_file::PositionalFile + [__link37]: https://doc.rust-lang.org/stable/std/convert/trait.From.html + [__link38]: https://crates.io/crates/bytesbuf/0.4.0 + [__link39]: https://docs.rs/bytesbuf/0.4.0/bytesbuf/?search=BytesBuf + [__link4]: https://doc.rust-lang.org/stable/std/?search=file::root::Root::bind_std + [__link40]: https://docs.rs/bytesbuf/0.4.0/bytesbuf/?search=BytesView + [__link41]: https://docs.rs/bytesbuf/0.4.0/bytesbuf/?search=mem::GlobalPool + [__link42]: https://docs.rs/bytesbuf/0.4.0/bytesbuf/?search=mem::HasMemory + [__link43]: https://docs.rs/bytesbuf/0.4.0/bytesbuf/?search=mem::Memory + [__link44]: https://doc.rust-lang.org/stable/std/?search=file::write_only_file::WriteOnlyFile::write_slice + [__link45]: https://doc.rust-lang.org/stable/std/?search=file::read_only_file::ReadOnlyFile::read_into_slice + [__link46]: https://docs.rs/bytesbuf/0.4.0/bytesbuf/?search=BytesView + [__link5]: https://docs.rs/bytesbuf_io/0.4.0/bytesbuf_io/?search=Read + [__link6]: https://docs.rs/bytesbuf_io/0.4.0/bytesbuf_io/?search=Write + [__link7]: https://doc.rust-lang.org/stable/std/?search=file::file::File::seek + [__link8]: https://doc.rust-lang.org/stable/std/?search=file::file::File::stream_position + [__link9]: https://doc.rust-lang.org/stable/std/?search=file::file::File::rewind diff --git a/crates/file/TODO.md b/crates/file/TODO.md new file mode 100644 index 000000000..75e92e8aa --- /dev/null +++ b/crates/file/TODO.md @@ -0,0 +1,11 @@ +# TODO + +- Should this crate be using ohno instead of std::fs::Error? + +- Should this crate be reexporting any types from std::fs in its public API or should we just clone the types so we're isolated? + +- Would be nice to enhance bytesbuf so the file crate doesn't need unsafe blocks to get max perf. + +- Could easily make the number of worker threads configurable. Should we? + +- Where does thread_aware make sense? diff --git a/crates/file/benches/fs_comparison.rs b/crates/file/benches/fs_comparison.rs new file mode 100644 index 000000000..212f9a3f4 --- /dev/null +++ b/crates/file/benches/fs_comparison.rs @@ -0,0 +1,795 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#![allow(missing_docs, reason = "Benchmark harness")] +#![allow(unused_results, reason = "Criterion builder returns are intentionally unused")] +#![allow(clippy::cast_possible_truncation, reason = "Intentional modular byte pattern")] + +use std::io::{Read, Seek, SeekFrom, Write}; + +use async_file::Priority; +use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; +use file::{ReadOnlyFile, ReadOnlyPositionalFile, Root, WriteOnlyFile, WriteOnlyPositionalFile}; +use sync_thunk::Thunker; +use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt}; +use tokio::runtime::Runtime; + +fn make_data(size: usize) -> Vec { + (0..size).map(|i| (i % 251) as u8).collect() +} + +// --------------------------------------------------------------------------- +// A. Sequential Write +// --------------------------------------------------------------------------- + +fn bench_sequential_write(c: &mut Criterion) { + let sizes: &[(usize, &str)] = &[(1024, "1KB"), (64 * 1024, "64KB"), (1024 * 1024, "1MB")]; + + let mut group = c.benchmark_group("sequential_write"); + + for &(size, label) in sizes { + let data = make_data(size); + group.throughput(Throughput::Bytes(size as u64)); + + // std::fs + group.bench_with_input(BenchmarkId::new("std_fs", label), &size, |b, _| { + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("out.bin"); + b.iter(|| { + std::fs::write(&path, &data).expect("write"); + }); + }); + + // tokio::fs + group.bench_with_input(BenchmarkId::new("tokio_fs", label), &size, |b, _| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("out.bin"); + let data = data.clone(); + b.iter(|| { + rt.block_on(async { + tokio::fs::write(&path, &data).await.expect("write"); + }); + }); + }); + + // file crate + group.bench_with_input(BenchmarkId::new("file_crate", label), &size, |b, _| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let thunker = Thunker::builder().build(); + let dir = rt.block_on(Root::bind_std(&thunker, tmp.path())).expect("bind"); + let data = data.clone(); + b.iter(|| { + rt.block_on(async { + dir.write_slice("out.bin", &data).await.expect("write"); + }); + }); + }); + + // async-fs + group.bench_with_input(BenchmarkId::new("async_fs", label), &size, |b, _| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("out.bin"); + let data = data.clone(); + b.iter(|| { + rt.block_on(async { + async_fs::write(&path, &data).await.expect("write"); + }); + }); + }); + } + + group.finish(); +} + +// --------------------------------------------------------------------------- +// B. Sequential Read +// --------------------------------------------------------------------------- + +fn bench_sequential_read(c: &mut Criterion) { + let sizes: &[(usize, &str)] = &[(1024, "1KB"), (64 * 1024, "64KB"), (1024 * 1024, "1MB")]; + + let mut group = c.benchmark_group("sequential_read"); + + for &(size, label) in sizes { + let data = make_data(size); + group.throughput(Throughput::Bytes(size as u64)); + + // std::fs + group.bench_with_input(BenchmarkId::new("std_fs", label), &size, |b, _| { + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("in.bin"); + std::fs::write(&path, &data).expect("setup write"); + b.iter(|| { + let _ = std::fs::read(&path).expect("read"); + }); + }); + + // tokio::fs + group.bench_with_input(BenchmarkId::new("tokio_fs", label), &size, |b, _| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("in.bin"); + std::fs::write(&path, &data).expect("setup write"); + b.iter(|| { + rt.block_on(async { + let _ = tokio::fs::read(&path).await.expect("read"); + }); + }); + }); + + // file crate + group.bench_with_input(BenchmarkId::new("file_crate", label), &size, |b, _| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("in.bin"); + std::fs::write(&path, &data).expect("setup write"); + let thunker = Thunker::builder().build(); + let dir = rt.block_on(Root::bind_std(&thunker, tmp.path())).expect("bind"); + b.iter(|| { + rt.block_on(async { + let _ = dir.read("in.bin").await.expect("read"); + }); + }); + }); + + // async-fs + group.bench_with_input(BenchmarkId::new("async_fs", label), &size, |b, _| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("in.bin"); + std::fs::write(&path, &data).expect("setup write"); + b.iter(|| { + rt.block_on(async { + let _ = async_fs::read(&path).await.expect("read"); + }); + }); + }); + + // async_file + group.bench_with_input(BenchmarkId::new("async_file", label), &size, |b, _| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("in.bin"); + std::fs::write(&path, &data).expect("setup write"); + b.iter(|| { + rt.block_on(async { + let f = async_file::File::open(&path, Priority::unit_test()).await.expect("open"); + let _ = f.read_all(Priority::unit_test()).await.expect("read"); + }); + }); + }); + } + + group.finish(); +} + +// --------------------------------------------------------------------------- +// C. Streaming Read (1 MB file, 8 KB chunks) +// --------------------------------------------------------------------------- + +fn bench_streaming_read(c: &mut Criterion) { + const FILE_SIZE: usize = 1024 * 1024; + const CHUNK: usize = 8 * 1024; + + let data = make_data(FILE_SIZE); + + let mut group = c.benchmark_group("streaming_read"); + group.throughput(Throughput::Bytes(FILE_SIZE as u64)); + + // std::fs + group.bench_function("std_fs", |b| { + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("stream.bin"); + std::fs::write(&path, &data).expect("setup write"); + b.iter(|| { + let mut f = std::fs::File::open(&path).expect("open"); + let mut buf = [0u8; CHUNK]; + loop { + let n = f.read(&mut buf).expect("read"); + if n == 0 { + break; + } + } + }); + }); + + // tokio::fs + group.bench_function("tokio_fs", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("stream.bin"); + std::fs::write(&path, &data).expect("setup write"); + b.iter(|| { + rt.block_on(async { + let mut f = tokio::fs::File::open(&path).await.expect("open"); + let mut buf = [0u8; CHUNK]; + loop { + let n = f.read(&mut buf).await.expect("read"); + if n == 0 { + break; + } + } + }); + }); + }); + + // file crate + group.bench_function("file_crate", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("stream.bin"); + std::fs::write(&path, &data).expect("setup write"); + let thunker = Thunker::builder().build(); + let dir = rt.block_on(Root::bind_std(&thunker, tmp.path())).expect("bind"); + b.iter(|| { + rt.block_on(async { + let mut f = ReadOnlyFile::open(&dir, "stream.bin").await.expect("open"); + loop { + let buf = f.read_max(8192).await.expect("read"); + if buf.is_empty() { + break; + } + } + }); + }); + }); + + // async-fs + group.bench_function("async_fs", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("stream.bin"); + std::fs::write(&path, &data).expect("setup write"); + b.iter(|| { + rt.block_on(async { + let mut f = async_fs::File::open(&path).await.expect("open"); + let mut buf = [0u8; CHUNK]; + loop { + let n = futures_lite::io::AsyncReadExt::read(&mut f, &mut buf).await.expect("read"); + if n == 0 { + break; + } + } + }); + }); + }); + + // async_file + group.bench_function("async_file", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("stream.bin"); + std::fs::write(&path, &data).expect("setup write"); + b.iter(|| { + rt.block_on(async { + let f = async_file::File::open(&path, Priority::unit_test()).await.expect("open"); + loop { + let buf = f.read(CHUNK, Priority::unit_test()).await.expect("read"); + if buf.is_empty() { + break; + } + } + }); + }); + }); + + group.finish(); +} + +// --------------------------------------------------------------------------- +// D. Streaming Write (128 chunks of 8 KB) +// --------------------------------------------------------------------------- + +fn bench_streaming_write(c: &mut Criterion) { + const CHUNKS: usize = 128; + const CHUNK: usize = 8 * 1024; + const TOTAL: usize = CHUNKS * CHUNK; + + let chunk_data = make_data(CHUNK); + + let mut group = c.benchmark_group("streaming_write"); + group.throughput(Throughput::Bytes(TOTAL as u64)); + + // std::fs + group.bench_function("std_fs", |b| { + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("stream_w.bin"); + b.iter(|| { + let mut f = std::fs::File::create(&path).expect("create"); + for _ in 0..CHUNKS { + f.write_all(&chunk_data).expect("write"); + } + }); + }); + + // tokio::fs + group.bench_function("tokio_fs", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("stream_w.bin"); + b.iter(|| { + rt.block_on(async { + let mut f = tokio::fs::File::create(&path).await.expect("create"); + for _ in 0..CHUNKS { + f.write_all(&chunk_data).await.expect("write"); + } + }); + }); + }); + + // file crate + group.bench_function("file_crate", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let thunker = Thunker::builder().build(); + let dir = rt.block_on(Root::bind_std(&thunker, tmp.path())).expect("bind"); + b.iter(|| { + rt.block_on(async { + let mut f = WriteOnlyFile::create(&dir, "stream_w.bin").await.expect("create"); + for _ in 0..CHUNKS { + f.write_slice(&chunk_data).await.expect("write"); + } + f.flush().await.expect("flush"); + }); + }); + }); + + // async-fs + group.bench_function("async_fs", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("stream_w.bin"); + b.iter(|| { + rt.block_on(async { + let mut f = async_fs::File::create(&path).await.expect("create"); + for _ in 0..CHUNKS { + futures_lite::io::AsyncWriteExt::write_all(&mut f, &chunk_data) + .await + .expect("write"); + } + futures_lite::io::AsyncWriteExt::flush(&mut f).await.expect("flush"); + }); + }); + }); + + group.finish(); +} + +// --------------------------------------------------------------------------- +// E. Many Small Files (100 files of 256 bytes: create + write + read + delete) +// --------------------------------------------------------------------------- + +fn bench_many_small_files(c: &mut Criterion) { + const COUNT: usize = 100; + const SIZE: usize = 256; + + let data = make_data(SIZE); + + let mut group = c.benchmark_group("many_small_files"); + group.throughput(Throughput::Elements(COUNT as u64)); + + // std::fs + group.bench_function("std_fs", |b| { + let tmp = tempfile::tempdir().expect("tempdir"); + let base = tmp.path().to_path_buf(); + b.iter(|| { + for i in 0..COUNT { + let p = base.join(format!("f{i}.bin")); + std::fs::write(&p, &data).expect("write"); + let _ = std::fs::read(&p).expect("read"); + std::fs::remove_file(&p).expect("remove"); + } + }); + }); + + // tokio::fs + group.bench_function("tokio_fs", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let base = tmp.path().to_path_buf(); + b.iter(|| { + rt.block_on(async { + for i in 0..COUNT { + let p = base.join(format!("f{i}.bin")); + tokio::fs::write(&p, &data).await.expect("write"); + let _ = tokio::fs::read(&p).await.expect("read"); + tokio::fs::remove_file(&p).await.expect("remove"); + } + }); + }); + }); + + // file crate + group.bench_function("file_crate", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let thunker = Thunker::builder().build(); + let dir = rt.block_on(Root::bind_std(&thunker, tmp.path())).expect("bind"); + b.iter(|| { + rt.block_on(async { + for i in 0..COUNT { + let name = format!("f{i}.bin"); + dir.write_slice(&name, &data).await.expect("write"); + let _ = dir.read(&name).await.expect("read"); + dir.remove_file(&name).await.expect("remove"); + } + }); + }); + }); + + // async-fs + group.bench_function("async_fs", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let base = tmp.path().to_path_buf(); + b.iter(|| { + rt.block_on(async { + for i in 0..COUNT { + let p = base.join(format!("f{i}.bin")); + async_fs::write(&p, &data).await.expect("write"); + let _ = async_fs::read(&p).await.expect("read"); + async_fs::remove_file(&p).await.expect("remove"); + } + }); + }); + }); + + group.finish(); +} + +// --------------------------------------------------------------------------- +// F. Metadata (stat a file 100 times) +// --------------------------------------------------------------------------- + +fn bench_metadata(c: &mut Criterion) { + const ITERS: usize = 100; + + let mut group = c.benchmark_group("metadata"); + group.throughput(Throughput::Elements(ITERS as u64)); + + // std::fs + group.bench_function("std_fs", |b| { + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("meta.bin"); + std::fs::write(&path, b"x").expect("setup write"); + b.iter(|| { + for _ in 0..ITERS { + let _ = std::fs::metadata(&path).expect("metadata"); + } + }); + }); + + // tokio::fs + group.bench_function("tokio_fs", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("meta.bin"); + std::fs::write(&path, b"x").expect("setup write"); + b.iter(|| { + rt.block_on(async { + for _ in 0..ITERS { + let _ = tokio::fs::metadata(&path).await.expect("metadata"); + } + }); + }); + }); + + // file crate + group.bench_function("file_crate", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + std::fs::write(tmp.path().join("meta.bin"), b"x").expect("setup write"); + let thunker = Thunker::builder().build(); + let dir = rt.block_on(Root::bind_std(&thunker, tmp.path())).expect("bind"); + b.iter(|| { + rt.block_on(async { + for _ in 0..ITERS { + let _ = dir.metadata("meta.bin").await.expect("metadata"); + } + }); + }); + }); + + // async-fs + group.bench_function("async_fs", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("meta.bin"); + std::fs::write(&path, b"x").expect("setup write"); + b.iter(|| { + rt.block_on(async { + for _ in 0..ITERS { + let _ = async_fs::metadata(&path).await.expect("metadata"); + } + }); + }); + }); + + // async_file + group.bench_function("async_file", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("meta.bin"); + std::fs::write(&path, b"x").expect("setup write"); + b.iter(|| { + rt.block_on(async { + for _ in 0..ITERS { + let f = async_file::File::open(&path, Priority::unit_test()).await.expect("open"); + let _ = f.metadata(Priority::unit_test()).await.expect("metadata"); + } + }); + }); + }); + + group.finish(); +} + +// --------------------------------------------------------------------------- +// G. Positional Read (1 MB file, 128 scattered 8 KB reads) +// --------------------------------------------------------------------------- + +fn bench_positional_read(c: &mut Criterion) { + const FILE_SIZE: usize = 1024 * 1024; + const CHUNKS: usize = 128; + const CHUNK: usize = 8192; + const TOTAL: usize = CHUNKS * CHUNK; + + let data = make_data(FILE_SIZE); + + let mut group = c.benchmark_group("positional_read"); + group.throughput(Throughput::Bytes(TOTAL as u64)); + + // std::fs + group.bench_function("std_fs", |b| { + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("pos_r.bin"); + std::fs::write(&path, &data).expect("setup write"); + b.iter(|| { + let mut f = std::fs::File::open(&path).expect("open"); + let mut buf = [0u8; CHUNK]; + for i in 0..CHUNKS { + let offset = (i * CHUNK) as u64; + f.seek(SeekFrom::Start(offset)).expect("seek"); + f.read_exact(&mut buf).expect("read"); + } + }); + }); + + // tokio::fs + group.bench_function("tokio_fs", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("pos_r.bin"); + std::fs::write(&path, &data).expect("setup write"); + b.iter(|| { + rt.block_on(async { + let mut f = tokio::fs::File::open(&path).await.expect("open"); + let mut buf = [0u8; CHUNK]; + for i in 0..CHUNKS { + let offset = (i * CHUNK) as u64; + f.seek(SeekFrom::Start(offset)).await.expect("seek"); + f.read_exact(&mut buf).await.expect("read"); + } + }); + }); + }); + + // file crate + group.bench_function("file_crate", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + std::fs::write(tmp.path().join("pos_r.bin"), &data).expect("setup write"); + let thunker = Thunker::builder().build(); + let dir = rt.block_on(Root::bind_std(&thunker, tmp.path())).expect("bind"); + b.iter(|| { + rt.block_on(async { + let f = ReadOnlyPositionalFile::open(&dir, "pos_r.bin").await.expect("open"); + for i in 0..CHUNKS { + let offset = (i * CHUNK) as u64; + let _ = f.read_exact_at(offset, CHUNK).await.expect("read"); + } + }); + }); + }); + + // async-fs + group.bench_function("async_fs", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("pos_r.bin"); + std::fs::write(&path, &data).expect("setup write"); + b.iter(|| { + rt.block_on(async { + let mut f = async_fs::File::open(&path).await.expect("open"); + let mut buf = [0u8; CHUNK]; + for i in 0..CHUNKS { + let offset = (i * CHUNK) as u64; + futures_lite::io::AsyncSeekExt::seek(&mut f, SeekFrom::Start(offset)) + .await + .expect("seek"); + futures_lite::io::AsyncReadExt::read(&mut f, &mut buf).await.expect("read"); + } + }); + }); + }); + + group.finish(); +} + +// --------------------------------------------------------------------------- +// H. Positional Write (128 scattered 8 KB writes) +// --------------------------------------------------------------------------- + +fn bench_positional_write(c: &mut Criterion) { + const CHUNKS: usize = 128; + const CHUNK: usize = 8192; + const TOTAL: usize = CHUNKS * CHUNK; + + let chunk_data = make_data(CHUNK); + + let mut group = c.benchmark_group("positional_write"); + group.throughput(Throughput::Bytes(TOTAL as u64)); + + // std::fs + group.bench_function("std_fs", |b| { + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("pos_w.bin"); + b.iter(|| { + let mut f = std::fs::File::create(&path).expect("create"); + for i in 0..CHUNKS { + let offset = (i * CHUNK) as u64; + f.seek(SeekFrom::Start(offset)).expect("seek"); + f.write_all(&chunk_data).expect("write"); + } + }); + }); + + // tokio::fs + group.bench_function("tokio_fs", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("pos_w.bin"); + b.iter(|| { + rt.block_on(async { + let mut f = tokio::fs::File::create(&path).await.expect("create"); + for i in 0..CHUNKS { + let offset = (i * CHUNK) as u64; + f.seek(SeekFrom::Start(offset)).await.expect("seek"); + f.write_all(&chunk_data).await.expect("write"); + } + }); + }); + }); + + // file crate + group.bench_function("file_crate", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let thunker = Thunker::builder().build(); + let dir = rt.block_on(Root::bind_std(&thunker, tmp.path())).expect("bind"); + b.iter(|| { + rt.block_on(async { + let f = WriteOnlyPositionalFile::create(&dir, "pos_w.bin").await.expect("create"); + for i in 0..CHUNKS { + let offset = (i * CHUNK) as u64; + f.write_slice_at(offset, &chunk_data).await.expect("write"); + } + f.flush().await.expect("flush"); + }); + }); + }); + + // async-fs + group.bench_function("async_fs", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("pos_w.bin"); + b.iter(|| { + rt.block_on(async { + let mut f = async_fs::File::create(&path).await.expect("create"); + for i in 0..CHUNKS { + let offset = (i * CHUNK) as u64; + futures_lite::io::AsyncSeekExt::seek(&mut f, SeekFrom::Start(offset)) + .await + .expect("seek"); + futures_lite::io::AsyncWriteExt::write_all(&mut f, &chunk_data) + .await + .expect("write"); + } + }); + }); + }); + + group.finish(); +} + +// --------------------------------------------------------------------------- +// I. Concurrent Positional Reads (4 concurrent 256 KB reads from a 1 MB file) +// --------------------------------------------------------------------------- + +fn bench_concurrent_positional_read(c: &mut Criterion) { + const FILE_SIZE: usize = 1024 * 1024; + const CHUNK: usize = 256 * 1024; + + let data = make_data(FILE_SIZE); + + let mut group = c.benchmark_group("concurrent_positional_read"); + group.throughput(Throughput::Bytes(FILE_SIZE as u64)); + + // std::fs (sequential baseline) + group.bench_function("std_fs", |b| { + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("conc_r.bin"); + std::fs::write(&path, &data).expect("setup write"); + b.iter(|| { + let mut f = std::fs::File::open(&path).expect("open"); + let mut buf = vec![0u8; CHUNK]; + for i in 0..4 { + let offset = (i * CHUNK) as u64; + f.seek(SeekFrom::Start(offset)).expect("seek"); + f.read_exact(&mut buf).expect("read"); + } + }); + }); + + // file crate (sequential) + group.bench_function("file_crate_sequential", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + std::fs::write(tmp.path().join("conc_r.bin"), &data).expect("setup write"); + let thunker = Thunker::builder().build(); + let dir = rt.block_on(Root::bind_std(&thunker, tmp.path())).expect("bind"); + b.iter(|| { + rt.block_on(async { + let f = ReadOnlyPositionalFile::open(&dir, "conc_r.bin").await.expect("open"); + for i in 0..4 { + let offset = (i * CHUNK) as u64; + let _ = f.read_exact_at(offset, CHUNK).await.expect("read"); + } + }); + }); + }); + + // file crate (concurrent — highlights &self positional advantage) + group.bench_function("file_crate_concurrent", |b| { + let rt = Runtime::new().expect("runtime"); + let tmp = tempfile::tempdir().expect("tempdir"); + std::fs::write(tmp.path().join("conc_r.bin"), &data).expect("setup write"); + let thunker = Thunker::builder().build(); + let dir = rt.block_on(Root::bind_std(&thunker, tmp.path())).expect("bind"); + b.iter(|| { + rt.block_on(async { + let f = ReadOnlyPositionalFile::open(&dir, "conc_r.bin").await.expect("open"); + let (r0, r1, r2, r3) = tokio::join!( + f.read_exact_at(0, CHUNK), + f.read_exact_at(CHUNK as u64, CHUNK), + f.read_exact_at((2 * CHUNK) as u64, CHUNK), + f.read_exact_at((3 * CHUNK) as u64, CHUNK), + ); + r0.expect("read 0"); + r1.expect("read 1"); + r2.expect("read 2"); + r3.expect("read 3"); + }); + }); + }); + + group.finish(); +} + +criterion_group!( + benches, + bench_sequential_write, + bench_sequential_read, + bench_streaming_read, + bench_streaming_write, + bench_many_small_files, + bench_metadata, + bench_positional_read, + bench_positional_write, + bench_concurrent_positional_read, +); +criterion_main!(benches); diff --git a/crates/file/examples/basic_read_write.rs b/crates/file/examples/basic_read_write.rs new file mode 100644 index 000000000..4fbe6e411 --- /dev/null +++ b/crates/file/examples/basic_read_write.rs @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Basic file read and write operations. +//! +//! Demonstrates binding to a directory with [`Root::bind_std`] and using +//! the [`Directory`] capability to read and write files. + +use file::Root; +use sync_thunk::Thunker; + +#[tokio::main] +async fn main() -> std::io::Result<()> { + let tmp = tempfile::tempdir()?; + let thunker = Thunker::builder().build(); + let dir = Root::bind_std(&thunker, tmp.path()).await?; + + // Write a file from a byte slice. + dir.write_slice("greeting.txt", b"Hello, world!").await?; + + // Read the entire file back as bytes. + let contents = dir.read("greeting.txt").await?; + println!("read {} bytes: {:?}", contents.len(), contents.first_slice()); + + // Read the file as a UTF-8 string. + let text = dir.read_to_string("greeting.txt").await?; + println!("text: {text}"); + + // Overwrite with new content. + dir.write_slice("greeting.txt", b"Goodbye!").await?; + let updated = dir.read_to_string("greeting.txt").await?; + println!("updated: {updated}"); + + Ok(()) +} diff --git a/crates/file/examples/directory_ops.rs b/crates/file/examples/directory_ops.rs new file mode 100644 index 000000000..6f498aaee --- /dev/null +++ b/crates/file/examples/directory_ops.rs @@ -0,0 +1,62 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Directory operations — creating, listing, copying, and removing entries. +//! +//! Demonstrates the capability-scoped directory API including subdirectory +//! navigation, file copying across directories, and recursive removal. + +use file::Root; +use sync_thunk::Thunker; + +#[tokio::main] +async fn main() -> std::io::Result<()> { + let tmp = tempfile::tempdir()?; + let thunker = Thunker::builder().build(); + let dir = Root::bind_std(&thunker, tmp.path()).await?; + + // Create a nested directory structure. + dir.create_dir_all("src/utils").await?; + dir.create_dir("docs").await?; + println!("created directories"); + + // Populate some files. + dir.write_slice("src/main.rs", b"fn main() {}").await?; + dir.write_slice("src/utils/helpers.rs", b"// helpers").await?; + dir.write_slice("docs/README.md", b"# Docs").await?; + + // List entries in the root. + let mut entries = dir.read_dir(".").await?; + println!("\nroot entries:"); + while let Some(entry) = entries.next_entry().await? { + let file_type = entry.file_type().map_err(|e| std::io::Error::other(e.to_string()))?; + let kind = if file_type.is_dir() { "dir" } else { "file" }; + println!(" [{kind}] {}", entry.file_name().to_string_lossy()); + } + + // Navigate into a subdirectory. + let src = dir.open_dir("src").await?; + let mut src_entries = src.read_dir(".").await?; + println!("\nsrc/ entries:"); + while let Some(entry) = src_entries.next_entry().await? { + println!(" {}", entry.file_name().to_string_lossy()); + } + + // Copy a file across directories. + let docs = dir.open_dir("docs").await?; + dir.copy("src/main.rs", &docs, "main_backup.rs").await?; + let backup = docs.read_to_string("main_backup.rs").await?; + println!("\ncopied file contents: {backup}"); + + // Rename a file within a directory. + dir.rename("docs/README.md", &docs, "INDEX.md").await?; + println!("renamed README.md -> INDEX.md"); + + // Remove a single file and then an entire directory tree. + dir.remove_file("src/utils/helpers.rs").await?; + dir.remove_dir("src/utils").await?; + dir.remove_dir_all("docs").await?; + println!("cleaned up files and directories"); + + Ok(()) +} diff --git a/crates/file/examples/file_types.rs b/crates/file/examples/file_types.rs new file mode 100644 index 000000000..0ee185381 --- /dev/null +++ b/crates/file/examples/file_types.rs @@ -0,0 +1,43 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Typed file access with [`ReadOnlyFile`], [`WriteOnlyFile`], and [`File`]. +//! +//! Each type enforces its access level at compile time. A [`File`] can +//! be narrowed to either single-access type via [`From`] conversions. + +use bytesbuf::mem::Memory; +use file::{File, ReadOnlyFile, Root, WriteOnlyFile}; +use sync_thunk::Thunker; + +#[tokio::main] +async fn main() -> std::io::Result<()> { + let tmp = tempfile::tempdir()?; + let thunker = Thunker::builder().build(); + let dir = Root::bind_std(&thunker, tmp.path()).await?; + + // WriteOnlyFile — can write, cannot read. + let mut wf = WriteOnlyFile::create(&dir, "data.txt").await?; + let mut buf = wf.reserve(64); + buf.put_slice(*b"written via WriteOnlyFile"); + wf.write(buf.consume_all()).await?; + wf.flush().await?; + println!("wrote via WriteOnlyFile"); + + // ReadOnlyFile — can read, cannot write. + let mut rf = ReadOnlyFile::open(&dir, "data.txt").await?; + let view = rf.read_max(1024).await?; + println!("read via ReadOnlyFile: {:?}", std::str::from_utf8(view.first_slice())); + + // File — full access. + let mut rw = File::open(&dir, "data.txt").await?; + let meta = rw.metadata().await?; + println!("file length via File: {} bytes", meta.len()); + + // Narrow a File down to ReadOnlyFile. + let mut ro: ReadOnlyFile = rw.into(); + let view = ro.read_max(1024).await?; + println!("after narrowing: {:?}", std::str::from_utf8(view.first_slice())); + + Ok(()) +} diff --git a/crates/file/examples/open_options.rs b/crates/file/examples/open_options.rs new file mode 100644 index 000000000..8d857c911 --- /dev/null +++ b/crates/file/examples/open_options.rs @@ -0,0 +1,62 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Flexible file creation with [`OpenOptions`]. +//! +//! [`OpenOptions`] provides a builder for controlling exactly how a file +//! is opened — analogous to [`std::fs::OpenOptions`] but fully async and +//! capability-scoped. + +use file::{OpenOptions, Root}; +use sync_thunk::Thunker; + +#[tokio::main] +async fn main() -> std::io::Result<()> { + let tmp = tempfile::tempdir()?; + let thunker = Thunker::builder().build(); + let dir = Root::bind_std(&thunker, tmp.path()).await?; + + // Create a new file for reading and writing. + let mut rw = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(&dir, "options.txt") + .await?; + + rw.write_slice(b"line 1\n").await?; + rw.write_slice(b"line 2\n").await?; + rw.flush().await?; + + // Rewind and read everything back. + rw.rewind().await?; + let data = rw.read_max(4096).await?; + let text = std::str::from_utf8(data.first_slice()).unwrap_or("?"); + println!("initial content:\n{text}"); + + // Re-open in truncate mode to clear the file. + let mut rw = OpenOptions::new() + .read(true) + .write(true) + .truncate(true) + .open(&dir, "options.txt") + .await?; + + rw.write_slice(b"fresh start\n").await?; + rw.flush().await?; + rw.rewind().await?; + let data = rw.read_max(4096).await?; + let text = std::str::from_utf8(data.first_slice()).unwrap_or("?"); + println!("after truncate:\n{text}"); + + // Open in append mode — writes always go to the end. + let mut appender = OpenOptions::new().append(true).open(&dir, "options.txt").await?; + + appender.write_slice(b"appended line\n").await?; + appender.flush().await?; + + let final_text = dir.read_to_string("options.txt").await?; + println!("final content:\n{final_text}"); + + Ok(()) +} diff --git a/crates/file/examples/positional_io.rs b/crates/file/examples/positional_io.rs new file mode 100644 index 000000000..253063b02 --- /dev/null +++ b/crates/file/examples/positional_io.rs @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Positional I/O — reading and writing at specific offsets. +//! +//! Positional methods like [`PositionalFile::read_at`] and +//! [`PositionalFile::write_at`] do not move the file cursor, enabling +//! concurrent access to different regions of the same file. + +use file::{PositionalFile, Root}; +use sync_thunk::Thunker; + +#[tokio::main] +async fn main() -> std::io::Result<()> { + let tmp = tempfile::tempdir()?; + let thunker = Thunker::builder().build(); + let dir = Root::bind_std(&thunker, tmp.path()).await?; + + // Seed a file with known content. + dir.write_slice("pos.bin", b"AAAA____BBBB").await?; + + let pf = PositionalFile::open(&dir, "pos.bin").await?; + + // Overwrite the middle section without touching the rest. + pf.write_slice_at(4, b"XXXX").await?; + + // Read back individual regions — no cursor is involved. + let head = pf.read_exact_at(0, 4).await?; + let mid = pf.read_exact_at(4, 4).await?; + let tail = pf.read_exact_at(8, 4).await?; + println!( + "head={:?} mid={:?} tail={:?}", + std::str::from_utf8(head.first_slice()), + std::str::from_utf8(mid.first_slice()), + std::str::from_utf8(tail.first_slice()), + ); + + Ok(()) +} diff --git a/crates/file/examples/positional_types.rs b/crates/file/examples/positional_types.rs new file mode 100644 index 000000000..85e8296cd --- /dev/null +++ b/crates/file/examples/positional_types.rs @@ -0,0 +1,56 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Positional file types — [`ReadOnlyPositionalFile`], [`WriteOnlyPositionalFile`], +//! and [`PositionalFile`]. +//! +//! Positional files have no cursor. Every I/O operation specifies an explicit +//! byte offset and takes `&self`, enabling concurrent access from multiple tasks. + +use file::{PositionalFile, ReadOnlyPositionalFile, Root, WriteOnlyPositionalFile}; +use sync_thunk::Thunker; + +#[tokio::main] +async fn main() -> std::io::Result<()> { + let tmp = tempfile::tempdir()?; + let thunker = Thunker::builder().build(); + let dir = Root::bind_std(&thunker, tmp.path()).await?; + + // WriteOnlyPositionalFile — write-only access at explicit offsets. + let wf = WriteOnlyPositionalFile::create(&dir, "pos.txt").await?; + wf.write_slice_at(0, b"Hello, ").await?; + wf.write_slice_at(7, b"positional world!").await?; + wf.flush().await?; + println!("wrote via WriteOnlyPositionalFile"); + + // ReadOnlyPositionalFile — read-only access at explicit offsets. + let rf = ReadOnlyPositionalFile::open(&dir, "pos.txt").await?; + let view = rf.read_exact_at(0, 24).await?; + println!("read via ReadOnlyPositionalFile: {:?}", std::str::from_utf8(view.first_slice()),); + // Read a sub-range without affecting any cursor. + let mid = rf.read_exact_at(7, 10).await?; + println!(" sub-range [7..17]: {:?}", std::str::from_utf8(mid.first_slice())); + + // PositionalFile — full read-write access. + let pf = PositionalFile::open(&dir, "pos.txt").await?; + // Overwrite part of the file and then read back. + pf.write_slice_at(7, b"POSITIONAL WORLD!").await?; + let view = pf.read_exact_at(0, 24).await?; + println!("read via PositionalFile: {:?}", std::str::from_utf8(view.first_slice()),); + + // Narrow a PositionalFile down to ReadOnlyPositionalFile. + let ro: ReadOnlyPositionalFile = pf.into(); + let view = ro.read_exact_at(0, 24).await?; + println!("after narrowing: {:?}", std::str::from_utf8(view.first_slice()),); + + // Concurrent reads — positional I/O takes &self, so multiple + // reads can run in parallel without cursor conflicts. + let (a, b) = tokio::join!(ro.read_exact_at(0, 5), ro.read_exact_at(7, 10)); + println!( + "concurrent reads: {:?} and {:?}", + std::str::from_utf8(a?.first_slice()), + std::str::from_utf8(b?.first_slice()), + ); + + Ok(()) +} diff --git a/crates/file/examples/streaming_io.rs b/crates/file/examples/streaming_io.rs new file mode 100644 index 000000000..1a11a2028 --- /dev/null +++ b/crates/file/examples/streaming_io.rs @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Streaming I/O — reading and writing files in chunks. +//! +//! Shows how to write data incrementally with [`WriteOnlyFile`] and +//! then stream it back with [`ReadOnlyFile::read_max`]. + +use bytesbuf::mem::Memory; +use file::{ReadOnlyFile, Root, WriteOnlyFile}; +use sync_thunk::Thunker; + +#[tokio::main] +async fn main() -> std::io::Result<()> { + let tmp = tempfile::tempdir()?; + let thunker = Thunker::builder().build(); + let dir = Root::bind_std(&thunker, tmp.path()).await?; + + // Write 10 chunks into a file. + let mut wf = WriteOnlyFile::create(&dir, "stream.bin").await?; + for i in 0u8..10 { + let mut buf = wf.reserve(128); + let line = format!("chunk {i}\n"); + buf.put_slice(line.as_bytes()); + wf.write(buf.consume_all()).await?; + } + wf.flush().await?; + println!("wrote 10 chunks"); + + // Stream the file back in small pieces. + let mut rf = ReadOnlyFile::open(&dir, "stream.bin").await?; + let mut total = 0usize; + loop { + let chunk = rf.read_max(16).await?; + if chunk.is_empty() { + break; // EOF + } + total += chunk.len(); + let text = std::str::from_utf8(chunk.first_slice()).unwrap_or("?"); + print!("{text}"); + } + println!("---\nread {total} bytes total"); + + Ok(()) +} diff --git a/crates/file/favicon.ico b/crates/file/favicon.ico new file mode 100644 index 000000000..d35aef2f3 --- /dev/null +++ b/crates/file/favicon.ico @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:208219ee37d9022f63f7b9797e6e69de1d5931b00c0338271c65103e0bd6730c +size 189766 diff --git a/crates/file/logo.png b/crates/file/logo.png new file mode 100644 index 000000000..89b2a2259 --- /dev/null +++ b/crates/file/logo.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d74b2d9e398febd77398fa3ce3fc583d2ee96d6ad47632000eaf074dec5c987e +size 68517 diff --git a/crates/file/src/dir_builder.rs b/crates/file/src/dir_builder.rs new file mode 100644 index 000000000..ffa756a36 --- /dev/null +++ b/crates/file/src/dir_builder.rs @@ -0,0 +1,57 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::io::Result; +use std::path::Path; + +use crate::directory::Directory; + +/// A builder used to create directories in various manners. +/// +/// This builder allows configuring whether directories should be created +/// recursively. +#[derive(Debug)] +pub struct DirBuilder { + recursive: bool, +} + +impl DirBuilder { + /// Creates a new set of options with default mode and recursive set to `false`. + #[must_use] + pub const fn new() -> Self { + Self { recursive: false } + } + + /// Indicates that directories should be created recursively, creating all + /// parent components if they are missing. + /// + /// When set to `false` (the default), only a single directory level is + /// created. + pub const fn recursive(&mut self, recursive: bool) -> &mut Self { + self.recursive = recursive; + self + } + + /// Creates the specified directory with the options configured in this builder. + /// + /// The path is relative to the given directory capability. + /// + /// # Errors + /// + /// Returns an error if the directory already exists (when not recursive), + /// if the parent does not exist (when not recursive), or if the process + /// lacks permissions. + pub async fn create(&self, dir: &Directory, path: impl AsRef) -> Result<()> { + if self.recursive { + dir.create_dir_all(path).await + } else { + dir.create_dir(path).await + } + } +} + +impl Default for DirBuilder { + fn default() -> Self { + Self::new() + } +} diff --git a/crates/file/src/dir_entry.rs b/crates/file/src/dir_entry.rs new file mode 100644 index 000000000..5f70f161e --- /dev/null +++ b/crates/file/src/dir_entry.rs @@ -0,0 +1,73 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::ffi::{OsStr, OsString}; +use std::fs::{FileType, Metadata}; +use std::io::{Error, Result}; + +/// An entry within a directory. +/// +/// This type is returned by [`ReadDir::next_entry`](crate::read_dir::ReadDir::next_entry). +/// Unlike `std::fs::DirEntry`, this type does not expose the full path to the +/// entry, preserving the capability-based access model. +/// +/// Metadata and file type are fetched eagerly during directory iteration, +/// so accessing them is allocation-free and instant. +#[derive(Debug)] +pub struct DirEntry { + file_name: OsString, + file_type: Result, + metadata: Result, +} + +impl DirEntry { + /// Creates a `DirEntry` by eagerly capturing all data from a `std::fs::DirEntry`. + pub(crate) fn from_std(entry: &std::fs::DirEntry) -> Self { + let file_name = entry.file_name(); + let metadata = entry.metadata(); + // Extract file_type from metadata when available, avoiding a + // separate syscall on platforms where file_type() would stat again. + let file_type = metadata.as_ref().map_or_else(|_| entry.file_type(), |m| Ok(m.file_type())); + Self { + file_name, + file_type, + metadata, + } + } + + /// Returns the bare file name of this directory entry without any other + /// leading path component. + #[must_use] + pub fn file_name(&self) -> &OsStr { + &self.file_name + } + + /// Returns the metadata for the file that this entry points at. + /// + /// This function will not traverse symlinks if this entry points at a + /// symlink. + /// + /// # Errors + /// + /// Returns an error if the metadata could not be read when the directory + /// was iterated. + pub const fn metadata(&self) -> core::result::Result<&Metadata, &Error> { + self.metadata.as_ref() + } + + /// Returns the file type for the file that this entry points at. + /// + /// This function will not traverse symlinks if this entry points at a + /// symlink. + /// + /// # Errors + /// + /// Returns an error if the file type could not be read when the directory + /// was iterated. + pub const fn file_type(&self) -> core::result::Result { + match &self.file_type { + Ok(ft) => Ok(*ft), + Err(e) => Err(e), + } + } +} diff --git a/crates/file/src/directory.rs b/crates/file/src/directory.rs new file mode 100644 index 000000000..34c574c6f --- /dev/null +++ b/crates/file/src/directory.rs @@ -0,0 +1,576 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::fs::{File, Metadata, Permissions}; +use std::io::{Error, ErrorKind, Result, Write as _}; +use std::path::{Path, PathBuf}; + +use bytesbuf::BytesView; +use bytesbuf::mem::{Memory, MemoryShared}; +use sync_thunk::{Thunker, thunk}; + +use crate::io_helpers::read_into_bytesbuf; +use crate::path_utils::safe_join; +use crate::shared_memory::SharedMemory; + +const DEFAULT_FALLBACK_SIZE: usize = 8192; + +/// A capability representing access to a directory on the filesystem. +/// +/// All paths used with a `Directory` are relative to the directory it represents. +/// Path components that would escape the directory (such as `..` at the root) are +/// rejected, enforcing capability-based access control. +#[derive(Debug)] +pub struct Directory { + base_path: PathBuf, + thunker: Thunker, +} + +impl Directory { + pub(crate) fn new(base_path: PathBuf, thunker: Thunker) -> Self { + Self { base_path, thunker } + } + + pub(crate) fn thunker(&self) -> &Thunker { + &self.thunker + } + + #[thunk(from = self.thunker)] + pub(crate) async fn open_std_file(&self, path: &Path, opts: std::fs::OpenOptions) -> Result { + let full_path = safe_join(&self.base_path, path)?; + opts.open(&full_path) + } + + /// Opens a subdirectory, returning a new [`Directory`] capability scoped to it. + /// + /// The returned `Directory` restricts all operations to the subdirectory and + /// its descendants. This is the primary mechanism for narrowing capabilities + /// in the capability-based access model. + /// + /// The given `path` is relative to this directory. + /// + /// # Errors + /// + /// Returns an error if the path does not exist, is not a directory, or + /// if the process lacks permission to access it. + #[inline] + pub async fn open_dir(&self, path: impl AsRef) -> Result { + self.open_dir_impl(path.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn open_dir_impl(&self, path: &Path) -> Result { + let full_path = safe_join(&self.base_path, path)?; + let metadata = std::fs::metadata(&full_path)?; + if !metadata.is_dir() { + return Err(Error::new(ErrorKind::NotADirectory, "path is not a directory")); + } + Ok(Self { + base_path: full_path, + thunker: self.thunker.clone(), + }) + } + + /// Returns the canonical, absolute form of a path with all intermediate + /// components normalized and symbolic links resolved. + /// + /// The given `path` is relative to this directory. + /// + /// # Errors + /// + /// This function will return an error if `path` does not exist or if any + /// component in the path is not a directory (when used as an intermediate + /// component). + #[inline] + pub async fn canonicalize(&self, path: impl AsRef) -> Result { + self.canonicalize_impl(path.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn canonicalize_impl(&self, path: &Path) -> Result { + let full_path = safe_join(&self.base_path, path)?; + std::fs::canonicalize(&full_path) + } + + /// Copies the contents of one file to another. This function will also + /// copy the permission bits of the original file to the destination file. + /// This function will overwrite the contents of the destination. Note that + /// if `src` and `dst` both point to the same file, then the file will + /// likely get truncated by this operation. + /// + /// On success, the total number of bytes copied is returned. + /// + /// The `src` path is relative to this directory, while the `dst` path is + /// relative to `dst_dir`. + /// + /// # Errors + /// + /// This function will return an error if the source file does not exist, + /// if the user lacks permissions to read the source or write the + /// destination, or if any other I/O error occurs. + #[inline] + pub async fn copy(&self, src: impl AsRef, dst_dir: &Self, dst: impl AsRef) -> Result { + self.copy_impl(src.as_ref(), dst_dir, dst.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn copy_impl(&self, src: &Path, dst_dir: &Directory, dst: &Path) -> Result { + let src_path = safe_join(&self.base_path, src)?; + let dst_path = safe_join(&dst_dir.base_path, dst)?; + std::fs::copy(&src_path, &dst_path) + } + + /// Creates a new, empty directory at the provided path. + /// + /// The given `path` is relative to this directory. + /// + /// # Errors + /// + /// This function will return an error if the user lacks permissions to + /// create the directory, if the parent directory of `path` does not exist, + /// or if `path` already exists. + #[inline] + pub async fn create_dir(&self, path: impl AsRef) -> Result<()> { + self.create_dir_impl(path.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn create_dir_impl(&self, path: &Path) -> Result<()> { + let full_path = safe_join(&self.base_path, path)?; + std::fs::create_dir(&full_path) + } + + /// Recursively creates a directory and all of its parent components if + /// they are missing. + /// + /// The given `path` is relative to this directory. + /// + /// # Errors + /// + /// This function will return an error if the user lacks permissions to + /// create any of the directories, or if any other I/O error occurs. + /// This function will succeed if the full directory path already exists. + #[inline] + pub async fn create_dir_all(&self, path: impl AsRef) -> Result<()> { + self.create_dir_all_impl(path.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn create_dir_all_impl(&self, path: &Path) -> Result<()> { + let full_path = safe_join(&self.base_path, path)?; + std::fs::create_dir_all(&full_path) + } + + /// Returns `Ok(true)` if the path points at an existing entity. + /// + /// This function will traverse symbolic links to query information about + /// the destination file. The given `path` is relative to this directory. + /// + /// Returns `Ok(false)` if the path does not exist or if existence cannot + /// be determined, and `Err` only on I/O errors unrelated to the existence + /// of the path. + /// + /// # Errors + /// + /// This function will return an error only if it encounters an I/O error + /// that is not related to whether the path exists. + #[inline] + pub async fn exists(&self, path: impl AsRef) -> Result { + self.exists_impl(path.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn exists_impl(&self, path: &Path) -> Result { + let full_path = safe_join(&self.base_path, path)?; + full_path.try_exists() + } + + /// Creates a new hard link on the filesystem. + /// + /// The `dst` file will be a link pointing to the `src` file. Neither path + /// may be a directory. The `src` path is relative to this directory, while + /// the `dst` path is relative to `dst_dir`. + /// + /// # Errors + /// + /// This function will return an error if `src` does not exist, if either + /// path is a directory, if the user lacks permissions, or if the source + /// and destination are on different filesystems. + #[inline] + pub async fn hard_link(&self, src: impl AsRef, dst_dir: &Self, dst: impl AsRef) -> Result<()> { + self.hard_link_impl(src.as_ref(), dst_dir, dst.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn hard_link_impl(&self, src: &Path, dst_dir: &Directory, dst: &Path) -> Result<()> { + let src_path = safe_join(&self.base_path, src)?; + let dst_path = safe_join(&dst_dir.base_path, dst)?; + std::fs::hard_link(&src_path, &dst_path) + } + + /// Given a path, queries the file system to get information about a file, + /// directory, etc. + /// + /// This function will traverse symbolic links to query information about + /// the destination file. The given `path` is relative to this directory. + /// + /// # Errors + /// + /// This function will return an error if the path does not exist, if the + /// user lacks permissions to query metadata, or if any other I/O error + /// occurs. + #[inline] + pub async fn metadata(&self, path: impl AsRef) -> Result { + self.metadata_impl(path.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn metadata_impl(&self, path: &Path) -> Result { + let full_path = safe_join(&self.base_path, path)?; + std::fs::metadata(&full_path) + } + + /// Reads the entire contents of a file into a [`BytesView`]. + /// + /// This is a convenience function for opening a file, reading it, and + /// closing it. Returns the contents allocated from the default memory + /// pool. The given `path` is relative to this directory. + /// + /// # Errors + /// + /// This function will return an error if the file does not exist, if the + /// user lacks permissions to read it, or if any other I/O error occurs. + #[inline] + pub async fn read(&self, path: impl AsRef) -> Result { + self.read_impl(path.as_ref(), SharedMemory::global()).await + } + + /// Reads the entire contents of a file into a [`BytesView`] using the + /// specified memory provider. + /// + /// This allows the caller to control buffer allocation, enabling + /// zero-copy transfers to other subsystems that share the same memory + /// provider. The given `path` is relative to this directory. + /// + /// # Errors + /// + /// This function will return an error if the file does not exist, if the + /// user lacks permissions to read it, or if any other I/O error occurs. + #[inline] + pub async fn read_with_memory(&self, path: impl AsRef, memory: impl MemoryShared) -> Result { + self.read_impl(path.as_ref(), SharedMemory::new(memory)).await + } + + #[thunk(from = self.thunker)] + async fn read_impl(&self, path: &Path, memory: SharedMemory) -> Result { + let full_path = safe_join(&self.base_path, path)?; + let mut file = File::open(&full_path)?; + let len = usize::try_from(file.metadata()?.len()).unwrap_or(usize::MAX); + if len > 0 { + let mut buf = memory.reserve(len); + let mut total = 0; + while total < len { + let n = read_into_bytesbuf(&mut file, &mut buf, len - total)?; + if n == 0 { + break; + } + total += n; + } + Ok(buf.consume_all()) + } else { + // Zero-length metadata (e.g. procfs files that report 0 size but + // have content). Try one read; if empty, return immediately without + // wasting a large allocation. + let mut buf = memory.reserve(DEFAULT_FALLBACK_SIZE); + let n = read_into_bytesbuf(&mut file, &mut buf, DEFAULT_FALLBACK_SIZE)?; + if n == 0 { + return Ok(buf.consume_all()); + } + // The file has content despite reporting 0 length — keep reading. + loop { + if buf.remaining_capacity() == 0 { + buf.reserve(DEFAULT_FALLBACK_SIZE, &memory); + } + let to_read = buf.remaining_capacity(); + let n = read_into_bytesbuf(&mut file, &mut buf, to_read)?; + if n == 0 { + break; + } + } + Ok(buf.consume_all()) + } + } + + /// Returns a [`ReadDir`](crate::read_dir::ReadDir) over the entries + /// within a directory. + /// + /// The given `path` is relative to this directory. + /// + /// # Errors + /// + /// This function will return an error if the path does not exist, if the + /// path is not a directory, if the user lacks permissions to read the + /// directory, or if any other I/O error occurs. + #[inline] + pub async fn read_dir(&self, path: impl AsRef) -> Result { + self.read_dir_impl(path.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn read_dir_impl(&self, path: &Path) -> Result { + let full_path = safe_join(&self.base_path, path)?; + let read_dir = std::fs::read_dir(&full_path)?; + Ok(crate::read_dir::ReadDir::new(read_dir, self.thunker.clone())) + } + + /// Reads a symbolic link, returning the file that the link points to. + /// + /// The given `path` is relative to this directory. + /// + /// # Errors + /// + /// This function will return an error if the path does not exist, if the + /// path is not a symbolic link, if the user lacks permissions, or if any + /// other I/O error occurs. + #[inline] + pub async fn read_link(&self, path: impl AsRef) -> Result { + self.read_link_impl(path.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn read_link_impl(&self, path: &Path) -> Result { + let full_path = safe_join(&self.base_path, path)?; + std::fs::read_link(&full_path) + } + + /// Reads the entire contents of a file into a string. + /// + /// This is a convenience function for opening a file, reading it, and + /// closing it. The given `path` is relative to this directory. + /// + /// # Errors + /// + /// This function will return an error if the file does not exist, if the + /// user lacks permissions to read it, if the file's contents are not + /// valid UTF-8, or if any other I/O error occurs. + #[inline] + pub async fn read_to_string(&self, path: impl AsRef) -> Result { + self.read_to_string_impl(path.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn read_to_string_impl(&self, path: &Path) -> Result { + let full_path = safe_join(&self.base_path, path)?; + std::fs::read_to_string(&full_path) + } + + /// Removes an existing, empty directory. + /// + /// The given `path` is relative to this directory. + /// + /// # Errors + /// + /// This function will return an error if the path does not exist, if the + /// directory is not empty, if the user lacks permissions, or if any other + /// I/O error occurs. + #[inline] + pub async fn remove_dir(&self, path: impl AsRef) -> Result<()> { + self.remove_dir_impl(path.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn remove_dir_impl(&self, path: &Path) -> Result<()> { + let full_path = safe_join(&self.base_path, path)?; + std::fs::remove_dir(&full_path) + } + + /// Removes a directory at this path, after removing all its contents. + /// Use carefully! + /// + /// The given `path` is relative to this directory. + /// + /// # Errors + /// + /// This function will return an error if the path does not exist, if the + /// user lacks permissions to remove the directory or any of its contents, + /// or if any other I/O error occurs. + #[inline] + pub async fn remove_dir_all(&self, path: impl AsRef) -> Result<()> { + self.remove_dir_all_impl(path.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn remove_dir_all_impl(&self, path: &Path) -> Result<()> { + let full_path = safe_join(&self.base_path, path)?; + std::fs::remove_dir_all(&full_path) + } + + /// Removes a file from the filesystem. + /// + /// There is no guarantee that the file is immediately deleted. The given + /// `path` is relative to this directory. + /// + /// # Errors + /// + /// This function will return an error if the path does not exist, if the + /// user lacks permissions to remove the file, or if any other I/O error + /// occurs. + #[inline] + pub async fn remove_file(&self, path: impl AsRef) -> Result<()> { + self.remove_file_impl(path.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn remove_file_impl(&self, path: &Path) -> Result<()> { + let full_path = safe_join(&self.base_path, path)?; + std::fs::remove_file(&full_path) + } + + /// Renames a file or directory to a new name, replacing the original file + /// if the destination already exists. + /// + /// The `src` path is relative to this directory, while the `dst` path is + /// relative to `dst_dir`. + /// + /// # Errors + /// + /// This function will return an error if `src` does not exist, if the + /// user lacks permissions, or if any other I/O error occurs. + #[inline] + pub async fn rename(&self, src: impl AsRef, dst_dir: &Self, dst: impl AsRef) -> Result<()> { + self.rename_impl(src.as_ref(), dst_dir, dst.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn rename_impl(&self, src: &Path, dst_dir: &Directory, dst: &Path) -> Result<()> { + let src_path = safe_join(&self.base_path, src)?; + let dst_path = safe_join(&dst_dir.base_path, dst)?; + std::fs::rename(&src_path, &dst_path) + } + + /// Changes the permissions found on a file or a directory. + /// + /// The given `path` is relative to this directory. + /// + /// # Errors + /// + /// This function will return an error if the path does not exist, if the + /// user lacks permissions to change the file permissions, or if any other + /// I/O error occurs. + #[inline] + pub async fn set_permissions(&self, path: impl AsRef, perms: Permissions) -> Result<()> { + self.set_permissions_impl(path.as_ref(), perms).await + } + + #[thunk(from = self.thunker)] + async fn set_permissions_impl(&self, path: &Path, perms: Permissions) -> Result<()> { + let full_path = safe_join(&self.base_path, path)?; + std::fs::set_permissions(&full_path, perms) + } + + /// Creates a new symbolic link on the filesystem. + /// + /// The `link` path will be a symbolic link pointing to the `original` + /// path. Both paths are relative to this directory. + /// + /// # Errors + /// + /// This function will return an error if `link` already exists, if the + /// user lacks permissions, or if any other I/O error occurs. + #[inline] + pub async fn symlink(&self, original: impl AsRef, link: impl AsRef) -> Result<()> { + self.symlink_impl(original.as_ref(), link.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn symlink_impl(&self, original: &Path, link: &Path) -> Result<()> { + let original_path = safe_join(&self.base_path, original)?; + let link_path = safe_join(&self.base_path, link)?; + #[cfg(unix)] + { + std::os::unix::fs::symlink(&original_path, &link_path) + } + #[cfg(windows)] + { + if std::fs::metadata(&original_path).map(|m| m.is_dir()).unwrap_or(false) { + std::os::windows::fs::symlink_dir(&original_path, &link_path) + } else { + std::os::windows::fs::symlink_file(&original_path, &link_path) + } + } + } + + /// Queries the metadata about a file without following symlinks. + /// + /// If the path is a symlink, metadata for the symlink itself is returned + /// rather than the file it points to. The given `path` is relative to + /// this directory. + /// + /// # Errors + /// + /// This function will return an error if the path does not exist, if the + /// user lacks permissions to query metadata, or if any other I/O error + /// occurs. + #[inline] + pub async fn symlink_metadata(&self, path: impl AsRef) -> Result { + self.symlink_metadata_impl(path.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn symlink_metadata_impl(&self, path: &Path) -> Result { + let full_path = safe_join(&self.base_path, path)?; + std::fs::symlink_metadata(&full_path) + } + + /// Writes the entire contents of a [`BytesView`] as a file. + /// + /// This is a convenience function for creating or truncating a file, + /// writing to it, and closing it. The given `path` is relative to this + /// directory. + /// + /// # Errors + /// + /// This function will return an error if the parent directory of the path + /// does not exist, if the user lacks permissions to write the file, or if + /// any other I/O error occurs. + #[inline] + pub async fn write(&self, path: impl AsRef, contents: BytesView) -> Result<()> { + self.write_impl(path.as_ref(), contents).await + } + + #[thunk(from = self.thunker)] + async fn write_impl(&self, path: &Path, contents: BytesView) -> Result<()> { + let full_path = safe_join(&self.base_path, path)?; + let mut file = File::create(&full_path)?; + let mut contents = contents; + while !contents.is_empty() { + let slice = contents.first_slice(); + let len = slice.len(); + file.write_all(slice)?; + contents.advance(len); + } + Ok(()) + } + + /// Writes a byte slice as the entire contents of a file. + /// + /// This is a convenience wrapper around [`write`](Self::write) for + /// callers working with `&[u8]` data. The given `path` is relative to + /// this directory. + /// + /// # Errors + /// + /// This function will return an error if the parent directory of the path + /// does not exist, if the user lacks permissions to write the file, or if + /// any other I/O error occurs. + #[inline] + pub async fn write_slice(&self, path: impl AsRef, contents: impl AsRef<[u8]>) -> Result<()> { + self.write_slice_impl(path.as_ref(), contents.as_ref()).await + } + + #[thunk(from = self.thunker)] + async fn write_slice_impl(&self, path: &Path, contents: &[u8]) -> Result<()> { + let full_path = safe_join(&self.base_path, path)?; + std::fs::write(&full_path, contents) + } +} diff --git a/crates/file/src/file.rs b/crates/file/src/file.rs new file mode 100644 index 000000000..6bc8fc62d --- /dev/null +++ b/crates/file/src/file.rs @@ -0,0 +1,581 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use core::mem::MaybeUninit; +use std::fs::{FileTimes, Metadata, Permissions, TryLockError}; +use std::io::{Error, Result, SeekFrom}; +use std::path::Path; + +use bytesbuf::mem::{HasMemory, Memory, MemoryShared}; +use bytesbuf::{BytesBuf, BytesView}; + +use crate::directory::Directory; +use crate::file_inner::FileInner; +use crate::open_options::OpenOptions; +use crate::shared_memory::SharedMemory; + +/// A seekable read-write file handle within a capability-based filesystem. +/// +/// A `File` provides both read and write access to a file. It implements +/// both [`bytesbuf_io::Read`] and [`bytesbuf_io::Write`] for streaming I/O using +/// managed buffers. +/// +/// Obtain a `File` by calling [`File::open`], [`File::create`], +/// [`File::create_new`], or through [`OpenOptions`]. +#[derive(Debug)] +pub struct File { + inner: FileInner, +} + +impl File { + pub(crate) const fn new(inner: FileInner) -> Self { + Self { inner } + } + + pub(crate) fn into_inner(self) -> FileInner { + self.inner + } + + /// Returns a new [`OpenOptions`] object. + /// + /// This allows opening a file with specific combinations of read, write, + /// append, truncate, and create options. + #[must_use] + #[inline] + pub const fn options() -> OpenOptions { + OpenOptions::new() + } + + /// Opens an existing file in read-write mode. + /// + /// The path is relative to the given directory capability. + /// + /// # Errors + /// + /// Returns an error if the file does not exist, if the path escapes the + /// directory capability, or on other I/O errors. + #[inline] + pub async fn open(dir: &Directory, path: impl AsRef) -> Result { + Ok(Self { + inner: FileInner::open_readwrite(dir, path, SharedMemory::global()).await?, + }) + } + + /// Opens an existing file in read-write mode using the specified memory provider. + /// + /// The custom memory provider allows the caller to control buffer allocation, + /// enabling zero-copy transfers with other subsystems sharing the same memory + /// provider. + /// + /// # Errors + /// + /// Returns an error if the file does not exist, if the path escapes the + /// directory capability, or on other I/O errors. + #[inline] + pub async fn open_with_memory(dir: &Directory, path: impl AsRef, memory: impl MemoryShared) -> Result { + Ok(Self { + inner: FileInner::open_readwrite(dir, path, SharedMemory::new(memory)).await?, + }) + } + + /// Opens a file in read-write mode. + /// + /// This function will create the file if it does not exist, and will truncate + /// it if it does. + /// + /// # Errors + /// + /// Returns an error if the path escapes the directory capability or on other + /// I/O errors. + #[inline] + pub async fn create(dir: &Directory, path: impl AsRef) -> Result { + Ok(Self { + inner: FileInner::create_readwrite(dir, path, SharedMemory::global()).await?, + }) + } + + /// Opens a file in read-write mode using the specified memory provider. + /// + /// Creates the file if it does not exist, truncates it if it does. + /// + /// # Errors + /// + /// Returns an error if the path escapes the directory capability or on other + /// I/O errors. + #[inline] + pub async fn create_with_memory(dir: &Directory, path: impl AsRef, memory: impl MemoryShared) -> Result { + Ok(Self { + inner: FileInner::create_readwrite(dir, path, SharedMemory::new(memory)).await?, + }) + } + + /// Creates a new file in read-write mode; returns an error if the file exists. + /// + /// If the call succeeds, the file is guaranteed to be new. This is atomic, + /// avoiding TOCTOU race conditions. + /// + /// # Errors + /// + /// Returns an error if the file already exists, if the path escapes the + /// directory capability, or on other I/O errors. + #[inline] + pub async fn create_new(dir: &Directory, path: impl AsRef) -> Result { + Ok(Self { + inner: FileInner::create_new_readwrite(dir, path, SharedMemory::global()).await?, + }) + } + + /// Creates a new file in read-write mode using the specified memory provider; + /// returns an error if the file exists. + /// + /// # Errors + /// + /// Returns an error if the file already exists, if the path escapes the + /// directory capability, or on other I/O errors. + #[inline] + pub async fn create_new_with_memory(dir: &Directory, path: impl AsRef, memory: impl MemoryShared) -> Result { + Ok(Self { + inner: FileInner::create_new_readwrite(dir, path, SharedMemory::new(memory)).await?, + }) + } + + /// Reads up to `len` bytes from the current position, making a best effort + /// to return the full amount. + /// + /// Performs multiple reads as necessary. May return fewer bytes only when + /// EOF is reached before `len` bytes are available. + /// + /// # Errors + /// + /// Returns an error if a read operation fails due to an I/O error. + #[inline] + pub async fn read(&mut self, len: usize) -> Result { + self.inner.read_into_bytesview(len).await + } + + /// Reads at most `len` bytes from the current position in a single + /// operation. + /// + /// May return fewer bytes than requested. A return of zero bytes indicates + /// EOF. + /// + /// # Errors + /// + /// Returns an error if the read operation fails due to an I/O error. + #[inline] + pub async fn read_max(&mut self, len: usize) -> Result { + self.inner.read_max_into_bytesview(len).await + } + + /// Reads exactly `len` bytes from the current position. + /// + /// Performs multiple reads as necessary. Returns an error if EOF is + /// reached before `len` bytes are read. + /// + /// # Errors + /// + /// Returns [`std::io::ErrorKind::UnexpectedEof`] if the file ends before `len` + /// bytes are read, or another error on I/O failure. + #[inline] + pub async fn read_exact(&mut self, len: usize) -> Result { + self.inner.read_exact_into_bytesview(len).await + } + + /// Reads an implementation-chosen number of bytes into the provided buffer. + /// + /// Returns the number of bytes read and the updated buffer. A return of + /// 0 bytes indicates EOF. + /// + /// # Errors + /// + /// Returns an error if the read operation fails due to an I/O error. + #[inline] + pub async fn read_into_bytesbuf(&mut self, buf: &mut BytesBuf) -> Result { + self.inner.read_into_bytesbuf(buf).await + } + + /// Reads at most `len` bytes into the provided buffer in a single + /// operation. + /// + /// Returns the number of bytes read and the updated buffer. + /// + /// # Errors + /// + /// Returns an error if the read operation fails due to an I/O error. + #[inline] + pub async fn read_max_into_bytesbuf(&mut self, len: usize, buf: &mut BytesBuf) -> Result { + self.inner.read_max_into_bytesbuf(len, buf).await + } + + /// Reads exactly `len` bytes into the provided buffer. + /// + /// Performs multiple reads as necessary. Returns an error if EOF is + /// reached before `len` bytes are appended. + /// + /// # Errors + /// + /// Returns [`std::io::ErrorKind::UnexpectedEof`] if the file ends before `len` + /// bytes are read, or another error on I/O failure. + #[inline] + pub async fn read_exact_into_bytesbuf(&mut self, len: usize, buf: &mut BytesBuf) -> Result<()> { + self.inner.read_exact_into_bytesbuf(len, buf).await + } + + /// Reads into the provided slice, making a best effort to fill it + /// completely. + /// + /// Returns the total number of bytes read. May return fewer than + /// `buf.len()` only when EOF is reached. + /// + /// # Errors + /// + /// Returns an error if a read operation fails due to an I/O error. + #[inline] + pub async fn read_into_slice(&mut self, buf: &mut [u8]) -> Result { + self.inner.read_into_slice(buf).await + } + + /// Fills the provided slice with exactly `buf.len()` bytes. + /// + /// Performs multiple reads as necessary. Returns an error if EOF is + /// reached before the slice is fully filled. + /// + /// # Errors + /// + /// Returns [`std::io::ErrorKind::UnexpectedEof`] if the file ends before the + /// slice is filled, or another error on I/O failure. + #[inline] + pub async fn read_exact_into_slice(&mut self, buf: &mut [u8]) -> Result<()> { + self.inner.read_exact_into_slice(buf).await + } + + /// Fills the provided uninitialized slice with exactly `buf.len()` bytes. + /// + /// On success every element in `buf` is initialized. + /// + /// # Errors + /// + /// Returns [`std::io::ErrorKind::UnexpectedEof`] if the file ends before the slice + /// is filled, or another error on I/O failure. + #[inline] + pub async fn read_exact_into_uninit(&mut self, buf: &mut [MaybeUninit]) -> Result<()> { + self.inner.read_exact_into_uninit(buf).await + } + + /// Writes the provided byte sequence to the file. + /// + /// The method completes when all bytes have been written. Partial writes are + /// considered a failure. + /// + /// For optimal efficiency, the data should originate from buffers allocated via + /// this file's memory provider (see [`Memory::reserve`] or [`HasMemory::memory`]). + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn write(&mut self, data: BytesView) -> Result<()> { + self.inner.write(data).await + } + + /// Writes a byte slice to the file at the current cursor position. + /// + /// Convenience method for `&[u8]` callers. The data is copied internally; + /// prefer [`write`](Self::write) with [`BytesView`] for large writes. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn write_slice(&mut self, data: impl AsRef<[u8]>) -> Result<()> { + self.inner.write_slice(data.as_ref()).await + } + + /// Queries metadata about the underlying file. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn metadata(&mut self) -> Result { + self.inner.metadata().await + } + + /// Truncates or extends the underlying file, updating the size to become `size`. + /// + /// If `size` is less than the current file size, the file shrinks. If greater, + /// it extends with zeroes. The file cursor is not changed. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn set_len(&mut self, size: u64) -> Result<()> { + self.inner.set_len(size).await + } + + /// Changes the modification time of the underlying file. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn set_modified(&mut self, modified: std::time::SystemTime) -> Result<()> { + self.inner.set_modified(modified).await + } + + /// Changes the permissions on the underlying file. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn set_permissions(&mut self, perms: Permissions) -> Result<()> { + self.inner.set_permissions(perms).await + } + + /// Changes the timestamps of the underlying file. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn set_times(&mut self, times: FileTimes) -> Result<()> { + self.inner.set_times(times).await + } + + /// Attempts to sync all OS-internal file content and metadata to disk. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn sync_all(&mut self) -> Result<()> { + self.inner.sync_all().await + } + + /// Similar to [`sync_all`](Self::sync_all), except that it might not synchronize + /// file metadata. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn sync_data(&mut self) -> Result<()> { + self.inner.sync_data().await + } + + /// Flushes any buffered data to the underlying file. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn flush(&mut self) -> Result<()> { + self.inner.flush().await + } + + /// Acquires an exclusive lock on the file. + /// + /// Blocks until the lock is acquired. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn lock(&mut self) -> Result<()> { + self.inner.lock().await + } + + /// Acquires a shared (non-exclusive) lock on the file. + /// + /// Blocks until the lock is acquired. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn lock_shared(&mut self) -> Result<()> { + self.inner.lock_shared().await + } + + /// Tries to acquire an exclusive lock on the file. + /// + /// Returns `Err(TryLockError::WouldBlock)` if another lock is held. + /// + /// # Errors + /// + /// Returns an error if the lock cannot be acquired or on I/O failure. + #[inline] + pub async fn try_lock(&mut self) -> core::result::Result<(), TryLockError> { + self.inner.try_lock().await + } + + /// Tries to acquire a shared lock on the file. + /// + /// Returns `Err(TryLockError::WouldBlock)` if an exclusive lock is held. + /// + /// # Errors + /// + /// Returns an error if the lock cannot be acquired or on I/O failure. + #[inline] + pub async fn try_lock_shared(&mut self) -> core::result::Result<(), TryLockError> { + self.inner.try_lock_shared().await + } + + /// Releases all locks on the file. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn unlock(&mut self) -> Result<()> { + self.inner.unlock().await + } + + /// Seeks to a position in the file. + /// + /// Returns the new position from the start of the file. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn seek(&mut self, pos: SeekFrom) -> Result { + self.inner.seek(pos).await + } + + /// Returns the current seek position from the start of the file. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn stream_position(&mut self) -> Result { + self.inner.stream_position().await + } + + /// Rewinds to the beginning of the file. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn rewind(&mut self) -> Result<()> { + self.inner.rewind().await + } + + /// Creates a new `File` instance that shares the same underlying file handle. + /// + /// Reads, writes, and seeks will affect both instances simultaneously. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn try_clone(&mut self) -> Result { + Ok(Self { + inner: self.inner.try_clone().await?, + }) + } + + /// Returns `true` if the underlying file descriptor refers to a terminal. + #[must_use] + #[inline] + pub fn is_terminal(&self) -> bool { + self.inner.is_terminal() + } +} + +impl HasMemory for File { + fn memory(&self) -> impl MemoryShared { + self.inner.memory().clone() + } +} + +impl Memory for File { + fn reserve(&self, min_bytes: usize) -> BytesBuf { + self.inner.memory().reserve(min_bytes) + } +} + +impl bytesbuf_io::Read for File { + type Error = Error; + + async fn read_at_most_into(&mut self, len: usize, mut into: BytesBuf) -> core::result::Result<(usize, BytesBuf), Self::Error> { + let n = self.inner.read_max_into_bytesbuf(len, &mut into).await?; + Ok((n, into)) + } + + async fn read_more_into(&mut self, mut into: BytesBuf) -> core::result::Result<(usize, BytesBuf), Self::Error> { + let n = self.inner.read_into_bytesbuf(&mut into).await?; + Ok((n, into)) + } + + async fn read_any(&mut self) -> core::result::Result { + let mut buf = self.inner.memory().reserve(8192); + let _ = self.inner.read_into_bytesbuf(&mut buf).await?; + Ok(buf) + } +} + +impl bytesbuf_io::Write for File { + type Error = Error; + + async fn write(&mut self, data: BytesView) -> core::result::Result<(), Self::Error> { + Self::write(self, data).await + } +} + +#[cfg(feature = "sync-compat")] +impl std::io::Read for File { + fn read(&mut self, buf: &mut [u8]) -> Result { + self.inner.sync_read(buf) + } +} + +#[cfg(feature = "sync-compat")] +impl std::io::Write for File { + fn write(&mut self, buf: &[u8]) -> Result { + self.inner.sync_write(buf) + } + + fn flush(&mut self) -> Result<()> { + self.inner.sync_flush() + } +} + +#[cfg(feature = "sync-compat")] +impl std::io::Seek for File { + fn seek(&mut self, pos: SeekFrom) -> Result { + self.inner.sync_seek(pos) + } +} + +#[cfg(unix)] +impl std::os::unix::io::AsRawFd for File { + fn as_raw_fd(&self) -> std::os::unix::io::RawFd { + self.inner.as_raw_fd() + } +} + +#[cfg(unix)] +impl std::os::unix::io::AsFd for File { + fn as_fd(&self) -> std::os::unix::io::BorrowedFd<'_> { + self.inner.as_fd() + } +} + +#[cfg(windows)] +impl std::os::windows::io::AsRawHandle for File { + fn as_raw_handle(&self) -> std::os::windows::io::RawHandle { + self.inner.as_raw_handle() + } +} + +#[cfg(windows)] +impl std::os::windows::io::AsHandle for File { + fn as_handle(&self) -> std::os::windows::io::BorrowedHandle<'_> { + self.inner.as_handle() + } +} diff --git a/crates/file/src/file_inner.rs b/crates/file/src/file_inner.rs new file mode 100644 index 000000000..f7b33e819 --- /dev/null +++ b/crates/file/src/file_inner.rs @@ -0,0 +1,388 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::fs::{File, FileTimes, Metadata, Permissions, TryLockError}; +use std::io::{ErrorKind, Read, Result, Seek as _, SeekFrom, Write as _}; + +use bytesbuf::mem::Memory; +use bytesbuf::{BytesBuf, BytesView}; +use sync_thunk::{Thunker, thunk}; + +use crate::io_helpers::read_into_bytesbuf; +use crate::shared_memory::SharedMemory; + +const DEFAULT_READ_SIZE: usize = 8192; + +#[derive(Debug)] +pub struct FileInner { + file: File, + thunker: Thunker, + memory: SharedMemory, +} + +impl FileInner { + /// Creates a `SeekableFileInner` from a standard `std::fs::File`. + pub fn from_std(file: File, dir: &crate::directory::Directory, memory: SharedMemory) -> Self { + Self { + file, + thunker: dir.thunker().clone(), + memory, + } + } + + /// Opens a file with the given options, dispatching the blocking open to a worker thread. + pub async fn open_file( + dir: &crate::directory::Directory, + path: impl AsRef, + memory: SharedMemory, + opts: std::fs::OpenOptions, + ) -> Result { + let file = dir.open_std_file(path.as_ref(), opts).await?; + Ok(Self::from_std(file, dir, memory)) + } + + /// Opens a file in read-only mode. + pub async fn open_readonly(dir: &crate::directory::Directory, path: impl AsRef, memory: SharedMemory) -> Result { + let mut opts = std::fs::OpenOptions::new(); + opts.read(true); + Self::open_file(dir, path, memory, opts).await + } + + /// Creates (or truncates) a file in write-only mode. + pub async fn create_writeonly( + dir: &crate::directory::Directory, + path: impl AsRef, + memory: SharedMemory, + ) -> Result { + let mut opts = std::fs::OpenOptions::new(); + opts.write(true).create(true).truncate(true); + Self::open_file(dir, path, memory, opts).await + } + + /// Atomically creates a new file in write-only mode; fails if it exists. + pub async fn create_new_writeonly( + dir: &crate::directory::Directory, + path: impl AsRef, + memory: SharedMemory, + ) -> Result { + let mut opts = std::fs::OpenOptions::new(); + opts.write(true).create_new(true); + Self::open_file(dir, path, memory, opts).await + } + + /// Opens an existing file in read-write mode. + pub async fn open_readwrite( + dir: &crate::directory::Directory, + path: impl AsRef, + memory: SharedMemory, + ) -> Result { + let mut opts = std::fs::OpenOptions::new(); + opts.read(true).write(true); + Self::open_file(dir, path, memory, opts).await + } + + /// Creates (or truncates) a file in read-write mode. + pub async fn create_readwrite( + dir: &crate::directory::Directory, + path: impl AsRef, + memory: SharedMemory, + ) -> Result { + let mut opts = std::fs::OpenOptions::new(); + opts.read(true).write(true).create(true).truncate(true); + Self::open_file(dir, path, memory, opts).await + } + + /// Atomically creates a new file in read-write mode; fails if it exists. + pub async fn create_new_readwrite( + dir: &crate::directory::Directory, + path: impl AsRef, + memory: SharedMemory, + ) -> Result { + let mut opts = std::fs::OpenOptions::new(); + opts.read(true).write(true).create_new(true); + Self::open_file(dir, path, memory, opts).await + } + + pub const fn memory(&self) -> &SharedMemory { + &self.memory + } + + /// Returns the raw file descriptor (Unix). + #[cfg(unix)] + pub fn as_raw_fd(&self) -> std::os::unix::io::RawFd { + use std::os::unix::io::AsRawFd; + self.file.as_raw_fd() + } + + /// Returns a borrowed file descriptor. + #[cfg(unix)] + pub fn as_fd(&self) -> std::os::unix::io::BorrowedFd<'_> { + use std::os::unix::io::AsFd; + self.file.as_fd() + } + + /// Returns the raw handle (Windows). + #[cfg(windows)] + pub fn as_raw_handle(&self) -> std::os::windows::io::RawHandle { + use std::os::windows::io::AsRawHandle; + self.file.as_raw_handle() + } + + /// Returns a borrowed handle. + #[cfg(windows)] + pub fn as_handle(&self) -> std::os::windows::io::BorrowedHandle<'_> { + use std::os::windows::io::AsHandle; + self.file.as_handle() + } + + /// Returns whether the underlying file descriptor refers to a terminal. + pub fn is_terminal(&self) -> bool { + use std::io::IsTerminal; + self.file.is_terminal() + } + + /// Synchronous read, bypassing the worker pool. + #[cfg(feature = "sync-compat")] + pub fn sync_read(&mut self, buf: &mut [u8]) -> Result { + self.file.read(buf) + } + + /// Synchronous write, bypassing the worker pool. + #[cfg(feature = "sync-compat")] + pub fn sync_write(&mut self, buf: &[u8]) -> Result { + use std::io::Write; + self.file.write(buf) + } + + /// Synchronous flush, bypassing the worker pool. + #[cfg(feature = "sync-compat")] + pub fn sync_flush(&mut self) -> Result<()> { + use std::io::Write; + self.file.flush() + } + + /// Synchronous seek, bypassing the worker pool. + #[cfg(feature = "sync-compat")] + pub fn sync_seek(&mut self, pos: SeekFrom) -> Result { + use std::io::Seek; + self.file.seek(pos) + } + + #[thunk(from = self.thunker)] + pub async fn read_into_bytesview(&mut self, len: usize) -> Result { + let mut buf = self.memory.reserve(len); + read_bytesbuf_best_effort(&mut self.file, &mut buf, len)?; + Ok(buf.consume_all()) + } + + #[thunk(from = self.thunker)] + pub async fn read_exact_into_bytesview(&mut self, len: usize) -> Result { + let mut buf = self.memory.reserve(len); + read_bytesbuf_exact(&mut self.file, &mut buf, len)?; + Ok(buf.consume_all()) + } + + #[thunk(from = self.thunker)] + pub async fn read_max_into_bytesview(&mut self, len: usize) -> Result { + let mut buf = self.memory.reserve(len); + read_into_bytesbuf(&mut self.file, &mut buf, len)?; + Ok(buf.consume_all()) + } + + #[thunk(from = self.thunker)] + pub async fn read_exact_into_bytesbuf(&mut self, len: usize, buf: &mut BytesBuf) -> Result<()> { + if buf.remaining_capacity() < len { + buf.reserve(len - buf.remaining_capacity(), &self.memory); + } + read_bytesbuf_exact(&mut self.file, buf, len)?; + Ok(()) + } + + #[thunk(from = self.thunker)] + pub async fn read_max_into_bytesbuf(&mut self, len: usize, into: &mut BytesBuf) -> Result { + let needed = len.saturating_sub(into.remaining_capacity()); + if needed > 0 { + into.reserve(needed, &self.memory); + } + read_into_bytesbuf(&mut self.file, into, len) + } + + pub async fn read_into_bytesbuf(&mut self, into: &mut BytesBuf) -> Result { + self.read_max_into_bytesbuf(DEFAULT_READ_SIZE, into).await + } + + #[thunk(from = self.thunker)] + pub async fn read_into_slice(&mut self, buf: &mut [u8]) -> Result { + read_slice_best_effort(&mut self.file, buf) + } + + pub async fn read_exact_into_uninit(&mut self, buf: &mut [core::mem::MaybeUninit]) -> Result<()> { + // SAFETY: MaybeUninit has the same layout as u8. + // read_exact_into_slice writes exactly buf.len() bytes on success, + // fully initializing the contents. + let initialized = unsafe { core::slice::from_raw_parts_mut(buf.as_mut_ptr().cast::(), buf.len()) }; + self.read_exact_into_slice(initialized).await + } + + #[thunk(from = self.thunker)] + pub async fn read_exact_into_slice(&mut self, buf: &mut [u8]) -> Result<()> { + self.file.read_exact(buf) + } + + #[thunk(from = self.thunker)] + pub async fn write(&mut self, data: BytesView) -> Result<()> { + write_all_bytesview(&mut self.file, &data) + } + + #[thunk(from = self.thunker)] + pub async fn write_slice(&mut self, data: &[u8]) -> Result<()> { + self.file.write_all(data) + } + + #[thunk(from = self.thunker)] + pub async fn metadata(&mut self) -> Result { + self.file.metadata() + } + + #[thunk(from = self.thunker)] + pub async fn set_len(&mut self, size: u64) -> Result<()> { + self.file.set_len(size) + } + + #[thunk(from = self.thunker)] + pub async fn set_modified(&mut self, modified: std::time::SystemTime) -> Result<()> { + self.file.set_modified(modified) + } + + #[thunk(from = self.thunker)] + pub async fn set_permissions(&mut self, perms: Permissions) -> Result<()> { + self.file.set_permissions(perms) + } + + #[thunk(from = self.thunker)] + pub async fn set_times(&mut self, times: FileTimes) -> Result<()> { + self.file.set_times(times) + } + + #[thunk(from = self.thunker)] + pub async fn sync_all(&mut self) -> Result<()> { + self.file.sync_all() + } + + #[thunk(from = self.thunker)] + pub async fn sync_data(&mut self) -> Result<()> { + self.file.sync_data() + } + + #[thunk(from = self.thunker)] + pub async fn flush(&mut self) -> Result<()> { + self.file.flush() + } + + #[thunk(from = self.thunker)] + pub async fn lock(&mut self) -> Result<()> { + self.file.lock() + } + + #[thunk(from = self.thunker)] + pub async fn lock_shared(&mut self) -> Result<()> { + self.file.lock_shared() + } + + #[thunk(from = self.thunker)] + pub async fn try_lock(&mut self) -> core::result::Result<(), TryLockError> { + self.file.try_lock() + } + + #[thunk(from = self.thunker)] + pub async fn try_lock_shared(&mut self) -> core::result::Result<(), TryLockError> { + self.file.try_lock_shared() + } + + #[thunk(from = self.thunker)] + pub async fn unlock(&mut self) -> Result<()> { + self.file.unlock() + } + + #[thunk(from = self.thunker)] + pub async fn seek(&mut self, pos: SeekFrom) -> Result { + self.file.seek(pos) + } + + #[thunk(from = self.thunker)] + pub async fn stream_position(&mut self) -> Result { + self.file.stream_position() + } + + #[thunk(from = self.thunker)] + pub async fn rewind(&mut self) -> Result<()> { + self.file.rewind() + } + + #[thunk(from = self.thunker)] + pub async fn try_clone(&mut self) -> Result { + let new_file = self.file.try_clone()?; + Ok(Self { + file: new_file, + thunker: self.thunker.clone(), + memory: self.memory.clone(), + }) + } +} + +/// Reads into a `BytesBuf` in a loop until `len` bytes are read or EOF. +fn read_bytesbuf_best_effort(reader: &mut impl Read, buf: &mut BytesBuf, len: usize) -> Result { + let mut total = 0; + while total < len { + let n = read_into_bytesbuf(reader, buf, len - total)?; + if n == 0 { + break; + } + total += n; + } + Ok(total) +} + +/// Reads into a `BytesBuf` in a loop until exactly `len` bytes are read; returns +/// `UnexpectedEof` on premature EOF. +fn read_bytesbuf_exact(reader: &mut impl Read, buf: &mut BytesBuf, len: usize) -> Result { + let start = buf.len(); + while buf.len() - start < len { + let remaining = len - (buf.len() - start); + let n = read_into_bytesbuf(reader, buf, remaining)?; + if n == 0 { + return Err(std::io::Error::new( + ErrorKind::UnexpectedEof, + "failed to read exact number of bytes", + )); + } + } + Ok(len) +} + +/// Reads into a slice in a loop until the buffer is full or EOF. +fn read_slice_best_effort(reader: &mut impl Read, buf: &mut [u8]) -> Result { + let mut total = 0; + while total < buf.len() { + let n = reader.read(&mut buf[total..])?; + if n == 0 { + break; + } + total += n; + } + Ok(total) +} + +/// Writes all slices of a [`BytesView`] to a writer. +fn write_all_bytesview(writer: &mut impl std::io::Write, data: &BytesView) -> Result<()> { + for (slice, _meta) in data.slices() { + writer.write_all(slice)?; + } + Ok(()) +} + +impl Memory for FileInner { + fn reserve(&self, min_bytes: usize) -> BytesBuf { + self.memory.reserve(min_bytes) + } +} diff --git a/crates/file/src/io_helpers.rs b/crates/file/src/io_helpers.rs new file mode 100644 index 000000000..c5b680a35 --- /dev/null +++ b/crates/file/src/io_helpers.rs @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Shared I/O helpers for reading into [`BytesBuf`] buffers. + +use std::io::{Read, Result}; + +use bytesbuf::BytesBuf; + +/// Reads up to `len` bytes from `reader` directly into `buf`'s unfilled capacity, +/// avoiding a temporary `Vec` allocation. +pub fn read_into_bytesbuf(reader: &mut impl Read, buf: &mut BytesBuf, len: usize) -> Result { + let unfilled = buf.first_unfilled_slice(); + let read_len = len.min(unfilled.len()); + + // SAFETY: MaybeUninit has the same layout as u8. + // We are passing uninitialized memory to the reader. + // Since we know the reader is a file, this is safe in practice as the OS + // writes to the buffer without reading it. + // The read call writes `n` bytes; we only advance by `n` below. + let dst = unsafe { core::slice::from_raw_parts_mut(unfilled.as_mut_ptr().cast::(), read_len) }; + let n = reader.read(dst)?; + if n > 0 { + // SAFETY: `n` bytes were just written by the read call. + unsafe { + buf.advance(n); + } + } + Ok(n) +} diff --git a/crates/file/src/lib.rs b/crates/file/src/lib.rs new file mode 100644 index 000000000..46e2ad4d7 --- /dev/null +++ b/crates/file/src/lib.rs @@ -0,0 +1,314 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#![cfg_attr(coverage_nightly, feature(coverage_attribute))] +#![cfg_attr(docsrs, feature(doc_cfg))] +#![doc(html_logo_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/file/logo.png")] +#![doc(html_favicon_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/file/favicon.ico")] + +//! Zero-copy asynchronous filesystem API. +//! +//! This crate provides a filesystem API that differs from [`std::fs`] in three key ways: +//! +//! 1. **Fully asynchronous.** Every I/O operation is `async`. The implementation uses +//! a pool of dedicated background threads to perform blocking filesystem calls, +//! keeping the async executor free. +//! +//! 2. **Managed buffers via [`bytesbuf`].** Reads produce +//! [`BytesView`](bytesbuf::BytesView) values backed by pooled memory; writes +//! accept them. This enables zero-copy data pipelines: data read from a file can +//! be written to a socket (or another file) without intermediate copies, as long +//! as both endpoints share a compatible memory provider. +//! +//! 3. **Capability-based access control.** All filesystem operations are scoped to a +//! [`Directory`] capability obtained via [`Root::bind_std`]. Paths are always relative +//! to a directory, and path traversals that would escape the directory (such as +//! leading `/` or `..` above the root) are rejected. This makes it possible to +//! grant a subsystem access to a specific directory tree without risking access +//! to the rest of the filesystem. +//! +//! # Quick start +//! +//! ```no_run +//! # async fn example() -> std::io::Result<()> { +//! use std::path::Path; +//! +//! use file::Root; +//! use sync_thunk::Thunker; +//! +//! // Bind to a directory — the only place an absolute path is accepted. +//! let thunker = Thunker::builder().build(); +//! let dir = Root::bind_std(&thunker, Path::new("/var/data")).await?; +//! +//! // Read and write whole files through the Directory capability. +//! dir.write_slice("greeting.txt", b"Hello!").await?; +//! let text = dir.read_to_string("greeting.txt").await?; +//! +//! // Narrow the capability to a subdirectory. +//! let sub = dir.open_dir("subdir").await?; +//! let data = sub.read("nested_file.txt").await?; +//! # Ok(()) +//! # } +//! ``` +//! +//! # File types +//! +//! The crate provides **six file types** organized into two families. Within each +//! family, three types enforce read, write, or read-write access at the type level. +//! +//! ## Seekable files — streaming I/O with a cursor +//! +//! Seekable files maintain an internal cursor that advances with each read or +//! write. They implement [`bytesbuf_io::Read`] and/or [`bytesbuf_io::Write`] for +//! streaming I/O and support [`seek`](File::seek), +//! [`stream_position`](File::stream_position), and [`rewind`](File::rewind). +//! +//! Because the cursor is shared mutable state, all I/O methods take **`&mut self`**, +//! ensuring only one operation is in flight at a time. This makes seekable files +//! ideal for sequential processing: reading a log from top to bottom, writing a +//! report line by line, or appending to a file. +//! +//! | Type | Access | Obtained via | +//! |------|--------|-------------| +//! | [`ReadOnlyFile`] | Read + seek | [`ReadOnlyFile::open`] | +//! | [`WriteOnlyFile`] | Write + seek | [`WriteOnlyFile::create`], [`WriteOnlyFile::create_new`] | +//! | [`File`] | Read + write + seek | [`File::open`], [`File::create`], [`OpenOptions`] | +//! +//! ```no_run +//! # async fn example() -> std::io::Result<()> { +//! use std::path::Path; +//! +//! use file::{ReadOnlyFile, Root}; +//! use sync_thunk::Thunker; +//! +//! let thunker = Thunker::builder().build(); +//! let dir = Root::bind_std(&thunker, Path::new("/var/data")).await?; +//! let mut file = ReadOnlyFile::open(&dir, "log.txt").await?; +//! +//! // Stream through the file in 8 KB chunks. +//! loop { +//! let chunk = file.read_max(8192).await?; +//! if chunk.is_empty() { +//! break; // EOF +//! } +//! // process chunk... +//! } +//! # Ok(()) +//! # } +//! ``` +//! +//! ## Positional files — offset-based I/O without a cursor +//! +//! Positional files have **no cursor**. Every I/O operation specifies an explicit +//! byte offset. Because there is no shared mutable state, all I/O methods take +//! **`&self`**, enabling multiple operations to be dispatched concurrently from +//! different tasks on the same handle. +//! +//! Positional files are ideal when the access pattern is non-sequential: reading +//! scattered records from a database file, writing blocks to a pre-allocated +//! image, or serving range requests from a large static asset. +//! +//! | Type | Access | Obtained via | +//! |------|--------|-------------| +//! | [`ReadOnlyPositionalFile`] | Positional read | [`ReadOnlyPositionalFile::open`] | +//! | [`WriteOnlyPositionalFile`] | Positional write | [`WriteOnlyPositionalFile::create`], [`WriteOnlyPositionalFile::create_new`] | +//! | [`PositionalFile`] | Positional read + write | [`PositionalFile::open`], [`PositionalFile::create`], [`OpenOptions`] | +//! +//! ```no_run +//! # async fn example() -> std::io::Result<()> { +//! use std::path::Path; +//! +//! use file::{ReadOnlyPositionalFile, Root}; +//! use sync_thunk::Thunker; +//! +//! let thunker = Thunker::builder().build(); +//! let dir = Root::bind_std(&thunker, Path::new("/var/data")).await?; +//! let file = ReadOnlyPositionalFile::open(&dir, "db.bin").await?; +//! +//! // Read two disjoint regions concurrently — both calls use &self. +//! let (header, record) = tokio::join!(file.read_exact_at(0, 128), file.read_exact_at(4096, 256),); +//! let header = header?; +//! let record = record?; +//! # Ok(()) +//! # } +//! ``` +//! +//! ## Choosing between seekable and positional +//! +//! | Use case | Recommended type | +//! |----------|-----------------| +//! | Read a file from start to end | [`ReadOnlyFile`] | +//! | Append log entries | [`WriteOnlyFile`] | +//! | Build a file incrementally (write, then rewind and read) | [`File`] | +//! | Read scattered records from a database or index | [`ReadOnlyPositionalFile`] | +//! | Write blocks to a pre-allocated file | [`WriteOnlyPositionalFile`] | +//! | Serve concurrent range requests from a static asset | [`ReadOnlyPositionalFile`] | +//! | Read and update a memory-mapped-style structure | [`PositionalFile`] | +//! +//! ## Narrowing capabilities +//! +//! Both [`File`] and [`PositionalFile`] can be permanently narrowed to their +//! single-access counterparts via [`From`] conversions. Once narrowed, the +//! dropped capability cannot be recovered: +//! +//! ```no_run +//! # async fn example() -> std::io::Result<()> { +//! use std::path::Path; +//! +//! use file::{File, ReadOnlyFile, Root}; +//! use sync_thunk::Thunker; +//! +//! let thunker = Thunker::builder().build(); +//! let dir = Root::bind_std(&thunker, Path::new("/var/data")).await?; +//! let rw = File::open(&dir, "data.bin").await?; +//! +//! // Narrow to read-only — the write capability is permanently dropped. +//! let ro: ReadOnlyFile = rw.into(); +//! # Ok(()) +//! # } +//! ``` +//! +//! # Buffer management +//! +//! All I/O uses buffers from the [`bytesbuf`] crate. [`BytesBuf`](bytesbuf::BytesBuf) +//! is a mutable write buffer; [`BytesView`](bytesbuf::BytesView) is an immutable, +//! reference-counted read view. Buffers are allocated from a memory provider +//! (defaulting to [`GlobalPool`](bytesbuf::mem::GlobalPool)). +//! +//! Each file type implements [`HasMemory`](bytesbuf::mem::HasMemory) and +//! [`Memory`](bytesbuf::mem::Memory), so you can reserve optimally-sized buffers +//! directly from the file: +//! +//! ```no_run +//! # async fn example() -> std::io::Result<()> { +//! use std::path::Path; +//! +//! use bytesbuf::mem::Memory; +//! use file::{Root, WriteOnlyFile}; +//! use sync_thunk::Thunker; +//! +//! let thunker = Thunker::builder().build(); +//! let dir = Root::bind_std(&thunker, Path::new("/var/data")).await?; +//! let mut file = WriteOnlyFile::create(&dir, "output.bin").await?; +//! +//! let mut buf = file.reserve(4096); +//! buf.put_slice(*b"Hello, world!"); +//! file.write(buf.consume_all()).await?; +//! # Ok(()) +//! # } +//! ``` +//! +//! For zero-copy cross-subsystem transfers, constructors accept an optional custom +//! memory provider via `_with_memory` variants: +//! +//! ```ignore +//! // Open a file using the socket's memory provider. +//! let file = ReadOnlyFile::open_with_memory(&dir, "data.bin", socket.memory()).await?; +//! +//! // Data lands in memory optimal for the socket — zero copies on write. +//! let data = file.read_max(8192).await?; +//! socket.write(data).await?; +//! ``` +//! +//! # Streaming I/O (seekable files) +//! +//! Seekable files support cursor-relative streaming. Use `read_max` to pull +//! data in chunks, or `write` / `write_slice` to push data sequentially: +//! +//! ```no_run +//! # async fn example() -> std::io::Result<()> { +//! use std::path::Path; +//! +//! use bytesbuf::mem::Memory; +//! use file::{Root, WriteOnlyFile}; +//! use sync_thunk::Thunker; +//! +//! let thunker = Thunker::builder().build(); +//! let dir = Root::bind_std(&thunker, Path::new("/var/data")).await?; +//! let mut file = WriteOnlyFile::create(&dir, "output.bin").await?; +//! for i in 0..10 { +//! let mut buf = file.reserve(1024); +//! buf.put_slice(*b"some data\n"); +//! file.write(buf.consume_all()).await?; +//! } +//! file.flush().await?; +//! # Ok(()) +//! # } +//! ``` +//! +//! Callers working with plain `&[u8]` slices can use convenience methods like +//! [`WriteOnlyFile::write_slice`] and [`ReadOnlyFile::read_into_slice`]. Note +//! that these copy data internally; for large or performance-sensitive I/O, +//! prefer the [`BytesView`](bytesbuf::BytesView) methods. +//! +//! # Positional I/O (positional files) +//! +//! Positional files accept an explicit byte offset on every call. Because +//! they take `&self`, you can share a single handle across tasks: +//! +//! ```no_run +//! # async fn example() -> std::io::Result<()> { +//! use std::path::Path; +//! +//! use file::{PositionalFile, Root}; +//! use sync_thunk::Thunker; +//! +//! let thunker = Thunker::builder().build(); +//! let dir = Root::bind_std(&thunker, Path::new("/var/data")).await?; +//! +//! // Pre-allocate a 1 MB file, then write four 256 KB regions concurrently. +//! let file = PositionalFile::create(&dir, "image.bin").await?; +//! file.set_len(1_048_576).await?; +//! +//! let data = vec![0xABu8; 262_144]; +//! let (a, b, c, d) = tokio::join!( +//! file.write_slice_at(0, &data), +//! file.write_slice_at(262_144, &data), +//! file.write_slice_at(524_288, &data), +//! file.write_slice_at(786_432, &data), +//! ); +//! a?; +//! b?; +//! c?; +//! d?; +//! file.flush().await?; +//! # Ok(()) +//! # } +//! ``` +//! +//! Positional files also offer `read_into_slice_at` and `write_slice_at` for +//! callers working with plain byte slices. + +pub use std::fs::{FileTimes, FileType, Metadata, Permissions, TryLockError}; +pub use std::io::SeekFrom; + +pub use crate::dir_builder::DirBuilder; +pub use crate::dir_entry::DirEntry; +pub use crate::directory::Directory; +pub use crate::file::File; +pub use crate::open_options::OpenOptions; +pub use crate::positional_file::PositionalFile; +pub use crate::read_dir::ReadDir; +pub use crate::read_only_file::ReadOnlyFile; +pub use crate::read_only_positional_file::ReadOnlyPositionalFile; +pub use crate::root::Root; +pub use crate::write_only_file::WriteOnlyFile; +pub use crate::write_only_positional_file::WriteOnlyPositionalFile; + +mod dir_builder; +mod dir_entry; +mod directory; +mod file; +mod file_inner; +mod io_helpers; +mod open_options; +mod path_utils; +mod positional_file; +mod positional_file_inner; +mod read_dir; +mod read_only_file; +mod read_only_positional_file; +mod root; +mod shared_memory; +mod write_only_file; +mod write_only_positional_file; diff --git a/crates/file/src/open_options.rs b/crates/file/src/open_options.rs new file mode 100644 index 000000000..c8759b5c9 --- /dev/null +++ b/crates/file/src/open_options.rs @@ -0,0 +1,206 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::io::Result; +use std::path::Path; + +use bytesbuf::mem::MemoryShared; + +use crate::directory::Directory; +use crate::file::File; +use crate::file_inner::FileInner; +use crate::positional_file_inner::PositionalFileInner; +use crate::shared_memory::SharedMemory; + +/// Options and flags which can be used to configure how a file is opened. +/// +/// This builder exposes the ability to configure how a [`File`] is opened +/// and what operations are permitted on the open file. The [`File::open`], +/// [`File::create`], and [`File::create_new`] methods are aliases +/// for commonly used options using this builder. +/// +/// Generally speaking, when using `OpenOptions`, you'll first call [`OpenOptions::new`], +/// then chain calls to methods to set each option, then call [`OpenOptions::open`], +/// passing a directory capability and a relative path. +#[derive(Clone, Copy, Debug)] +#[expect(clippy::struct_excessive_bools, reason = "mirrors std::fs::OpenOptions API")] +pub struct OpenOptions { + read: bool, + write: bool, + append: bool, + truncate: bool, + create: bool, + create_new: bool, +} + +impl OpenOptions { + /// Creates a blank new set of options ready for configuration. + /// + /// All options are initially set to `false`. + #[must_use] + pub const fn new() -> Self { + Self { + read: false, + write: false, + append: false, + truncate: false, + create: false, + create_new: false, + } + } + + /// Sets the option for read access. + /// + /// This option, when true, will indicate that the file should be readable + /// if opened. + pub const fn read(&mut self, read: bool) -> &mut Self { + self.read = read; + self + } + + /// Sets the option for write access. + /// + /// This option, when true, will indicate that the file should be writable + /// if opened. If the file already exists, any write calls on it will + /// overwrite its contents, without truncating it. + pub const fn write(&mut self, write: bool) -> &mut Self { + self.write = write; + self + } + + /// Sets the option for the append mode. + /// + /// This option, when true, means that writes will append to a file instead + /// of overwriting previous contents. Note that setting `.write(true).append(true)` + /// has the same effect as setting only `.append(true)`. + /// + /// This function doesn't create the file if it doesn't exist. Use the + /// [`create`](OpenOptions::create) method to do so. + pub const fn append(&mut self, append: bool) -> &mut Self { + self.append = append; + self + } + + /// Sets the option for truncating a previous file. + /// + /// If a file is successfully opened with this option set to true, it will + /// truncate the file to 0 length if it already exists. The file must be + /// opened with write access for truncate to work. + pub const fn truncate(&mut self, truncate: bool) -> &mut Self { + self.truncate = truncate; + self + } + + /// Sets the option to create a new file, or open it if it already exists. + /// + /// In order for the file to be created, write or append access must be used. + pub const fn create(&mut self, create: bool) -> &mut Self { + self.create = create; + self + } + + /// Sets the option to always create a new file, failing if it already exists. + /// + /// No file is allowed to exist at the target location. This option is useful + /// because it is atomic, avoiding TOCTOU race conditions. If + /// `.create_new(true)` is set, `.create()` and `.truncate()` are ignored. + pub const fn create_new(&mut self, create_new: bool) -> &mut Self { + self.create_new = create_new; + self + } + + /// Opens a file at the given path relative to the directory capability with + /// the options specified by `self`. + /// + /// # Errors + /// + /// This function will return an error under a number of different + /// circumstances, including but not limited to: + /// + /// * [`NotFound`](std::io::ErrorKind::NotFound): the specified file does + /// not exist and neither `create` nor `create_new` is set. + /// * [`PermissionDenied`](std::io::ErrorKind::PermissionDenied): the user + /// lacks permission to get the specified access rights for the file, or + /// the file or one of its parent directories does not allow access. + /// * [`AlreadyExists`](std::io::ErrorKind::AlreadyExists): `create_new` + /// was specified and the file already exists. + /// * [`InvalidInput`](std::io::ErrorKind::InvalidInput): invalid + /// combinations of open options were used. + pub async fn open(&self, dir: &Directory, path: impl AsRef) -> Result { + Ok(File::new(self.open_inner(dir, path, SharedMemory::global()).await?)) + } + + /// Opens a file with the specified options using a custom memory provider. + /// + /// The memory provider controls buffer allocation for subsequent I/O + /// operations on the returned file. + /// + /// # Errors + /// + /// Returns the same errors as [`open`](OpenOptions::open). + pub async fn open_with_memory(&self, dir: &Directory, path: impl AsRef, memory: impl MemoryShared) -> Result { + Ok(File::new(self.open_inner(dir, path, SharedMemory::new(memory)).await?)) + } + + /// Opens a positional file at the given path relative to the directory + /// capability with the options specified by `self`. + /// + /// The returned file provides only positional I/O — all methods take + /// `&self` and operate at explicit byte offsets. + /// + /// # Errors + /// + /// Returns the same errors as [`open`](OpenOptions::open). + pub async fn open_positional(&self, dir: &Directory, path: impl AsRef) -> Result { + Ok(crate::positional_file::PositionalFile::new( + self.open_inner_positional(dir, path, SharedMemory::global()).await?, + )) + } + + /// Opens a positional file with the specified options using a custom memory + /// provider. + /// + /// # Errors + /// + /// Returns the same errors as [`open`](OpenOptions::open). + pub async fn open_positional_with_memory( + &self, + dir: &Directory, + path: impl AsRef, + memory: impl MemoryShared, + ) -> Result { + Ok(crate::positional_file::PositionalFile::new( + self.open_inner_positional(dir, path, SharedMemory::new(memory)).await?, + )) + } + + async fn open_inner(&self, dir: &Directory, path: impl AsRef, memory: SharedMemory) -> Result { + let mut std_opts = std::fs::OpenOptions::new(); + std_opts + .read(self.read) + .write(self.write) + .append(self.append) + .truncate(self.truncate) + .create(self.create) + .create_new(self.create_new); + FileInner::open_file(dir, path, memory, std_opts).await + } + + async fn open_inner_positional(&self, dir: &Directory, path: impl AsRef, memory: SharedMemory) -> Result { + let mut std_opts = std::fs::OpenOptions::new(); + std_opts + .read(self.read) + .write(self.write) + .append(self.append) + .truncate(self.truncate) + .create(self.create) + .create_new(self.create_new); + PositionalFileInner::open_file(dir, path, memory, std_opts).await + } +} + +impl Default for OpenOptions { + fn default() -> Self { + Self::new() + } +} diff --git a/crates/file/src/path_utils.rs b/crates/file/src/path_utils.rs new file mode 100644 index 000000000..0b734e4db --- /dev/null +++ b/crates/file/src/path_utils.rs @@ -0,0 +1,167 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::io::{Error, ErrorKind, Result}; +use std::path::{Component, Path, PathBuf}; + +/// Joins a relative `path` onto `base`, rejecting any traversal that would +/// escape the directory cone rooted at `base`. +/// +/// Returns the fully resolved path on success. +/// +/// # Limitations +/// +/// This function performs purely lexical validation and does **not** resolve +/// symbolic links. A path such as `symlink_to_parent/../../etc/passwd` will +/// pass validation if the symlink component is treated as a normal directory +/// name. +pub fn safe_join(base: impl AsRef, relative: impl AsRef) -> Result { + let base = base.as_ref(); + let relative = relative.as_ref(); + + let mut result = PathBuf::with_capacity(base.as_os_str().len() + 1 + relative.as_os_str().len()); + result.push(base); + let mut depth: usize = 0; + + for component in relative.components() { + match component { + Component::Normal(c) => { + #[cfg(windows)] + if is_windows_reserved_name(c) { + return Err(Error::new( + ErrorKind::InvalidInput, + "Windows reserved device names are not permitted", + )); + } + result.push(c); + depth += 1; + } + Component::CurDir => {} // "." — skip + Component::ParentDir => { + if depth == 0 { + return Err(Error::new(ErrorKind::InvalidInput, "path escapes the directory")); + } + let _ = result.pop(); + depth -= 1; + } + Component::RootDir | Component::Prefix(_) => { + return Err(Error::new( + ErrorKind::InvalidInput, + "absolute paths are not permitted in capability-based access", + )); + } + } + } + + Ok(result) +} + +/// Returns `true` if the given name (ignoring extension and case) is a +/// Windows reserved device name such as CON, PRN, NUL, etc. +#[cfg(windows)] +fn is_windows_reserved_name(name: &std::ffi::OsStr) -> bool { + let s = name.to_ascii_uppercase(); + let s = s.to_string_lossy(); + // Strip any extension (e.g. "CON.txt" is still reserved) + let stem = s.split('.').next().unwrap_or(""); + matches!( + stem, + "CON" + | "PRN" + | "AUX" + | "NUL" + | "COM0" + | "COM1" + | "COM2" + | "COM3" + | "COM4" + | "COM5" + | "COM6" + | "COM7" + | "COM8" + | "COM9" + | "LPT0" + | "LPT1" + | "LPT2" + | "LPT3" + | "LPT4" + | "LPT5" + | "LPT6" + | "LPT7" + | "LPT8" + | "LPT9" + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn simple_relative() { + let base = Path::new("/data"); + assert_eq!( + safe_join(base, Path::new("foo/bar.txt")).expect("ok"), + PathBuf::from("/data/foo/bar.txt") + ); + } + + #[test] + fn dot_segments() { + let base = Path::new("/data"); + assert_eq!( + safe_join(base, Path::new("./foo/./bar.txt")).expect("ok"), + PathBuf::from("/data/foo/bar.txt") + ); + } + + #[test] + fn safe_dotdot() { + let base = Path::new("/data"); + assert_eq!( + safe_join(base, Path::new("foo/../bar.txt")).expect("ok"), + PathBuf::from("/data/bar.txt") + ); + } + + #[test] + fn escape_rejected() { + let base = Path::new("/data"); + let _ = safe_join(base, Path::new("../etc/passwd")).expect_err("should reject escape"); + } + + #[test] + fn deep_escape_rejected() { + let base = Path::new("/data"); + let _ = safe_join(base, Path::new("foo/../../etc/passwd")).expect_err("should reject deep escape"); + } + + #[test] + fn absolute_rejected() { + let base = Path::new("/data"); + let _ = safe_join(base, Path::new("/etc/passwd")).expect_err("should reject absolute path"); + } + + #[test] + fn empty_path() { + let base = Path::new("/data"); + assert_eq!(safe_join(base, Path::new("")).expect("ok"), PathBuf::from("/data")); + } + + #[cfg(windows)] + #[test] + fn windows_reserved_name_rejected() { + let base = Path::new("C:\\data"); + let _ = safe_join(base, Path::new("CON")).expect_err("should reject CON"); + let _ = safe_join(base, Path::new("nul")).expect_err("should reject NUL"); + let _ = safe_join(base, Path::new("COM1.txt")).expect_err("should reject COM1.txt"); + } + + #[cfg(windows)] + #[test] + fn windows_non_reserved_name_allowed() { + let base = Path::new("C:\\data"); + safe_join(base, Path::new("CONSOLE")).unwrap(); + safe_join(base, Path::new("connect.txt")).unwrap(); + } +} diff --git a/crates/file/src/positional_file.rs b/crates/file/src/positional_file.rs new file mode 100644 index 000000000..264de5e41 --- /dev/null +++ b/crates/file/src/positional_file.rs @@ -0,0 +1,505 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use core::mem::MaybeUninit; +use std::fs::{FileTimes, Metadata, Permissions, TryLockError}; +use std::io::Result; +use std::path::Path; + +use bytesbuf::mem::{HasMemory, Memory, MemoryShared}; +use bytesbuf::{BytesBuf, BytesView}; + +use crate::directory::Directory; +use crate::open_options::OpenOptions; +use crate::positional_file_inner::PositionalFileInner; +use crate::shared_memory::SharedMemory; + +/// A positional read-write file handle within a capability-based filesystem. +/// +/// All I/O methods take `&self` and operate at explicit byte offsets, enabling +/// concurrent access from multiple tasks without cursor management. +/// +/// Obtain a `PositionalFile` by calling [`PositionalFile::open`], +/// [`PositionalFile::create`], [`PositionalFile::create_new`], or through +/// [`OpenOptions::open_positional`]. +#[derive(Debug)] +pub struct PositionalFile { + inner: PositionalFileInner, +} + +impl PositionalFile { + pub(crate) const fn new(inner: PositionalFileInner) -> Self { + Self { inner } + } + + pub(crate) fn into_inner(self) -> PositionalFileInner { + self.inner + } + + /// Returns a new [`OpenOptions`] object. + /// + /// This allows opening a file with specific combinations of read, write, + /// append, truncate, and create options. Use [`OpenOptions::open_positional`] + /// to obtain a `PositionalFile`. + #[must_use] + #[inline] + pub const fn options() -> OpenOptions { + OpenOptions::new() + } + + /// Opens an existing file in read-write mode for positional access. + /// + /// The path is relative to the given directory capability. + /// + /// # Errors + /// + /// Returns an error if the file does not exist, if the path escapes the + /// directory capability, or on other I/O errors. + #[inline] + pub async fn open(dir: &Directory, path: impl AsRef) -> Result { + Ok(Self { + inner: PositionalFileInner::open_readwrite(dir, path, SharedMemory::global()).await?, + }) + } + + /// Opens an existing file in read-write mode for positional access using the + /// specified memory provider. + /// + /// The custom memory provider allows the caller to control buffer allocation, + /// enabling zero-copy transfers with other subsystems sharing the same memory + /// provider. + /// + /// # Errors + /// + /// Returns an error if the file does not exist, if the path escapes the + /// directory capability, or on other I/O errors. + #[inline] + pub async fn open_with_memory(dir: &Directory, path: impl AsRef, memory: impl MemoryShared) -> Result { + Ok(Self { + inner: PositionalFileInner::open_readwrite(dir, path, SharedMemory::new(memory)).await?, + }) + } + + /// Opens a file in read-write mode for positional access. + /// + /// This function will create the file if it does not exist, and will truncate + /// it if it does. + /// + /// # Errors + /// + /// Returns an error if the path escapes the directory capability or on other + /// I/O errors. + #[inline] + pub async fn create(dir: &Directory, path: impl AsRef) -> Result { + Ok(Self { + inner: PositionalFileInner::create_readwrite(dir, path, SharedMemory::global()).await?, + }) + } + + /// Opens a file in read-write mode for positional access using the specified + /// memory provider. + /// + /// Creates the file if it does not exist, truncates it if it does. + /// + /// # Errors + /// + /// Returns an error if the path escapes the directory capability or on other + /// I/O errors. + #[inline] + pub async fn create_with_memory(dir: &Directory, path: impl AsRef, memory: impl MemoryShared) -> Result { + Ok(Self { + inner: PositionalFileInner::create_readwrite(dir, path, SharedMemory::new(memory)).await?, + }) + } + + /// Creates a new file in read-write mode for positional access; returns an + /// error if the file exists. + /// + /// If the call succeeds, the file is guaranteed to be new. This is atomic, + /// avoiding TOCTOU race conditions. + /// + /// # Errors + /// + /// Returns an error if the file already exists, if the path escapes the + /// directory capability, or on other I/O errors. + #[inline] + pub async fn create_new(dir: &Directory, path: impl AsRef) -> Result { + Ok(Self { + inner: PositionalFileInner::create_new_readwrite(dir, path, SharedMemory::global()).await?, + }) + } + + /// Creates a new file in read-write mode for positional access using the + /// specified memory provider; returns an error if the file exists. + /// + /// # Errors + /// + /// Returns an error if the file already exists, if the path escapes the + /// directory capability, or on other I/O errors. + #[inline] + pub async fn create_new_with_memory(dir: &Directory, path: impl AsRef, memory: impl MemoryShared) -> Result { + Ok(Self { + inner: PositionalFileInner::create_new_readwrite(dir, path, SharedMemory::new(memory)).await?, + }) + } + + /// Reads up to `len` bytes at `offset`, making a best effort to return + /// the full amount. + /// + /// Performs multiple reads as necessary. May return fewer bytes only when + /// EOF is reached before `len` bytes are available. + /// + /// # Errors + /// + /// Returns an error if a read operation fails due to an I/O error. + #[inline] + pub async fn read_at(&self, offset: u64, len: usize) -> Result { + self.inner.read_at(offset, len).await + } + + /// Reads at most `len` bytes at `offset` in a single operation. + /// + /// May return fewer bytes than requested. A return of zero bytes indicates + /// EOF. + /// + /// # Errors + /// + /// Returns an error if the read operation fails due to an I/O error. + #[inline] + pub async fn read_max_at(&self, offset: u64, len: usize) -> Result { + self.inner.read_max_at(offset, len).await + } + + /// Reads exactly `len` bytes at `offset`. + /// + /// Performs multiple reads as necessary. Returns an error if EOF is + /// reached before `len` bytes are read. + /// + /// # Errors + /// + /// Returns [`std::io::ErrorKind::UnexpectedEof`] if the file ends before `len` + /// bytes are read, or another error on I/O failure. + #[inline] + pub async fn read_exact_at(&self, offset: u64, len: usize) -> Result { + self.inner.read_exact_at(offset, len).await + } + + /// Reads an implementation-chosen number of bytes at `offset` into the + /// provided buffer. + /// + /// Uses the buffer's remaining capacity (or 8192 if empty) to determine + /// the read size. Returns the number of bytes read and the updated buffer. + /// + /// # Errors + /// + /// Returns an error if the read operation fails due to an I/O error. + #[inline] + pub async fn read_into_bytesbuf_at(&self, offset: u64, buf: &mut BytesBuf) -> Result { + let len = if buf.remaining_capacity() > 0 { + buf.remaining_capacity() + } else { + 8192 + }; + self.inner.read_max_into_bytesbuf_at(offset, len, buf).await + } + + /// Reads at most `len` bytes at `offset` into the provided buffer in a + /// single operation. + /// + /// Returns the number of bytes read and the updated buffer. + /// + /// # Errors + /// + /// Returns an error if the read operation fails due to an I/O error. + #[inline] + pub async fn read_max_into_bytesbuf_at(&self, offset: u64, len: usize, buf: &mut BytesBuf) -> Result { + self.inner.read_max_into_bytesbuf_at(offset, len, buf).await + } + + /// Reads exactly `len` bytes at `offset` into the provided buffer. + /// + /// Performs multiple reads as necessary. Returns an error if EOF is + /// reached before `len` bytes are appended. + /// + /// # Errors + /// + /// Returns [`std::io::ErrorKind::UnexpectedEof`] if the file ends before `len` + /// bytes are read, or another error on I/O failure. + #[inline] + pub async fn read_exact_into_bytesbuf_at(&self, offset: u64, len: usize, buf: &mut BytesBuf) -> Result<()> { + self.inner.read_exact_into_bytesbuf_at(offset, len, buf).await + } + + /// Reads into the provided slice at `offset`, making a best effort to + /// fill it completely. + /// + /// Returns the total number of bytes read. May return fewer than + /// `buf.len()` only when EOF is reached. + /// + /// # Errors + /// + /// Returns an error if a read operation fails due to an I/O error. + #[inline] + pub async fn read_into_slice_at(&self, offset: u64, buf: &mut [u8]) -> Result { + self.inner.read_into_slice_at(offset, buf).await + } + + /// Fills the provided slice with exactly `buf.len()` bytes starting at + /// `offset`. + /// + /// Performs multiple reads as necessary. Returns an error if EOF is + /// reached before the slice is fully filled. + /// + /// # Errors + /// + /// Returns [`std::io::ErrorKind::UnexpectedEof`] if the file ends before the + /// slice is filled, or another error on I/O failure. + #[inline] + pub async fn read_exact_into_slice_at(&self, offset: u64, buf: &mut [u8]) -> Result<()> { + self.inner.read_exact_into_slice_at(offset, buf).await + } + + /// Fills the provided uninitialized slice with exactly `buf.len()` bytes + /// starting at `offset`. + /// + /// On success every element in `buf` is initialized. + /// + /// # Errors + /// + /// Returns [`std::io::ErrorKind::UnexpectedEof`] if the file ends before the slice + /// is filled, or another error on I/O failure. + #[inline] + pub async fn read_exact_into_uninit_at(&self, offset: u64, buf: &mut [MaybeUninit]) -> Result<()> { + self.inner.read_exact_into_uninit_at(offset, buf).await + } + + /// Writes the provided byte sequence to the file at `offset`. + /// + /// The method completes when all bytes have been written. Partial writes are + /// retried automatically. + /// + /// # Errors + /// + /// Returns an error if the write operation fails. + #[inline] + pub async fn write_at(&self, offset: u64, data: BytesView) -> Result<()> { + self.inner.write_at(offset, data).await + } + + /// Writes a byte slice to the file at `offset`. + /// + /// This is a convenience method for callers working with `&[u8]` rather than + /// managed buffers. The data is copied internally to transfer it to the + /// worker thread; prefer [`write_at`](Self::write_at) with [`BytesView`] for + /// large or performance-sensitive writes. + /// + /// # Errors + /// + /// Returns an error if the write operation fails. + #[inline] + pub async fn write_slice_at(&self, offset: u64, data: impl AsRef<[u8]>) -> Result<()> { + self.inner.write_slice_at(offset, data.as_ref()).await + } + + /// Queries metadata about the underlying file. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn metadata(&self) -> Result { + self.inner.metadata().await + } + + /// Truncates or extends the underlying file, updating the size to become `size`. + /// + /// If `size` is less than the current file size, the file shrinks. If greater, + /// it extends with zeroes. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn set_len(&self, size: u64) -> Result<()> { + self.inner.set_len(size).await + } + + /// Changes the modification time of the underlying file. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn set_modified(&self, modified: std::time::SystemTime) -> Result<()> { + self.inner.set_modified(modified).await + } + + /// Changes the permissions on the underlying file. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn set_permissions(&self, perms: Permissions) -> Result<()> { + self.inner.set_permissions(perms).await + } + + /// Changes the timestamps of the underlying file. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn set_times(&self, times: FileTimes) -> Result<()> { + self.inner.set_times(times).await + } + + /// Attempts to sync all OS-internal file content and metadata to disk. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn sync_all(&self) -> Result<()> { + self.inner.sync_all().await + } + + /// Similar to [`sync_all`](Self::sync_all), except that it might not synchronize + /// file metadata. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn sync_data(&self) -> Result<()> { + self.inner.sync_data().await + } + + /// Flushes any buffered data to the underlying file. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn flush(&self) -> Result<()> { + self.inner.flush().await + } + + /// Acquires an exclusive lock on the file. + /// + /// Blocks until the lock is acquired. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn lock(&self) -> Result<()> { + self.inner.lock().await + } + + /// Acquires a shared (non-exclusive) lock on the file. + /// + /// Blocks until the lock is acquired. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn lock_shared(&self) -> Result<()> { + self.inner.lock_shared().await + } + + /// Tries to acquire an exclusive lock on the file. + /// + /// Returns `Err(TryLockError::WouldBlock)` if another lock is held. + /// + /// # Errors + /// + /// Returns an error if the lock cannot be acquired or on I/O failure. + #[inline] + pub async fn try_lock(&self) -> core::result::Result<(), TryLockError> { + self.inner.try_lock().await + } + + /// Tries to acquire a shared lock on the file. + /// + /// Returns `Err(TryLockError::WouldBlock)` if an exclusive lock is held. + /// + /// # Errors + /// + /// Returns an error if the lock cannot be acquired or on I/O failure. + #[inline] + pub async fn try_lock_shared(&self) -> core::result::Result<(), TryLockError> { + self.inner.try_lock_shared().await + } + + /// Releases all locks on the file. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn unlock(&self) -> Result<()> { + self.inner.unlock().await + } + + /// Creates a new `PositionalFile` instance that shares the same underlying + /// file handle. + /// + /// # Errors + /// + /// Returns an error if the underlying I/O operation fails. + #[inline] + pub async fn try_clone(&self) -> Result { + Ok(Self { + inner: self.inner.try_clone().await?, + }) + } + + /// Returns `true` if the underlying file descriptor refers to a terminal. + #[must_use] + #[inline] + pub fn is_terminal(&self) -> bool { + self.inner.is_terminal() + } +} + +impl HasMemory for PositionalFile { + fn memory(&self) -> impl MemoryShared { + self.inner.memory().clone() + } +} + +impl Memory for PositionalFile { + fn reserve(&self, min_bytes: usize) -> BytesBuf { + self.inner.memory().reserve(min_bytes) + } +} + +#[cfg(unix)] +impl std::os::unix::io::AsRawFd for PositionalFile { + fn as_raw_fd(&self) -> std::os::unix::io::RawFd { + self.inner.as_raw_fd() + } +} + +#[cfg(unix)] +impl std::os::unix::io::AsFd for PositionalFile { + fn as_fd(&self) -> std::os::unix::io::BorrowedFd<'_> { + self.inner.as_fd() + } +} + +#[cfg(windows)] +impl std::os::windows::io::AsRawHandle for PositionalFile { + fn as_raw_handle(&self) -> std::os::windows::io::RawHandle { + self.inner.as_raw_handle() + } +} + +#[cfg(windows)] +impl std::os::windows::io::AsHandle for PositionalFile { + fn as_handle(&self) -> std::os::windows::io::BorrowedHandle<'_> { + self.inner.as_handle() + } +} diff --git a/crates/file/src/positional_file_inner.rs b/crates/file/src/positional_file_inner.rs new file mode 100644 index 000000000..ff625c8d9 --- /dev/null +++ b/crates/file/src/positional_file_inner.rs @@ -0,0 +1,441 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::fs::{File, FileTimes, Metadata, Permissions, TryLockError}; +use std::io::{Error, ErrorKind, Result}; +use std::sync::Arc; + +use bytesbuf::mem::Memory; +use bytesbuf::{BytesBuf, BytesView}; +use sync_thunk::{Thunker, thunk}; + +use crate::shared_memory::SharedMemory; + +#[cfg(unix)] +type FileHandle = Arc; +#[cfg(windows)] +type FileHandle = Arc>; + +#[derive(Debug)] +pub struct PositionalFileInner { + file: FileHandle, + thunker: Thunker, + memory: SharedMemory, +} + +impl PositionalFileInner { + /// Creates a `PositionalFileInner` from a standard `std::fs::File`. + pub fn from_std(file: File, dir: &crate::directory::Directory, memory: SharedMemory) -> Self { + #[cfg(unix)] + let file = Arc::new(file); + #[cfg(windows)] + let file = Arc::new(std::sync::Mutex::new(file)); + Self { + file, + thunker: dir.thunker().clone(), + memory, + } + } + + /// Opens a file with the given options, dispatching the blocking open to a worker thread. + pub async fn open_file( + dir: &crate::directory::Directory, + path: impl AsRef, + memory: SharedMemory, + opts: std::fs::OpenOptions, + ) -> Result { + let file = dir.open_std_file(path.as_ref(), opts).await?; + Ok(Self::from_std(file, dir, memory)) + } + + /// Opens a file in read-only mode. + pub async fn open_readonly(dir: &crate::directory::Directory, path: impl AsRef, memory: SharedMemory) -> Result { + let mut opts = std::fs::OpenOptions::new(); + opts.read(true); + Self::open_file(dir, path, memory, opts).await + } + + /// Creates (or truncates) a file in write-only mode. + pub async fn create_writeonly( + dir: &crate::directory::Directory, + path: impl AsRef, + memory: SharedMemory, + ) -> Result { + let mut opts = std::fs::OpenOptions::new(); + opts.write(true).create(true).truncate(true); + Self::open_file(dir, path, memory, opts).await + } + + /// Atomically creates a new file in write-only mode; fails if it exists. + pub async fn create_new_writeonly( + dir: &crate::directory::Directory, + path: impl AsRef, + memory: SharedMemory, + ) -> Result { + let mut opts = std::fs::OpenOptions::new(); + opts.write(true).create_new(true); + Self::open_file(dir, path, memory, opts).await + } + + /// Opens an existing file in read-write mode. + pub async fn open_readwrite( + dir: &crate::directory::Directory, + path: impl AsRef, + memory: SharedMemory, + ) -> Result { + let mut opts = std::fs::OpenOptions::new(); + opts.read(true).write(true); + Self::open_file(dir, path, memory, opts).await + } + + /// Creates (or truncates) a file in read-write mode. + pub async fn create_readwrite( + dir: &crate::directory::Directory, + path: impl AsRef, + memory: SharedMemory, + ) -> Result { + let mut opts = std::fs::OpenOptions::new(); + opts.read(true).write(true).create(true).truncate(true); + Self::open_file(dir, path, memory, opts).await + } + + /// Atomically creates a new file in read-write mode; fails if it exists. + pub async fn create_new_readwrite( + dir: &crate::directory::Directory, + path: impl AsRef, + memory: SharedMemory, + ) -> Result { + let mut opts = std::fs::OpenOptions::new(); + opts.read(true).write(true).create_new(true); + Self::open_file(dir, path, memory, opts).await + } + + pub const fn memory(&self) -> &SharedMemory { + &self.memory + } + + /// Returns the raw file descriptor (Unix). + #[cfg(unix)] + pub fn as_raw_fd(&self) -> std::os::unix::io::RawFd { + use std::os::unix::io::AsRawFd; + self.file.as_raw_fd() + } + + /// Returns a borrowed file descriptor. + #[cfg(unix)] + pub fn as_fd(&self) -> std::os::unix::io::BorrowedFd<'_> { + use std::os::unix::io::AsFd; + self.file.as_fd() + } + + /// Returns the raw handle (Windows). + #[cfg(windows)] + pub fn as_raw_handle(&self) -> std::os::windows::io::RawHandle { + use std::os::windows::io::AsRawHandle; + self.file.lock().expect("file mutex poisoned").as_raw_handle() + } + + /// Returns a borrowed handle. + #[cfg(windows)] + pub fn as_handle(&self) -> std::os::windows::io::BorrowedHandle<'_> { + // SAFETY: The handle is valid for the lifetime of &self + // because self holds an Arc> that keeps the handle open. + unsafe { std::os::windows::io::BorrowedHandle::borrow_raw(self.as_raw_handle()) } + } + + /// Returns whether the underlying file descriptor refers to a terminal. + pub fn is_terminal(&self) -> bool { + use std::io::IsTerminal; + #[cfg(unix)] + { + self.file.is_terminal() + } + #[cfg(windows)] + { + self.file.lock().is_ok_and(|f| f.is_terminal()) + } + } + + /// Executes a closure with a `&File` reference, handling platform-specific locking. + #[cfg(unix)] + fn with_file(&self, f: impl FnOnce(&File) -> R) -> R { + f(&self.file) + } + + /// Executes a closure with a `&File` reference, handling platform-specific locking. + #[cfg(windows)] + fn with_file(&self, f: impl FnOnce(&File) -> R) -> R { + f(&self.file.lock().expect("file mutex poisoned")) + } + + #[thunk(from = self.thunker)] + pub async fn read_max_at(&self, offset: u64, len: usize) -> Result { + let mut buf = self.memory.reserve(len); + self.with_file(|f| positional_read_into_bytesbuf(f, &mut buf, len, offset))?; + Ok(buf.consume_all()) + } + + #[thunk(from = self.thunker)] + pub async fn read_at(&self, offset: u64, len: usize) -> Result { + let mut buf = self.memory.reserve(len); + self.with_file(|f| { + let mut total = 0; + while total < len { + let cur = offset.saturating_add(total as u64); + let n = positional_read_into_bytesbuf(f, &mut buf, len - total, cur)?; + if n == 0 { + break; + } + total += n; + } + Ok::<_, std::io::Error>(total) + })?; + Ok(buf.consume_all()) + } + + #[thunk(from = self.thunker)] + pub async fn read_exact_into_bytesbuf_at(&self, offset: u64, len: usize, buf: &mut BytesBuf) -> Result<()> { + if buf.remaining_capacity() < len { + buf.reserve(len - buf.remaining_capacity(), &self.memory); + } + self.with_file(|f| { + let mut total = 0; + while total < len { + let cur = offset.saturating_add(total as u64); + let n = positional_read_into_bytesbuf(f, buf, len - total, cur)?; + if n == 0 { + return Err(Error::new(ErrorKind::UnexpectedEof, "failed to fill whole buffer")); + } + total += n; + } + Ok(()) + }) + } + + #[thunk(from = self.thunker)] + pub async fn read_max_into_bytesbuf_at(&self, offset: u64, len: usize, buf: &mut BytesBuf) -> Result { + let needed = len.saturating_sub(buf.remaining_capacity()); + if needed > 0 { + buf.reserve(needed, &self.memory); + } + self.with_file(|f| positional_read_into_bytesbuf(f, buf, len, offset)) + } + + #[thunk(from = self.thunker)] + pub async fn read_exact_at(&self, offset: u64, len: usize) -> Result { + let mut buf = self.memory.reserve(len); + self.with_file(|f| { + let mut total = 0; + while total < len { + let cur = offset.saturating_add(total as u64); + let n = positional_read_into_bytesbuf(f, &mut buf, len - total, cur)?; + if n == 0 { + return Err(Error::new(ErrorKind::UnexpectedEof, "failed to fill whole buffer")); + } + total += n; + } + Ok(()) + })?; + Ok(buf.consume_all()) + } + + #[thunk(from = self.thunker)] + pub async fn read_into_slice_at(&self, offset: u64, buf: &mut [u8]) -> Result { + self.with_file(|f| { + let mut total = 0; + while total < buf.len() { + let cur = offset.saturating_add(total as u64); + let n = positional_read(f, &mut buf[total..], cur)?; + if n == 0 { + break; + } + total += n; + } + Ok(total) + }) + } + + pub async fn read_exact_into_uninit_at(&self, offset: u64, buf: &mut [core::mem::MaybeUninit]) -> Result<()> { + // SAFETY: MaybeUninit has the same layout as u8. + // read_exact_into_slice_at writes exactly buf.len() bytes on success, + // fully initializing the contents. + let initialized = unsafe { core::slice::from_raw_parts_mut(buf.as_mut_ptr().cast::(), buf.len()) }; + self.read_exact_into_slice_at(offset, initialized).await + } + + #[thunk(from = self.thunker)] + pub async fn read_exact_into_slice_at(&self, offset: u64, buf: &mut [u8]) -> Result<()> { + self.with_file(|f| { + let mut total = 0; + while total < buf.len() { + let cur = offset.saturating_add(total as u64); + let n = positional_read(f, &mut buf[total..], cur)?; + if n == 0 { + return Err(Error::new(ErrorKind::UnexpectedEof, "failed to fill whole buffer")); + } + total += n; + } + Ok(()) + }) + } + + #[thunk(from = self.thunker)] + pub async fn write_at(&self, offset: u64, data: BytesView) -> Result<()> { + self.with_file(|f| { + let mut current_offset = offset; + for (slice, _meta) in data.slices() { + positional_write_all(f, slice, current_offset)?; + current_offset += slice.len() as u64; + } + Ok(()) + }) + } + + #[thunk(from = self.thunker)] + pub async fn write_slice_at(&self, offset: u64, data: &[u8]) -> Result<()> { + self.with_file(|f| positional_write_all(f, data, offset)) + } + + #[thunk(from = self.thunker)] + pub async fn metadata(&self) -> Result { + self.with_file(File::metadata) + } + + #[thunk(from = self.thunker)] + pub async fn set_len(&self, size: u64) -> Result<()> { + self.with_file(|f| f.set_len(size)) + } + + #[thunk(from = self.thunker)] + pub async fn set_modified(&self, modified: std::time::SystemTime) -> Result<()> { + self.with_file(|f| f.set_modified(modified)) + } + + #[thunk(from = self.thunker)] + pub async fn set_permissions(&self, perms: Permissions) -> Result<()> { + self.with_file(|f| f.set_permissions(perms)) + } + + #[thunk(from = self.thunker)] + pub async fn set_times(&self, times: FileTimes) -> Result<()> { + self.with_file(|f| f.set_times(times)) + } + + #[thunk(from = self.thunker)] + pub async fn sync_all(&self) -> Result<()> { + self.with_file(File::sync_all) + } + + #[thunk(from = self.thunker)] + pub async fn sync_data(&self) -> Result<()> { + self.with_file(File::sync_data) + } + + #[thunk(from = self.thunker)] + pub async fn flush(&self) -> Result<()> { + // std::fs::File has no internal buffer, so flush() through a shared + // reference is effectively a no-op. Use sync_data() instead to + // actually ensure data reaches the disk. + self.with_file(File::sync_data) + } + + #[thunk(from = self.thunker)] + pub async fn lock(&self) -> Result<()> { + self.with_file(File::lock) + } + + #[thunk(from = self.thunker)] + pub async fn lock_shared(&self) -> Result<()> { + self.with_file(File::lock_shared) + } + + #[thunk(from = self.thunker)] + pub async fn try_lock(&self) -> core::result::Result<(), TryLockError> { + self.with_file(File::try_lock) + } + + #[thunk(from = self.thunker)] + pub async fn try_lock_shared(&self) -> core::result::Result<(), TryLockError> { + self.with_file(File::try_lock_shared) + } + + #[thunk(from = self.thunker)] + pub async fn unlock(&self) -> Result<()> { + self.with_file(File::unlock) + } + + #[expect(clippy::unused_async, reason = "async signature required to match public API contract")] + pub async fn try_clone(&self) -> Result { + Ok(Self { + file: Arc::clone(&self.file), + thunker: self.thunker.clone(), + memory: self.memory.clone(), + }) + } +} + +impl Memory for PositionalFileInner { + fn reserve(&self, min_bytes: usize) -> BytesBuf { + self.memory.reserve(min_bytes) + } +} + +/// Reads bytes at `offset` without affecting the cursor. +#[cfg(unix)] +pub fn positional_read(file: &File, buf: &mut [u8], offset: u64) -> Result { + use std::os::unix::fs::FileExt; + file.read_at(buf, offset) +} + +/// Reads bytes at `offset` without affecting the cursor. +#[cfg(windows)] +pub fn positional_read(file: &File, buf: &mut [u8], offset: u64) -> Result { + use std::os::windows::fs::FileExt; + file.seek_read(buf, offset) +} + +/// Writes bytes at `offset` without affecting the cursor. +#[cfg(unix)] +pub fn positional_write(file: &File, buf: &[u8], offset: u64) -> Result { + use std::os::unix::fs::FileExt; + file.write_at(buf, offset) +} + +/// Writes bytes at `offset` without affecting the cursor. +#[cfg(windows)] +pub fn positional_write(file: &File, buf: &[u8], offset: u64) -> Result { + use std::os::windows::fs::FileExt; + file.seek_write(buf, offset) +} + +/// Writes all of `buf` at `offset`, retrying on short writes. +pub fn positional_write_all(file: &File, mut buf: &[u8], mut offset: u64) -> Result<()> { + while !buf.is_empty() { + let n = positional_write(file, buf, offset)?; + if n == 0 { + return Err(Error::new(ErrorKind::WriteZero, "failed to write whole buffer")); + } + buf = &buf[n..]; + offset += n as u64; + } + Ok(()) +} + +/// Reads up to `len` bytes at `offset` directly into `buf`'s unfilled capacity, +/// without affecting the file cursor. +pub fn positional_read_into_bytesbuf(file: &File, buf: &mut BytesBuf, len: usize, offset: u64) -> Result { + let unfilled = buf.first_unfilled_slice(); + let read_len = len.min(unfilled.len()); + + // SAFETY: same as read_into_bytesbuf in io_helpers.rs — MaybeUninit has identical layout to u8. + // We skip zero-initialization because the OS file primitives only write to the buffer. + let dst = unsafe { core::slice::from_raw_parts_mut(unfilled.as_mut_ptr().cast::(), read_len) }; + let n = positional_read(file, dst, offset)?; + if n > 0 { + // SAFETY: `n` bytes were just written by the positional read. + unsafe { + buf.advance(n); + } + } + Ok(n) +} diff --git a/crates/file/src/read_dir.rs b/crates/file/src/read_dir.rs new file mode 100644 index 000000000..daad6be06 --- /dev/null +++ b/crates/file/src/read_dir.rs @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::io::Result; + +use sync_thunk::{Thunker, thunk}; + +use crate::dir_entry::DirEntry; + +/// An asynchronous iterator over the entries in a directory. +/// +/// This struct is returned from [`Directory::read_dir`](crate::directory::Directory::read_dir) +/// and will yield instances of [`DirEntry`](crate::dir_entry::DirEntry). Through a `DirEntry`, +/// information like the entry's file name and possibly other metadata can be learned. +/// +/// The entries are fetched lazily from the underlying filesystem on each call to +/// [`next_entry`](ReadDir::next_entry), dispatching to a worker thread. +#[derive(Debug)] +pub struct ReadDir { + inner: std::fs::ReadDir, + thunker: Thunker, +} + +impl ReadDir { + pub(crate) fn new(read_dir: std::fs::ReadDir, thunker: Thunker) -> Self { + Self { inner: read_dir, thunker } + } + + /// Returns the next entry in the directory stream. + /// + /// Returns `Ok(None)` when there are no more entries. + /// + /// # Errors + /// + /// May return an I/O error if there was a problem reading a particular entry. + #[inline] + pub async fn next_entry(&mut self) -> Result> { + self.next_entry_impl().await + } + + #[thunk(from = self.thunker)] + async fn next_entry_impl(&mut self) -> Result> { + match self.inner.next() { + Some(Ok(entry)) => Ok(Some(DirEntry::from_std(&entry))), + Some(Err(e)) => Err(e), + None => Ok(None), + } + } +} diff --git a/crates/file/src/read_only_file.rs b/crates/file/src/read_only_file.rs new file mode 100644 index 000000000..be2bb8f5f --- /dev/null +++ b/crates/file/src/read_only_file.rs @@ -0,0 +1,399 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use core::mem::MaybeUninit; +use std::fs::{Metadata, TryLockError}; +use std::io::{Error, Result, SeekFrom}; +use std::path::Path; + +use bytesbuf::mem::{HasMemory, Memory, MemoryShared}; +use bytesbuf::{BytesBuf, BytesView}; + +use crate::directory::Directory; +use crate::file_inner::FileInner; +use crate::shared_memory::SharedMemory; + +/// A seekable read-only file handle within a capability-based filesystem. +/// +/// A `ReadOnlyFile` provides read access to a file. It implements +/// [`bytesbuf_io::Read`] for streaming reads using managed buffers. +/// +/// Obtain a `ReadOnlyFile` by calling [`ReadOnlyFile::open`]. +#[derive(Debug)] +pub struct ReadOnlyFile { + inner: FileInner, +} + +impl ReadOnlyFile { + /// Attempts to open a file in read-only mode. + /// + /// The path is relative to the given directory capability. + /// + /// # Errors + /// + /// Returns an error if the path does not exist, if the path escapes the + /// directory capability, or due to other I/O errors. + #[inline] + pub async fn open(dir: &Directory, path: impl AsRef) -> Result { + Ok(Self { + inner: FileInner::open_readonly(dir, path, SharedMemory::global()).await?, + }) + } + + /// Attempts to open a file in read-only mode using the specified memory provider. + /// + /// This allows the caller to control buffer allocation, enabling zero-copy + /// transfers to other subsystems that share the same memory provider. + /// + /// # Errors + /// + /// Returns an error if the path does not exist, if the path escapes the + /// directory capability, or due to other I/O errors. + #[inline] + pub async fn open_with_memory(dir: &Directory, path: impl AsRef, memory: impl MemoryShared) -> Result { + Ok(Self { + inner: FileInner::open_readonly(dir, path, SharedMemory::new(memory)).await?, + }) + } + + /// Reads up to `len` bytes from the current position, making a best effort + /// to return the full amount. + /// + /// Performs multiple reads as necessary. May return fewer bytes only when + /// EOF is reached before `len` bytes are available. + /// + /// # Errors + /// + /// Returns an error if a read operation fails due to an I/O error. + #[inline] + pub async fn read(&mut self, len: usize) -> Result { + self.inner.read_into_bytesview(len).await + } + + /// Reads at most `len` bytes from the current position in a single + /// operation. + /// + /// May return fewer bytes than requested. A return of zero bytes indicates + /// EOF. + /// + /// # Errors + /// + /// Returns an error if the read operation fails due to an I/O error. + #[inline] + pub async fn read_max(&mut self, len: usize) -> Result { + self.inner.read_max_into_bytesview(len).await + } + + /// Reads exactly `len` bytes from the current position. + /// + /// Performs multiple reads as necessary. Returns an error if EOF is + /// reached before `len` bytes are read. + /// + /// # Errors + /// + /// Returns [`std::io::ErrorKind::UnexpectedEof`] if the file ends before `len` + /// bytes are read, or another error on I/O failure. + #[inline] + pub async fn read_exact(&mut self, len: usize) -> Result { + self.inner.read_exact_into_bytesview(len).await + } + + /// Reads an implementation-chosen number of bytes into the provided buffer. + /// + /// Returns the number of bytes read and the updated buffer. A return of + /// 0 bytes indicates EOF. + /// + /// # Errors + /// + /// Returns an error if the read operation fails due to an I/O error. + #[inline] + pub async fn read_into_bytesbuf(&mut self, buf: &mut BytesBuf) -> Result { + self.inner.read_into_bytesbuf(buf).await + } + + /// Reads at most `len` bytes into the provided buffer in a single + /// operation. + /// + /// Returns the number of bytes read and the updated buffer. + /// + /// # Errors + /// + /// Returns an error if the read operation fails due to an I/O error. + #[inline] + pub async fn read_max_into_bytesbuf(&mut self, len: usize, buf: &mut BytesBuf) -> Result { + self.inner.read_max_into_bytesbuf(len, buf).await + } + + /// Reads exactly `len` bytes into the provided buffer. + /// + /// Performs multiple reads as necessary. Returns an error if EOF is + /// reached before `len` bytes are appended. + /// + /// # Errors + /// + /// Returns [`std::io::ErrorKind::UnexpectedEof`] if the file ends before `len` + /// bytes are read, or another error on I/O failure. + #[inline] + pub async fn read_exact_into_bytesbuf(&mut self, len: usize, buf: &mut BytesBuf) -> Result<()> { + self.inner.read_exact_into_bytesbuf(len, buf).await + } + + /// Reads into the provided slice, making a best effort to fill it + /// completely. + /// + /// Returns the total number of bytes read. May return fewer than + /// `buf.len()` only when EOF is reached. + /// + /// # Errors + /// + /// Returns an error if a read operation fails due to an I/O error. + #[inline] + pub async fn read_into_slice(&mut self, buf: &mut [u8]) -> Result { + self.inner.read_into_slice(buf).await + } + + /// Fills the provided slice with exactly `buf.len()` bytes. + /// + /// Performs multiple reads as necessary. Returns an error if EOF is + /// reached before the slice is fully filled. + /// + /// # Errors + /// + /// Returns [`std::io::ErrorKind::UnexpectedEof`] if the file ends before the + /// slice is filled, or another error on I/O failure. + #[inline] + pub async fn read_exact_into_slice(&mut self, buf: &mut [u8]) -> Result<()> { + self.inner.read_exact_into_slice(buf).await + } + + /// Fills the provided uninitialized slice with exactly `buf.len()` bytes. + /// + /// On success every element in `buf` is initialized. + /// + /// # Errors + /// + /// Returns [`std::io::ErrorKind::UnexpectedEof`] if the file ends before the slice + /// is filled, or another error on I/O failure. + #[inline] + pub async fn read_exact_into_uninit(&mut self, buf: &mut [MaybeUninit]) -> Result<()> { + self.inner.read_exact_into_uninit(buf).await + } + + /// Queries metadata about the underlying file. + /// + /// # Errors + /// + /// Returns an error if the metadata cannot be retrieved due to an I/O error. + #[inline] + pub async fn metadata(&mut self) -> Result { + self.inner.metadata().await + } + + /// Acquires an exclusive lock on the file. + /// + /// Blocks until the lock can be acquired. No other file handle to this file + /// may acquire another lock while this lock is held. + /// + /// # Errors + /// + /// Returns an error if the lock cannot be acquired due to an I/O error. + #[inline] + pub async fn lock(&mut self) -> Result<()> { + self.inner.lock().await + } + + /// Acquires a shared (non-exclusive) lock on the file. + /// + /// Blocks until the lock can be acquired. More than one handle may hold a + /// shared lock, but none may hold an exclusive lock at the same time. + /// + /// # Errors + /// + /// Returns an error if the lock cannot be acquired due to an I/O error. + #[inline] + pub async fn lock_shared(&mut self) -> Result<()> { + self.inner.lock_shared().await + } + + /// Tries to acquire an exclusive lock on the file. + /// + /// Returns `Err(TryLockError::WouldBlock)` if a different lock is already + /// held. + /// + /// # Errors + /// + /// Returns [`std::fs::TryLockError::WouldBlock`] if the lock is already + /// held, or [`std::fs::TryLockError::Error`] for other I/O errors. + #[inline] + pub async fn try_lock(&mut self) -> core::result::Result<(), TryLockError> { + self.inner.try_lock().await + } + + /// Tries to acquire a shared lock on the file. + /// + /// Returns `Err(TryLockError::WouldBlock)` if an exclusive lock is already + /// held. + /// + /// # Errors + /// + /// Returns [`std::fs::TryLockError::WouldBlock`] if an exclusive lock is + /// already held, or [`std::fs::TryLockError::Error`] for other I/O errors. + #[inline] + pub async fn try_lock_shared(&mut self) -> core::result::Result<(), TryLockError> { + self.inner.try_lock_shared().await + } + + /// Releases all locks on the file. + /// + /// All locks are also released when the file is closed. + /// + /// # Errors + /// + /// Returns an error if the unlock operation fails due to an I/O error. + #[inline] + pub async fn unlock(&mut self) -> Result<()> { + self.inner.unlock().await + } + + /// Seeks to a position in the file. + /// + /// The new position, measured in bytes from the start of the file, is + /// returned. + /// + /// # Errors + /// + /// Returns an error if the seek operation fails due to an I/O error. + #[inline] + pub async fn seek(&mut self, pos: SeekFrom) -> Result { + self.inner.seek(pos).await + } + + /// Returns the current seek position from the start of the file. + /// + /// This is equivalent to `self.seek(SeekFrom::Current(0))`. + /// + /// # Errors + /// + /// Returns an error if the seek operation fails due to an I/O error. + #[inline] + pub async fn stream_position(&mut self) -> Result { + self.inner.stream_position().await + } + + /// Rewinds to the beginning of the file. + /// + /// This is equivalent to `self.seek(SeekFrom::Start(0))` but does not + /// return the previous position. + /// + /// # Errors + /// + /// Returns an error if the seek operation fails due to an I/O error. + #[inline] + pub async fn rewind(&mut self) -> Result<()> { + self.inner.rewind().await + } + + /// Creates a new `ReadOnlyFile` instance that shares the same underlying + /// file handle. + /// + /// Reads and seeks will affect both instances simultaneously. + /// + /// # Errors + /// + /// Returns an error if the clone operation fails due to an I/O error. + #[inline] + pub async fn try_clone(&mut self) -> Result { + Ok(Self { + inner: self.inner.try_clone().await?, + }) + } + + /// Returns `true` if the underlying file descriptor refers to a terminal. + #[must_use] + #[inline] + pub fn is_terminal(&self) -> bool { + self.inner.is_terminal() + } +} + +impl HasMemory for ReadOnlyFile { + fn memory(&self) -> impl MemoryShared { + self.inner.memory().clone() + } +} + +impl Memory for ReadOnlyFile { + fn reserve(&self, min_bytes: usize) -> BytesBuf { + self.inner.memory().reserve(min_bytes) + } +} + +impl bytesbuf_io::Read for ReadOnlyFile { + type Error = Error; + + async fn read_at_most_into(&mut self, len: usize, mut into: BytesBuf) -> core::result::Result<(usize, BytesBuf), Self::Error> { + let n = self.inner.read_max_into_bytesbuf(len, &mut into).await?; + Ok((n, into)) + } + + async fn read_more_into(&mut self, mut into: BytesBuf) -> core::result::Result<(usize, BytesBuf), Self::Error> { + let n = self.inner.read_into_bytesbuf(&mut into).await?; + Ok((n, into)) + } + + async fn read_any(&mut self) -> core::result::Result { + let mut buf = self.inner.memory().reserve(8192); + let _ = self.inner.read_into_bytesbuf(&mut buf).await?; + Ok(buf) + } +} + +#[cfg(feature = "sync-compat")] +impl std::io::Read for ReadOnlyFile { + fn read(&mut self, buf: &mut [u8]) -> Result { + self.inner.sync_read(buf) + } +} + +#[cfg(feature = "sync-compat")] +impl std::io::Seek for ReadOnlyFile { + fn seek(&mut self, pos: SeekFrom) -> Result { + self.inner.sync_seek(pos) + } +} + +#[cfg(unix)] +impl std::os::unix::io::AsRawFd for ReadOnlyFile { + fn as_raw_fd(&self) -> std::os::unix::io::RawFd { + self.inner.as_raw_fd() + } +} + +#[cfg(unix)] +impl std::os::unix::io::AsFd for ReadOnlyFile { + fn as_fd(&self) -> std::os::unix::io::BorrowedFd<'_> { + self.inner.as_fd() + } +} + +#[cfg(windows)] +impl std::os::windows::io::AsRawHandle for ReadOnlyFile { + fn as_raw_handle(&self) -> std::os::windows::io::RawHandle { + self.inner.as_raw_handle() + } +} + +#[cfg(windows)] +impl std::os::windows::io::AsHandle for ReadOnlyFile { + fn as_handle(&self) -> std::os::windows::io::BorrowedHandle<'_> { + self.inner.as_handle() + } +} + +impl From for ReadOnlyFile { + /// Converts a [`File`](crate::File) into a `ReadOnlyFile`, + /// narrowing the capability to read-only access. + fn from(file: crate::file::File) -> Self { + Self { inner: file.into_inner() } + } +} diff --git a/crates/file/src/read_only_positional_file.rs b/crates/file/src/read_only_positional_file.rs new file mode 100644 index 000000000..5e87b8f7a --- /dev/null +++ b/crates/file/src/read_only_positional_file.rs @@ -0,0 +1,334 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use core::mem::MaybeUninit; +use std::fs::{Metadata, TryLockError}; +use std::io::Result; +use std::path::Path; + +use bytesbuf::mem::{HasMemory, Memory, MemoryShared}; +use bytesbuf::{BytesBuf, BytesView}; + +use crate::directory::Directory; +use crate::positional_file_inner::PositionalFileInner; +use crate::shared_memory::SharedMemory; + +/// A positional read-only file handle within a capability-based filesystem. +/// +/// All I/O methods take `&self` and operate at explicit byte offsets, enabling +/// concurrent access from multiple tasks without cursor management. +/// +/// Obtain a `ReadOnlyPositionalFile` by calling [`ReadOnlyPositionalFile::open`], +/// or by narrowing a [`PositionalFile`](crate::PositionalFile) via [`From`]. +#[derive(Debug)] +pub struct ReadOnlyPositionalFile { + inner: PositionalFileInner, +} + +impl ReadOnlyPositionalFile { + /// Attempts to open a file in read-only mode. + /// + /// The path is relative to the given directory capability. + /// + /// # Errors + /// + /// Returns an error if the path does not exist, if the path escapes the + /// directory capability, or due to other I/O errors. + #[inline] + pub async fn open(dir: &Directory, path: impl AsRef) -> Result { + Ok(Self { + inner: PositionalFileInner::open_readonly(dir, path, SharedMemory::global()).await?, + }) + } + + /// Attempts to open a file in read-only mode using the specified memory provider. + /// + /// This allows the caller to control buffer allocation, enabling zero-copy + /// transfers to other subsystems that share the same memory provider. + /// + /// # Errors + /// + /// Returns an error if the path does not exist, if the path escapes the + /// directory capability, or due to other I/O errors. + #[inline] + pub async fn open_with_memory(dir: &Directory, path: impl AsRef, memory: impl MemoryShared) -> Result { + Ok(Self { + inner: PositionalFileInner::open_readonly(dir, path, SharedMemory::new(memory)).await?, + }) + } + + /// Reads up to `len` bytes at `offset`, making a best effort to return + /// the full amount. + /// + /// Performs multiple reads as necessary. May return fewer bytes only when + /// EOF is reached before `len` bytes are available. + /// + /// # Errors + /// + /// Returns an error if a read operation fails due to an I/O error. + #[inline] + pub async fn read_at(&self, offset: u64, len: usize) -> Result { + self.inner.read_at(offset, len).await + } + + /// Reads at most `len` bytes at `offset` in a single operation. + /// + /// May return fewer bytes than requested. A return of zero bytes indicates + /// EOF. + /// + /// # Errors + /// + /// Returns an error if the read operation fails due to an I/O error. + #[inline] + pub async fn read_max_at(&self, offset: u64, len: usize) -> Result { + self.inner.read_max_at(offset, len).await + } + + /// Reads exactly `len` bytes at `offset`. + /// + /// Performs multiple reads as necessary. Returns an error if EOF is + /// reached before `len` bytes are read. + /// + /// # Errors + /// + /// Returns [`std::io::ErrorKind::UnexpectedEof`] if the file ends before `len` + /// bytes are read, or another error on I/O failure. + #[inline] + pub async fn read_exact_at(&self, offset: u64, len: usize) -> Result { + self.inner.read_exact_at(offset, len).await + } + + /// Reads an implementation-chosen number of bytes at `offset` into the + /// provided buffer. + /// + /// Uses the buffer's remaining capacity (or 8192 if empty) to determine + /// the read size. Returns the number of bytes read and the updated buffer. + /// + /// # Errors + /// + /// Returns an error if the read operation fails due to an I/O error. + #[inline] + pub async fn read_into_bytesbuf_at(&self, offset: u64, buf: &mut BytesBuf) -> Result { + let len = if buf.remaining_capacity() > 0 { + buf.remaining_capacity() + } else { + 8192 + }; + self.inner.read_max_into_bytesbuf_at(offset, len, buf).await + } + + /// Reads at most `len` bytes at `offset` into the provided buffer in a + /// single operation. + /// + /// Returns the number of bytes read and the updated buffer. + /// + /// # Errors + /// + /// Returns an error if the read operation fails due to an I/O error. + #[inline] + pub async fn read_max_into_bytesbuf_at(&self, offset: u64, len: usize, buf: &mut BytesBuf) -> Result { + self.inner.read_max_into_bytesbuf_at(offset, len, buf).await + } + + /// Reads exactly `len` bytes at `offset` into the provided buffer. + /// + /// Performs multiple reads as necessary. Returns an error if EOF is + /// reached before `len` bytes are appended. + /// + /// # Errors + /// + /// Returns [`std::io::ErrorKind::UnexpectedEof`] if the file ends before `len` + /// bytes are read, or another error on I/O failure. + #[inline] + pub async fn read_exact_into_bytesbuf_at(&self, offset: u64, len: usize, buf: &mut BytesBuf) -> Result<()> { + self.inner.read_exact_into_bytesbuf_at(offset, len, buf).await + } + + /// Reads into the provided slice at `offset`, making a best effort to + /// fill it completely. + /// + /// Returns the total number of bytes read. May return fewer than + /// `buf.len()` only when EOF is reached. + /// + /// # Errors + /// + /// Returns an error if a read operation fails due to an I/O error. + #[inline] + pub async fn read_into_slice_at(&self, offset: u64, buf: &mut [u8]) -> Result { + self.inner.read_into_slice_at(offset, buf).await + } + + /// Fills the provided slice with exactly `buf.len()` bytes starting at + /// `offset`. + /// + /// Performs multiple reads as necessary. Returns an error if EOF is + /// reached before the slice is fully filled. + /// + /// # Errors + /// + /// Returns [`std::io::ErrorKind::UnexpectedEof`] if the file ends before the + /// slice is filled, or another error on I/O failure. + #[inline] + pub async fn read_exact_into_slice_at(&self, offset: u64, buf: &mut [u8]) -> Result<()> { + self.inner.read_exact_into_slice_at(offset, buf).await + } + + /// Fills the provided uninitialized slice with exactly `buf.len()` bytes + /// starting at `offset`. + /// + /// On success every element in `buf` is initialized. + /// + /// # Errors + /// + /// Returns [`std::io::ErrorKind::UnexpectedEof`] if the file ends before the slice + /// is filled, or another error on I/O failure. + #[inline] + pub async fn read_exact_into_uninit_at(&self, offset: u64, buf: &mut [MaybeUninit]) -> Result<()> { + self.inner.read_exact_into_uninit_at(offset, buf).await + } + + /// Queries metadata about the underlying file. + /// + /// # Errors + /// + /// Returns an error if the metadata cannot be retrieved due to an I/O error. + #[inline] + pub async fn metadata(&self) -> Result { + self.inner.metadata().await + } + + /// Acquires an exclusive lock on the file. + /// + /// Blocks until the lock can be acquired. No other file handle to this file + /// may acquire another lock while this lock is held. + /// + /// # Errors + /// + /// Returns an error if the lock cannot be acquired due to an I/O error. + #[inline] + pub async fn lock(&self) -> Result<()> { + self.inner.lock().await + } + + /// Acquires a shared (non-exclusive) lock on the file. + /// + /// Blocks until the lock can be acquired. More than one handle may hold a + /// shared lock, but none may hold an exclusive lock at the same time. + /// + /// # Errors + /// + /// Returns an error if the lock cannot be acquired due to an I/O error. + #[inline] + pub async fn lock_shared(&self) -> Result<()> { + self.inner.lock_shared().await + } + + /// Tries to acquire an exclusive lock on the file. + /// + /// Returns `Err(TryLockError::WouldBlock)` if a different lock is already + /// held. + /// + /// # Errors + /// + /// Returns [`std::fs::TryLockError::WouldBlock`] if the lock is already + /// held, or [`std::fs::TryLockError::Error`] for other I/O errors. + #[inline] + pub async fn try_lock(&self) -> core::result::Result<(), TryLockError> { + self.inner.try_lock().await + } + + /// Tries to acquire a shared lock on the file. + /// + /// Returns `Err(TryLockError::WouldBlock)` if an exclusive lock is already + /// held. + /// + /// # Errors + /// + /// Returns [`std::fs::TryLockError::WouldBlock`] if an exclusive lock is + /// already held, or [`std::fs::TryLockError::Error`] for other I/O errors. + #[inline] + pub async fn try_lock_shared(&self) -> core::result::Result<(), TryLockError> { + self.inner.try_lock_shared().await + } + + /// Releases all locks on the file. + /// + /// All locks are also released when the file is closed. + /// + /// # Errors + /// + /// Returns an error if the unlock operation fails due to an I/O error. + #[inline] + pub async fn unlock(&self) -> Result<()> { + self.inner.unlock().await + } + + /// Creates a new `ReadOnlyPositionalFile` instance that shares the same + /// underlying file handle. + /// + /// # Errors + /// + /// Returns an error if the clone operation fails due to an I/O error. + #[inline] + pub async fn try_clone(&self) -> Result { + Ok(Self { + inner: self.inner.try_clone().await?, + }) + } + + /// Returns `true` if the underlying file descriptor refers to a terminal. + #[must_use] + #[inline] + pub fn is_terminal(&self) -> bool { + self.inner.is_terminal() + } +} + +impl HasMemory for ReadOnlyPositionalFile { + fn memory(&self) -> impl MemoryShared { + self.inner.memory().clone() + } +} + +impl Memory for ReadOnlyPositionalFile { + fn reserve(&self, min_bytes: usize) -> BytesBuf { + self.inner.memory().reserve(min_bytes) + } +} + +#[cfg(unix)] +impl std::os::unix::io::AsRawFd for ReadOnlyPositionalFile { + fn as_raw_fd(&self) -> std::os::unix::io::RawFd { + self.inner.as_raw_fd() + } +} + +#[cfg(unix)] +impl std::os::unix::io::AsFd for ReadOnlyPositionalFile { + fn as_fd(&self) -> std::os::unix::io::BorrowedFd<'_> { + self.inner.as_fd() + } +} + +#[cfg(windows)] +impl std::os::windows::io::AsRawHandle for ReadOnlyPositionalFile { + fn as_raw_handle(&self) -> std::os::windows::io::RawHandle { + self.inner.as_raw_handle() + } +} + +#[cfg(windows)] +impl std::os::windows::io::AsHandle for ReadOnlyPositionalFile { + fn as_handle(&self) -> std::os::windows::io::BorrowedHandle<'_> { + self.inner.as_handle() + } +} + +impl From for ReadOnlyPositionalFile { + /// Converts a [`PositionalFile`](crate::PositionalFile) into a + /// `ReadOnlyPositionalFile`, narrowing the capability to read-only + /// positional access. + fn from(file: crate::positional_file::PositionalFile) -> Self { + Self { inner: file.into_inner() } + } +} diff --git a/crates/file/src/root.rs b/crates/file/src/root.rs new file mode 100644 index 000000000..dd73be1b2 --- /dev/null +++ b/crates/file/src/root.rs @@ -0,0 +1,43 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::io::{Error, ErrorKind, Result}; +use std::path::Path; + +use sync_thunk::{Thunker, thunk}; + +use crate::directory::Directory; + +/// The entry point for capability-based filesystem access. +/// +/// `Root` provides the sole mechanism for obtaining a [`Directory`] capability. +/// Once a directory is bound, all filesystem operations are scoped to that +/// directory and its descendants. +#[derive(Debug)] +pub struct Root; + +#[expect(missing_docs, reason = "thunk macro generates a blocking helper without its own doc comment")] +impl Root { + /// Binds to a directory on the filesystem, returning a [`Directory`] capability. + /// + /// The given path is the only point where an absolute or arbitrary path is accepted. + /// All subsequent filesystem operations through the returned `Directory` use + /// paths relative to this root. + /// + /// The provided [`Thunker`] will be used for all blocking I/O dispatched by + /// the returned directory and any files opened through it. + /// + /// # Errors + /// + /// Returns an error if the path does not exist, is not a directory, or if + /// the process lacks permission to access it. + #[thunk(from = thunker)] + pub async fn bind_std(thunker: &Thunker, path: &Path) -> Result { + let canonical = std::fs::canonicalize(path)?; + let metadata = std::fs::metadata(&canonical)?; + if !metadata.is_dir() { + return Err(Error::new(ErrorKind::NotADirectory, "path is not a directory")); + } + Ok(Directory::new(canonical, thunker.clone())) + } +} diff --git a/crates/file/src/shared_memory.rs b/crates/file/src/shared_memory.rs new file mode 100644 index 000000000..4c6329933 --- /dev/null +++ b/crates/file/src/shared_memory.rs @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +/// A type-erased, cloneable, thread-safe memory provider. +/// +/// Uses an enum to avoid `Arc` allocation and dynamic dispatch for the +/// common [`GlobalPool`] case, falling back to `Arc` for custom +/// memory providers. +use std::fmt; +use std::sync::Arc; + +use bytesbuf::BytesBuf; +use bytesbuf::mem::{GlobalPool, Memory, MemoryShared}; + +#[derive(Clone)] +pub struct SharedMemory { + inner: SharedMemoryInner, +} + +#[derive(Clone)] +enum SharedMemoryInner { + Global(GlobalPool), + Custom(Arc BytesBuf + Send + Sync>), +} + +impl SharedMemory { + /// Creates a `SharedMemory` from any `MemoryShared` implementation. + pub fn new(memory: impl MemoryShared) -> Self { + Self { + inner: SharedMemoryInner::Custom(Arc::new(move |min_bytes| memory.reserve(min_bytes))), + } + } + + /// Creates a `SharedMemory` backed by the default [`GlobalPool`]. + pub fn global() -> Self { + Self { + inner: SharedMemoryInner::Global(GlobalPool::new()), + } + } +} + +impl fmt::Debug for SharedMemory { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SharedMemory").finish_non_exhaustive() + } +} + +impl Memory for SharedMemory { + fn reserve(&self, min_bytes: usize) -> BytesBuf { + match &self.inner { + SharedMemoryInner::Global(pool) => pool.reserve(min_bytes), + SharedMemoryInner::Custom(f) => f(min_bytes), + } + } +} diff --git a/crates/file/src/write_only_file.rs b/crates/file/src/write_only_file.rs new file mode 100644 index 000000000..d0ada0f66 --- /dev/null +++ b/crates/file/src/write_only_file.rs @@ -0,0 +1,399 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::fs::{FileTimes, Metadata, Permissions, TryLockError}; +use std::io::{Error, Result, SeekFrom}; +use std::path::Path; + +use bytesbuf::mem::{HasMemory, Memory, MemoryShared}; +use bytesbuf::{BytesBuf, BytesView}; + +use crate::directory::Directory; +use crate::file_inner::FileInner; +use crate::shared_memory::SharedMemory; + +/// A seekable write-only file handle within a capability-based filesystem. +/// +/// A `WriteOnlyFile` provides write access to a file. It implements +/// [`bytesbuf_io::Write`] for streaming writes using managed buffers. +/// +/// Obtain a `WriteOnlyFile` by calling [`WriteOnlyFile::create`] or +/// [`WriteOnlyFile::create_new`]. +#[derive(Debug)] +pub struct WriteOnlyFile { + inner: FileInner, +} + +impl WriteOnlyFile { + /// Opens a file in write-only mode. + /// + /// This function will create the file if it does not exist, and will truncate + /// it if it does. The path is relative to the given directory capability. + /// + /// # Errors + /// + /// Returns an error if the file cannot be created or opened. + #[inline] + pub async fn create(dir: &Directory, path: impl AsRef) -> Result { + Ok(Self { + inner: FileInner::create_writeonly(dir, path, SharedMemory::global()).await?, + }) + } + + /// Opens a file in write-only mode using the specified memory provider. + /// + /// This function will create the file if it does not exist, and will truncate + /// it if it does. The custom memory provider allows the caller to control + /// buffer allocation, enabling zero-copy transfers from other subsystems that + /// share the same memory provider. + /// + /// # Errors + /// + /// Returns an error if the file cannot be created or opened. + #[inline] + pub async fn create_with_memory(dir: &Directory, path: impl AsRef, memory: impl MemoryShared) -> Result { + Ok(Self { + inner: FileInner::create_writeonly(dir, path, SharedMemory::new(memory)).await?, + }) + } + + /// Creates a new file in write-only mode; returns an error if the file exists. + /// + /// If the call succeeds, the file returned is guaranteed to be new. This option + /// is useful because it is atomic. Otherwise, between checking whether a file + /// exists and creating a new one, the file may have been created by another + /// process (a TOCTOU race condition). + /// + /// # Errors + /// + /// Returns an error if the file already exists or cannot be created. + #[inline] + pub async fn create_new(dir: &Directory, path: impl AsRef) -> Result { + Ok(Self { + inner: FileInner::create_new_writeonly(dir, path, SharedMemory::global()).await?, + }) + } + + /// Creates a new file in write-only mode using the specified memory provider; + /// returns an error if the file exists. + /// + /// Combines the atomicity guarantees of [`WriteOnlyFile::create_new`] with a + /// custom memory provider for zero-copy transfers. + /// + /// # Errors + /// + /// Returns an error if the file already exists or cannot be created. + #[inline] + pub async fn create_new_with_memory(dir: &Directory, path: impl AsRef, memory: impl MemoryShared) -> Result { + Ok(Self { + inner: FileInner::create_new_writeonly(dir, path, SharedMemory::new(memory)).await?, + }) + } + + /// Writes the provided byte sequence to the file. + /// + /// The method completes when all bytes have been written. Partial writes are + /// considered a failure. + /// + /// For optimal efficiency, the data should originate from buffers allocated via + /// this file's memory provider (see [`Memory::reserve`] or [`HasMemory::memory`]). + /// + /// # Errors + /// + /// Returns an error if the write operation fails. + #[inline] + pub async fn write(&mut self, data: BytesView) -> Result<()> { + self.inner.write(data).await + } + + /// Writes a byte slice to the file at the current cursor position. + /// + /// This is a convenience method for callers working with `&[u8]` rather than + /// managed buffers. The data is copied internally to transfer it to the + /// worker thread; prefer [`write`](Self::write) with [`BytesView`] for + /// large or performance-sensitive writes. + /// + /// # Errors + /// + /// Returns an error if the write operation fails. + #[inline] + pub async fn write_slice(&mut self, data: impl AsRef<[u8]>) -> Result<()> { + self.inner.write_slice(data.as_ref()).await + } + + /// Queries metadata about the underlying file. + /// + /// # Errors + /// + /// Returns an error if the metadata cannot be retrieved. + #[inline] + pub async fn metadata(&mut self) -> Result { + self.inner.metadata().await + } + + /// Truncates or extends the underlying file, updating the size of this file + /// to become `size`. + /// + /// If `size` is less than the current file size, the file will be shrunk. + /// If it is greater, the file will be extended with zeroes. The file's cursor + /// isn't changed. + /// + /// # Errors + /// + /// Returns an error if the operation fails. + #[inline] + pub async fn set_len(&mut self, size: u64) -> Result<()> { + self.inner.set_len(size).await + } + + /// Changes the modification time of the underlying file. + /// + /// # Errors + /// + /// Returns an error if the operation fails. + #[inline] + pub async fn set_modified(&mut self, modified: std::time::SystemTime) -> Result<()> { + self.inner.set_modified(modified).await + } + + /// Changes the permissions on the underlying file. + /// + /// # Errors + /// + /// Returns an error if the operation fails. + #[inline] + pub async fn set_permissions(&mut self, perms: Permissions) -> Result<()> { + self.inner.set_permissions(perms).await + } + + /// Changes the timestamps of the underlying file. + /// + /// # Errors + /// + /// Returns an error if the operation fails. + #[inline] + pub async fn set_times(&mut self, times: FileTimes) -> Result<()> { + self.inner.set_times(times).await + } + + /// Attempts to sync all OS-internal file content and metadata to disk. + /// + /// # Errors + /// + /// Returns an error if the sync operation fails. + #[inline] + pub async fn sync_all(&mut self) -> Result<()> { + self.inner.sync_all().await + } + + /// Similar to [`WriteOnlyFile::sync_all`], except that it might not synchronize + /// file metadata to the filesystem. + /// + /// # Errors + /// + /// Returns an error if the sync operation fails. + #[inline] + pub async fn sync_data(&mut self) -> Result<()> { + self.inner.sync_data().await + } + + /// Flushes any buffered data to the underlying file. + /// + /// Call this before dropping to ensure all data is written. + /// + /// # Errors + /// + /// Returns an error if the flush operation fails. + #[inline] + pub async fn flush(&mut self) -> Result<()> { + self.inner.flush().await + } + + /// Acquires an exclusive lock on the file. + /// + /// Blocks until the lock can be acquired. + /// + /// # Errors + /// + /// Returns an error if the lock cannot be acquired. + #[inline] + pub async fn lock(&mut self) -> Result<()> { + self.inner.lock().await + } + + /// Acquires a shared (non-exclusive) lock on the file. + /// + /// Blocks until the lock can be acquired. + /// + /// # Errors + /// + /// Returns an error if the lock cannot be acquired. + #[inline] + pub async fn lock_shared(&mut self) -> Result<()> { + self.inner.lock_shared().await + } + + /// Tries to acquire an exclusive lock on the file. + /// + /// Returns `Err(TryLockError::WouldBlock)` if a different lock is already held. + /// + /// # Errors + /// + /// Returns [`std::fs::TryLockError::WouldBlock`] if the lock is held by another + /// process, or [`std::fs::TryLockError::Error`] if the operation fails. + #[inline] + pub async fn try_lock(&mut self) -> core::result::Result<(), TryLockError> { + self.inner.try_lock().await + } + + /// Tries to acquire a shared lock on the file. + /// + /// Returns `Err(TryLockError::WouldBlock)` if an exclusive lock is already held. + /// + /// # Errors + /// + /// Returns [`std::fs::TryLockError::WouldBlock`] if an exclusive lock is held + /// by another process, or [`std::fs::TryLockError::Error`] if the operation fails. + #[inline] + pub async fn try_lock_shared(&mut self) -> core::result::Result<(), TryLockError> { + self.inner.try_lock_shared().await + } + + /// Releases all locks on the file. + /// + /// # Errors + /// + /// Returns an error if the unlock operation fails. + #[inline] + pub async fn unlock(&mut self) -> Result<()> { + self.inner.unlock().await + } + + /// Seeks to a position in the file. + /// + /// The new position, measured in bytes from the start of the file, is returned. + /// + /// # Errors + /// + /// Returns an error if the seek operation fails. + #[inline] + pub async fn seek(&mut self, pos: SeekFrom) -> Result { + self.inner.seek(pos).await + } + + /// Returns the current seek position from the start of the file. + /// + /// # Errors + /// + /// Returns an error if the operation fails. + #[inline] + pub async fn stream_position(&mut self) -> Result { + self.inner.stream_position().await + } + + /// Rewinds to the beginning of the file. + /// + /// # Errors + /// + /// Returns an error if the seek operation fails. + #[inline] + pub async fn rewind(&mut self) -> Result<()> { + self.inner.rewind().await + } + + /// Creates a new `WriteOnlyFile` instance that shares the same underlying + /// file handle. + /// + /// Writes and seeks will affect both instances simultaneously. + /// + /// # Errors + /// + /// Returns an error if the file handle cannot be cloned. + #[inline] + pub async fn try_clone(&mut self) -> Result { + Ok(Self { + inner: self.inner.try_clone().await?, + }) + } + + /// Returns `true` if the underlying file descriptor refers to a terminal. + #[must_use] + #[inline] + pub fn is_terminal(&self) -> bool { + self.inner.is_terminal() + } +} + +impl HasMemory for WriteOnlyFile { + fn memory(&self) -> impl MemoryShared { + self.inner.memory().clone() + } +} + +impl Memory for WriteOnlyFile { + fn reserve(&self, min_bytes: usize) -> BytesBuf { + self.inner.memory().reserve(min_bytes) + } +} + +impl bytesbuf_io::Write for WriteOnlyFile { + type Error = Error; + + async fn write(&mut self, data: BytesView) -> core::result::Result<(), Self::Error> { + Self::write(self, data).await + } +} + +#[cfg(feature = "sync-compat")] +impl std::io::Write for WriteOnlyFile { + fn write(&mut self, buf: &[u8]) -> Result { + self.inner.sync_write(buf) + } + + fn flush(&mut self) -> Result<()> { + self.inner.sync_flush() + } +} + +#[cfg(feature = "sync-compat")] +impl std::io::Seek for WriteOnlyFile { + fn seek(&mut self, pos: SeekFrom) -> Result { + self.inner.sync_seek(pos) + } +} + +#[cfg(unix)] +impl std::os::unix::io::AsRawFd for WriteOnlyFile { + fn as_raw_fd(&self) -> std::os::unix::io::RawFd { + self.inner.as_raw_fd() + } +} + +#[cfg(unix)] +impl std::os::unix::io::AsFd for WriteOnlyFile { + fn as_fd(&self) -> std::os::unix::io::BorrowedFd<'_> { + self.inner.as_fd() + } +} + +#[cfg(windows)] +impl std::os::windows::io::AsRawHandle for WriteOnlyFile { + fn as_raw_handle(&self) -> std::os::windows::io::RawHandle { + self.inner.as_raw_handle() + } +} + +#[cfg(windows)] +impl std::os::windows::io::AsHandle for WriteOnlyFile { + fn as_handle(&self) -> std::os::windows::io::BorrowedHandle<'_> { + self.inner.as_handle() + } +} + +impl From for WriteOnlyFile { + /// Converts a [`File`](crate::File) into a `WriteOnlyFile`, + /// narrowing the capability to write-only access. + fn from(file: crate::file::File) -> Self { + Self { inner: file.into_inner() } + } +} diff --git a/crates/file/src/write_only_positional_file.rs b/crates/file/src/write_only_positional_file.rs new file mode 100644 index 000000000..08ed61988 --- /dev/null +++ b/crates/file/src/write_only_positional_file.rs @@ -0,0 +1,337 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::fs::{FileTimes, Metadata, Permissions, TryLockError}; +use std::io::Result; +use std::path::Path; + +use bytesbuf::mem::{HasMemory, Memory, MemoryShared}; +use bytesbuf::{BytesBuf, BytesView}; + +use crate::directory::Directory; +use crate::positional_file_inner::PositionalFileInner; +use crate::shared_memory::SharedMemory; + +/// A positional write-only file handle within a capability-based filesystem. +/// +/// All I/O methods take `&self` and operate at explicit byte offsets, enabling +/// concurrent access from multiple tasks without cursor management. +/// +/// Obtain a `WriteOnlyPositionalFile` by calling [`WriteOnlyPositionalFile::create`] +/// or [`WriteOnlyPositionalFile::create_new`], or by narrowing a +/// [`PositionalFile`](crate::PositionalFile) via [`From`]. +#[derive(Debug)] +pub struct WriteOnlyPositionalFile { + inner: PositionalFileInner, +} + +impl WriteOnlyPositionalFile { + /// Opens a file in write-only mode. + /// + /// This function will create the file if it does not exist, and will truncate + /// it if it does. The path is relative to the given directory capability. + /// + /// # Errors + /// + /// Returns an error if the file cannot be created or opened. + #[inline] + pub async fn create(dir: &Directory, path: impl AsRef) -> Result { + Ok(Self { + inner: PositionalFileInner::create_writeonly(dir, path, SharedMemory::global()).await?, + }) + } + + /// Opens a file in write-only mode using the specified memory provider. + /// + /// This function will create the file if it does not exist, and will truncate + /// it if it does. The custom memory provider allows the caller to control + /// buffer allocation, enabling zero-copy transfers from other subsystems that + /// share the same memory provider. + /// + /// # Errors + /// + /// Returns an error if the file cannot be created or opened. + #[inline] + pub async fn create_with_memory(dir: &Directory, path: impl AsRef, memory: impl MemoryShared) -> Result { + Ok(Self { + inner: PositionalFileInner::create_writeonly(dir, path, SharedMemory::new(memory)).await?, + }) + } + + /// Creates a new file in write-only mode; returns an error if the file exists. + /// + /// If the call succeeds, the file returned is guaranteed to be new. This option + /// is useful because it is atomic. Otherwise, between checking whether a file + /// exists and creating a new one, the file may have been created by another + /// process (a TOCTOU race condition). + /// + /// # Errors + /// + /// Returns an error if the file already exists or cannot be created. + #[inline] + pub async fn create_new(dir: &Directory, path: impl AsRef) -> Result { + Ok(Self { + inner: PositionalFileInner::create_new_writeonly(dir, path, SharedMemory::global()).await?, + }) + } + + /// Creates a new file in write-only mode using the specified memory provider; + /// returns an error if the file exists. + /// + /// Combines the atomicity guarantees of [`WriteOnlyPositionalFile::create_new`] + /// with a custom memory provider for zero-copy transfers. + /// + /// # Errors + /// + /// Returns an error if the file already exists or cannot be created. + #[inline] + pub async fn create_new_with_memory(dir: &Directory, path: impl AsRef, memory: impl MemoryShared) -> Result { + Ok(Self { + inner: PositionalFileInner::create_new_writeonly(dir, path, SharedMemory::new(memory)).await?, + }) + } + + /// Writes the provided byte sequence to the file at `offset`. + /// + /// The method completes when all bytes have been written. Partial writes are + /// retried automatically. + /// + /// # Errors + /// + /// Returns an error if the write operation fails. + #[inline] + pub async fn write_at(&self, offset: u64, data: BytesView) -> Result<()> { + self.inner.write_at(offset, data).await + } + + /// Writes a byte slice to the file at `offset`. + /// + /// This is a convenience method for callers working with `&[u8]` rather than + /// managed buffers. The data is copied internally to transfer it to the + /// worker thread; prefer [`write_at`](Self::write_at) with [`BytesView`] for + /// large or performance-sensitive writes. + /// + /// # Errors + /// + /// Returns an error if the write operation fails. + #[inline] + pub async fn write_slice_at(&self, offset: u64, data: impl AsRef<[u8]>) -> Result<()> { + self.inner.write_slice_at(offset, data.as_ref()).await + } + + /// Queries metadata about the underlying file. + /// + /// # Errors + /// + /// Returns an error if the metadata cannot be retrieved. + #[inline] + pub async fn metadata(&self) -> Result { + self.inner.metadata().await + } + + /// Truncates or extends the underlying file, updating the size of this file + /// to become `size`. + /// + /// If `size` is less than the current file size, the file will be shrunk. + /// If it is greater, the file will be extended with zeroes. + /// + /// # Errors + /// + /// Returns an error if the operation fails. + #[inline] + pub async fn set_len(&self, size: u64) -> Result<()> { + self.inner.set_len(size).await + } + + /// Changes the modification time of the underlying file. + /// + /// # Errors + /// + /// Returns an error if the operation fails. + #[inline] + pub async fn set_modified(&self, modified: std::time::SystemTime) -> Result<()> { + self.inner.set_modified(modified).await + } + + /// Changes the permissions on the underlying file. + /// + /// # Errors + /// + /// Returns an error if the operation fails. + #[inline] + pub async fn set_permissions(&self, perms: Permissions) -> Result<()> { + self.inner.set_permissions(perms).await + } + + /// Changes the timestamps of the underlying file. + /// + /// # Errors + /// + /// Returns an error if the operation fails. + #[inline] + pub async fn set_times(&self, times: FileTimes) -> Result<()> { + self.inner.set_times(times).await + } + + /// Attempts to sync all OS-internal file content and metadata to disk. + /// + /// # Errors + /// + /// Returns an error if the sync operation fails. + #[inline] + pub async fn sync_all(&self) -> Result<()> { + self.inner.sync_all().await + } + + /// Similar to [`WriteOnlyPositionalFile::sync_all`], except that it might not + /// synchronize file metadata to the filesystem. + /// + /// # Errors + /// + /// Returns an error if the sync operation fails. + #[inline] + pub async fn sync_data(&self) -> Result<()> { + self.inner.sync_data().await + } + + /// Flushes any buffered data to the underlying file. + /// + /// Call this before dropping to ensure all data is written. + /// + /// # Errors + /// + /// Returns an error if the flush operation fails. + #[inline] + pub async fn flush(&self) -> Result<()> { + self.inner.flush().await + } + + /// Acquires an exclusive lock on the file. + /// + /// Blocks until the lock can be acquired. + /// + /// # Errors + /// + /// Returns an error if the lock cannot be acquired. + #[inline] + pub async fn lock(&self) -> Result<()> { + self.inner.lock().await + } + + /// Acquires a shared (non-exclusive) lock on the file. + /// + /// Blocks until the lock can be acquired. + /// + /// # Errors + /// + /// Returns an error if the lock cannot be acquired. + #[inline] + pub async fn lock_shared(&self) -> Result<()> { + self.inner.lock_shared().await + } + + /// Tries to acquire an exclusive lock on the file. + /// + /// Returns `Err(TryLockError::WouldBlock)` if a different lock is already held. + /// + /// # Errors + /// + /// Returns [`std::fs::TryLockError::WouldBlock`] if the lock is held by another + /// process, or [`std::fs::TryLockError::Error`] if the operation fails. + #[inline] + pub async fn try_lock(&self) -> core::result::Result<(), TryLockError> { + self.inner.try_lock().await + } + + /// Tries to acquire a shared lock on the file. + /// + /// Returns `Err(TryLockError::WouldBlock)` if an exclusive lock is already held. + /// + /// # Errors + /// + /// Returns [`std::fs::TryLockError::WouldBlock`] if an exclusive lock is held + /// by another process, or [`std::fs::TryLockError::Error`] if the operation fails. + #[inline] + pub async fn try_lock_shared(&self) -> core::result::Result<(), TryLockError> { + self.inner.try_lock_shared().await + } + + /// Releases all locks on the file. + /// + /// # Errors + /// + /// Returns an error if the unlock operation fails. + #[inline] + pub async fn unlock(&self) -> Result<()> { + self.inner.unlock().await + } + + /// Creates a new `WriteOnlyPositionalFile` instance that shares the same + /// underlying file handle. + /// + /// # Errors + /// + /// Returns an error if the file handle cannot be cloned. + #[inline] + pub async fn try_clone(&self) -> Result { + Ok(Self { + inner: self.inner.try_clone().await?, + }) + } + + /// Returns `true` if the underlying file descriptor refers to a terminal. + #[must_use] + #[inline] + pub fn is_terminal(&self) -> bool { + self.inner.is_terminal() + } +} + +impl HasMemory for WriteOnlyPositionalFile { + fn memory(&self) -> impl MemoryShared { + self.inner.memory().clone() + } +} + +impl Memory for WriteOnlyPositionalFile { + fn reserve(&self, min_bytes: usize) -> BytesBuf { + self.inner.memory().reserve(min_bytes) + } +} + +#[cfg(unix)] +impl std::os::unix::io::AsRawFd for WriteOnlyPositionalFile { + fn as_raw_fd(&self) -> std::os::unix::io::RawFd { + self.inner.as_raw_fd() + } +} + +#[cfg(unix)] +impl std::os::unix::io::AsFd for WriteOnlyPositionalFile { + fn as_fd(&self) -> std::os::unix::io::BorrowedFd<'_> { + self.inner.as_fd() + } +} + +#[cfg(windows)] +impl std::os::windows::io::AsRawHandle for WriteOnlyPositionalFile { + fn as_raw_handle(&self) -> std::os::windows::io::RawHandle { + self.inner.as_raw_handle() + } +} + +#[cfg(windows)] +impl std::os::windows::io::AsHandle for WriteOnlyPositionalFile { + fn as_handle(&self) -> std::os::windows::io::BorrowedHandle<'_> { + self.inner.as_handle() + } +} + +impl From for WriteOnlyPositionalFile { + /// Converts a [`PositionalFile`](crate::PositionalFile) into a + /// `WriteOnlyPositionalFile`, narrowing the capability to write-only + /// positional access. + fn from(file: crate::positional_file::PositionalFile) -> Self { + Self { inner: file.into_inner() } + } +} diff --git a/crates/file/tests/integration.rs b/crates/file/tests/integration.rs new file mode 100644 index 000000000..84a240753 --- /dev/null +++ b/crates/file/tests/integration.rs @@ -0,0 +1,2039 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// Skip under Miri — these tests perform real filesystem I/O which Miri cannot emulate. +#![cfg(not(miri))] + +#![allow(clippy::unwrap_used, reason = "Tests use unwrap for brevity")] +#![allow(clippy::missing_panics_doc, reason = "Tests")] +#![allow(clippy::missing_errors_doc, reason = "Tests")] +#![allow(unused_results, reason = "Tests")] +#![allow(clippy::must_use_candidate, reason = "Tests")] +#![allow(clippy::needless_pass_by_value, reason = "Tests")] +#![allow(clippy::string_slice, reason = "Tests")] +#![allow(missing_docs, reason = "Tests")] +#![allow(clippy::assertions_on_result_states, reason = "Tests use assert!(x.is_err()) for clarity")] +#![allow(clippy::std_instead_of_core, reason = "Tests prefer std imports")] +#![allow(clippy::filetype_is_file, reason = "Test intentionally checks is_file()")] + +use std::ffi::OsString; +use std::path::Path; +use std::time::{Duration, SystemTime}; + +use bytesbuf::mem::GlobalPool; +use file::{ + DirBuilder, File, OpenOptions, PositionalFile, ReadOnlyFile, ReadOnlyPositionalFile, Root, SeekFrom, WriteOnlyFile, + WriteOnlyPositionalFile, +}; +use sync_thunk::Thunker; +use tempfile::TempDir; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +async fn setup() -> (TempDir, file::Directory) { + let tmp = TempDir::new().unwrap(); + let thunker = Thunker::builder().build(); + let dir = Root::bind_std(&thunker, tmp.path()).await.unwrap(); + (tmp, dir) +} + +fn make_view(data: &[u8]) -> bytesbuf::BytesView { + let mem = GlobalPool::new(); + let mut buf = mem.reserve(data.len()); + buf.put_slice(data); + buf.consume_all() +} + +// =========================================================================== +// Root tests +// =========================================================================== + +mod root { + use super::*; + + #[tokio::test] + async fn bind_to_valid_directory_succeeds() { + let tmp = TempDir::new().unwrap(); + let thunker = Thunker::builder().build(); + let _dir = Root::bind_std(&thunker, tmp.path()).await.unwrap(); + } + + #[tokio::test] + async fn bind_to_non_existent_path_fails() { + let thunker = Thunker::builder().build(); + let result = Root::bind_std(&thunker, Path::new("/tmp/__nonexistent_path_12345__")).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn bind_to_file_fails() { + let tmp = TempDir::new().unwrap(); + let file_path = tmp.path().join("a_file.txt"); + std::fs::write(&file_path, b"hello").unwrap(); + let thunker = Thunker::builder().build(); + let result = Root::bind_std(&thunker, &file_path).await; + assert!(result.is_err()); + } +} + +// =========================================================================== +// Directory tests +// =========================================================================== + +mod directory { + use super::*; + + #[tokio::test] + async fn create_dir_and_exists() { + let (_tmp, dir) = setup().await; + dir.create_dir("sub").await.unwrap(); + assert!(dir.exists("sub").await.unwrap()); + } + + #[tokio::test] + async fn create_dir_all_nested() { + let (_tmp, dir) = setup().await; + dir.create_dir_all("a/b/c").await.unwrap(); + assert!(dir.exists("a/b/c").await.unwrap()); + } + + #[tokio::test] + async fn read_and_write_bytes_view() { + let (_tmp, dir) = setup().await; + let data = make_view(b"hello bytes"); + dir.write("file.bin", data).await.unwrap(); + let view = dir.read("file.bin").await.unwrap(); + let mut collected = Vec::new(); + let mut v = view; + while !v.is_empty() { + let s = v.first_slice(); + collected.extend_from_slice(s); + let len = s.len(); + v.advance(len); + } + assert_eq!(collected, b"hello bytes"); + } + + #[tokio::test] + async fn read_to_string_round_trip() { + let (_tmp, dir) = setup().await; + dir.write_slice("greeting.txt", b"Good morning").await.unwrap(); + let s = dir.read_to_string("greeting.txt").await.unwrap(); + assert_eq!(s, "Good morning"); + } + + #[tokio::test] + async fn write_slice_and_read() { + let (_tmp, dir) = setup().await; + dir.write_slice("data.txt", b"slice data").await.unwrap(); + let s = dir.read_to_string("data.txt").await.unwrap(); + assert_eq!(s, "slice data"); + } + + #[tokio::test] + async fn read_with_custom_memory() { + let (_tmp, dir) = setup().await; + dir.write_slice("mem.txt", b"custom memory").await.unwrap(); + let mem = GlobalPool::new(); + let view = dir.read_with_memory("mem.txt", mem).await.unwrap(); + let mut collected = Vec::new(); + let mut v = view; + while !v.is_empty() { + let s = v.first_slice(); + collected.extend_from_slice(s); + let len = s.len(); + v.advance(len); + } + assert_eq!(collected, b"custom memory"); + } + + #[tokio::test] + async fn exists_false_for_nonexistent() { + let (_tmp, dir) = setup().await; + assert!(!dir.exists("nope.txt").await.unwrap()); + } + + #[tokio::test] + async fn metadata_returns_info() { + let (_tmp, dir) = setup().await; + dir.write_slice("m.txt", b"12345").await.unwrap(); + let md = dir.metadata("m.txt").await.unwrap(); + assert!(md.is_file()); + assert_eq!(md.len(), 5); + } + + #[tokio::test] + async fn symlink_metadata() { + let (_tmp, dir) = setup().await; + dir.write_slice("orig.txt", b"content").await.unwrap(); + let md = dir.symlink_metadata("orig.txt").await.unwrap(); + assert!(md.is_file()); + } + + #[tokio::test] + async fn remove_file_works() { + let (_tmp, dir) = setup().await; + dir.write_slice("delete_me.txt", b"bye").await.unwrap(); + assert!(dir.exists("delete_me.txt").await.unwrap()); + dir.remove_file("delete_me.txt").await.unwrap(); + assert!(!dir.exists("delete_me.txt").await.unwrap()); + } + + #[tokio::test] + async fn remove_dir_empty() { + let (_tmp, dir) = setup().await; + dir.create_dir("empty").await.unwrap(); + dir.remove_dir("empty").await.unwrap(); + assert!(!dir.exists("empty").await.unwrap()); + } + + #[tokio::test] + async fn remove_dir_all_recursive() { + let (_tmp, dir) = setup().await; + dir.create_dir_all("tree/branch").await.unwrap(); + dir.write_slice("tree/branch/leaf.txt", b"leaf").await.unwrap(); + dir.remove_dir_all("tree").await.unwrap(); + assert!(!dir.exists("tree").await.unwrap()); + } + + #[tokio::test] + async fn rename_same_dir() { + let (_tmp, dir) = setup().await; + dir.write_slice("old.txt", b"data").await.unwrap(); + dir.rename("old.txt", &dir, "new.txt").await.unwrap(); + assert!(!dir.exists("old.txt").await.unwrap()); + let s = dir.read_to_string("new.txt").await.unwrap(); + assert_eq!(s, "data"); + } + + #[tokio::test] + async fn rename_cross_dir() { + let (_tmp, dir) = setup().await; + dir.create_dir("src_dir").await.unwrap(); + dir.create_dir("dst_dir").await.unwrap(); + dir.write_slice("src_dir/f.txt", b"moved").await.unwrap(); + let src = dir.open_dir("src_dir").await.unwrap(); + let dst = dir.open_dir("dst_dir").await.unwrap(); + src.rename("f.txt", &dst, "f.txt").await.unwrap(); + assert!(!src.exists("f.txt").await.unwrap()); + let s = dst.read_to_string("f.txt").await.unwrap(); + assert_eq!(s, "moved"); + } + + #[tokio::test] + async fn copy_same_dir() { + let (_tmp, dir) = setup().await; + dir.write_slice("src.txt", b"copy me").await.unwrap(); + let n = dir.copy("src.txt", &dir, "dst.txt").await.unwrap(); + assert_eq!(n, 7); + let s = dir.read_to_string("dst.txt").await.unwrap(); + assert_eq!(s, "copy me"); + } + + #[tokio::test] + async fn copy_cross_dir() { + let (_tmp, dir) = setup().await; + dir.create_dir("a").await.unwrap(); + dir.create_dir("b").await.unwrap(); + dir.write_slice("a/f.txt", b"cross").await.unwrap(); + let a = dir.open_dir("a").await.unwrap(); + let b = dir.open_dir("b").await.unwrap(); + a.copy("f.txt", &b, "f.txt").await.unwrap(); + let s = b.read_to_string("f.txt").await.unwrap(); + assert_eq!(s, "cross"); + } + + #[tokio::test] + async fn hard_link_works() { + let (_tmp, dir) = setup().await; + dir.write_slice("original.txt", b"linked").await.unwrap(); + dir.hard_link("original.txt", &dir, "link.txt").await.unwrap(); + let s = dir.read_to_string("link.txt").await.unwrap(); + assert_eq!(s, "linked"); + } + + #[tokio::test] + async fn canonicalize_returns_path() { + let (_tmp, dir) = setup().await; + dir.write_slice("c.txt", b"x").await.unwrap(); + let canon = dir.canonicalize("c.txt").await.unwrap(); + assert!(canon.is_absolute()); + } + + #[tokio::test] + async fn read_dir_multiple_entries() { + let (_tmp, dir) = setup().await; + dir.write_slice("a.txt", b"a").await.unwrap(); + dir.write_slice("b.txt", b"b").await.unwrap(); + dir.write_slice("c.txt", b"c").await.unwrap(); + let mut rd = dir.read_dir(".").await.unwrap(); + let mut names: Vec = Vec::new(); + while let Some(entry) = rd.next_entry().await.unwrap() { + names.push(entry.file_name().to_owned()); + } + names.sort(); + assert_eq!( + names, + vec![OsString::from("a.txt"), OsString::from("b.txt"), OsString::from("c.txt")] + ); + } + + #[tokio::test] + async fn read_dir_empty() { + let (_tmp, dir) = setup().await; + dir.create_dir("empty").await.unwrap(); + let mut rd = dir.read_dir("empty").await.unwrap(); + assert!(rd.next_entry().await.unwrap().is_none()); + } + + #[tokio::test] + async fn open_dir_subdirectory() { + let (_tmp, dir) = setup().await; + dir.create_dir("sub").await.unwrap(); + dir.write_slice("sub/f.txt", b"inner").await.unwrap(); + let sub = dir.open_dir("sub").await.unwrap(); + let s = sub.read_to_string("f.txt").await.unwrap(); + assert_eq!(s, "inner"); + } + + #[tokio::test] + async fn path_escape_with_dotdot_rejected() { + let (_tmp, dir) = setup().await; + let result = dir.read_to_string("../etc/passwd").await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn absolute_path_rejected() { + let (_tmp, dir) = setup().await; + let result = dir.read_to_string("/etc/passwd").await; + assert!(result.is_err()); + } +} + +// =========================================================================== +// ReadOnlyFile tests +// =========================================================================== + +mod read_only_file { + use super::*; + + #[tokio::test] + async fn open_and_read_max() { + let (_tmp, dir) = setup().await; + dir.write_slice("r.txt", b"readable").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "r.txt").await.unwrap(); + let buf = f.read_max(8192).await.unwrap(); + assert_eq!(buf.len(), 8); + } + + #[tokio::test] + async fn read_max_into_bytesbuf() { + let (_tmp, dir) = setup().await; + dir.write_slice("ram.txt", b"hello world").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "ram.txt").await.unwrap(); + let mem = GlobalPool::new(); + let mut buf = mem.reserve(32); + let n = f.read_max_into_bytesbuf(5, &mut buf).await.unwrap(); + assert_eq!(n, 5); + assert_eq!(buf.len(), 5); + } + + #[tokio::test] + async fn read_into_bytesbuf() { + let (_tmp, dir) = setup().await; + dir.write_slice("rmi.txt", b"more data here").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "rmi.txt").await.unwrap(); + let mem = GlobalPool::new(); + let mut buf = mem.reserve(128); + let n = f.read_into_bytesbuf(&mut buf).await.unwrap(); + assert!(n > 0); + } + + #[tokio::test] + async fn read_into_slice_works() { + let (_tmp, dir) = setup().await; + dir.write_slice("rs.txt", b"slice_test").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "rs.txt").await.unwrap(); + + let mut buf = [0u8; 5]; + let n = f.read_into_slice(&mut buf).await.unwrap(); + assert_eq!(n, 5); + assert_eq!(&buf[..n], b"slice"); + } + + #[tokio::test] + async fn open_with_memory() { + let (_tmp, dir) = setup().await; + dir.write_slice("owm.txt", b"with memory").await.unwrap(); + let mem = GlobalPool::new(); + let mut f = ReadOnlyFile::open_with_memory(&dir, "owm.txt", mem).await.unwrap(); + let buf = f.read_max(8192).await.unwrap(); + assert_eq!(buf.len(), 11); + } + + #[tokio::test] + async fn metadata_works() { + let (_tmp, dir) = setup().await; + dir.write_slice("md.txt", b"12345678").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "md.txt").await.unwrap(); + let md = f.metadata().await.unwrap(); + assert_eq!(md.len(), 8); + assert!(md.is_file()); + } + + #[tokio::test] + async fn seek_stream_position_rewind() { + let (_tmp, dir) = setup().await; + dir.write_slice("seek.txt", b"0123456789").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "seek.txt").await.unwrap(); + + let pos = f.seek(SeekFrom::Start(5)).await.unwrap(); + assert_eq!(pos, 5); + + let pos = f.stream_position().await.unwrap(); + assert_eq!(pos, 5); + + f.rewind().await.unwrap(); + let pos = f.stream_position().await.unwrap(); + assert_eq!(pos, 0); + } + + #[tokio::test] + async fn try_clone_shares_state() { + let (_tmp, dir) = setup().await; + dir.write_slice("clone.txt", b"clone data").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "clone.txt").await.unwrap(); + let _f2 = f.try_clone().await.unwrap(); + let md = f.metadata().await.unwrap(); + assert_eq!(md.len(), 10); + } + + #[tokio::test] + async fn open_nonexistent_fails() { + let (_tmp, dir) = setup().await; + let result = ReadOnlyFile::open(&dir, "nope.txt").await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn lock_and_unlock() { + let (_tmp, dir) = setup().await; + dir.write_slice("lock.txt", b"x").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "lock.txt").await.unwrap(); + f.lock().await.unwrap(); + f.unlock().await.unwrap(); + } + + #[tokio::test] + async fn lock_shared_works() { + let (_tmp, dir) = setup().await; + dir.write_slice("slock.txt", b"x").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "slock.txt").await.unwrap(); + f.lock_shared().await.unwrap(); + f.unlock().await.unwrap(); + } + + #[tokio::test] + async fn try_lock_and_try_lock_shared() { + let (_tmp, dir) = setup().await; + dir.write_slice("tlock.txt", b"x").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "tlock.txt").await.unwrap(); + f.try_lock().await.unwrap(); + f.unlock().await.unwrap(); + f.try_lock_shared().await.unwrap(); + f.unlock().await.unwrap(); + } +} + +// =========================================================================== +// WriteOnlyFile tests +// =========================================================================== + +mod write_only_file { + use super::*; + + #[tokio::test] + async fn create_and_write_bytes_view() { + let (_tmp, dir) = setup().await; + let mut f = WriteOnlyFile::create(&dir, "w.txt").await.unwrap(); + let data = make_view(b"written via BytesView"); + f.write(data).await.unwrap(); + drop(f); + let s = dir.read_to_string("w.txt").await.unwrap(); + assert_eq!(s, "written via BytesView"); + } + + #[tokio::test] + async fn create_new_succeeds_then_fails() { + let (_tmp, dir) = setup().await; + let f = WriteOnlyFile::create_new(&dir, "new.txt").await.unwrap(); + drop(f); + let result = WriteOnlyFile::create_new(&dir, "new.txt").await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn create_with_memory_works() { + let (_tmp, dir) = setup().await; + let mem = GlobalPool::new(); + let mut f = WriteOnlyFile::create_with_memory(&dir, "cwm.txt", mem).await.unwrap(); + f.write_slice(b"with memory").await.unwrap(); + drop(f); + let s = dir.read_to_string("cwm.txt").await.unwrap(); + assert_eq!(s, "with memory"); + } + + #[tokio::test] + async fn create_new_with_memory_works() { + let (_tmp, dir) = setup().await; + let mem = GlobalPool::new(); + let mut f = WriteOnlyFile::create_new_with_memory(&dir, "cnwm.txt", mem).await.unwrap(); + f.write_slice(b"new with mem").await.unwrap(); + drop(f); + let s = dir.read_to_string("cnwm.txt").await.unwrap(); + assert_eq!(s, "new with mem"); + } + + #[tokio::test] + async fn write_slice_works() { + let (_tmp, dir) = setup().await; + let mut f = WriteOnlyFile::create(&dir, "ws.txt").await.unwrap(); + f.write_slice(b"slice write").await.unwrap(); + drop(f); + let s = dir.read_to_string("ws.txt").await.unwrap(); + assert_eq!(s, "slice write"); + } + + #[tokio::test] + async fn set_len_truncate() { + let (_tmp, dir) = setup().await; + let mut f = WriteOnlyFile::create(&dir, "slt.txt").await.unwrap(); + f.write_slice(b"1234567890").await.unwrap(); + f.set_len(5).await.unwrap(); + drop(f); + let s = dir.read_to_string("slt.txt").await.unwrap(); + assert_eq!(s, "12345"); + } + + #[tokio::test] + async fn set_len_extend() { + let (_tmp, dir) = setup().await; + let mut f = WriteOnlyFile::create(&dir, "sle.txt").await.unwrap(); + f.write_slice(b"AB").await.unwrap(); + f.set_len(10).await.unwrap(); + let md = f.metadata().await.unwrap(); + assert_eq!(md.len(), 10); + } + + #[tokio::test] + async fn flush_sync_all_sync_data() { + let (_tmp, dir) = setup().await; + let mut f = WriteOnlyFile::create(&dir, "sync.txt").await.unwrap(); + f.write_slice(b"sync data").await.unwrap(); + f.flush().await.unwrap(); + f.sync_all().await.unwrap(); + f.sync_data().await.unwrap(); + } + + #[tokio::test] + async fn set_permissions_works() { + let (_tmp, dir) = setup().await; + let mut f = WriteOnlyFile::create(&dir, "perms.txt").await.unwrap(); + let md = f.metadata().await.unwrap(); + let perms = md.permissions(); + f.set_permissions(perms).await.unwrap(); + } + + #[tokio::test] + async fn set_modified_works() { + let (_tmp, dir) = setup().await; + let mut f = WriteOnlyFile::create(&dir, "mod.txt").await.unwrap(); + let t = SystemTime::now() - Duration::from_secs(3600); + f.set_modified(t).await.unwrap(); + } + + #[tokio::test] + async fn set_times_works() { + let (_tmp, dir) = setup().await; + let mut f = WriteOnlyFile::create(&dir, "times.txt").await.unwrap(); + let now = SystemTime::now(); + let times = file::FileTimes::new().set_modified(now).set_accessed(now); + f.set_times(times).await.unwrap(); + } + + #[tokio::test] + async fn seek_stream_position_rewind() { + let (_tmp, dir) = setup().await; + let mut f = WriteOnlyFile::create(&dir, "seek_w.txt").await.unwrap(); + f.write_slice(b"0123456789").await.unwrap(); + let pos = f.seek(SeekFrom::Start(5)).await.unwrap(); + assert_eq!(pos, 5); + let pos = f.stream_position().await.unwrap(); + assert_eq!(pos, 5); + f.rewind().await.unwrap(); + let pos = f.stream_position().await.unwrap(); + assert_eq!(pos, 0); + } + + #[tokio::test] + async fn try_clone_works() { + let (_tmp, dir) = setup().await; + let mut f = WriteOnlyFile::create(&dir, "tc.txt").await.unwrap(); + let _f2 = f.try_clone().await.unwrap(); + } + + #[tokio::test] + async fn metadata_works() { + let (_tmp, dir) = setup().await; + let mut f = WriteOnlyFile::create(&dir, "wmd.txt").await.unwrap(); + f.write_slice(b"12345").await.unwrap(); + let md = f.metadata().await.unwrap(); + assert_eq!(md.len(), 5); + } +} + +// =========================================================================== +// File tests +// =========================================================================== + +mod read_write_file { + use super::*; + + #[tokio::test] + async fn open_existing_for_rw() { + let (_tmp, dir) = setup().await; + dir.write_slice("rw.txt", b"existing").await.unwrap(); + let _f = File::open(&dir, "rw.txt").await.unwrap(); + } + + #[tokio::test] + async fn create_new_file() { + let (_tmp, dir) = setup().await; + let mut f = File::create(&dir, "rw_new.txt").await.unwrap(); + f.write_slice(b"created").await.unwrap(); + drop(f); + let s = dir.read_to_string("rw_new.txt").await.unwrap(); + assert_eq!(s, "created"); + } + + #[tokio::test] + async fn create_truncates_existing() { + let (_tmp, dir) = setup().await; + dir.write_slice("trunc.txt", b"old data old data").await.unwrap(); + let mut f = File::create(&dir, "trunc.txt").await.unwrap(); + f.write_slice(b"new").await.unwrap(); + drop(f); + let s = dir.read_to_string("trunc.txt").await.unwrap(); + assert_eq!(s, "new"); + } + + #[tokio::test] + async fn create_new_fails_on_existing() { + let (_tmp, dir) = setup().await; + dir.write_slice("exists.txt", b"x").await.unwrap(); + let result = File::create_new(&dir, "exists.txt").await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn create_new_succeeds_on_new() { + let (_tmp, dir) = setup().await; + let _f = File::create_new(&dir, "brand_new.txt").await.unwrap(); + } + + #[tokio::test] + async fn open_with_memory_works() { + let (_tmp, dir) = setup().await; + dir.write_slice("owm.txt", b"data").await.unwrap(); + let mem = GlobalPool::new(); + let _f = File::open_with_memory(&dir, "owm.txt", mem).await.unwrap(); + } + + #[tokio::test] + async fn create_with_memory_works() { + let (_tmp, dir) = setup().await; + let mem = GlobalPool::new(); + let mut f = File::create_with_memory(&dir, "cwm.txt", mem).await.unwrap(); + f.write_slice(b"mem data").await.unwrap(); + drop(f); + let s = dir.read_to_string("cwm.txt").await.unwrap(); + assert_eq!(s, "mem data"); + } + + #[tokio::test] + async fn create_new_with_memory_works() { + let (_tmp, dir) = setup().await; + let mem = GlobalPool::new(); + let mut f = File::create_new_with_memory(&dir, "cnwm.txt", mem).await.unwrap(); + f.write_slice(b"new mem").await.unwrap(); + drop(f); + let s = dir.read_to_string("cnwm.txt").await.unwrap(); + assert_eq!(s, "new mem"); + } + + #[tokio::test] + async fn write_then_seek_back_and_read() { + let (_tmp, dir) = setup().await; + let mut f = File::create(&dir, "wsb.txt").await.unwrap(); + f.write_slice(b"Hello, World!").await.unwrap(); + f.rewind().await.unwrap(); + let buf = f.read_max(8192).await.unwrap(); + assert_eq!(buf.len(), 13); + } + + #[tokio::test] + async fn write_slice_then_read_slice_round_trip() { + let (_tmp, dir) = setup().await; + let mut f = File::create(&dir, "slrt.txt").await.unwrap(); + f.write_slice(b"round trip").await.unwrap(); + f.rewind().await.unwrap(); + let mut buf = [0u8; 10]; + let n = f.read_into_slice(&mut buf).await.unwrap(); + assert_eq!(n, 10); + assert_eq!(&buf[..n], b"round trip"); + } + + #[tokio::test] + async fn metadata_and_set_len() { + let (_tmp, dir) = setup().await; + let mut f = File::create(&dir, "rwmd.txt").await.unwrap(); + f.write_slice(b"12345").await.unwrap(); + let md = f.metadata().await.unwrap(); + assert_eq!(md.len(), 5); + f.set_len(3).await.unwrap(); + let md2 = f.metadata().await.unwrap(); + assert_eq!(md2.len(), 3); + } + + #[tokio::test] + async fn lock_unlock_cycle() { + let (_tmp, dir) = setup().await; + let mut f = File::create(&dir, "rwlock.txt").await.unwrap(); + f.lock().await.unwrap(); + f.unlock().await.unwrap(); + f.lock_shared().await.unwrap(); + f.unlock().await.unwrap(); + f.try_lock().await.unwrap(); + f.unlock().await.unwrap(); + f.try_lock_shared().await.unwrap(); + f.unlock().await.unwrap(); + } + + #[tokio::test] + async fn seek_stream_position_rewind() { + let (_tmp, dir) = setup().await; + let mut f = File::create(&dir, "rwseek.txt").await.unwrap(); + f.write_slice(b"0123456789").await.unwrap(); + let pos = f.seek(SeekFrom::Start(3)).await.unwrap(); + assert_eq!(pos, 3); + let pos = f.stream_position().await.unwrap(); + assert_eq!(pos, 3); + f.rewind().await.unwrap(); + let pos = f.stream_position().await.unwrap(); + assert_eq!(pos, 0); + } + + #[tokio::test] + async fn flush_sync_all_sync_data() { + let (_tmp, dir) = setup().await; + let mut f = File::create(&dir, "rwsync.txt").await.unwrap(); + f.write_slice(b"sync").await.unwrap(); + f.flush().await.unwrap(); + f.sync_all().await.unwrap(); + f.sync_data().await.unwrap(); + } + + #[tokio::test] + async fn set_permissions_and_set_modified_and_set_times() { + let (_tmp, dir) = setup().await; + let mut f = File::create(&dir, "rwperms.txt").await.unwrap(); + let md = f.metadata().await.unwrap(); + f.set_permissions(md.permissions()).await.unwrap(); + let t = SystemTime::now() - Duration::from_secs(100); + f.set_modified(t).await.unwrap(); + let times = file::FileTimes::new().set_modified(SystemTime::now()); + f.set_times(times).await.unwrap(); + } + + #[tokio::test] + async fn options_returns_open_options() { + let mut opts = File::options(); + // Just verify it returns an OpenOptions (compiles and is usable) + let (_tmp, dir) = setup().await; + dir.write_slice("opts.txt", b"via options").await.unwrap(); + let mut f = opts.read(true).write(true).open(&dir, "opts.txt").await.unwrap(); + let buf = f.read_max(8192).await.unwrap(); + assert_eq!(buf.len(), 11); + } + + #[tokio::test] + async fn from_rw_into_read_only() { + let (_tmp, dir) = setup().await; + dir.write_slice("conv.txt", b"convert me").await.unwrap(); + let rw = File::open(&dir, "conv.txt").await.unwrap(); + let mut ro: ReadOnlyFile = rw.into(); + let buf = ro.read_max(8192).await.unwrap(); + assert_eq!(buf.len(), 10); + } + + #[tokio::test] + async fn from_rw_into_write_only() { + let (_tmp, dir) = setup().await; + let rw = File::create(&dir, "conv_w.txt").await.unwrap(); + let mut wo: WriteOnlyFile = rw.into(); + wo.write_slice(b"write only now").await.unwrap(); + drop(wo); + let s = dir.read_to_string("conv_w.txt").await.unwrap(); + assert_eq!(s, "write only now"); + } + + #[tokio::test] + async fn try_clone_works() { + let (_tmp, dir) = setup().await; + let mut f = File::create(&dir, "rwtc.txt").await.unwrap(); + let _f2 = f.try_clone().await.unwrap(); + } +} + +// =========================================================================== +// OpenOptions tests +// =========================================================================== + +mod open_options { + use super::*; + + #[tokio::test] + async fn read_open_existing() { + let (_tmp, dir) = setup().await; + dir.write_slice("oo_r.txt", b"read me").await.unwrap(); + let mut f = OpenOptions::new().read(true).open(&dir, "oo_r.txt").await.unwrap(); + let buf = f.read_max(8192).await.unwrap(); + assert_eq!(buf.len(), 7); + } + + #[tokio::test] + async fn write_create_new_file() { + let (_tmp, dir) = setup().await; + let mut f = OpenOptions::new().write(true).create(true).open(&dir, "oo_wc.txt").await.unwrap(); + f.write_slice(b"new file").await.unwrap(); + drop(f); + let s = dir.read_to_string("oo_wc.txt").await.unwrap(); + assert_eq!(s, "new file"); + } + + #[tokio::test] + async fn create_new_fails_on_existing() { + let (_tmp, dir) = setup().await; + dir.write_slice("oo_cn.txt", b"exists").await.unwrap(); + let result = OpenOptions::new().write(true).create_new(true).open(&dir, "oo_cn.txt").await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn append_mode() { + let (_tmp, dir) = setup().await; + dir.write_slice("oo_ap.txt", b"first").await.unwrap(); + let mut f = OpenOptions::new().append(true).open(&dir, "oo_ap.txt").await.unwrap(); + f.write_slice(b"_second").await.unwrap(); + drop(f); + let s = dir.read_to_string("oo_ap.txt").await.unwrap(); + assert_eq!(s, "first_second"); + } + + #[tokio::test] + async fn truncate_mode() { + let (_tmp, dir) = setup().await; + dir.write_slice("oo_tr.txt", b"old content here").await.unwrap(); + let mut f = OpenOptions::new().write(true).truncate(true).open(&dir, "oo_tr.txt").await.unwrap(); + f.write_slice(b"new").await.unwrap(); + drop(f); + let s = dir.read_to_string("oo_tr.txt").await.unwrap(); + assert_eq!(s, "new"); + } + + #[tokio::test] + async fn open_with_memory() { + let (_tmp, dir) = setup().await; + dir.write_slice("oo_mem.txt", b"memory").await.unwrap(); + let mem = GlobalPool::new(); + let mut f = OpenOptions::new() + .read(true) + .open_with_memory(&dir, "oo_mem.txt", mem) + .await + .unwrap(); + let buf = f.read_max(8192).await.unwrap(); + assert_eq!(buf.len(), 6); + } +} + +// =========================================================================== +// DirBuilder tests +// =========================================================================== + +mod dir_builder { + use super::*; + + #[tokio::test] + async fn create_non_recursive() { + let (_tmp, dir) = setup().await; + DirBuilder::new().create(&dir, Path::new("single_dir")).await.unwrap(); + assert!(dir.exists("single_dir").await.unwrap()); + } + + #[tokio::test] + async fn create_recursive() { + let (_tmp, dir) = setup().await; + DirBuilder::new().recursive(true).create(&dir, Path::new("x/y/z")).await.unwrap(); + assert!(dir.exists("x/y/z").await.unwrap()); + } + + #[tokio::test] + async fn create_existing_non_recursive_fails() { + let (_tmp, dir) = setup().await; + dir.create_dir("already").await.unwrap(); + let result = DirBuilder::new().create(&dir, Path::new("already")).await; + assert!(result.is_err()); + } +} + +// =========================================================================== +// DirEntry tests +// =========================================================================== + +mod dir_entry { + use super::*; + + #[tokio::test] + async fn file_name_returns_bare_name() { + let (_tmp, dir) = setup().await; + dir.write_slice("entry.txt", b"x").await.unwrap(); + let mut rd = dir.read_dir(".").await.unwrap(); + let entry = rd.next_entry().await.unwrap().unwrap(); + assert_eq!(entry.file_name(), OsString::from("entry.txt")); + } + + #[tokio::test] + async fn metadata_returns_correct_info() { + let (_tmp, dir) = setup().await; + dir.write_slice("info.txt", b"12345").await.unwrap(); + let mut rd = dir.read_dir(".").await.unwrap(); + let entry = rd.next_entry().await.unwrap().unwrap(); + let md = entry.metadata().unwrap(); + assert!(md.is_file()); + assert_eq!(md.len(), 5); + } + + #[tokio::test] + async fn file_type_for_file() { + let (_tmp, dir) = setup().await; + dir.write_slice("ft.txt", b"x").await.unwrap(); + let mut rd = dir.read_dir(".").await.unwrap(); + let entry = rd.next_entry().await.unwrap().unwrap(); + let ft = entry.file_type().unwrap(); + assert!(ft.is_file()); + } + + #[tokio::test] + async fn file_type_for_dir() { + let (_tmp, dir) = setup().await; + dir.create_dir("sub").await.unwrap(); + let mut rd = dir.read_dir(".").await.unwrap(); + let entry = rd.next_entry().await.unwrap().unwrap(); + let ft = entry.file_type().unwrap(); + assert!(ft.is_dir()); + } +} + +// =========================================================================== +// Edge case tests +// =========================================================================== + +mod edge_cases { + use super::*; + + #[tokio::test] + async fn empty_file_read_returns_zero_bytes() { + let (_tmp, dir) = setup().await; + dir.write_slice("empty.txt", b"").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "empty.txt").await.unwrap(); + let buf = f.read_max(8192).await.unwrap(); + assert_eq!(buf.len(), 0); + } + + #[tokio::test] + async fn large_write_and_read_1mb() { + let (_tmp, dir) = setup().await; + let size = 1024 * 1024; // 1 MB + let data = vec![0xABu8; size]; + dir.write_slice("large.bin", &data).await.unwrap(); + let view = dir.read("large.bin").await.unwrap(); + let mut collected = Vec::new(); + let mut v = view; + while !v.is_empty() { + let s = v.first_slice(); + collected.extend_from_slice(s); + let len = s.len(); + v.advance(len); + } + assert_eq!(collected.len(), size); + assert!(collected.iter().all(|&b| b == 0xAB)); + } + + #[tokio::test] + async fn unicode_filenames() { + let (_tmp, dir) = setup().await; + dir.write_slice("日本語ファイル.txt", b"unicode").await.unwrap(); + let s = dir.read_to_string("日本語ファイル.txt").await.unwrap(); + assert_eq!(s, "unicode"); + } + + #[tokio::test] + async fn deeply_nested_directories() { + let (_tmp, dir) = setup().await; + let deep = "a/b/c/d/e/f/g/h/i/j"; + dir.create_dir_all(deep).await.unwrap(); + dir.write_slice(&format!("{deep}/deep.txt"), b"deep").await.unwrap(); + let s = dir.read_to_string(&format!("{deep}/deep.txt")).await.unwrap(); + assert_eq!(s, "deep"); + } + + #[tokio::test] + async fn concurrent_read_at_positional_io() { + let (_tmp, dir) = setup().await; + dir.write_slice("conc.txt", b"ABCDEFGHIJKLMNOPQRSTUVWXYZ").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "conc.txt").await.unwrap(); + + let f1 = f.try_clone().await.unwrap(); + let f2 = f.try_clone().await.unwrap(); + let f3 = f.try_clone().await.unwrap(); + + let (r1, r2, r3) = tokio::join!(f1.read_at(0, 5), f2.read_at(10, 5), f3.read_at(20, 5),); + + let collect = |view: bytesbuf::BytesView| -> Vec { + let mut out = Vec::new(); + let mut v = view; + while !v.is_empty() { + let s = v.first_slice(); + out.extend_from_slice(s); + let len = s.len(); + v.advance(len); + } + out + }; + + assert_eq!(collect(r1.unwrap()), b"ABCDE"); + assert_eq!(collect(r2.unwrap()), b"KLMNO"); + assert_eq!(collect(r3.unwrap()), b"UVWXY"); + } + + #[tokio::test] + async fn emoji_filename() { + let (_tmp, dir) = setup().await; + dir.write_slice("🚀🎉.txt", b"rocket party").await.unwrap(); + let s = dir.read_to_string("🚀🎉.txt").await.unwrap(); + assert_eq!(s, "rocket party"); + } + + #[tokio::test] + async fn write_empty_file() { + let (_tmp, dir) = setup().await; + let mut f = WriteOnlyFile::create(&dir, "empty_w.txt").await.unwrap(); + f.write_slice(b"").await.unwrap(); + drop(f); + let md = dir.metadata("empty_w.txt").await.unwrap(); + assert_eq!(md.len(), 0); + } + + #[tokio::test] + async fn path_with_interior_dotdot_within_root() { + let (_tmp, dir) = setup().await; + dir.create_dir("a").await.unwrap(); + dir.create_dir("b").await.unwrap(); + dir.write_slice("b/f.txt", b"found").await.unwrap(); + // "a/../b/f.txt" should resolve within root + let result = dir.read_to_string("a/../b/f.txt").await; + // It should either succeed (path resolves within root) or fail + // gracefully (path traversal rejected). Both are valid. + if let Ok(s) = result { + assert_eq!(s, "found"); + } + } + + #[tokio::test] + async fn read_at_beyond_eof() { + let (_tmp, dir) = setup().await; + dir.write_slice("small.txt", b"hi").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "small.txt").await.unwrap(); + let view = f.read_at(100, 10).await.unwrap(); + assert_eq!(view.len(), 0); + } + + #[tokio::test] + async fn write_at_beyond_eof_extends_file() { + let (_tmp, dir) = setup().await; + let f = WriteOnlyPositionalFile::create(&dir, "extend.txt").await.unwrap(); + f.write_slice_at(0, b"AB").await.unwrap(); + f.write_slice_at(10, b"CD").await.unwrap(); + let md = f.metadata().await.unwrap(); + assert_eq!(md.len(), 12); + } + + #[tokio::test] + async fn multiple_sequential_writes() { + let (_tmp, dir) = setup().await; + let mut f = WriteOnlyFile::create(&dir, "multi.txt").await.unwrap(); + for i in 0..100 { + let line = format!("line {i}\n"); + f.write_slice(line.as_bytes()).await.unwrap(); + } + drop(f); + let s = dir.read_to_string("multi.txt").await.unwrap(); + assert!(s.contains("line 0\n")); + assert!(s.contains("line 99\n")); + } + + #[tokio::test] + async fn seek_from_end() { + let (_tmp, dir) = setup().await; + dir.write_slice("sfe.txt", b"0123456789").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "sfe.txt").await.unwrap(); + let pos = f.seek(SeekFrom::End(-3)).await.unwrap(); + assert_eq!(pos, 7); + let mut buf = [0u8; 3]; + let n = f.read_into_slice(&mut buf).await.unwrap(); + assert_eq!(n, 3); + assert_eq!(&buf, b"789"); + } + + #[tokio::test] + async fn seek_from_current() { + let (_tmp, dir) = setup().await; + dir.write_slice("sfc.txt", b"ABCDEFGHIJ").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "sfc.txt").await.unwrap(); + f.seek(SeekFrom::Start(2)).await.unwrap(); + let pos = f.seek(SeekFrom::Current(3)).await.unwrap(); + assert_eq!(pos, 5); + } + + #[tokio::test] + async fn read_dir_with_mixed_entries() { + let (_tmp, dir) = setup().await; + dir.write_slice("file1.txt", b"a").await.unwrap(); + dir.create_dir("subdir").await.unwrap(); + dir.write_slice("file2.txt", b"b").await.unwrap(); + let mut rd = dir.read_dir(".").await.unwrap(); + let mut count = 0; + while rd.next_entry().await.unwrap().is_some() { + count += 1; + } + assert_eq!(count, 3); + } +} + +// =========================================================================== +// New read API tests (ReadOnlyFile) +// =========================================================================== + +mod read_only_new_api { + use core::mem::MaybeUninit; + + use super::*; + + #[tokio::test] + async fn read_best_effort_fills_fully() { + let (_tmp, dir) = setup().await; + dir.write_slice("rbf.txt", b"0123456789").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "rbf.txt").await.unwrap(); + let view = f.read(10).await.unwrap(); + assert_eq!(view.len(), 10); + } + + #[tokio::test] + async fn read_best_effort_partial_at_eof() { + let (_tmp, dir) = setup().await; + dir.write_slice("rbp.txt", b"short").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "rbp.txt").await.unwrap(); + let view = f.read(100).await.unwrap(); + assert_eq!(view.len(), 5); + } + + #[tokio::test] + async fn read_exact_success() { + let (_tmp, dir) = setup().await; + dir.write_slice("re.txt", b"exact_data!").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "re.txt").await.unwrap(); + let view = f.read_exact(5).await.unwrap(); + assert_eq!(view.len(), 5); + } + + #[tokio::test] + async fn read_exact_eof_is_error() { + let (_tmp, dir) = setup().await; + dir.write_slice("ree.txt", b"hi").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "ree.txt").await.unwrap(); + let err = f.read_exact(100).await; + assert!(err.is_err()); + } + + #[tokio::test] + async fn read_max_at_single_operation() { + let (_tmp, dir) = setup().await; + dir.write_slice("rma.txt", b"0123456789").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "rma.txt").await.unwrap(); + let view = f.read_max_at(3, 4).await.unwrap(); + assert!(view.len() <= 4); + assert!(!view.is_empty()); + } + + #[tokio::test] + async fn read_into_bytesbuf_at_works() { + let (_tmp, dir) = setup().await; + dir.write_slice("riba.txt", b"ABCDEFGHIJ").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "riba.txt").await.unwrap(); + let mem = GlobalPool::new(); + let mut buf = mem.reserve(16); + let n = f.read_into_bytesbuf_at(5, &mut buf).await.unwrap(); + assert!(n > 0); + } + + #[tokio::test] + async fn read_exact_into_bytesbuf_success() { + let (_tmp, dir) = setup().await; + dir.write_slice("reib.txt", b"hello world").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "reib.txt").await.unwrap(); + let mem = GlobalPool::new(); + let mut buf = mem.reserve(32); + f.read_exact_into_bytesbuf(5, &mut buf).await.unwrap(); + assert_eq!(buf.len(), 5); + } + + #[tokio::test] + async fn read_exact_into_bytesbuf_eof_is_error() { + let (_tmp, dir) = setup().await; + dir.write_slice("reibf.txt", b"hi").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "reibf.txt").await.unwrap(); + let mem = GlobalPool::new(); + let mut buf = mem.reserve(32); + let err = f.read_exact_into_bytesbuf(100, &mut buf).await; + assert!(err.is_err()); + } + + #[tokio::test] + async fn read_exact_into_bytesbuf_at_success() { + let (_tmp, dir) = setup().await; + dir.write_slice("reiba.txt", b"0123456789").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "reiba.txt").await.unwrap(); + let mem = GlobalPool::new(); + let mut buf = mem.reserve(32); + f.read_exact_into_bytesbuf_at(2, 4, &mut buf).await.unwrap(); + assert_eq!(buf.len(), 4); + } + + #[tokio::test] + async fn read_exact_into_bytesbuf_at_eof_is_error() { + let (_tmp, dir) = setup().await; + dir.write_slice("reibaf.txt", b"hi").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "reibaf.txt").await.unwrap(); + let mem = GlobalPool::new(); + let mut buf = mem.reserve(32); + let err = f.read_exact_into_bytesbuf_at(0, 100, &mut buf).await; + assert!(err.is_err()); + } + + #[tokio::test] + async fn read_into_slice_fills_fully() { + let (_tmp, dir) = setup().await; + dir.write_slice("ris.txt", b"0123456789").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "ris.txt").await.unwrap(); + let mut buf = [0u8; 10]; + let n = f.read_into_slice(&mut buf).await.unwrap(); + assert_eq!(n, 10); + assert_eq!(&buf, b"0123456789"); + } + + #[tokio::test] + async fn read_exact_into_slice_success() { + let (_tmp, dir) = setup().await; + dir.write_slice("reis.txt", b"hello").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "reis.txt").await.unwrap(); + let mut buf = [0u8; 5]; + f.read_exact_into_slice(&mut buf).await.unwrap(); + assert_eq!(&buf, b"hello"); + } + + #[tokio::test] + async fn read_exact_into_slice_eof_is_error() { + let (_tmp, dir) = setup().await; + dir.write_slice("reisf.txt", b"hi").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "reisf.txt").await.unwrap(); + let mut buf = [0u8; 100]; + let err = f.read_exact_into_slice(&mut buf).await; + assert!(err.is_err()); + } + + #[tokio::test] + async fn read_into_slice_at_fills() { + let (_tmp, dir) = setup().await; + dir.write_slice("risa.txt", b"ABCDEFGHIJ").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "risa.txt").await.unwrap(); + let mut buf = [0u8; 5]; + let n = f.read_into_slice_at(5, &mut buf).await.unwrap(); + assert_eq!(n, 5); + assert_eq!(&buf, b"FGHIJ"); + } + + #[tokio::test] + async fn read_exact_into_slice_at_success() { + let (_tmp, dir) = setup().await; + dir.write_slice("reisa.txt", b"ABCDEFGHIJ").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "reisa.txt").await.unwrap(); + let mut buf = [0u8; 3]; + f.read_exact_into_slice_at(7, &mut buf).await.unwrap(); + assert_eq!(&buf, b"HIJ"); + } + + #[tokio::test] + async fn read_exact_into_slice_at_eof_is_error() { + let (_tmp, dir) = setup().await; + dir.write_slice("reisaf.txt", b"hi").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "reisaf.txt").await.unwrap(); + let mut buf = [0u8; 100]; + let err = f.read_exact_into_slice_at(0, &mut buf).await; + assert!(err.is_err()); + } + + #[tokio::test] + async fn read_exact_into_uninit_success() { + let (_tmp, dir) = setup().await; + dir.write_slice("reiu.txt", b"uninit_test").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "reiu.txt").await.unwrap(); + let mut buf = [MaybeUninit::::uninit(); 6]; + f.read_exact_into_uninit(&mut buf).await.unwrap(); + // SAFETY: read_exact_into_uninit guarantees initialization on success. + let initialized = unsafe { core::slice::from_raw_parts(buf.as_ptr().cast::(), buf.len()) }; + assert_eq!(initialized, b"uninit"); + } + + #[tokio::test] + async fn read_exact_into_uninit_at_success() { + let (_tmp, dir) = setup().await; + dir.write_slice("reiua.txt", b"0123456789").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "reiua.txt").await.unwrap(); + let mut buf = [MaybeUninit::::uninit(); 3]; + f.read_exact_into_uninit_at(7, &mut buf).await.unwrap(); + // SAFETY: read_exact_into_uninit_at guarantees initialization on success. + let initialized = unsafe { core::slice::from_raw_parts(buf.as_ptr().cast::(), buf.len()) }; + assert_eq!(initialized, b"789"); + } +} + +// =========================================================================== +// Sync Read / Write / Seek trait tests +// =========================================================================== + +#[cfg(feature = "sync-compat")] +mod sync_io_traits { + use std::io::{BufRead, BufReader, Read, Seek, Write}; + + use super::*; + + #[tokio::test] + async fn read_only_file_sync_read() { + let (_tmp, dir) = setup().await; + dir.write_slice("sr.txt", b"sync read test").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "sr.txt").await.unwrap(); + let mut buf = [0u8; 9]; + let n = Read::read(&mut f, &mut buf).unwrap(); + assert_eq!(n, 9); + assert_eq!(&buf, b"sync read"); + } + + #[tokio::test] + async fn read_only_file_sync_seek() { + let (_tmp, dir) = setup().await; + dir.write_slice("ss.txt", b"0123456789").await.unwrap(); + let mut f = ReadOnlyFile::open(&dir, "ss.txt").await.unwrap(); + let pos = Seek::seek(&mut f, SeekFrom::Start(5)).unwrap(); + assert_eq!(pos, 5); + let mut buf = [0u8; 5]; + let n = Read::read(&mut f, &mut buf).unwrap(); + assert_eq!(n, 5); + assert_eq!(&buf, b"56789"); + } + + #[tokio::test] + async fn read_only_file_with_bufreader() { + let (_tmp, dir) = setup().await; + dir.write_slice("br.txt", b"line1\nline2\nline3\n").await.unwrap(); + let f = ReadOnlyFile::open(&dir, "br.txt").await.unwrap(); + let reader = BufReader::new(f); + let lines: Vec = reader.lines().map(|l| l.unwrap()).collect(); + assert_eq!(lines, vec!["line1", "line2", "line3"]); + } + + #[tokio::test] + async fn write_only_file_sync_write() { + let (_tmp, dir) = setup().await; + let mut f = WriteOnlyFile::create(&dir, "sw.txt").await.unwrap(); + let n = Write::write(&mut f, b"sync write").unwrap(); + assert_eq!(n, 10); + Write::flush(&mut f).unwrap(); + drop(f); + let s = dir.read_to_string("sw.txt").await.unwrap(); + assert_eq!(s, "sync write"); + } + + #[tokio::test] + async fn write_only_file_sync_seek() { + let (_tmp, dir) = setup().await; + let mut f = WriteOnlyFile::create(&dir, "sws.txt").await.unwrap(); + Write::write_all(&mut f, b"AAAAAAAAAA").unwrap(); + Seek::seek(&mut f, SeekFrom::Start(3)).unwrap(); + Write::write_all(&mut f, b"BB").unwrap(); + Write::flush(&mut f).unwrap(); + drop(f); + let s = dir.read_to_string("sws.txt").await.unwrap(); + assert_eq!(s, "AAABBAAAAA"); + } + + #[tokio::test] + async fn read_write_file_sync_read_write() { + let (_tmp, dir) = setup().await; + let mut f = File::create(&dir, "srw.txt").await.unwrap(); + Write::write_all(&mut f, b"hello world").unwrap(); + Seek::seek(&mut f, SeekFrom::Start(0)).unwrap(); + let mut buf = [0u8; 5]; + Read::read_exact(&mut f, &mut buf).unwrap(); + assert_eq!(&buf, b"hello"); + } + + #[tokio::test] + async fn read_write_file_sync_seek_stream_position() { + let (_tmp, dir) = setup().await; + let mut f = File::create(&dir, "ssp.txt").await.unwrap(); + Write::write_all(&mut f, b"0123456789").unwrap(); + let pos = Seek::stream_position(&mut f).unwrap(); + assert_eq!(pos, 10); + Seek::rewind(&mut f).unwrap(); + let pos = Seek::stream_position(&mut f).unwrap(); + assert_eq!(pos, 0); + } +} + +// =========================================================================== +// Platform fd/handle trait tests +// =========================================================================== + +mod platform_traits { + use super::*; + + #[tokio::test] + async fn read_only_file_as_raw_fd_or_handle() { + let (_tmp, dir) = setup().await; + dir.write_slice("fd.txt", b"x").await.unwrap(); + let f = ReadOnlyFile::open(&dir, "fd.txt").await.unwrap(); + + #[cfg(unix)] + { + use std::os::unix::io::{AsFd, AsRawFd}; + let raw = f.as_raw_fd(); + assert!(raw >= 0); + let borrowed = f.as_fd(); + assert_eq!(std::os::unix::io::AsRawFd::as_raw_fd(&borrowed), raw); + } + + #[cfg(windows)] + { + use std::os::windows::io::{AsHandle, AsRawHandle}; + let raw = f.as_raw_handle(); + assert!(!raw.is_null()); + let _borrowed = f.as_handle(); + } + } + + #[tokio::test] + async fn write_only_file_as_raw_fd_or_handle() { + let (_tmp, dir) = setup().await; + let f = WriteOnlyFile::create(&dir, "wfd.txt").await.unwrap(); + + #[cfg(unix)] + { + use std::os::unix::io::{AsFd, AsRawFd}; + let raw = f.as_raw_fd(); + assert!(raw >= 0); + let _borrowed = f.as_fd(); + } + + #[cfg(windows)] + { + use std::os::windows::io::{AsHandle, AsRawHandle}; + let raw = f.as_raw_handle(); + assert!(!raw.is_null()); + let _borrowed = f.as_handle(); + } + } + + #[tokio::test] + async fn read_write_file_as_raw_fd_or_handle() { + let (_tmp, dir) = setup().await; + let f = File::create(&dir, "rwfd.txt").await.unwrap(); + + #[cfg(unix)] + { + use std::os::unix::io::{AsFd, AsRawFd}; + let raw = f.as_raw_fd(); + assert!(raw >= 0); + let _borrowed = f.as_fd(); + } + + #[cfg(windows)] + { + use std::os::windows::io::{AsHandle, AsRawHandle}; + let raw = f.as_raw_handle(); + assert!(!raw.is_null()); + let _borrowed = f.as_handle(); + } + } +} + +// =========================================================================== +// File new read API tests +// =========================================================================== + +mod read_write_new_api { + use super::*; + + #[tokio::test] + async fn read_exact_success() { + let (_tmp, dir) = setup().await; + let mut f = File::create(&dir, "rwre.txt").await.unwrap(); + f.write_slice(b"exact_data!").await.unwrap(); + f.rewind().await.unwrap(); + let view = f.read_exact(5).await.unwrap(); + assert_eq!(view.len(), 5); + } + + #[tokio::test] + async fn read_exact_eof_is_error() { + let (_tmp, dir) = setup().await; + let mut f = File::create(&dir, "rwref.txt").await.unwrap(); + f.write_slice(b"hi").await.unwrap(); + f.rewind().await.unwrap(); + let err = f.read_exact(100).await; + assert!(err.is_err()); + } + + #[tokio::test] + async fn read_exact_into_uninit() { + let (_tmp, dir) = setup().await; + let mut f = File::create(&dir, "rwreiu.txt").await.unwrap(); + f.write_slice(b"uninit_test").await.unwrap(); + f.rewind().await.unwrap(); + let mut buf = [core::mem::MaybeUninit::::uninit(); 6]; + f.read_exact_into_uninit(&mut buf).await.unwrap(); + // SAFETY: read_exact_into_uninit guarantees initialization on success. + let initialized = unsafe { core::slice::from_raw_parts(buf.as_ptr().cast::(), buf.len()) }; + assert_eq!(initialized, b"uninit"); + } +} + +// =========================================================================== +// ReadOnlyPositionalFile tests +// =========================================================================== + +mod read_only_positional_file { + use core::mem::MaybeUninit; + + use super::*; + + #[tokio::test] + async fn open_and_read_at() { + let (_tmp, dir) = setup().await; + dir.write_slice("rat.txt", b"0123456789").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "rat.txt").await.unwrap(); + let view = f.read_at(5, 5).await.unwrap(); + let mut collected = Vec::new(); + let mut v = view; + while !v.is_empty() { + let s = v.first_slice(); + collected.extend_from_slice(s); + let len = s.len(); + v.advance(len); + } + assert_eq!(collected, b"56789"); + } + + #[tokio::test] + async fn read_max_into_bytesbuf_at_works() { + let (_tmp, dir) = setup().await; + dir.write_slice("rai.txt", b"ABCDEFGHIJ").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "rai.txt").await.unwrap(); + let mem = GlobalPool::new(); + let mut buf = mem.reserve(16); + let n = f.read_max_into_bytesbuf_at(2, 4, &mut buf).await.unwrap(); + assert!(n > 0); + assert!(!buf.is_empty()); + } + + #[tokio::test] + async fn read_exact_at_success() { + let (_tmp, dir) = setup().await; + dir.write_slice("rea.txt", b"exact read test!").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "rea.txt").await.unwrap(); + let view = f.read_exact_at(6, 4).await.unwrap(); + let mut collected = Vec::new(); + let mut v = view; + while !v.is_empty() { + let s = v.first_slice(); + collected.extend_from_slice(s); + let len = s.len(); + v.advance(len); + } + assert_eq!(collected, b"read"); + } + + #[tokio::test] + async fn read_exact_at_unexpected_eof() { + let (_tmp, dir) = setup().await; + dir.write_slice("short.txt", b"hi").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "short.txt").await.unwrap(); + let result = f.read_exact_at(0, 100).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn read_into_slice_at_single_read() { + let (_tmp, dir) = setup().await; + dir.write_slice("rs.txt", b"slice_test").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "rs.txt").await.unwrap(); + let mut buf = [0u8; 4]; + let n = f.read_into_slice_at(6, &mut buf).await.unwrap(); + assert_eq!(n, 4); + assert_eq!(&buf[..n], b"test"); + } + + #[tokio::test] + async fn open_with_memory() { + let (_tmp, dir) = setup().await; + dir.write_slice("owm.txt", b"with memory").await.unwrap(); + let mem = GlobalPool::new(); + let f = ReadOnlyPositionalFile::open_with_memory(&dir, "owm.txt", mem).await.unwrap(); + let view = f.read_at(0, 11).await.unwrap(); + assert_eq!(view.len(), 11); + } + + #[tokio::test] + async fn metadata_works() { + let (_tmp, dir) = setup().await; + dir.write_slice("md.txt", b"12345678").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "md.txt").await.unwrap(); + let md = f.metadata().await.unwrap(); + assert_eq!(md.len(), 8); + assert!(md.is_file()); + } + + #[tokio::test] + async fn try_clone_works() { + let (_tmp, dir) = setup().await; + dir.write_slice("clone.txt", b"clone data").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "clone.txt").await.unwrap(); + let _f2 = f.try_clone().await.unwrap(); + let md = f.metadata().await.unwrap(); + assert_eq!(md.len(), 10); + } + + #[tokio::test] + async fn lock_and_unlock() { + let (_tmp, dir) = setup().await; + dir.write_slice("lock.txt", b"x").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "lock.txt").await.unwrap(); + f.lock().await.unwrap(); + f.unlock().await.unwrap(); + } + + #[tokio::test] + async fn read_exact_into_uninit_at_success() { + let (_tmp, dir) = setup().await; + dir.write_slice("reiua.txt", b"0123456789").await.unwrap(); + let f = ReadOnlyPositionalFile::open(&dir, "reiua.txt").await.unwrap(); + let mut buf = [MaybeUninit::::uninit(); 3]; + f.read_exact_into_uninit_at(7, &mut buf).await.unwrap(); + // SAFETY: read_exact_into_uninit_at guarantees initialization on success. + let initialized = unsafe { core::slice::from_raw_parts(buf.as_ptr().cast::(), buf.len()) }; + assert_eq!(initialized, b"789"); + } +} + +// =========================================================================== +// WriteOnlyPositionalFile tests +// =========================================================================== + +mod write_only_positional_file { + use super::*; + + #[tokio::test] + async fn write_at_positional() { + let (_tmp, dir) = setup().await; + let f = WriteOnlyPositionalFile::create(&dir, "wat.txt").await.unwrap(); + f.write_slice_at(0, b"AAAAAAAAAA").await.unwrap(); + let data = make_view(b"BB"); + f.write_at(3, data).await.unwrap(); + drop(f); + let s = dir.read_to_string("wat.txt").await.unwrap(); + assert_eq!(s, "AAABBAAAAA"); + } + + #[tokio::test] + async fn write_at_positional_all() { + let (_tmp, dir) = setup().await; + let f = WriteOnlyPositionalFile::create(&dir, "waat.txt").await.unwrap(); + f.write_slice_at(0, b"XXXXXXXXXX").await.unwrap(); + let data = make_view(b"YYY"); + f.write_at(7, data).await.unwrap(); + drop(f); + let s = dir.read_to_string("waat.txt").await.unwrap(); + assert_eq!(s, "XXXXXXXYYY"); + } + + #[tokio::test] + async fn write_slice_at_positional() { + let (_tmp, dir) = setup().await; + let f = WriteOnlyPositionalFile::create(&dir, "wsa.txt").await.unwrap(); + f.write_slice_at(0, b"0000000000").await.unwrap(); + f.write_slice_at(2, b"11").await.unwrap(); + drop(f); + let s = dir.read_to_string("wsa.txt").await.unwrap(); + assert_eq!(s, "0011000000"); + } + + #[tokio::test] + async fn create_new_succeeds_then_fails() { + let (_tmp, dir) = setup().await; + let f = WriteOnlyPositionalFile::create_new(&dir, "new.txt").await.unwrap(); + drop(f); + let result = WriteOnlyPositionalFile::create_new(&dir, "new.txt").await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn create_with_memory_works() { + let (_tmp, dir) = setup().await; + let mem = GlobalPool::new(); + let f = WriteOnlyPositionalFile::create_with_memory(&dir, "cwm.txt", mem).await.unwrap(); + f.write_slice_at(0, b"with memory").await.unwrap(); + drop(f); + let s = dir.read_to_string("cwm.txt").await.unwrap(); + assert_eq!(s, "with memory"); + } + + #[tokio::test] + async fn create_new_with_memory_works() { + let (_tmp, dir) = setup().await; + let mem = GlobalPool::new(); + let f = WriteOnlyPositionalFile::create_new_with_memory(&dir, "cnwm.txt", mem) + .await + .unwrap(); + f.write_slice_at(0, b"new with mem").await.unwrap(); + drop(f); + let s = dir.read_to_string("cnwm.txt").await.unwrap(); + assert_eq!(s, "new with mem"); + } + + #[tokio::test] + async fn set_len_truncate() { + let (_tmp, dir) = setup().await; + let f = WriteOnlyPositionalFile::create(&dir, "slt.txt").await.unwrap(); + f.write_slice_at(0, b"1234567890").await.unwrap(); + f.set_len(5).await.unwrap(); + drop(f); + let s = dir.read_to_string("slt.txt").await.unwrap(); + assert_eq!(s, "12345"); + } + + #[tokio::test] + async fn metadata_works() { + let (_tmp, dir) = setup().await; + let f = WriteOnlyPositionalFile::create(&dir, "wmd.txt").await.unwrap(); + f.write_slice_at(0, b"12345").await.unwrap(); + let md = f.metadata().await.unwrap(); + assert_eq!(md.len(), 5); + } + + #[tokio::test] + async fn flush_sync_all_sync_data() { + let (_tmp, dir) = setup().await; + let f = WriteOnlyPositionalFile::create(&dir, "sync.txt").await.unwrap(); + f.write_slice_at(0, b"sync data").await.unwrap(); + f.flush().await.unwrap(); + f.sync_all().await.unwrap(); + f.sync_data().await.unwrap(); + } + + #[tokio::test] + async fn try_clone_works() { + let (_tmp, dir) = setup().await; + let f = WriteOnlyPositionalFile::create(&dir, "tc.txt").await.unwrap(); + let _f2 = f.try_clone().await.unwrap(); + } + + #[tokio::test] + async fn lock_and_unlock() { + let (_tmp, dir) = setup().await; + let f = WriteOnlyPositionalFile::create(&dir, "lock.txt").await.unwrap(); + f.lock().await.unwrap(); + f.unlock().await.unwrap(); + } +} + +// =========================================================================== +// PositionalFile tests +// =========================================================================== + +mod positional_file { + use super::*; + + #[tokio::test] + async fn open_existing() { + let (_tmp, dir) = setup().await; + dir.write_slice("pf.txt", b"existing").await.unwrap(); + let _f = PositionalFile::open(&dir, "pf.txt").await.unwrap(); + } + + #[tokio::test] + async fn create_new_file() { + let (_tmp, dir) = setup().await; + let f = PositionalFile::create(&dir, "pf_new.txt").await.unwrap(); + f.write_slice_at(0, b"created").await.unwrap(); + drop(f); + let s = dir.read_to_string("pf_new.txt").await.unwrap(); + assert_eq!(s, "created"); + } + + #[tokio::test] + async fn create_truncates_existing() { + let (_tmp, dir) = setup().await; + dir.write_slice("trunc.txt", b"old data old data").await.unwrap(); + let f = PositionalFile::create(&dir, "trunc.txt").await.unwrap(); + f.write_slice_at(0, b"new").await.unwrap(); + drop(f); + let s = dir.read_to_string("trunc.txt").await.unwrap(); + assert_eq!(s, "new"); + } + + #[tokio::test] + async fn create_new_fails_on_existing() { + let (_tmp, dir) = setup().await; + dir.write_slice("exists.txt", b"x").await.unwrap(); + let result = PositionalFile::create_new(&dir, "exists.txt").await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn create_new_succeeds_on_new() { + let (_tmp, dir) = setup().await; + let _f = PositionalFile::create_new(&dir, "brand_new.txt").await.unwrap(); + } + + #[tokio::test] + async fn open_with_memory_works() { + let (_tmp, dir) = setup().await; + dir.write_slice("owm.txt", b"data").await.unwrap(); + let mem = GlobalPool::new(); + let _f = PositionalFile::open_with_memory(&dir, "owm.txt", mem).await.unwrap(); + } + + #[tokio::test] + async fn create_with_memory_works() { + let (_tmp, dir) = setup().await; + let mem = GlobalPool::new(); + let f = PositionalFile::create_with_memory(&dir, "cwm.txt", mem).await.unwrap(); + f.write_slice_at(0, b"mem data").await.unwrap(); + drop(f); + let s = dir.read_to_string("cwm.txt").await.unwrap(); + assert_eq!(s, "mem data"); + } + + #[tokio::test] + async fn create_new_with_memory_works() { + let (_tmp, dir) = setup().await; + let mem = GlobalPool::new(); + let f = PositionalFile::create_new_with_memory(&dir, "cnwm.txt", mem).await.unwrap(); + f.write_slice_at(0, b"new mem").await.unwrap(); + drop(f); + let s = dir.read_to_string("cnwm.txt").await.unwrap(); + assert_eq!(s, "new mem"); + } + + #[tokio::test] + async fn read_at_and_write_at_interleaved() { + let (_tmp, dir) = setup().await; + let f = PositionalFile::create(&dir, "interl.txt").await.unwrap(); + f.write_slice_at(0, b"ABCDEFGHIJ").await.unwrap(); + + let view = f.read_at(0, 3).await.unwrap(); + let mut collected = Vec::new(); + let mut v = view; + while !v.is_empty() { + let s = v.first_slice(); + collected.extend_from_slice(s); + let len = s.len(); + v.advance(len); + } + assert_eq!(collected, b"ABC"); + + let data = make_view(b"XY"); + f.write_at(3, data).await.unwrap(); + + let view2 = f.read_at(3, 2).await.unwrap(); + let mut collected2 = Vec::new(); + let mut v2 = view2; + while !v2.is_empty() { + let s = v2.first_slice(); + collected2.extend_from_slice(s); + let len = s.len(); + v2.advance(len); + } + assert_eq!(collected2, b"XY"); + } + + #[tokio::test] + async fn read_exact_at_works() { + let (_tmp, dir) = setup().await; + dir.write_slice("rea_pf.txt", b"exactpfdata").await.unwrap(); + let f = PositionalFile::open(&dir, "rea_pf.txt").await.unwrap(); + let view = f.read_exact_at(5, 2).await.unwrap(); + let mut collected = Vec::new(); + let mut v = view; + while !v.is_empty() { + let s = v.first_slice(); + collected.extend_from_slice(s); + let len = s.len(); + v.advance(len); + } + assert_eq!(collected, b"pf"); + } + + #[tokio::test] + async fn read_slice_at_and_write_slice_at() { + let (_tmp, dir) = setup().await; + let f = PositionalFile::create(&dir, "rsa.txt").await.unwrap(); + f.write_slice_at(0, b"0123456789").await.unwrap(); + f.write_slice_at(2, b"AB").await.unwrap(); + let mut buf = [0u8; 4]; + let n = f.read_into_slice_at(1, &mut buf).await.unwrap(); + assert_eq!(n, 4); + assert_eq!(&buf[..n], b"1AB4"); + } + + #[tokio::test] + async fn metadata_and_set_len() { + let (_tmp, dir) = setup().await; + let f = PositionalFile::create(&dir, "pfmd.txt").await.unwrap(); + f.write_slice_at(0, b"12345").await.unwrap(); + let md = f.metadata().await.unwrap(); + assert_eq!(md.len(), 5); + f.set_len(3).await.unwrap(); + let md2 = f.metadata().await.unwrap(); + assert_eq!(md2.len(), 3); + } + + #[tokio::test] + async fn lock_unlock_cycle() { + let (_tmp, dir) = setup().await; + let f = PositionalFile::create(&dir, "pflock.txt").await.unwrap(); + f.lock().await.unwrap(); + f.unlock().await.unwrap(); + f.lock_shared().await.unwrap(); + f.unlock().await.unwrap(); + f.try_lock().await.unwrap(); + f.unlock().await.unwrap(); + f.try_lock_shared().await.unwrap(); + f.unlock().await.unwrap(); + } + + #[tokio::test] + async fn flush_sync_all_sync_data() { + let (_tmp, dir) = setup().await; + let f = PositionalFile::create(&dir, "pfsync.txt").await.unwrap(); + f.write_slice_at(0, b"sync").await.unwrap(); + f.flush().await.unwrap(); + f.sync_all().await.unwrap(); + f.sync_data().await.unwrap(); + } + + #[tokio::test] + async fn set_permissions_and_set_modified_and_set_times() { + let (_tmp, dir) = setup().await; + let f = PositionalFile::create(&dir, "pfperms.txt").await.unwrap(); + let md = f.metadata().await.unwrap(); + f.set_permissions(md.permissions()).await.unwrap(); + let t = SystemTime::now() - Duration::from_secs(100); + f.set_modified(t).await.unwrap(); + let times = file::FileTimes::new().set_modified(SystemTime::now()); + f.set_times(times).await.unwrap(); + } + + #[tokio::test] + async fn from_positional_into_read_only_positional() { + let (_tmp, dir) = setup().await; + dir.write_slice("conv.txt", b"convert me").await.unwrap(); + let pf = PositionalFile::open(&dir, "conv.txt").await.unwrap(); + let ro: ReadOnlyPositionalFile = pf.into(); + let view = ro.read_at(0, 10).await.unwrap(); + assert_eq!(view.len(), 10); + } + + #[tokio::test] + async fn from_positional_into_write_only_positional() { + let (_tmp, dir) = setup().await; + let pf = PositionalFile::create(&dir, "conv_w.txt").await.unwrap(); + let wo: WriteOnlyPositionalFile = pf.into(); + wo.write_slice_at(0, b"write only now").await.unwrap(); + drop(wo); + let s = dir.read_to_string("conv_w.txt").await.unwrap(); + assert_eq!(s, "write only now"); + } + + #[tokio::test] + async fn try_clone_works() { + let (_tmp, dir) = setup().await; + let f = PositionalFile::create(&dir, "pftc.txt").await.unwrap(); + let _f2 = f.try_clone().await.unwrap(); + } + + #[tokio::test] + async fn read_max_at_single_operation() { + let (_tmp, dir) = setup().await; + let f = PositionalFile::create(&dir, "pfrma.txt").await.unwrap(); + f.write_slice_at(0, b"0123456789").await.unwrap(); + let view = f.read_max_at(3, 4).await.unwrap(); + assert!(view.len() <= 4); + assert!(!view.is_empty()); + } + + #[tokio::test] + async fn read_into_bytesbuf_at_works() { + let (_tmp, dir) = setup().await; + let f = PositionalFile::create(&dir, "pfriba.txt").await.unwrap(); + f.write_slice_at(0, b"ABCDEFGHIJ").await.unwrap(); + let mem = GlobalPool::new(); + let mut buf = mem.reserve(16); + let n = f.read_into_bytesbuf_at(5, &mut buf).await.unwrap(); + assert!(n > 0); + } + + #[tokio::test] + async fn read_exact_into_uninit_at() { + let (_tmp, dir) = setup().await; + let f = PositionalFile::create(&dir, "pfreiu.txt").await.unwrap(); + f.write_slice_at(0, b"uninit_test").await.unwrap(); + let mut buf = [core::mem::MaybeUninit::::uninit(); 6]; + f.read_exact_into_uninit_at(0, &mut buf).await.unwrap(); + // SAFETY: read_exact_into_uninit_at guarantees initialization on success. + let initialized = unsafe { core::slice::from_raw_parts(buf.as_ptr().cast::(), buf.len()) }; + assert_eq!(initialized, b"uninit"); + } +} diff --git a/crates/seatbelt/src/context.rs b/crates/seatbelt/src/context.rs index 266b4b8a7..a0dbacf3c 100644 --- a/crates/seatbelt/src/context.rs +++ b/crates/seatbelt/src/context.rs @@ -3,10 +3,8 @@ use std::borrow::Cow; -use thread_aware::{ - ThreadAware, - affinity::{MemoryAffinity, PinnedAffinity}, -}; +use thread_aware::ThreadAware; +use thread_aware::affinity::{MemoryAffinity, PinnedAffinity}; use tick::Clock; use crate::TelemetryString; diff --git a/crates/seatbelt/src/fallback/layer.rs b/crates/seatbelt/src/fallback/layer.rs index 82365c5b8..17e15f2e8 100644 --- a/crates/seatbelt/src/fallback/layer.rs +++ b/crates/seatbelt/src/fallback/layer.rs @@ -6,8 +6,9 @@ use std::sync::Arc; use layered::Layer; +use crate::fallback::*; use crate::utils::{EnableIf, TelemetryHelper}; -use crate::{NotSet, ResilienceContext, Set, TelemetryString, fallback::*}; +use crate::{NotSet, ResilienceContext, Set, TelemetryString}; /// Builder for configuring fallback resilience middleware. /// diff --git a/crates/seatbelt/src/fallback/mod.rs b/crates/seatbelt/src/fallback/mod.rs index c8038746b..63af6df91 100644 --- a/crates/seatbelt/src/fallback/mod.rs +++ b/crates/seatbelt/src/fallback/mod.rs @@ -123,9 +123,7 @@ //! let stack = ( //! Fallback::layer("my_fallback", &context) //! .should_fallback(|output: &String| output == "error") -//! .fallback_async(|_output: String, _args| async { -//! "fetched_from_cache".to_string() -//! }), +//! .fallback_async(|_output: String, _args| async { "fetched_from_cache".to_string() }), //! Execute::new(execute_unreliable_operation), //! ); //! diff --git a/crates/sync_thunk/CHANGELOG.md b/crates/sync_thunk/CHANGELOG.md new file mode 100644 index 000000000..825c32f0d --- /dev/null +++ b/crates/sync_thunk/CHANGELOG.md @@ -0,0 +1 @@ +# Changelog diff --git a/crates/sync_thunk/Cargo.toml b/crates/sync_thunk/Cargo.toml new file mode 100644 index 000000000..11f0be82a --- /dev/null +++ b/crates/sync_thunk/Cargo.toml @@ -0,0 +1,45 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +[package] +name = "sync_thunk" +description = "Efficiently handle blocking calls in async code." +version = "0.1.0" +readme = "README.md" +keywords = ["oxidizer", "blocking", "async"] +categories = ["asynchronous", "concurrency"] + +edition.workspace = true +rust-version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +repository = "https://github.com/microsoft/oxidizer/tree/main/crates/sync_thunk" + +[package.metadata.cargo_check_external_types] +allowed_external_types = ["sync_thunk_macros::*"] + +[package.metadata.docs.rs] +all-features = true + +[dependencies] +crossfire.workspace = true +sync_thunk_macros.workspace = true + +[dev-dependencies] +tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } + +[lints] +workspace = true + +[[example]] +name = "from_field" + +[[example]] +name = "from_method" + +[[example]] +name = "from_parameter" + +[[example]] +name = "from_static" diff --git a/crates/sync_thunk/README.md b/crates/sync_thunk/README.md new file mode 100644 index 000000000..a35b8aba8 --- /dev/null +++ b/crates/sync_thunk/README.md @@ -0,0 +1,155 @@ +
+ Sync Thunk Logo + +# Sync Thunk + +[![crate.io](https://img.shields.io/crates/v/sync_thunk.svg)](https://crates.io/crates/sync_thunk) +[![docs.rs](https://docs.rs/sync_thunk/badge.svg)](https://docs.rs/sync_thunk) +[![MSRV](https://img.shields.io/crates/msrv/sync_thunk)](https://crates.io/crates/sync_thunk) +[![CI](https://github.com/microsoft/oxidizer/actions/workflows/main.yml/badge.svg?event=push)](https://github.com/microsoft/oxidizer/actions/workflows/main.yml) +[![Coverage](https://codecov.io/gh/microsoft/oxidizer/graph/badge.svg?token=FCUG0EL5TI)](https://codecov.io/gh/microsoft/oxidizer) +[![License](https://img.shields.io/badge/license-MIT-blue.svg)](../../LICENSE) +This crate was developed as part of the Oxidizer project + +
+ +Efficiently handle blocking calls in async code. + +Mark any async method with [`#[thunk]`][__link0] and its body will execute on a +dedicated worker thread, freeing the async executor to do other work. + +```rust +use sync_thunk::{Thunker, thunk}; + +struct MyService { + thunker: Thunker, +} + +impl MyService { + #[thunk(from = self.thunker)] + async fn blocking_work(&self) -> String { + // This body runs on a worker thread, not the async executor. + std::fs::read_to_string("/etc/hostname").unwrap() + } +} +``` + +## Why + +Async runtimes assume tasks yield quickly. Blocking operations — filesystem I/O, +DNS lookups, CPU-heavy computation — stall the executor and hurt throughput. +The traditional fix is `spawn_blocking`, but that allocates a closure, boxes the +return value, and may spawn an unbounded number of OS threads. + +`sync_thunk` solves this differently: + +* **Zero-allocation dispatch.** Arguments are packed into a stack-allocated struct + and sent through a pre-allocated bounded channel. No `Box`, no `Arc`, no closure. + +* **Zero-copy design.** Arguments is moved to the worker thread without requiring any copying or funny ownership gymnastics. + +* **Auto-scaling thread pool.** The [`Thunker`][__link1] starts with a single worker thread + and automatically scales up when the queue backs up — up to a configurable + maximum. Idle workers exit after a configurable cool-down interval, but at least + one worker is always kept alive. + +## Getting Started + +**1. Create a [`Thunker`][__link2]:** + +```rust +use sync_thunk::Thunker; + +let thunker = Thunker::builder() + .max_thread_count(4) // at most 4 workers + .cool_down_interval(std::time::Duration::from_secs(10)) + .build(); +``` + +**2. Annotate methods with [`#[thunk]`][__link3]:** + +The `from` parameter tells the macro where to find the [`Thunker`][__link4]. It can be a +struct field, a method call, a function parameter, or a static — anything that returns a `&Thunker`. + +```rust +#[thunk(from = self.thunker)] +async fn do_io(&self) -> std::io::Result> { + std::fs::read("/some/file") +} +``` + +**3. Call it like any other async method:** + +```rust +let data = service.do_io().await?; +``` + +## Where the Thunker Comes From + +The `from` parameter is flexible. Here are the four common patterns: + +### From a struct field + +The most common pattern — the struct owns the thunker: + +```rust +struct MyService { thunker: Thunker } + +impl MyService { + #[thunk(from = self.thunker)] + async fn work(&self) -> u64 { /* ... */ } +} +``` + +### From a method call + +Useful when the thunker is behind a getter or shared via an accessor: + +```rust +impl MyService { + fn thunker(&self) -> &Thunker { &self.inner_thunker } + + #[thunk(from = self.thunker())] + async fn work(&self) -> u64 { /* ... */ } +} +``` + +### From a function parameter + +Useful for associated functions with no `self` receiver: + +```rust +impl MyService { + #[thunk(from = thunker)] + async fn create(thunker: &Thunker, path: &Path) -> std::io::Result { + let data = std::fs::read(path)?; + /* ... */ + } +} +``` + +### From a global static + +For applications that share a single pool without threading it through structs: + +```rust +static THUNKER: LazyLock = LazyLock::new(|| Thunker::builder().build()); + +impl MyService { + #[thunk(from = THUNKER)] + async fn work(&self) -> u64 { /* ... */ } +} +``` + + +
+ +This crate was developed as part of The Oxidizer Project. Browse this crate's source code. + + + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG_BuYsgMfBazG9TJGWvNurCDGwRx9icGpbylGxSUKrXYyeSHYWSBgmpzeW5jX3RodW5rZTAuMS4w + [__link0]: https://docs.rs/sync_thunk/0.1.0/sync_thunk/?search=thunk + [__link1]: https://docs.rs/sync_thunk/0.1.0/sync_thunk/?search=Thunker + [__link2]: https://docs.rs/sync_thunk/0.1.0/sync_thunk/?search=Thunker + [__link3]: https://docs.rs/sync_thunk/0.1.0/sync_thunk/?search=thunk + [__link4]: https://docs.rs/sync_thunk/0.1.0/sync_thunk/?search=Thunker diff --git a/crates/sync_thunk/examples/from_field.rs b/crates/sync_thunk/examples/from_field.rs new file mode 100644 index 000000000..c558c287d --- /dev/null +++ b/crates/sync_thunk/examples/from_field.rs @@ -0,0 +1,48 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Shows `#[thunk(from = self.thunker)]` — the thunker is a struct field. +//! +//! This is the most common pattern: a struct holds a `Thunker` and its +//! methods use it for dispatch. + +use std::path::Path; +use std::time::Duration; + +use sync_thunk::{Thunker, thunk}; + +struct FileSystem { + thunker: Thunker, +} + +impl FileSystem { + /// Lists files in a directory — dispatched to a worker thread. + #[thunk(from = self.thunker)] + async fn list_files(&self, dir: &Path) -> std::io::Result> { + std::fs::read_dir(dir)? + .map(|entry| entry.map(|e| e.file_name().to_string_lossy().into_owned())) + .collect() + } +} + +#[tokio::main] +async fn main() { + let fs = FileSystem { + thunker: Thunker::builder() + .max_thread_count(2) + .cool_down_interval(Duration::from_secs(5)) + .build(), + }; + + println!("calling thread: {:?}", std::thread::current().id()); + + match fs.list_files(Path::new(".")).await { + Ok(files) => { + println!("files in current directory:"); + for f in &files { + println!(" {f}"); + } + } + Err(e) => eprintln!("error: {e}"), + } +} diff --git a/crates/sync_thunk/examples/from_method.rs b/crates/sync_thunk/examples/from_method.rs new file mode 100644 index 000000000..c7db24d8e --- /dev/null +++ b/crates/sync_thunk/examples/from_method.rs @@ -0,0 +1,52 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Shows `#[thunk(from = self.thunker())]` — the thunker is obtained via a method call. +//! +//! This is useful when the thunker is behind a getter, stored in an inner +//! type, or shared via an accessor. + +use std::path::Path; +use std::time::Duration; + +use sync_thunk::{Thunker, thunk}; + +struct FileSystem { + inner_thunker: Thunker, +} + +impl FileSystem { + fn thunker(&self) -> &Thunker { + &self.inner_thunker + } + + /// Lists files in a directory — dispatched to a worker thread. + #[thunk(from = self.thunker())] + async fn list_files(&self, dir: &Path) -> std::io::Result> { + std::fs::read_dir(dir)? + .map(|entry| entry.map(|e| e.file_name().to_string_lossy().into_owned())) + .collect() + } +} + +#[tokio::main] +async fn main() { + let fs = FileSystem { + inner_thunker: Thunker::builder() + .max_thread_count(2) + .cool_down_interval(Duration::from_secs(5)) + .build(), + }; + + println!("calling thread: {:?}", std::thread::current().id()); + + match fs.list_files(Path::new(".")).await { + Ok(files) => { + println!("files in current directory:"); + for f in &files { + println!(" {f}"); + } + } + Err(e) => eprintln!("error: {e}"), + } +} diff --git a/crates/sync_thunk/examples/from_parameter.rs b/crates/sync_thunk/examples/from_parameter.rs new file mode 100644 index 000000000..1090b7bc9 --- /dev/null +++ b/crates/sync_thunk/examples/from_parameter.rs @@ -0,0 +1,47 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Shows `#[thunk(from = thunker)]` — the thunker is a function parameter. +//! +//! This is useful for associated functions (no `self` receiver), free functions, +//! or bootstrap code where a thunker is passed in by the caller. + +use std::path::Path; +use std::time::Duration; + +use sync_thunk::{Thunker, thunk}; + +struct FileSystem; + +impl FileSystem { + /// Lists files in a directory — dispatched to a worker thread. + /// + /// The thunker is passed as a parameter because this is an associated + /// function with no `self` receiver. + #[thunk(from = thunker)] + async fn list_files(thunker: &Thunker, dir: &Path) -> std::io::Result> { + std::fs::read_dir(dir)? + .map(|entry| entry.map(|e| e.file_name().to_string_lossy().into_owned())) + .collect() + } +} + +#[tokio::main] +async fn main() { + let thunker = Thunker::builder() + .max_thread_count(2) + .cool_down_interval(Duration::from_secs(5)) + .build(); + + println!("calling thread: {:?}", std::thread::current().id()); + + match FileSystem::list_files(&thunker, Path::new(".")).await { + Ok(files) => { + println!("files in current directory:"); + for f in &files { + println!(" {f}"); + } + } + Err(e) => eprintln!("error: {e}"), + } +} diff --git a/crates/sync_thunk/examples/from_static.rs b/crates/sync_thunk/examples/from_static.rs new file mode 100644 index 000000000..2b3062fed --- /dev/null +++ b/crates/sync_thunk/examples/from_static.rs @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Shows `#[thunk(from = THUNKER)]` — the thunker is a global static. +//! +//! This is useful for applications that share a single thread pool across +//! many components without threading a `Thunker` through every struct. + +use std::path::Path; +use std::sync::LazyLock; +use std::time::Duration; + +use sync_thunk::{Thunker, thunk}; + +static THUNKER: LazyLock = LazyLock::new(|| { + Thunker::builder() + .max_thread_count(2) + .cool_down_interval(Duration::from_secs(5)) + .build() +}); + +struct FileSystem; + +impl FileSystem { + /// Lists files in a directory — dispatched to a worker thread. + #[thunk(from = THUNKER)] + async fn list_files(&self, dir: &Path) -> std::io::Result> { + std::fs::read_dir(dir)? + .map(|entry| entry.map(|e| e.file_name().to_string_lossy().into_owned())) + .collect() + } +} + +#[tokio::main] +async fn main() { + let fs = FileSystem; + + println!("calling thread: {:?}", std::thread::current().id()); + + match fs.list_files(Path::new(".")).await { + Ok(files) => { + println!("files in current directory:"); + for f in &files { + println!(" {f}"); + } + } + Err(e) => eprintln!("error: {e}"), + } +} diff --git a/crates/sync_thunk/favicon.ico b/crates/sync_thunk/favicon.ico new file mode 100644 index 000000000..127c7d34f --- /dev/null +++ b/crates/sync_thunk/favicon.ico @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2814201968ea7689ed59b84bf57f1552ab691b0e043f83e43f04b2fcad06e0d +size 198169 diff --git a/crates/sync_thunk/logo.png b/crates/sync_thunk/logo.png new file mode 100644 index 000000000..d72833407 --- /dev/null +++ b/crates/sync_thunk/logo.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:917b8ece75df7d5f9d2e0b65b1480564bd7b8d2b46a74e4fd754a36d69b1d652 +size 61948 diff --git a/crates/sync_thunk/src/lib.rs b/crates/sync_thunk/src/lib.rs new file mode 100644 index 000000000..8e7b35409 --- /dev/null +++ b/crates/sync_thunk/src/lib.rs @@ -0,0 +1,155 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#![cfg_attr(coverage_nightly, feature(coverage_attribute))] +#![cfg_attr(docsrs, feature(doc_cfg))] + +//! Efficiently handle blocking calls in async code. +//! +//! Mark any async method with [`#[thunk]`](thunk) and its body will execute on a +//! dedicated worker thread, freeing the async executor to do other work. +//! +//! ```ignore +//! use sync_thunk::{Thunker, thunk}; +//! +//! struct MyService { +//! thunker: Thunker, +//! } +//! +//! impl MyService { +//! #[thunk(from = self.thunker)] +//! async fn blocking_work(&self) -> String { +//! // This body runs on a worker thread, not the async executor. +//! std::fs::read_to_string("/etc/hostname").unwrap() +//! } +//! } +//! ``` +//! +//! # Why +//! +//! Async runtimes assume tasks yield quickly. Blocking operations — filesystem I/O, +//! DNS lookups, CPU-heavy computation — stall the executor and hurt throughput. +//! The traditional fix is `spawn_blocking`, but that allocates a closure, boxes the +//! return value, and may spawn an unbounded number of OS threads. +//! +//! `sync_thunk` solves this differently: +//! +//! - **Zero-allocation dispatch.** Arguments are packed into a stack-allocated struct +//! and sent through a pre-allocated bounded channel. No `Box`, no `Arc`, no closure. +//! +//! - **Zero-copy design.** Arguments is moved to the worker thread without requiring any copying or funny ownership gymnastics. +//! +//! - **Auto-scaling thread pool.** The [`Thunker`] starts with a single worker thread +//! and automatically scales up when the queue backs up — up to a configurable +//! maximum. Idle workers exit after a configurable cool-down interval, but at least +//! one worker is always kept alive. +//! +//! # Getting Started +//! +//! **1. Create a [`Thunker`]:** +//! +//! ``` +//! use sync_thunk::Thunker; +//! +//! let thunker = Thunker::builder() +//! .max_thread_count(4) // at most 4 workers +//! .cool_down_interval(std::time::Duration::from_secs(10)) +//! .build(); +//! ``` +//! +//! **2. Annotate methods with [`#[thunk]`](thunk):** +//! +//! The `from` parameter tells the macro where to find the [`Thunker`]. It can be a +//! struct field, a method call, a function parameter, or a static — anything that returns a `&Thunker`. +//! +//! ```ignore +//! #[thunk(from = self.thunker)] +//! async fn do_io(&self) -> std::io::Result> { +//! std::fs::read("/some/file") +//! } +//! ``` +//! +//! **3. Call it like any other async method:** +//! +//! ```ignore +//! let data = service.do_io().await?; +//! ``` +//! +//! # Where the Thunker Comes From +//! +//! The `from` parameter is flexible. Here are the four common patterns: +//! +//! ## From a struct field +//! +//! The most common pattern — the struct owns the thunker: +//! +//! ```ignore +//! struct MyService { thunker: Thunker } +//! +//! impl MyService { +//! #[thunk(from = self.thunker)] +//! async fn work(&self) -> u64 { /* ... */ } +//! } +//! ``` +//! +//! ## From a method call +//! +//! Useful when the thunker is behind a getter or shared via an accessor: +//! +//! ```ignore +//! impl MyService { +//! fn thunker(&self) -> &Thunker { &self.inner_thunker } +//! +//! #[thunk(from = self.thunker())] +//! async fn work(&self) -> u64 { /* ... */ } +//! } +//! ``` +//! +//! ## From a function parameter +//! +//! Useful for associated functions with no `self` receiver: +//! +//! ```ignore +//! impl MyService { +//! #[thunk(from = thunker)] +//! async fn create(thunker: &Thunker, path: &Path) -> std::io::Result { +//! let data = std::fs::read(path)?; +//! /* ... */ +//! } +//! } +//! ``` +//! +//! ## From a global static +//! +//! For applications that share a single pool without threading it through structs: +//! +//! ```ignore +//! static THUNKER: LazyLock = LazyLock::new(|| Thunker::builder().build()); +//! +//! impl MyService { +//! #[thunk(from = THUNKER)] +//! async fn work(&self) -> u64 { /* ... */ } +//! } +//! ``` + +#![doc(html_logo_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/sync_thunk/logo.png")] +#![doc(html_favicon_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/sync_thunk/favicon.ico")] + +extern crate self as sync_thunk; + +mod macros; +mod stack_state; +mod thunk_future; +mod thunker; +mod thunker_builder; +mod work_item; + +pub use macros::thunk; +#[doc(hidden)] +pub use stack_state::StackState; +#[doc(hidden)] +pub use thunk_future::ThunkFuture; +pub use thunker::Thunker; +pub use thunker_builder::ThunkerBuilder; +#[doc(hidden)] +pub use work_item::WorkItem; diff --git a/crates/sync_thunk/src/macros.rs b/crates/sync_thunk/src/macros.rs new file mode 100644 index 000000000..bc2b90b19 --- /dev/null +++ b/crates/sync_thunk/src/macros.rs @@ -0,0 +1,84 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +/// Runs a function body on a worker thread instead of the async executor. +/// +/// Apply `#[thunk]` to any `async fn` whose body performs blocking work — file I/O, +/// CPU-heavy computation, FFI calls, or anything else that should not block the +/// async runtime. The method signature stays `async`, but the body executes on a +/// [`Thunker`](crate::Thunker) worker thread. +/// +/// # The `from` parameter +/// +/// The `from` parameter tells the macro where to find the [`Thunker`](crate::Thunker). +/// There are four supported patterns: +/// +/// **Struct field** — the most common pattern: +/// ```ignore +/// #[thunk(from = self.thunker)] +/// async fn work(&self) -> u64 { /* blocking code */ } +/// ``` +/// +/// **Method call** — when the thunker is behind a getter: +/// ```ignore +/// #[thunk(from = self.thunker())] +/// async fn work(&self) -> u64 { /* blocking code */ } +/// ``` +/// +/// **Function parameter** — for associated functions with no `self`: +/// ```ignore +/// #[thunk(from = thunker)] +/// async fn create(thunker: &Thunker, path: &Path) -> Result { /* ... */ } +/// ``` +/// +/// **Global static** — for applications that share a single pool: +/// ```ignore +/// #[thunk(from = THUNKER)] +/// async fn work(&self) -> u64 { /* blocking code */ } +/// ``` +/// +/// # Parameters and return values +/// +/// All parameter types and return types work naturally: +/// +/// - **`&self` / `&mut self`** — fully supported. +/// - **References (`&T`, `&mut T`)** — borrowed data is safe because the future's +/// drop guard blocks until the worker completes. +/// - **Owned values** — moved to the worker thread and available in the body. +/// - **Any return type** — the result is written back to the caller's stack and +/// returned from the `.await`. +/// +/// # Cancellation +/// +/// If the future is dropped (canceled) before the worker finishes, a drop +/// guard blocks the dropping thread until the worker completes. The blocking +/// operation always runs to completion — it cannot be interrupted mid-flight. +/// This is the same guarantee that makes borrowed references safe across threads. +/// +/// # Panics +/// +/// If the function body panics, the panic propagates to the `.await` site on the +/// calling task, just as it would for a synchronous call. +/// +/// # Example +/// +/// ```ignore +/// use sync_thunk::{Thunker, thunk}; +/// +/// struct FileService { +/// thunker: Thunker, +/// } +/// +/// impl FileService { +/// #[thunk(from = self.thunker)] +/// async fn read_file(&self, path: &std::path::Path) -> std::io::Result> { +/// std::fs::read(path) +/// } +/// +/// #[thunk(from = self.thunker)] +/// async fn write_file(&mut self, path: &std::path::Path, data: &[u8]) -> std::io::Result<()> { +/// std::fs::write(path, data) +/// } +/// } +/// ``` +pub use sync_thunk_macros::thunk; diff --git a/crates/sync_thunk/src/stack_state.rs b/crates/sync_thunk/src/stack_state.rs new file mode 100644 index 000000000..9f2fb5884 --- /dev/null +++ b/crates/sync_thunk/src/stack_state.rs @@ -0,0 +1,353 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::cell::UnsafeCell; +use std::sync::Mutex; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::task::Waker; + +/// Stack-allocated state shared between the async caller and the worker thread. +/// +/// # Cancellation Safety +/// +/// The [`Drop`] implementation spin-loops until the `ready` flag is set. This +/// guarantees the worker thread can always safely write its result to the +/// stack-allocated slots, even if the future is cancelled. However, it also +/// means that dropping a `StackState` whose work item was **never dispatched** +/// (or whose worker will never call [`complete`](Self::complete)) will block +/// the dropping thread indefinitely. Callers must ensure the work item is +/// submitted to the [`Thunker`](crate::Thunker) before the `StackState` can +/// be dropped. +pub struct StackState { + ready: AtomicBool, + panicked: AtomicBool, + result: UnsafeCell>, + waker: Mutex>, + task: UnsafeCell>, +} + +impl StackState { + /// Creates a new empty `StackState`. + pub fn new() -> Self { + Self { + ready: AtomicBool::new(false), + panicked: AtomicBool::new(false), + result: UnsafeCell::new(None), + waker: Mutex::new(None), + task: UnsafeCell::new(None), + } + } + + /// Stores the task arguments into the state. + /// + /// # Safety + /// + /// Must not be called concurrently with [`take_task`](Self::take_task). + pub unsafe fn set_task(&self, task: T) { + // SAFETY: Caller guarantees no concurrent access to the task slot. + unsafe { *self.task.get() = Some(task) }; + } + + /// Takes the task arguments out of the state, returning `None` if already taken. + /// + /// # Safety + /// + /// Must not be called concurrently with [`set_task`](Self::set_task). + pub unsafe fn take_task(&self) -> Option { + // SAFETY: Caller guarantees no concurrent access to the task slot. + unsafe { (*self.task.get()).take() } + } + + /// Writes the computed result and signals readiness. + /// + /// # Safety + /// + /// Must be called exactly once by the worker thread after computing the result. + pub unsafe fn complete(&self, result: R) { + // SAFETY: Caller guarantees exclusive access to the result slot at this point. + unsafe { *self.result.get() = Some(result) }; + self.ready.store(true, Ordering::Release); + } + + /// Takes the result out of the state, returning `None` if not yet written. + /// + /// # Safety + /// + /// Must only be called after [`is_ready`](Self::is_ready) returns `true`. + pub unsafe fn take_result(&self) -> Option { + // SAFETY: Caller guarantees the result has been written and no concurrent access. + unsafe { (*self.result.get()).take() } + } + + /// Returns `true` if the worker has signaled completion. + pub fn is_ready(&self) -> bool { + self.ready.load(Ordering::Acquire) + } + + /// Marks the state as panicked and signals readiness. + /// + /// This unblocks `Drop` and causes `ThunkFuture::poll` to re-raise the panic. + pub fn mark_panicked(&self) { + self.panicked.store(true, Ordering::Relaxed); + self.ready.store(true, Ordering::Release); + } + + /// Returns `true` if the worker panicked. + pub fn has_panicked(&self) -> bool { + self.panicked.load(Ordering::Relaxed) + } + + /// Stores a waker to be notified when the result is ready. + /// + /// # Panics + /// + /// Panics if the internal waker mutex is poisoned. + pub fn set_waker(&self, waker: Waker) { + let mut guard = self.waker.lock().expect("waker mutex is not poisoned"); + *guard = Some(waker); + } + + /// Takes and wakes the stored waker, if present. + /// + /// # Panics + /// + /// Panics if the internal waker mutex is poisoned. + pub fn wake(&self) { + let waker = self.waker.lock().expect("waker mutex is not poisoned").take(); + if let Some(w) = waker { + w.wake(); + } + } + + /// Returns a raw const pointer to this `StackState`. + pub fn as_ptr(&self) -> *const Self { + self + } + + /// Returns a raw mutable pointer to this `StackState`. + pub fn as_mut_ptr(&self) -> *mut Self { + std::ptr::from_ref(self).cast_mut() + } +} + +impl Default for StackState { + fn default() -> Self { + Self::new() + } +} + +impl core::fmt::Debug for StackState { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("StackState") + .field("ready", &self.ready.load(Ordering::Relaxed)) + .finish_non_exhaustive() + } +} + +impl Drop for StackState { + fn drop(&mut self) { + // Cancellation guard: prevent use-after-free if the future is dropped + // before the worker finishes writing to our stack-allocated state. + while !self.ready.load(Ordering::Acquire) { + std::hint::spin_loop(); + } + } +} + +// SAFETY: StackState is designed for cross-thread sharing between an async +// poller and a worker thread. Access to UnsafeCell fields is synchronized +// by the `ready` atomic flag and the protocol enforced by the unsafe methods. +unsafe impl Sync for StackState {} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + use std::task::{RawWaker, RawWakerVTable}; + + use super::*; + + fn noop_clone(p: *const ()) -> RawWaker { + RawWaker::new(p, &NOOP_VTABLE) + } + fn noop(_: *const ()) {} + static NOOP_VTABLE: RawWakerVTable = RawWakerVTable::new(noop_clone, noop, noop, noop); + + /// Creates a no-op waker for testing. + fn noop_waker() -> Waker { + // SAFETY: The vtable functions are sound no-ops. + unsafe { Waker::from_raw(RawWaker::new(core::ptr::null(), &NOOP_VTABLE)) } + } + + fn flag_clone(data: *const ()) -> RawWaker { + // SAFETY: data points to a valid Arc. + let arc = unsafe { Arc::from_raw(data.cast::()) }; + let clone = Arc::clone(&arc); + core::mem::forget(arc); + RawWaker::new(Arc::into_raw(clone).cast(), &FLAG_VTABLE) + } + fn flag_wake(data: *const ()) { + // SAFETY: data points to a valid Arc. + let arc = unsafe { Arc::from_raw(data.cast::()) }; + arc.store(true, Ordering::SeqCst); + } + fn flag_wake_by_ref(data: *const ()) { + // SAFETY: data points to a valid Arc. + let arc = unsafe { Arc::from_raw(data.cast::()) }; + arc.store(true, Ordering::SeqCst); + core::mem::forget(arc); + } + fn flag_drop(data: *const ()) { + // SAFETY: data points to a valid Arc. + unsafe { drop(Arc::from_raw(data.cast::())) }; + } + static FLAG_VTABLE: RawWakerVTable = RawWakerVTable::new(flag_clone, flag_wake, flag_wake_by_ref, flag_drop); + + #[test] + fn new_is_not_ready() { + let state = StackState::::new(); + assert!(!state.is_ready()); + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(0) }; + } + + #[test] + fn default_is_not_ready() { + let state = StackState::::default(); + assert!(!state.is_ready()); + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(0) }; + } + + #[test] + fn set_and_take_task() { + let state = StackState::<(), String>::new(); + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.set_task(String::from("hello")) }; + // SAFETY: No concurrent access — single-threaded test. + let task = unsafe { state.take_task() }; + assert_eq!(task.as_deref(), Some("hello")); + // SAFETY: No concurrent access — single-threaded test. + let task2 = unsafe { state.take_task() }; + assert!(task2.is_none()); + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(()) }; + } + + #[test] + fn complete_and_take_result() { + let state = StackState::::new(); + assert!(!state.is_ready()); + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(String::from("result")) }; + assert!(state.is_ready()); + // SAFETY: is_ready() returned true; no concurrent access. + let val = unsafe { state.take_result() }; + assert_eq!(val.as_deref(), Some("result")); + // SAFETY: No concurrent access — single-threaded test. + let val2 = unsafe { state.take_result() }; + assert!(val2.is_none()); + } + + #[test] + fn set_waker_and_wake() { + let woken = Arc::new(AtomicBool::new(false)); + let woken2 = Arc::clone(&woken); + + let raw = RawWaker::new(Arc::into_raw(woken2).cast(), &FLAG_VTABLE); + // SAFETY: The vtable functions correctly manage Arc refcounts. + let waker = unsafe { Waker::from_raw(raw) }; + + let state = StackState::<(), ()>::new(); + state.set_waker(waker); + state.wake(); + assert!(woken.load(Ordering::SeqCst)); + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(()) }; + } + + #[test] + fn wake_without_waker_is_noop() { + let state = StackState::<(), ()>::new(); + state.wake(); + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(()) }; + } + + #[test] + fn as_ptr_and_as_mut_ptr() { + let state = StackState::::new(); + let p = state.as_ptr(); + let mp = state.as_mut_ptr(); + assert_eq!(p, mp.cast_const()); + assert_eq!(p, &raw const state); + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(0) }; + } + + #[test] + fn debug_impl_not_ready() { + let state = StackState::::new(); + let debug = format!("{state:?}"); + assert!(debug.contains("StackState")); + assert!(debug.contains("false")); + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(0) }; + } + + #[test] + fn debug_impl_ready() { + let state = StackState::::new(); + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(42) }; + let debug = format!("{state:?}"); + assert!(debug.contains("true")); + } + + #[test] + fn drop_blocks_until_ready() { + let state = StackState::::new(); + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(99) }; + drop(state); + } + + #[test] + fn sync_trait_bounds() { + fn assert_sync() {} + assert_sync::>(); + } + + #[test] + fn set_waker_with_noop() { + let state = StackState::<(), ()>::new(); + state.set_waker(noop_waker()); + state.wake(); + state.wake(); + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(()) }; + } + + #[test] + fn complete_with_complex_type() { + let state = StackState::, ()>::new(); + let data = vec![String::from("a"), String::from("b")]; + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(data) }; + assert!(state.is_ready()); + // SAFETY: is_ready() returned true; no concurrent access. + let result = unsafe { state.take_result() }.unwrap(); + assert_eq!(result, vec!["a", "b"]); + } + + #[test] + fn task_with_complex_type() { + let state = StackState::<(), Vec>::new(); + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.set_task(vec![1, 2, 3]) }; + // SAFETY: No concurrent access — single-threaded test. + let task = unsafe { state.take_task() }.unwrap(); + assert_eq!(task, vec![1, 2, 3]); + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(()) }; + } +} diff --git a/crates/sync_thunk/src/thunk_future.rs b/crates/sync_thunk/src/thunk_future.rs new file mode 100644 index 000000000..677d41a9e --- /dev/null +++ b/crates/sync_thunk/src/thunk_future.rs @@ -0,0 +1,183 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::pin::Pin; +use std::task::{Context, Poll}; + +use crate::StackState; + +/// A future that resolves when the worker thread writes a result into [`StackState`]. +pub struct ThunkFuture<'a, R, T> { + state: &'a StackState, +} + +impl<'a, R, T> ThunkFuture<'a, R, T> { + /// Creates a new `ThunkFuture` that will resolve when the given state becomes ready. + pub fn new(state: &'a StackState) -> Self { + Self { state } + } +} + +impl core::fmt::Debug for ThunkFuture<'_, R, T> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("ThunkFuture").finish_non_exhaustive() + } +} + +// ThunkFuture holds only a plain reference — no self-referential state — so it +// is safe to move after pinning. +impl Unpin for ThunkFuture<'_, R, T> {} + +impl Future for ThunkFuture<'_, R, T> { + type Output = R; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if self.state.is_ready() { + assert!( + !self.state.has_panicked(), + "thunked function panicked on worker thread" + ); + // SAFETY: The worker has signalled completion via the atomic store. + // We are the only consumer of the result slot. + let val = unsafe { self.state.take_result().expect("guarded by ready flag to always contain a result") }; + return Poll::Ready(val); + } + + // Register the waker before re-checking readiness to avoid lost wakeups. + self.state.set_waker(cx.waker().clone()); + + // Re-check after waker registration: the worker may have completed + // between our first is_ready() check and the set_waker() call above. + if self.state.is_ready() { + assert!( + !self.state.has_panicked(), + "thunked function panicked on worker thread" + ); + // SAFETY: Same as above — ready flag guarantees result is present. + let val = unsafe { self.state.take_result().expect("guarded by ready flag to always contain a result") }; + return Poll::Ready(val); + } + + Poll::Pending + } +} + +#[cfg(test)] +mod tests { + use std::task::{RawWaker, RawWakerVTable, Waker}; + + use super::*; + + fn noop_clone(p: *const ()) -> RawWaker { + RawWaker::new(p, &NOOP_VTABLE) + } + fn noop(_: *const ()) {} + static NOOP_VTABLE: RawWakerVTable = RawWakerVTable::new(noop_clone, noop, noop, noop); + + /// Creates a no-op waker for manual polling. + fn noop_waker() -> Waker { + // SAFETY: The vtable functions are sound no-ops. + unsafe { Waker::from_raw(RawWaker::new(core::ptr::null(), &NOOP_VTABLE)) } + } + + #[test] + fn poll_ready_immediately() { + let state = StackState::::new(); + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(42) }; + + let mut future = ThunkFuture::new(&state); + let waker = noop_waker(); + let mut cx = Context::from_waker(&waker); + match Pin::new(&mut future).poll(&mut cx) { + Poll::Ready(val) => assert_eq!(val, 42), + Poll::Pending => panic!("expected Ready"), + } + } + + #[test] + fn poll_pending_then_ready() { + let state = StackState::::new(); + + let mut future = ThunkFuture::new(&state); + let waker = noop_waker(); + let mut cx = Context::from_waker(&waker); + + assert!(Pin::new(&mut future).poll(&mut cx).is_pending()); + + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(String::from("done")) }; + + match Pin::new(&mut future).poll(&mut cx) { + Poll::Ready(val) => assert_eq!(val, "done"), + Poll::Pending => panic!("expected Ready"), + } + } + + #[test] + fn poll_ready_on_recheck_after_waker_registration() { + let state = StackState::::new(); + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(-1) }; + + let mut future = ThunkFuture::new(&state); + let waker = noop_waker(); + let mut cx = Context::from_waker(&waker); + + match Pin::new(&mut future).poll(&mut cx) { + Poll::Ready(val) => assert_eq!(val, -1), + Poll::Pending => panic!("expected Ready"), + } + } + + #[test] + fn debug_impl() { + let state = StackState::::new(); + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(0) }; + let future = ThunkFuture::new(&state); + let debug = format!("{future:?}"); + assert!(debug.contains("ThunkFuture")); + } + + #[test] + fn unpin_trait() { + fn assert_unpin() {} + assert_unpin::>(); + } + + #[test] + fn poll_with_complex_return_type() { + let state = StackState::, ()>::new(); + let data = vec![String::from("a"), String::from("b")]; + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(data) }; + + let mut future = ThunkFuture::new(&state); + let waker = noop_waker(); + let mut cx = Context::from_waker(&waker); + match Pin::new(&mut future).poll(&mut cx) { + Poll::Ready(val) => assert_eq!(val, vec!["a", "b"]), + Poll::Pending => panic!("expected Ready"), + } + } + + #[test] + fn multiple_pending_polls_before_ready() { + let state = StackState::::new(); + let mut future = ThunkFuture::new(&state); + let waker = noop_waker(); + let mut cx = Context::from_waker(&waker); + + for _ in 0..5 { + assert!(Pin::new(&mut future).poll(&mut cx).is_pending()); + } + + // SAFETY: No concurrent access — single-threaded test. + unsafe { state.complete(100) }; + match Pin::new(&mut future).poll(&mut cx) { + Poll::Ready(val) => assert_eq!(val, 100), + Poll::Pending => panic!("expected Ready"), + } + } +} diff --git a/crates/sync_thunk/src/thunker.rs b/crates/sync_thunk/src/thunker.rs new file mode 100644 index 000000000..f5647883c --- /dev/null +++ b/crates/sync_thunk/src/thunker.rs @@ -0,0 +1,325 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::sync::Arc; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::time::Duration; + +use crossfire::mpmc; + +use crate::{ThunkerBuilder, WorkItem}; + +/// Shared state between the `Thunker` handle and its worker threads. +struct ThunkerInner { + sender: crossfire::MTx>, + receiver: crossfire::MRx>, + thread_count: AtomicUsize, + pending_count: AtomicUsize, + max_thread_count: usize, + cool_down_interval: Duration, +} + +/// An auto-scaling thread pool for dispatching blocking work from async code. +/// +/// `Thunker` manages a pool of worker threads that execute blocking operations on behalf of async tasks. +/// +/// # Thread Scaling +/// +/// The pool starts with **one** worker thread. When a new work item is enqueued and the +/// number of pending items meets or exceeds the current thread count, an additional worker +/// is spawned (up to [`max_thread_count`](Self::max_thread_count)). The scale-up decision +/// uses a compare-and-swap to prevent multiple threads from being spawned simultaneously. +/// +/// Workers that receive no work for [`cool_down_interval`](Self::cool_down_interval) exit +/// voluntarily, but at least one worker is always kept alive. +/// +/// # Examples +/// +/// Using default settings: +/// +/// ``` +/// use sync_thunk::Thunker; +/// +/// let thunker = Thunker::new(); +/// ``` +/// +/// Using the builder for custom configuration: +/// +/// ``` +/// use std::time::Duration; +/// +/// use sync_thunk::Thunker; +/// +/// let thunker = Thunker::builder() +/// .max_thread_count(8) +/// .cool_down_interval(Duration::from_secs(30)) +/// .build(); +/// ``` +#[derive(Clone)] +pub struct Thunker { + inner: Arc, +} + +impl Thunker { + /// Creates a new `Thunker` with default settings and spawns an initial worker thread. + #[must_use] + pub fn new() -> Self { + Self::builder().build() + } + + /// Returns a [`ThunkerBuilder`] for configuring a new `Thunker`. + #[must_use] + pub fn builder() -> ThunkerBuilder { + ThunkerBuilder::new() + } + + /// Constructs a `Thunker` from a completed builder. + pub(crate) fn from_builder(builder: ThunkerBuilder) -> Self { + let (sender, receiver) = mpmc::bounded_blocking(builder.channel_capacity); + let thunker = Self { + inner: Arc::new(ThunkerInner { + sender, + receiver, + thread_count: AtomicUsize::new(0), + pending_count: AtomicUsize::new(0), + max_thread_count: builder.max_thread_count, + cool_down_interval: builder.cool_down_interval, + }), + }; + Self::spawn_worker(&thunker.inner); + thunker + } + + /// Returns the maximum number of worker threads. + #[must_use] + pub fn max_thread_count(&self) -> usize { + self.inner.max_thread_count + } + + /// Returns the cool-down interval for idle worker threads. + #[must_use] + pub fn cool_down_interval(&self) -> Duration { + self.inner.cool_down_interval + } + + /// Returns the current number of active worker threads. + #[must_use] + pub fn thread_count(&self) -> usize { + self.inner.thread_count.load(Ordering::Relaxed) + } + + /// Sends a work item to be executed on a worker thread. + /// + /// Automatically scales up the thread pool if the queue is backing up + /// and the current thread count is below the configured maximum. + /// + /// # Panics + /// + /// Panics if the channel is closed. + #[doc(hidden)] + pub fn send(&self, item: WorkItem) { + let prev_pending = self.inner.pending_count.fetch_add(1, Ordering::Relaxed); + let threads = self.inner.thread_count.load(Ordering::Acquire); + + // Scale up if the queue is backing up and we haven't hit the limit. + if prev_pending >= threads + && threads < self.inner.max_thread_count + && self + .inner + .thread_count + .compare_exchange(threads, threads + 1, Ordering::AcqRel, Ordering::Relaxed) + .is_ok() + { + Self::spawn_worker_already_counted(&self.inner); + } + + self.inner.sender.send(item).expect("channel is closed"); + } + + /// Spawns a worker thread and increments the thread count. + fn spawn_worker(inner: &Arc) { + let _ = inner.thread_count.fetch_add(1, Ordering::AcqRel); + Self::spawn_worker_already_counted(inner); + } + + /// Spawns a worker thread, assuming the caller already incremented the count. + fn spawn_worker_already_counted(inner: &Arc) { + let inner = Arc::clone(inner); + std::thread::Builder::new() + .name("sync-thunk-worker".into()) + .spawn(move || { + Self::worker_loop(&inner); + }) + .expect("OS refused to spawn a sync-thunk worker thread; the system may be out of resources"); + } + + fn worker_loop(inner: &ThunkerInner) { + loop { + match inner.receiver.recv_timeout(inner.cool_down_interval) { + Ok(item) => { + // Decrement pending_count even if the work item panics. + struct DecrementOnDrop<'a>(&'a AtomicUsize); + impl Drop for DecrementOnDrop<'_> { + fn drop(&mut self) { + let _ = self.0.fetch_sub(1, Ordering::Relaxed); + } + } + let _guard = DecrementOnDrop(&inner.pending_count); + item.execute(); + } + Err(crossfire::RecvTimeoutError::Timeout) => { + // Scale down: CAS loop ensures at least one worker remains. + let mut count = inner.thread_count.load(Ordering::Relaxed); + loop { + if count <= 1 { + break; + } + match inner + .thread_count + .compare_exchange_weak(count, count - 1, Ordering::AcqRel, Ordering::Relaxed) + { + Ok(_) => return, + Err(actual) => count = actual, + } + } + // Last worker — keep running. + } + Err(crossfire::RecvTimeoutError::Disconnected) => { + let _ = inner.thread_count.fetch_sub(1, Ordering::AcqRel); + return; + } + } + } + } +} + +impl Default for Thunker { + fn default() -> Self { + Self::new() + } +} + +impl core::fmt::Debug for Thunker { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("Thunker") + .field("max_thread_count", &self.inner.max_thread_count) + .field("cool_down_interval", &self.inner.cool_down_interval) + .field("thread_count", &self.inner.thread_count.load(Ordering::Relaxed)) + .field("pending_count", &self.inner.pending_count.load(Ordering::Relaxed)) + .finish() + } +} + +#[cfg(test)] +mod tests { + use std::sync::atomic::AtomicBool; + + use super::*; + + fn set_flag(ptr: *mut ()) { + // SAFETY: ptr is a valid Arc created via Arc::into_raw. + let arc = unsafe { Arc::from_raw(ptr.cast::()) }; + arc.store(true, Ordering::SeqCst); + } + + fn wait_on_barrier(ptr: *mut ()) { + // SAFETY: ptr is a valid Arc created via Arc::into_raw. + let b = unsafe { Arc::from_raw(ptr.cast::()) }; + b.wait(); + } + + #[test] + fn new_has_defaults() { + let t = Thunker::new(); + assert_eq!(t.max_thread_count(), 4); + assert_eq!(t.cool_down_interval(), Duration::from_secs(10)); + assert!(t.thread_count() >= 1); + } + + #[test] + fn builder_returns_builder() { + let builder = Thunker::builder(); + let t = builder.max_thread_count(2).build(); + assert_eq!(t.max_thread_count(), 2); + } + + #[test] + fn from_builder_spawns_initial_worker() { + let t = Thunker::builder().build(); + std::thread::sleep(Duration::from_millis(10)); + assert!(t.thread_count() >= 1); + } + + #[test] + fn clone_shares_state() { + let t1 = Thunker::new(); + let t2 = t1.clone(); + assert_eq!(t1.max_thread_count(), t2.max_thread_count()); + assert_eq!(t1.cool_down_interval(), t2.cool_down_interval()); + } + + #[test] + fn default_same_as_new() { + let d = Thunker::default(); + let n = Thunker::new(); + assert_eq!(d.max_thread_count(), n.max_thread_count()); + assert_eq!(d.cool_down_interval(), n.cool_down_interval()); + } + + #[test] + fn debug_impl() { + let t = Thunker::new(); + let debug = format!("{t:?}"); + assert!(debug.contains("Thunker")); + assert!(debug.contains("max_thread_count")); + assert!(debug.contains("cool_down_interval")); + assert!(debug.contains("thread_count")); + assert!(debug.contains("pending_count")); + } + + #[test] + fn send_executes_work_item() { + let t = Thunker::new(); + let executed = Arc::new(AtomicBool::new(false)); + let executed2 = Arc::clone(&executed); + + let flag_ptr = Arc::into_raw(executed2).cast_mut().cast::<()>(); + let item = WorkItem::new(flag_ptr, set_flag); + t.send(item); + + for _ in 0..100 { + if executed.load(Ordering::SeqCst) { + break; + } + std::thread::sleep(Duration::from_millis(10)); + } + assert!(executed.load(Ordering::SeqCst)); + } + + #[test] + fn send_scales_up_threads() { + use std::sync::Barrier; + + let t = Thunker::builder().max_thread_count(4).channel_capacity(8).build(); + + let barrier = Arc::new(Barrier::new(4)); + for _ in 0..3 { + let b = Arc::clone(&barrier); + let b_ptr = Arc::into_raw(b).cast_mut().cast::<()>(); + t.send(WorkItem::new(b_ptr, wait_on_barrier)); + } + + std::thread::sleep(Duration::from_millis(100)); + let count = t.thread_count(); + assert!(count >= 2, "expected at least 2 threads, got {count}"); + + barrier.wait(); + } + + #[test] + fn custom_cool_down_interval() { + let interval = Duration::from_millis(50); + let t = Thunker::builder().cool_down_interval(interval).build(); + assert_eq!(t.cool_down_interval(), interval); + } +} diff --git a/crates/sync_thunk/src/thunker_builder.rs b/crates/sync_thunk/src/thunker_builder.rs new file mode 100644 index 000000000..b80c037a5 --- /dev/null +++ b/crates/sync_thunk/src/thunker_builder.rs @@ -0,0 +1,164 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::time::Duration; + +use crate::Thunker; + +/// Default maximum number of worker threads. +const DEFAULT_MAX_THREAD_COUNT: usize = 4; + +/// Default cool-down interval before an idle worker thread exits. +const DEFAULT_COOL_DOWN_INTERVAL: Duration = Duration::from_secs(10); + +/// Default channel capacity (number of pre-allocated work item slots). +const DEFAULT_CHANNEL_CAPACITY: usize = 64; + +/// Builder for configuring and constructing a [`Thunker`]. +/// +/// Obtain an instance via [`Thunker::builder()`], set parameters with chainable methods, +/// and finalize with [`build()`](Self::build). All parameters have sensible defaults: +/// +/// | Parameter | Default | Description | +/// |---|---|---| +/// | [`max_thread_count`](Self::max_thread_count) | 4 | Upper bound on worker threads | +/// | [`cool_down_interval`](Self::cool_down_interval) | 10 s | Idle timeout before a worker exits | +/// | [`channel_capacity`](Self::channel_capacity) | 64 | Pre-allocated ring-buffer slots | +/// +/// # Examples +/// +/// ``` +/// use std::time::Duration; +/// +/// use sync_thunk::Thunker; +/// +/// let thunker = Thunker::builder() +/// .max_thread_count(8) +/// .cool_down_interval(Duration::from_secs(30)) +/// .channel_capacity(128) +/// .build(); +/// ``` +#[derive(Debug, Clone, Copy)] +pub struct ThunkerBuilder { + pub(crate) max_thread_count: usize, + pub(crate) cool_down_interval: Duration, + pub(crate) channel_capacity: usize, +} + +impl ThunkerBuilder { + /// Creates a new builder with default settings. + pub(crate) fn new() -> Self { + Self { + max_thread_count: DEFAULT_MAX_THREAD_COUNT, + cool_down_interval: DEFAULT_COOL_DOWN_INTERVAL, + channel_capacity: DEFAULT_CHANNEL_CAPACITY, + } + } + + /// Sets the maximum number of worker threads the pool may scale up to. + /// + /// Defaults to 4. + #[must_use] + pub fn max_thread_count(mut self, count: usize) -> Self { + self.max_thread_count = count; + self + } + + /// Sets the duration a worker thread idles before shutting down. + /// + /// Defaults to 10 seconds. + #[must_use] + pub fn cool_down_interval(mut self, interval: Duration) -> Self { + self.cool_down_interval = interval; + self + } + + /// Sets the capacity of the pre-allocated work item channel. + /// + /// Using a bounded channel avoids per-send heap allocation. If the channel + /// is full, the async call blocks. + /// + /// Defaults to 64. + #[must_use] + pub fn channel_capacity(mut self, capacity: usize) -> Self { + self.channel_capacity = capacity; + self + } + + /// Builds and returns a [`Thunker`] with the configured settings. + /// + /// Spawns an initial worker thread immediately. + #[must_use] + pub fn build(self) -> Thunker { + Thunker::from_builder(self) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn default_values() { + let builder = ThunkerBuilder::new(); + assert_eq!(builder.max_thread_count, DEFAULT_MAX_THREAD_COUNT); + assert_eq!(builder.cool_down_interval, DEFAULT_COOL_DOWN_INTERVAL); + assert_eq!(builder.channel_capacity, DEFAULT_CHANNEL_CAPACITY); + } + + #[test] + fn max_thread_count_setter() { + let builder = ThunkerBuilder::new().max_thread_count(16); + assert_eq!(builder.max_thread_count, 16); + } + + #[test] + fn cool_down_interval_setter() { + let interval = Duration::from_secs(30); + let builder = ThunkerBuilder::new().cool_down_interval(interval); + assert_eq!(builder.cool_down_interval, interval); + } + + #[test] + fn channel_capacity_setter() { + let builder = ThunkerBuilder::new().channel_capacity(128); + assert_eq!(builder.channel_capacity, 128); + } + + #[test] + fn chaining_all_setters() { + let builder = ThunkerBuilder::new() + .max_thread_count(8) + .cool_down_interval(Duration::from_millis(500)) + .channel_capacity(32); + assert_eq!(builder.max_thread_count, 8); + assert_eq!(builder.cool_down_interval, Duration::from_millis(500)); + assert_eq!(builder.channel_capacity, 32); + } + + #[test] + fn debug_impl() { + let builder = ThunkerBuilder::new(); + let debug = format!("{builder:?}"); + assert!(debug.contains("ThunkerBuilder")); + assert!(debug.contains("max_thread_count")); + assert!(debug.contains("cool_down_interval")); + assert!(debug.contains("channel_capacity")); + } + + #[test] + #[expect(clippy::clone_on_copy, reason = "deliberately testing Clone impl")] + fn clone_and_copy() { + let builder = ThunkerBuilder::new().max_thread_count(8); + let cloned = builder.clone(); + let copied = builder; // Copy + assert_eq!(cloned.max_thread_count, 8); + assert_eq!(copied.max_thread_count, 8); + } + + #[test] + fn build_produces_thunker() { + let thunker = ThunkerBuilder::new().max_thread_count(2).build(); + assert_eq!(thunker.max_thread_count(), 2); + } +} diff --git a/crates/sync_thunk/src/work_item.rs b/crates/sync_thunk/src/work_item.rs new file mode 100644 index 000000000..db568b150 --- /dev/null +++ b/crates/sync_thunk/src/work_item.rs @@ -0,0 +1,87 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +/// A unit of work to be executed on a worker thread. +pub struct WorkItem { + data: *mut (), + vtable_fn: fn(*mut ()), +} + +impl WorkItem { + /// Creates a new `WorkItem`. + pub fn new(data: *mut (), vtable_fn: fn(*mut ())) -> Self { + Self { data, vtable_fn } + } + + /// Returns the pointer to the task data. + #[must_use] + pub fn data(&self) -> *mut () { + self.data + } + + /// Returns the function pointer that executes the task. + #[must_use] + pub fn vtable_fn(&self) -> fn(*mut ()) { + self.vtable_fn + } + + /// Executes the work item by invoking the function pointer with the data pointer. + pub fn execute(self) { + (self.vtable_fn)(self.data); + } +} + +impl core::fmt::Debug for WorkItem { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("WorkItem").finish_non_exhaustive() + } +} + +// SAFETY: WorkItem is sent across threads by design; the caller guarantees +// that the pointed-to data remains valid until the vtable_fn completes. +unsafe impl Send for WorkItem {} + +#[cfg(test)] +mod tests { + use super::*; + + fn dummy_vtable(ptr: *mut ()) { + // SAFETY: In tests we pass a valid *mut u32. + unsafe { *ptr.cast::() = 42 }; + } + + #[test] + fn new_and_accessors() { + let mut val: u32 = 0; + let ptr = (&raw mut val).cast::<()>(); + let item = WorkItem::new(ptr, dummy_vtable); + assert_eq!(item.data(), ptr); + // Verify vtable_fn returns a callable function pointer. + let vt = item.vtable_fn(); + let mut check: u32 = 0; + vt((&raw mut check).cast::<()>()); + assert_eq!(check, 42); + } + + #[test] + fn execute_invokes_vtable_fn() { + let mut val: u32 = 0; + let ptr = (&raw mut val).cast::<()>(); + let item = WorkItem::new(ptr, dummy_vtable); + item.execute(); + assert_eq!(val, 42); + } + + #[test] + fn debug_impl() { + let item = WorkItem::new(core::ptr::null_mut(), dummy_vtable); + let debug = format!("{item:?}"); + assert!(debug.contains("WorkItem")); + } + + #[test] + fn send_trait() { + fn assert_send() {} + assert_send::(); + } +} diff --git a/crates/sync_thunk/tests/adversarial.rs b/crates/sync_thunk/tests/adversarial.rs new file mode 100644 index 000000000..7b3afd1e5 --- /dev/null +++ b/crates/sync_thunk/tests/adversarial.rs @@ -0,0 +1,71 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Tests for adversarial usage patterns — `mem::forget`, cancellation, etc. + +use std::sync::atomic::{AtomicBool, Ordering}; +use std::task::{RawWaker, RawWakerVTable, Waker}; +use std::time::Duration; + +use sync_thunk::{Thunker, thunk}; + +/// Creates a no-op waker for manual polling. +fn noop_waker() -> Waker { + fn noop(_: *const ()) {} + fn clone(p: *const ()) -> RawWaker { + RawWaker::new(p, &VTABLE) + } + const VTABLE: RawWakerVTable = RawWakerVTable::new(clone, noop, noop, noop); + // SAFETY: The vtable functions are valid no-ops. + unsafe { Waker::from_raw(RawWaker::new(std::ptr::null(), &VTABLE)) } +} + +struct Service { + thunker: Thunker, +} + +impl Service { + #[thunk(from = self.thunker)] + async fn blocking_work(&self, flag: &AtomicBool) -> u64 { + // Signal that the worker started executing. + flag.store(true, Ordering::Release); + std::thread::sleep(Duration::from_millis(10)); + 42 + } +} + +/// Calling `mem::forget` on a thunked future after it has been polled must not cause UB. +/// +/// When a future is forgotten after the work item has been dispatched, its +/// destructor (including `StackState::Drop`) never runs. The worker thread +/// still completes and writes its result into the leaked — but still valid — +/// memory. The result is a memory leak, not use-after-free. +#[tokio::test] +async fn mem_forget_on_thunked_future_does_not_cause_ub() { + let service = Service { + thunker: Thunker::builder() + .max_thread_count(2) + .cool_down_interval(Duration::from_secs(1)) + .build(), + }; + + let started = AtomicBool::new(false); + + // Box::pin the future so we can forget the Box (and thus the StackState inside). + let mut future = Box::pin(service.blocking_work(&started)); + + // Poll once — this dispatches the WorkItem to the thunker and returns Pending. + let waker = noop_waker(); + let mut cx = std::task::Context::from_waker(&waker); + let _ = std::future::Future::poll(future.as_mut(), &mut cx); + + // Forget the boxed future. Its destructor (StackState::Drop) never runs. + // The heap allocation is leaked, but the worker still has a valid pointer. + std::mem::forget(future); + + // Give the worker time to complete. It writes to the leaked StackState. + std::thread::sleep(Duration::from_millis(100)); + + // The worker should have executed. + assert!(started.load(Ordering::Acquire), "worker should have executed"); +} diff --git a/crates/sync_thunk_macros/CHANGELOG.md b/crates/sync_thunk_macros/CHANGELOG.md new file mode 100644 index 000000000..825c32f0d --- /dev/null +++ b/crates/sync_thunk_macros/CHANGELOG.md @@ -0,0 +1 @@ +# Changelog diff --git a/crates/sync_thunk_macros/Cargo.toml b/crates/sync_thunk_macros/Cargo.toml new file mode 100644 index 000000000..dd8e621e6 --- /dev/null +++ b/crates/sync_thunk_macros/Cargo.toml @@ -0,0 +1,35 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +[package] +name = "sync_thunk_macros" +description = "Macros for the sync_thunk crate." +version = "0.1.0" +readme = "README.md" +keywords = ["oxidizer", "blocking", "async"] +categories = ["asynchronous", "concurrency"] + +edition.workspace = true +rust-version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +repository = "https://github.com/microsoft/oxidizer/tree/main/crates/sync_thunk_macros" + +[package.metadata.cargo_check_external_types] +allowed_external_types = ["sync_thunk_macros::*"] + +[package.metadata.docs.rs] +all-features = true + +[lib] +proc-macro = true + +[dependencies] +sync_thunk_macros_impl.workspace = true + +[dev-dependencies] +mutants.workspace = true + +[lints] +workspace = true diff --git a/crates/sync_thunk_macros/README.md b/crates/sync_thunk_macros/README.md new file mode 100644 index 000000000..272375142 --- /dev/null +++ b/crates/sync_thunk_macros/README.md @@ -0,0 +1,24 @@ +
+ Sync Thunk Macros Logo + +# Sync Thunk Macros + +[![crate.io](https://img.shields.io/crates/v/sync_thunk_macros.svg)](https://crates.io/crates/sync_thunk_macros) +[![docs.rs](https://docs.rs/sync_thunk_macros/badge.svg)](https://docs.rs/sync_thunk_macros) +[![MSRV](https://img.shields.io/crates/msrv/sync_thunk_macros)](https://crates.io/crates/sync_thunk_macros) +[![CI](https://github.com/microsoft/oxidizer/actions/workflows/main.yml/badge.svg?event=push)](https://github.com/microsoft/oxidizer/actions/workflows/main.yml) +[![Coverage](https://codecov.io/gh/microsoft/oxidizer/graph/badge.svg?token=FCUG0EL5TI)](https://codecov.io/gh/microsoft/oxidizer) +[![License](https://img.shields.io/badge/license-MIT-blue.svg)](../../LICENSE) +This crate was developed as part of the Oxidizer project + +
+ +Macros for the [`sync_thunk`][__link0] crate. + + +
+ +This crate was developed as part of The Oxidizer Project. Browse this crate's source code. + + + [__link0]: https://docs.rs/sync_thunk diff --git a/crates/sync_thunk_macros/favicon.ico b/crates/sync_thunk_macros/favicon.ico new file mode 100644 index 000000000..127c7d34f --- /dev/null +++ b/crates/sync_thunk_macros/favicon.ico @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2814201968ea7689ed59b84bf57f1552ab691b0e043f83e43f04b2fcad06e0d +size 198169 diff --git a/crates/sync_thunk_macros/logo.png b/crates/sync_thunk_macros/logo.png new file mode 100644 index 000000000..d72833407 --- /dev/null +++ b/crates/sync_thunk_macros/logo.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:917b8ece75df7d5f9d2e0b65b1480564bd7b8d2b46a74e4fd754a36d69b1d652 +size 61948 diff --git a/crates/sync_thunk_macros/src/lib.rs b/crates/sync_thunk_macros/src/lib.rs new file mode 100644 index 000000000..d3205bbfc --- /dev/null +++ b/crates/sync_thunk_macros/src/lib.rs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#![cfg_attr(coverage_nightly, feature(coverage_attribute))] +#![cfg_attr(docsrs, feature(doc_cfg))] + +//! Macros for the [`sync_thunk`](https://docs.rs/sync_thunk) crate. + +#![doc(html_logo_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/sync_thunk_macros/logo.png")] +#![doc( + html_favicon_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/sync_thunk_macros/favicon.ico" +)] + +#[expect(missing_docs, reason = "this is documented in the sync_thunk reexport")] +#[proc_macro_attribute] +#[cfg_attr(test, mutants::skip)] +pub fn thunk(attr_args: proc_macro::TokenStream, item: proc_macro::TokenStream) -> proc_macro::TokenStream { + sync_thunk_macros_impl::thunk_impl(attr_args.into(), item.into()) + .unwrap_or_else(|err| err.to_compile_error()) + .into() +} diff --git a/crates/sync_thunk_macros_impl/CHANGELOG.md b/crates/sync_thunk_macros_impl/CHANGELOG.md new file mode 100644 index 000000000..825c32f0d --- /dev/null +++ b/crates/sync_thunk_macros_impl/CHANGELOG.md @@ -0,0 +1 @@ +# Changelog diff --git a/crates/sync_thunk_macros_impl/Cargo.toml b/crates/sync_thunk_macros_impl/Cargo.toml new file mode 100644 index 000000000..0524ed62d --- /dev/null +++ b/crates/sync_thunk_macros_impl/Cargo.toml @@ -0,0 +1,53 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +[package] +name = "sync_thunk_macros_impl" +description = "Macros for the sync_thunk crate." +version = "0.1.0" +readme = "README.md" +keywords = ["oxidizer", "blocking", "async"] +categories = ["asynchronous", "concurrency"] + +edition.workspace = true +rust-version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +repository = "https://github.com/microsoft/oxidizer/tree/main/crates/sync_thunk_macros_impl" + +[package.metadata.cargo_check_external_types] +allowed_external_types = [ + "proc_macro2::TokenStream", + "syn::error::Error", + "syn::error::Result", +] + +[package.metadata.docs.rs] +all-features = true + +[dependencies] +proc-macro2.workspace = true +quote.workspace = true +syn = { workspace = true, features = ["clone-impls", "derive", "full", "parsing", "printing", "proc-macro"] } + +[dev-dependencies] +insta.workspace = true +mutants.workspace = true +prettyplease.workspace = true +quote.workspace = true + +[dev-dependencies.syn] +workspace = true +features = [ + "clone-impls", + "derive", + "full", + "extra-traits", + "parsing", + "printing", + "proc-macro", +] + +[lints] +workspace = true diff --git a/crates/sync_thunk_macros_impl/README.md b/crates/sync_thunk_macros_impl/README.md new file mode 100644 index 000000000..6abeda084 --- /dev/null +++ b/crates/sync_thunk_macros_impl/README.md @@ -0,0 +1,24 @@ +
+ Sync Thunk Macros Impl Logo + +# Sync Thunk Macros Impl + +[![crate.io](https://img.shields.io/crates/v/sync_thunk_macros_impl.svg)](https://crates.io/crates/sync_thunk_macros_impl) +[![docs.rs](https://docs.rs/sync_thunk_macros_impl/badge.svg)](https://docs.rs/sync_thunk_macros_impl) +[![MSRV](https://img.shields.io/crates/msrv/sync_thunk_macros_impl)](https://crates.io/crates/sync_thunk_macros_impl) +[![CI](https://github.com/microsoft/oxidizer/actions/workflows/main.yml/badge.svg?event=push)](https://github.com/microsoft/oxidizer/actions/workflows/main.yml) +[![Coverage](https://codecov.io/gh/microsoft/oxidizer/graph/badge.svg?token=FCUG0EL5TI)](https://codecov.io/gh/microsoft/oxidizer) +[![License](https://img.shields.io/badge/license-MIT-blue.svg)](../../LICENSE) +This crate was developed as part of the Oxidizer project + +
+ +Macros for the [`sync_thunk`][__link0] crate. + + +
+ +This crate was developed as part of The Oxidizer Project. Browse this crate's source code. + + + [__link0]: https://docs.rs/sync_thunk diff --git a/crates/sync_thunk_macros_impl/favicon.ico b/crates/sync_thunk_macros_impl/favicon.ico new file mode 100644 index 000000000..127c7d34f --- /dev/null +++ b/crates/sync_thunk_macros_impl/favicon.ico @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2814201968ea7689ed59b84bf57f1552ab691b0e043f83e43f04b2fcad06e0d +size 198169 diff --git a/crates/sync_thunk_macros_impl/logo.png b/crates/sync_thunk_macros_impl/logo.png new file mode 100644 index 000000000..d72833407 --- /dev/null +++ b/crates/sync_thunk_macros_impl/logo.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:917b8ece75df7d5f9d2e0b65b1480564bd7b8d2b46a74e4fd754a36d69b1d652 +size 61948 diff --git a/crates/sync_thunk_macros_impl/src/lib.rs b/crates/sync_thunk_macros_impl/src/lib.rs new file mode 100644 index 000000000..c58718fab --- /dev/null +++ b/crates/sync_thunk_macros_impl/src/lib.rs @@ -0,0 +1,17 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#![cfg_attr(coverage_nightly, feature(coverage_attribute))] +#![cfg_attr(docsrs, feature(doc_cfg))] + +//! Macros for the [`sync_thunk`](https://docs.rs/sync_thunk) crate. + +#![doc(html_logo_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/sync_thunk_macros/logo.png")] +#![doc( + html_favicon_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/sync_thunk_macros/favicon.ico" +)] + +/// Implementation of the `thunk` attribute macro. +mod thunk; + +pub use thunk::thunk_impl; diff --git a/crates/sync_thunk_macros_impl/src/thunk.rs b/crates/sync_thunk_macros_impl/src/thunk.rs new file mode 100644 index 000000000..48c78635f --- /dev/null +++ b/crates/sync_thunk_macros_impl/src/thunk.rs @@ -0,0 +1,360 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use proc_macro2::TokenStream; +use quote::{format_ident, quote}; +use syn::parse::{Parse, ParseStream}; +use syn::{FnArg, ItemFn, Pat, ReturnType, Token, Type, parse2}; + +/// Parsed arguments for the `thunk` attribute macro. +pub struct ThunkArgs { + /// The path expression for the provider (specified via `from = ...`). + pub provider_path: syn::Expr, +} + +impl core::fmt::Debug for ThunkArgs { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("ThunkArgs").finish_non_exhaustive() + } +} + +impl Parse for ThunkArgs { + fn parse(input: ParseStream) -> syn::Result { + let mut provider_path: Option = None; + while !input.is_empty() { + let ident: syn::Ident = input.parse()?; + input.parse::()?; + if ident == "from" { + provider_path = Some(input.parse()?); + } + if !input.is_empty() { + input.parse::()?; + } + } + Ok(Self { + provider_path: provider_path.ok_or_else(|| input.error("Must specify 'from' source"))?, + }) + } +} + +/// Processes the `thunk` attribute macro. +/// +/// # Errors +/// +/// Returns an error if the macro input is invalid. +#[expect(clippy::too_many_lines, reason = "macro codegen is inherently verbose")] +pub fn thunk_impl(args: TokenStream, input: TokenStream) -> syn::Result { + let input_fn: ItemFn = parse2(input)?; + let args: ThunkArgs = parse2(args)?; + + let fn_name = &input_fn.sig.ident; + let inner_fn_name = format_ident!("__{}_inner", fn_name); + let raw_struct_name = format_ident!("__{fn_name}_RawTask"); + let provider = &args.provider_path; + let vis = &input_fn.vis; + + let ret_type = match &input_fn.sig.output { + ReturnType::Default => quote! { () }, + ReturnType::Type(_, ty) => quote! { #ty }, + }; + + let mut raw_fields = Vec::new(); + let mut raw_init = Vec::new(); + let mut safe_unwrap = Vec::new(); + let mut call_args = Vec::new(); + + let has_receiver = matches!(input_fn.sig.inputs.first(), Some(FnArg::Receiver(_))); + let is_mut_self = if let Some(FnArg::Receiver(recv)) = input_fn.sig.inputs.first() { + recv.mutability.is_some() + } else { + false + }; + + let provider_str = quote!(#provider).to_string(); + for arg in &input_fn.sig.inputs { + if let FnArg::Typed(pat_type) = arg + && let Pat::Ident(pat_ident) = &*pat_type.pat + { + let name = &pat_ident.ident; + let ty = &pat_type.ty; + + // Skip fields that are accessed via self (e.g. self.thunker). + // Parameters that ARE the provider (exact name match) are still + // packed because the inner function needs them as arguments. + if provider_str.ends_with(&format!(".{name}")) { + continue; + } + + call_args.push(quote! { #name }); + if let Type::Reference(ref_ty) = &**ty { + let inner_ty = &ref_ty.elem; + if ref_ty.mutability.is_some() { + raw_fields.push(quote! { #name: *mut #inner_ty }); + raw_init.push(quote! { #name: #name as *mut #inner_ty }); + safe_unwrap.push(quote! { + // SAFETY: Pointer valid for the lifetime of the StackState. + let #name = unsafe { &mut *task.#name }; + }); + } else { + raw_fields.push(quote! { #name: *const #inner_ty }); + raw_init.push(quote! { #name: #name as *const #inner_ty }); + safe_unwrap.push(quote! { + // SAFETY: Pointer valid for the lifetime of the StackState. + let #name = unsafe { &*task.#name }; + }); + } + } else { + raw_fields.push(quote! { #name: #ty }); + raw_init.push(quote! { #name }); + safe_unwrap.push(quote! { let #name = task.#name; }); + } + } + } + + // The inner function is a sync copy of the original, emitted as a sibling. + // When there is a receiver, it stays as a method (so `self` works naturally). + // When there is no receiver, it stays as an associated/free function. + let mut inner_fn = input_fn.clone(); + inner_fn.sig.ident = inner_fn_name.clone(); + inner_fn.sig.asyncness = None; + inner_fn.vis = syn::Visibility::Inherited; + inner_fn + .attrs + .retain(|attr| attr.path().is_ident("expect") || attr.path().is_ident("allow")); + + // Build the self-pointer field, initializer, shim reconstruction, and call + // expression depending on whether there is a receiver. + let (self_ptr_field, self_ptr_init, shim_self_ref, inner_call) = if has_receiver { + let cast = if is_mut_self { + quote! { &mut *task.self_ptr.cast::().cast_mut() } + } else { + quote! { &*task.self_ptr.cast::() } + }; + let init = if is_mut_self { + quote! { self_ptr: self as *mut Self as *const (), } + } else { + quote! { self_ptr: self as *const Self as *const (), } + }; + ( + Some(quote! { self_ptr: *const (), }), + Some(init), + Some(quote! { + // SAFETY: self_ptr is valid for the lifetime of the StackState. + let self_ref = unsafe { #cast }; + }), + quote! { self_ref.#inner_fn_name(#(#call_args),*) }, + ) + } else { + (None, None, None, quote! { Self::#inner_fn_name(#(#call_args),*) }) + }; + + let fn_inputs = &input_fn.sig.inputs; + + // The shim is a sibling associated fn so it can reference `Self` for the + // pointer cast. It is `#[doc(hidden)]` to keep the public API clean. + let shim_name = format_ident!("__{fn_name}_shim"); + + Ok(quote! { + // Sync inner function — sibling method/associated fn so `self`/`Self` work. + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + #inner_fn + + // Shim — runs on the worker thread. Sibling so `Self` is available for cast. + #[doc(hidden)] + #[allow(clippy::use_self)] + fn #shim_name(ptr: *mut ()) { + // The struct must be repeated here so the shim can name it. + #[allow(non_camel_case_types, dead_code)] + struct #raw_struct_name { + #self_ptr_field + #(#raw_fields,)* + } + + // SAFETY: ptr points to a valid StackState on the caller's stack. + let state = unsafe { &*ptr.cast::<::sync_thunk::StackState::<#ret_type, #raw_struct_name>>() }; + // SAFETY: We are the sole consumer; the caller has set the task. + let task = unsafe { state.take_task() }.expect("thunk task was set before dispatch"); + + #shim_self_ref + #(#safe_unwrap)* + + let panic_result = ::std::panic::catch_unwind(::std::panic::AssertUnwindSafe(|| { + #inner_call + })); + + match panic_result { + Ok(result) => { + // SAFETY: Sole writer; poller reads only after ready flag is set. + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + + // Async wrapper — the only user-visible function. + #vis async fn #fn_name(#fn_inputs) -> #ret_type { + #[allow(non_camel_case_types, dead_code)] + struct #raw_struct_name { + #self_ptr_field + #(#raw_fields,)* + } + // SAFETY: Pointers are valid for the lifetime of the StackState. + unsafe impl Send for #raw_struct_name {} + + let state = ::sync_thunk::StackState::<#ret_type, #raw_struct_name>::new(); + let raw_task = #raw_struct_name { + #self_ptr_init + #(#raw_init,)* + }; + // SAFETY: No concurrent access — work item not yet sent. + unsafe { state.set_task(raw_task); } + + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::#shim_name, + ); + #provider.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn thunk_args_parse_from_field() { + let tokens = quote! { from = self.thunker }; + let args: ThunkArgs = parse2(tokens).unwrap(); + let provider = &args.provider_path; + let path_str = quote!(#provider).to_string(); + assert!(!path_str.is_empty()); + } + + #[test] + fn thunk_args_parse_from_ident() { + let tokens = quote! { from = thunker }; + let parsed: ThunkArgs = parse2(tokens).unwrap(); + let provider = &parsed.provider_path; + let path_str = quote!(#provider).to_string(); + assert!(!path_str.is_empty()); + } + + #[test] + fn thunk_args_debug() { + let tokens = quote! { from = self.thunker }; + let parsed: ThunkArgs = parse2(tokens).unwrap(); + let debug = format!("{parsed:?}"); + assert!(debug.contains("ThunkArgs")); + } + + #[test] + fn thunk_args_missing_from() { + let tokens = quote! {}; + let result: syn::Result = parse2(tokens); + let err = result.unwrap_err().to_string(); + assert!(err.contains("Must specify 'from' source")); + } + + #[test] + fn thunk_args_missing_equals() { + let tokens = quote! { from }; + let result: syn::Result = parse2(tokens); + assert!(result.unwrap_err().to_string().contains("expected `=`")); + } + + #[test] + fn thunk_args_with_trailing_comma() { + let tokens = quote! { from = self.thunker, }; + let parsed: ThunkArgs = parse2(tokens).unwrap(); + let provider = &parsed.provider_path; + let path_str = quote!(#provider).to_string(); + assert!(!path_str.is_empty()); + } + + #[test] + fn thunk_impl_ref_self_no_params() { + let attr_args = quote! { from = self.thunker }; + let item = quote! { + async fn work(&self) -> u64 { + 42 + } + }; + let output = thunk_impl(attr_args, item).unwrap().to_string(); + assert!(output.contains("__work_inner")); + assert!(output.contains("__work_shim")); + assert!(output.contains("StackState")); + } + + #[test] + fn thunk_impl_mut_self() { + let attr_args = quote! { from = self.thunker }; + let item = quote! { + async fn work(&mut self) -> u64 { 42 } + }; + let output = thunk_impl(attr_args, item).unwrap().to_string(); + assert!(output.contains("cast_mut")); + } + + #[test] + fn thunk_impl_no_receiver() { + let attr_args = quote! { from = thunker }; + let item = quote! { + async fn create(thunker: &Thunker, name: String) -> Self { + Self { name } + } + }; + let output = thunk_impl(attr_args, item).unwrap().to_string(); + assert!(!output.contains("self_ptr")); + } + + #[test] + fn thunk_impl_unit_return() { + let attr_args = quote! { from = self.thunker }; + let item = quote! { + async fn fire(&self) {} + }; + thunk_impl(attr_args, item).unwrap(); + } + + #[test] + fn thunk_impl_not_a_function() { + let attr_args = quote! { from = self.thunker }; + let item = quote! { + struct Foo; + }; + thunk_impl(attr_args, item).unwrap_err(); + } + + #[test] + fn thunk_impl_ref_and_mut_ref_params() { + let attr_args = quote! { from = self.thunker }; + let item = quote! { + async fn work(&self, a: &str, b: &mut Vec) -> usize { 0 } + }; + let output = thunk_impl(attr_args, item).unwrap().to_string(); + assert!(output.contains("* const str")); + assert!(output.contains("* mut Vec")); + } + + #[test] + fn thunk_impl_owned_params() { + let attr_args = quote! { from = self.thunker }; + let item = quote! { + async fn work(&self, data: Vec) -> usize { data.len() } + }; + thunk_impl(attr_args, item).unwrap(); + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_field_mut_self.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_field_mut_self.snap new file mode 100644 index 000000000..670ce8f8e --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_field_mut_self.snap @@ -0,0 +1,65 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 33 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + fn __work_inner(&mut self) -> String { + String::from("hello") + } + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __work_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __work_RawTask { + self_ptr: *const (), + } + let state = unsafe { + &*ptr.cast::<::sync_thunk::StackState>() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let self_ref = unsafe { &mut *task.self_ptr.cast::().cast_mut() }; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { self_ref.__work_inner() }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + async fn work(&mut self) -> String { + #[allow(non_camel_case_types, dead_code)] + struct __work_RawTask { + self_ptr: *const (), + } + unsafe impl Send for __work_RawTask {} + let state = ::sync_thunk::StackState::::new(); + let raw_task = __work_RawTask { + self_ptr: self as *mut Self as *const (), + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__work_shim, + ); + self.thunker.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_field_ref_self.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_field_ref_self.snap new file mode 100644 index 000000000..2d63c864f --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_field_ref_self.snap @@ -0,0 +1,65 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 20 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + fn __work_inner(&self) -> u64 { + 42 + } + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __work_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __work_RawTask { + self_ptr: *const (), + } + let state = unsafe { + &*ptr.cast::<::sync_thunk::StackState>() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let self_ref = unsafe { &*task.self_ptr.cast::() }; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { self_ref.__work_inner() }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + async fn work(&self) -> u64 { + #[allow(non_camel_case_types, dead_code)] + struct __work_RawTask { + self_ptr: *const (), + } + unsafe impl Send for __work_RawTask {} + let state = ::sync_thunk::StackState::::new(); + let raw_task = __work_RawTask { + self_ptr: self as *const Self as *const (), + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__work_shim, + ); + self.thunker.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_method_call.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_method_call.snap new file mode 100644 index 000000000..c58921733 --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_method_call.snap @@ -0,0 +1,65 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 46 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + fn __work_inner(&self) -> u32 { + 1 + } + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __work_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __work_RawTask { + self_ptr: *const (), + } + let state = unsafe { + &*ptr.cast::<::sync_thunk::StackState>() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let self_ref = unsafe { &*task.self_ptr.cast::() }; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { self_ref.__work_inner() }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + async fn work(&self) -> u32 { + #[allow(non_camel_case_types, dead_code)] + struct __work_RawTask { + self_ptr: *const (), + } + unsafe impl Send for __work_RawTask {} + let state = ::sync_thunk::StackState::::new(); + let raw_task = __work_RawTask { + self_ptr: self as *const Self as *const (), + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__work_shim, + ); + self.thunker().send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_parameter.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_parameter.snap new file mode 100644 index 000000000..4f0372d7e --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_parameter.snap @@ -0,0 +1,69 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 59 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + fn __create_inner(thunker: &Thunker, name: String) -> Self { + Self { name } + } + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __create_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __create_RawTask { + thunker: *const Thunker, + name: String, + } + let state = unsafe { + &*ptr.cast::<::sync_thunk::StackState>() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let thunker = unsafe { &*task.thunker }; + let name = task.name; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { Self::__create_inner(thunker, name) }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + async fn create(thunker: &Thunker, name: String) -> Self { + #[allow(non_camel_case_types, dead_code)] + struct __create_RawTask { + thunker: *const Thunker, + name: String, + } + unsafe impl Send for __create_RawTask {} + let state = ::sync_thunk::StackState::::new(); + let raw_task = __create_RawTask { + thunker: thunker as *const Thunker, + name, + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__create_shim, + ); + thunker.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_parameter_multiple_args.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_parameter_multiple_args.snap new file mode 100644 index 000000000..c91414cf7 --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_parameter_multiple_args.snap @@ -0,0 +1,85 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 203 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + fn __create_inner( + thunker: &Thunker, + path: &Path, + mode: u32, + ) -> std::io::Result { + Ok(Self) + } + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __create_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __create_RawTask { + thunker: *const Thunker, + path: *const Path, + mode: u32, + } + let state = unsafe { + &*ptr + .cast::< + ::sync_thunk::StackState, __create_RawTask>, + >() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let thunker = unsafe { &*task.thunker }; + let path = unsafe { &*task.path }; + let mode = task.mode; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { + Self::__create_inner(thunker, path, mode) + }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + async fn create(thunker: &Thunker, path: &Path, mode: u32) -> std::io::Result { + #[allow(non_camel_case_types, dead_code)] + struct __create_RawTask { + thunker: *const Thunker, + path: *const Path, + mode: u32, + } + unsafe impl Send for __create_RawTask {} + let state = ::sync_thunk::StackState::< + std::io::Result, + __create_RawTask, + >::new(); + let raw_task = __create_RawTask { + thunker: thunker as *const Thunker, + path: path as *const Path, + mode, + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__create_shim, + ); + thunker.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_static.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_static.snap new file mode 100644 index 000000000..f0d37b979 --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__from_static.snap @@ -0,0 +1,65 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 72 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + fn __work_inner(&self) -> bool { + true + } + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __work_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __work_RawTask { + self_ptr: *const (), + } + let state = unsafe { + &*ptr.cast::<::sync_thunk::StackState>() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let self_ref = unsafe { &*task.self_ptr.cast::() }; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { self_ref.__work_inner() }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + async fn work(&self) -> bool { + #[allow(non_camel_case_types, dead_code)] + struct __work_RawTask { + self_ptr: *const (), + } + unsafe impl Send for __work_RawTask {} + let state = ::sync_thunk::StackState::::new(); + let raw_task = __work_RawTask { + self_ptr: self as *const Self as *const (), + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__work_shim, + ); + THUNKER.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__generic_return_type.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__generic_return_type.snap new file mode 100644 index 000000000..832fc3bd8 --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__generic_return_type.snap @@ -0,0 +1,69 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 244 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + fn __get_option_inner(&self) -> Option { + Some(String::from("hi")) + } + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __get_option_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __get_option_RawTask { + self_ptr: *const (), + } + let state = unsafe { + &*ptr + .cast::<::sync_thunk::StackState, __get_option_RawTask>>() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let self_ref = unsafe { &*task.self_ptr.cast::() }; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { self_ref.__get_option_inner() }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + async fn get_option(&self) -> Option { + #[allow(non_camel_case_types, dead_code)] + struct __get_option_RawTask { + self_ptr: *const (), + } + unsafe impl Send for __get_option_RawTask {} + let state = ::sync_thunk::StackState::< + Option, + __get_option_RawTask, + >::new(); + let raw_task = __get_option_RawTask { + self_ptr: self as *const Self as *const (), + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__get_option_shim, + ); + self.thunker.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__multiple_params.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__multiple_params.snap new file mode 100644 index 000000000..b30cd019a --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__multiple_params.snap @@ -0,0 +1,77 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 125 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + fn __multi_inner(&self, a: u32, b: &str, c: &mut Vec) -> bool { + true + } + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __multi_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __multi_RawTask { + self_ptr: *const (), + a: u32, + b: *const str, + c: *mut Vec, + } + let state = unsafe { + &*ptr.cast::<::sync_thunk::StackState>() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let self_ref = unsafe { &*task.self_ptr.cast::() }; + let a = task.a; + let b = unsafe { &*task.b }; + let c = unsafe { &mut *task.c }; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { self_ref.__multi_inner(a, b, c) }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + async fn multi(&self, a: u32, b: &str, c: &mut Vec) -> bool { + #[allow(non_camel_case_types, dead_code)] + struct __multi_RawTask { + self_ptr: *const (), + a: u32, + b: *const str, + c: *mut Vec, + } + unsafe impl Send for __multi_RawTask {} + let state = ::sync_thunk::StackState::::new(); + let raw_task = __multi_RawTask { + self_ptr: self as *const Self as *const (), + a, + b: b as *const str, + c: c as *mut Vec, + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__multi_shim, + ); + self.thunker.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__mut_ref_param.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__mut_ref_param.snap new file mode 100644 index 000000000..4d75dd4d2 --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__mut_ref_param.snap @@ -0,0 +1,70 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 99 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + fn __fill_inner(&self, buf: &mut Vec) -> usize { + buf.push(1); + 1 + } + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __fill_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __fill_RawTask { + self_ptr: *const (), + buf: *mut Vec, + } + let state = unsafe { + &*ptr.cast::<::sync_thunk::StackState>() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let self_ref = unsafe { &*task.self_ptr.cast::() }; + let buf = unsafe { &mut *task.buf }; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { self_ref.__fill_inner(buf) }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + async fn fill(&self, buf: &mut Vec) -> usize { + #[allow(non_camel_case_types, dead_code)] + struct __fill_RawTask { + self_ptr: *const (), + buf: *mut Vec, + } + unsafe impl Send for __fill_RawTask {} + let state = ::sync_thunk::StackState::::new(); + let raw_task = __fill_RawTask { + self_ptr: self as *const Self as *const (), + buf: buf as *mut Vec, + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__fill_shim, + ); + self.thunker.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__no_params_beyond_self.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__no_params_beyond_self.snap new file mode 100644 index 000000000..0718826ea --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__no_params_beyond_self.snap @@ -0,0 +1,65 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 190 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + fn __simple_inner(&self) -> i32 { + -1 + } + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __simple_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __simple_RawTask { + self_ptr: *const (), + } + let state = unsafe { + &*ptr.cast::<::sync_thunk::StackState>() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let self_ref = unsafe { &*task.self_ptr.cast::() }; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { self_ref.__simple_inner() }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + async fn simple(&self) -> i32 { + #[allow(non_camel_case_types, dead_code)] + struct __simple_RawTask { + self_ptr: *const (), + } + unsafe impl Send for __simple_RawTask {} + let state = ::sync_thunk::StackState::::new(); + let raw_task = __simple_RawTask { + self_ptr: self as *const Self as *const (), + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__simple_shim, + ); + self.thunker.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__owned_param.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__owned_param.snap new file mode 100644 index 000000000..91752e141 --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__owned_param.snap @@ -0,0 +1,69 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 112 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + fn __consume_inner(&self, data: Vec) -> usize { + data.len() + } + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __consume_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __consume_RawTask { + self_ptr: *const (), + data: Vec, + } + let state = unsafe { + &*ptr.cast::<::sync_thunk::StackState>() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let self_ref = unsafe { &*task.self_ptr.cast::() }; + let data = task.data; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { self_ref.__consume_inner(data) }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + async fn consume(&self, data: Vec) -> usize { + #[allow(non_camel_case_types, dead_code)] + struct __consume_RawTask { + self_ptr: *const (), + data: Vec, + } + unsafe impl Send for __consume_RawTask {} + let state = ::sync_thunk::StackState::::new(); + let raw_task = __consume_RawTask { + self_ptr: self as *const Self as *const (), + data, + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__consume_shim, + ); + self.thunker.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__preserves_allow_attrs.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__preserves_allow_attrs.snap new file mode 100644 index 000000000..22052e43a --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__preserves_allow_attrs.snap @@ -0,0 +1,66 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 217 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + #[allow(unused)] + fn __with_allow_inner(&self) -> u64 { + 42 + } + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __with_allow_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __with_allow_RawTask { + self_ptr: *const (), + } + let state = unsafe { + &*ptr.cast::<::sync_thunk::StackState>() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let self_ref = unsafe { &*task.self_ptr.cast::() }; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { self_ref.__with_allow_inner() }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + async fn with_allow(&self) -> u64 { + #[allow(non_camel_case_types, dead_code)] + struct __with_allow_RawTask { + self_ptr: *const (), + } + unsafe impl Send for __with_allow_RawTask {} + let state = ::sync_thunk::StackState::::new(); + let raw_task = __with_allow_RawTask { + self_ptr: self as *const Self as *const (), + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__with_allow_shim, + ); + self.thunker.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__preserves_expect_attrs.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__preserves_expect_attrs.snap new file mode 100644 index 000000000..5c4d33737 --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__preserves_expect_attrs.snap @@ -0,0 +1,70 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 231 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + #[expect(clippy::needless_pass_by_value, reason = "testing")] + fn __with_expect_inner(&self, data: Vec) -> usize { + data.len() + } + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __with_expect_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __with_expect_RawTask { + self_ptr: *const (), + data: Vec, + } + let state = unsafe { + &*ptr.cast::<::sync_thunk::StackState>() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let self_ref = unsafe { &*task.self_ptr.cast::() }; + let data = task.data; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { self_ref.__with_expect_inner(data) }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + async fn with_expect(&self, data: Vec) -> usize { + #[allow(non_camel_case_types, dead_code)] + struct __with_expect_RawTask { + self_ptr: *const (), + data: Vec, + } + unsafe impl Send for __with_expect_RawTask {} + let state = ::sync_thunk::StackState::::new(); + let raw_task = __with_expect_RawTask { + self_ptr: self as *const Self as *const (), + data, + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__with_expect_shim, + ); + self.thunker.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__pub_crate_visibility.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__pub_crate_visibility.snap new file mode 100644 index 000000000..973c3806a --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__pub_crate_visibility.snap @@ -0,0 +1,65 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 177 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + fn __internal_work_inner(&self) -> u64 { + 42 + } + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __internal_work_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __internal_work_RawTask { + self_ptr: *const (), + } + let state = unsafe { + &*ptr.cast::<::sync_thunk::StackState>() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let self_ref = unsafe { &*task.self_ptr.cast::() }; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { self_ref.__internal_work_inner() }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + pub(crate) async fn internal_work(&self) -> u64 { + #[allow(non_camel_case_types, dead_code)] + struct __internal_work_RawTask { + self_ptr: *const (), + } + unsafe impl Send for __internal_work_RawTask {} + let state = ::sync_thunk::StackState::::new(); + let raw_task = __internal_work_RawTask { + self_ptr: self as *const Self as *const (), + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__internal_work_shim, + ); + self.thunker.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__pub_visibility.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__pub_visibility.snap new file mode 100644 index 000000000..3b5386093 --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__pub_visibility.snap @@ -0,0 +1,65 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 164 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + fn __public_work_inner(&self) -> u64 { + 42 + } + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __public_work_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __public_work_RawTask { + self_ptr: *const (), + } + let state = unsafe { + &*ptr.cast::<::sync_thunk::StackState>() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let self_ref = unsafe { &*task.self_ptr.cast::() }; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { self_ref.__public_work_inner() }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + pub async fn public_work(&self) -> u64 { + #[allow(non_camel_case_types, dead_code)] + struct __public_work_RawTask { + self_ptr: *const (), + } + unsafe impl Send for __public_work_RawTask {} + let state = ::sync_thunk::StackState::::new(); + let raw_task = __public_work_RawTask { + self_ptr: self as *const Self as *const (), + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__public_work_shim, + ); + self.thunker.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__ref_param.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__ref_param.snap new file mode 100644 index 000000000..a6f8fc899 --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__ref_param.snap @@ -0,0 +1,69 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 85 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + fn __read_inner(&self, path: &Path) -> Vec { + vec![] + } + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __read_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __read_RawTask { + self_ptr: *const (), + path: *const Path, + } + let state = unsafe { + &*ptr.cast::<::sync_thunk::StackState, __read_RawTask>>() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let self_ref = unsafe { &*task.self_ptr.cast::() }; + let path = unsafe { &*task.path }; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { self_ref.__read_inner(path) }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + async fn read(&self, path: &Path) -> Vec { + #[allow(non_camel_case_types, dead_code)] + struct __read_RawTask { + self_ptr: *const (), + path: *const Path, + } + unsafe impl Send for __read_RawTask {} + let state = ::sync_thunk::StackState::, __read_RawTask>::new(); + let raw_task = __read_RawTask { + self_ptr: self as *const Self as *const (), + path: path as *const Path, + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__read_shim, + ); + self.thunker.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__result_return_type.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__result_return_type.snap new file mode 100644 index 000000000..239bf625a --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__result_return_type.snap @@ -0,0 +1,74 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 151 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + fn __fallible_inner(&self) -> std::io::Result> { + Ok(vec![]) + } + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __fallible_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __fallible_RawTask { + self_ptr: *const (), + } + let state = unsafe { + &*ptr + .cast::< + ::sync_thunk::StackState< + std::io::Result>, + __fallible_RawTask, + >, + >() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let self_ref = unsafe { &*task.self_ptr.cast::() }; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { self_ref.__fallible_inner() }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + async fn fallible(&self) -> std::io::Result> { + #[allow(non_camel_case_types, dead_code)] + struct __fallible_RawTask { + self_ptr: *const (), + } + unsafe impl Send for __fallible_RawTask {} + let state = ::sync_thunk::StackState::< + std::io::Result>, + __fallible_RawTask, + >::new(); + let raw_task = __fallible_RawTask { + self_ptr: self as *const Self as *const (), + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__fallible_shim, + ); + self.thunker.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__unit_return_type.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__unit_return_type.snap new file mode 100644 index 000000000..ceef7c24b --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_basic__unit_return_type.snap @@ -0,0 +1,63 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_basic.rs +assertion_line: 138 +expression: expand_thunk!(item) +--- +impl Dummy { + #[inline] + #[doc(hidden)] + #[allow( + clippy::needless_pass_by_ref_mut, + clippy::needless_pass_by_value, + clippy::use_self, + clippy::unused_self, + unused_variables, + )] + fn __fire_and_forget_inner(&self) {} + #[doc(hidden)] + #[allow(clippy::use_self)] + fn __fire_and_forget_shim(ptr: *mut ()) { + #[allow(non_camel_case_types, dead_code)] + struct __fire_and_forget_RawTask { + self_ptr: *const (), + } + let state = unsafe { + &*ptr.cast::<::sync_thunk::StackState<(), __fire_and_forget_RawTask>>() + }; + let task = unsafe { state.take_task() } + .expect("thunk task was set before dispatch"); + let self_ref = unsafe { &*task.self_ptr.cast::() }; + let panic_result = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| { self_ref.__fire_and_forget_inner() }), + ); + match panic_result { + Ok(result) => { + unsafe { state.complete(result) }; + } + Err(_) => { + state.mark_panicked(); + } + } + state.wake(); + } + async fn fire_and_forget(&self) -> () { + #[allow(non_camel_case_types, dead_code)] + struct __fire_and_forget_RawTask { + self_ptr: *const (), + } + unsafe impl Send for __fire_and_forget_RawTask {} + let state = ::sync_thunk::StackState::<(), __fire_and_forget_RawTask>::new(); + let raw_task = __fire_and_forget_RawTask { + self_ptr: self as *const Self as *const (), + }; + unsafe { + state.set_task(raw_task); + } + let work_item = ::sync_thunk::WorkItem::new( + state.as_mut_ptr().cast::<()>(), + Self::__fire_and_forget_shim, + ); + self.thunker.send(work_item); + ::sync_thunk::ThunkFuture::new(&state).await + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_errors__missing_equals.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_errors__missing_equals.snap new file mode 100644 index 000000000..3c8f45eb9 --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_errors__missing_equals.snap @@ -0,0 +1,9 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_errors.rs +expression: "expand_thunk_error!(attr_args, item)" +--- +impl Dummy { + ::core::compile_error! { + "expected `=`" + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_errors__missing_from.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_errors__missing_from.snap new file mode 100644 index 000000000..0b04342c5 --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_errors__missing_from.snap @@ -0,0 +1,9 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_errors.rs +expression: "expand_thunk_error!(attr_args, item)" +--- +impl Dummy { + ::core::compile_error! { + "unexpected end of input, Must specify 'from' source" + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_errors__not_a_function.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_errors__not_a_function.snap new file mode 100644 index 000000000..1c6205943 --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_errors__not_a_function.snap @@ -0,0 +1,9 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_errors.rs +expression: "expand_thunk_error!(attr_args, item)" +--- +impl Dummy { + ::core::compile_error! { + "expected `fn`" + } +} diff --git a/crates/sync_thunk_macros_impl/tests/snapshots/thunk_errors__unknown_key.snap b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_errors__unknown_key.snap new file mode 100644 index 000000000..b8db80eaa --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/snapshots/thunk_errors__unknown_key.snap @@ -0,0 +1,9 @@ +--- +source: crates/sync_thunk_macros_impl/tests/thunk_errors.rs +expression: "expand_thunk_error!(attr_args, item)" +--- +impl Dummy { + ::core::compile_error! { + "expected `,`" + } +} diff --git a/crates/sync_thunk_macros_impl/tests/thunk_basic.rs b/crates/sync_thunk_macros_impl/tests/thunk_basic.rs new file mode 100644 index 000000000..7c2cf2356 --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/thunk_basic.rs @@ -0,0 +1,245 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#![expect(missing_docs, reason = "Test code")] + +use syn::{ImplItemFn, parse_quote}; + +mod util; + +#[test] +#[cfg_attr(miri, ignore)] +fn from_field_ref_self() { + let item: ImplItemFn = parse_quote! { + #[thunk(from = self.thunker)] + async fn work(&self) -> u64 { + 42 + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn from_field_mut_self() { + let item: ImplItemFn = parse_quote! { + #[thunk(from = self.thunker)] + async fn work(&mut self) -> String { + String::from("hello") + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn from_method_call() { + let item: ImplItemFn = parse_quote! { + #[thunk(from = self.thunker())] + async fn work(&self) -> u32 { + 1 + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn from_parameter() { + let item: ImplItemFn = parse_quote! { + #[thunk(from = thunker)] + async fn create(thunker: &Thunker, name: String) -> Self { + Self { name } + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn from_static() { + let item: ImplItemFn = parse_quote! { + #[thunk(from = THUNKER)] + async fn work(&self) -> bool { + true + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn ref_param() { + let item: ImplItemFn = parse_quote! { + #[thunk(from = self.thunker)] + async fn read(&self, path: &Path) -> Vec { + vec![] + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn mut_ref_param() { + let item: ImplItemFn = parse_quote! { + #[thunk(from = self.thunker)] + async fn fill(&self, buf: &mut Vec) -> usize { + buf.push(1); + 1 + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn owned_param() { + let item: ImplItemFn = parse_quote! { + #[thunk(from = self.thunker)] + async fn consume(&self, data: Vec) -> usize { + data.len() + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn multiple_params() { + let item: ImplItemFn = parse_quote! { + #[thunk(from = self.thunker)] + async fn multi(&self, a: u32, b: &str, c: &mut Vec) -> bool { + true + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn unit_return_type() { + let item: ImplItemFn = parse_quote! { + #[thunk(from = self.thunker)] + async fn fire_and_forget(&self) { + // no return + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn result_return_type() { + let item: ImplItemFn = parse_quote! { + #[thunk(from = self.thunker)] + async fn fallible(&self) -> std::io::Result> { + Ok(vec![]) + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn pub_visibility() { + let item: ImplItemFn = parse_quote! { + #[thunk(from = self.thunker)] + pub async fn public_work(&self) -> u64 { + 42 + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn pub_crate_visibility() { + let item: ImplItemFn = parse_quote! { + #[thunk(from = self.thunker)] + pub(crate) async fn internal_work(&self) -> u64 { + 42 + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn no_params_beyond_self() { + let item: ImplItemFn = parse_quote! { + #[thunk(from = self.thunker)] + async fn simple(&self) -> i32 { + -1 + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn from_parameter_multiple_args() { + let item: ImplItemFn = parse_quote! { + #[thunk(from = thunker)] + async fn create(thunker: &Thunker, path: &Path, mode: u32) -> std::io::Result { + Ok(Self) + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn preserves_allow_attrs() { + let item: ImplItemFn = parse_quote! { + #[allow(unused)] + #[thunk(from = self.thunker)] + async fn with_allow(&self) -> u64 { + 42 + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn preserves_expect_attrs() { + let item: ImplItemFn = parse_quote! { + #[expect(clippy::needless_pass_by_value, reason = "testing")] + #[thunk(from = self.thunker)] + async fn with_expect(&self, data: Vec) -> usize { + data.len() + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn generic_return_type() { + let item: ImplItemFn = parse_quote! { + #[thunk(from = self.thunker)] + async fn get_option(&self) -> Option { + Some(String::from("hi")) + } + }; + + insta::assert_snapshot!(expand_thunk!(item)); +} diff --git a/crates/sync_thunk_macros_impl/tests/thunk_errors.rs b/crates/sync_thunk_macros_impl/tests/thunk_errors.rs new file mode 100644 index 000000000..e4a9c21e2 --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/thunk_errors.rs @@ -0,0 +1,56 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#![expect(missing_docs, reason = "Test code")] + +mod util; + +#[test] +#[cfg_attr(miri, ignore)] +fn missing_from() { + let attr_args = quote::quote! {}; + let item = quote::quote! { + async fn work(&self) -> u64 { + 42 + } + }; + + insta::assert_snapshot!(expand_thunk_error!(attr_args, item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn unknown_key() { + let attr_args = quote::quote! { foo = bar }; + let item = quote::quote! { + async fn work(&self) -> u64 { + 42 + } + }; + + insta::assert_snapshot!(expand_thunk_error!(attr_args, item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn missing_equals() { + let attr_args = quote::quote! { from }; + let item = quote::quote! { + async fn work(&self) -> u64 { + 42 + } + }; + + insta::assert_snapshot!(expand_thunk_error!(attr_args, item)); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn not_a_function() { + let attr_args = quote::quote! { from = self.thunker }; + let item = quote::quote! { + struct NotAFunction; + }; + + insta::assert_snapshot!(expand_thunk_error!(attr_args, item)); +} diff --git a/crates/sync_thunk_macros_impl/tests/util.rs b/crates/sync_thunk_macros_impl/tests/util.rs new file mode 100644 index 000000000..fa09c3dc3 --- /dev/null +++ b/crates/sync_thunk_macros_impl/tests/util.rs @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Test utilities for snapshot testing the `thunk` macro expansion. + +/// Helper macro to expand a `#[thunk(...)]` attribute on an `async fn` item +/// inside an `impl` block and pretty-print the result for snapshot comparison. +#[macro_export] +macro_rules! expand_thunk { + ($item:expr) => {{ + let item_fn: &syn::ImplItemFn = &$item; + + // Extract the thunk attribute arguments. + let attr = item_fn + .attrs + .iter() + .find(|a| a.path().is_ident("thunk")) + .expect("item must have a #[thunk(...)] attribute"); + let attr_args = match &attr.meta { + syn::Meta::Path(_) => quote::quote! {}, + syn::Meta::List(list) => list.tokens.clone(), + syn::Meta::NameValue(_) => quote::quote! {}, + }; + + // Strip the thunk attribute before passing to the macro. + let mut clean = item_fn.clone(); + clean.attrs.retain(|a| !a.path().is_ident("thunk")); + let item_tokens = quote::quote! { #clean }; + + let output = sync_thunk_macros_impl::thunk_impl(attr_args, item_tokens).unwrap_or_else(|e| e.to_compile_error()); + + // Wrap in a dummy impl block so prettyplease can parse it. + let wrapped = quote::quote! { impl Dummy { #output } }; + let file: syn::File = syn::parse2(wrapped).unwrap(); + prettyplease::unparse(&file) + }}; +} + +/// Helper macro for cases that should produce a compile error. +#[macro_export] +macro_rules! expand_thunk_error { + ($attr_args:expr, $item_tokens:expr) => {{ + let output = sync_thunk_macros_impl::thunk_impl($attr_args, $item_tokens).unwrap_or_else(|e| e.to_compile_error()); + + let wrapped = quote::quote! { impl Dummy { #output } }; + let file: syn::File = syn::parse2(wrapped).unwrap(); + prettyplease::unparse(&file) + }}; +} diff --git a/crates/tick/Cargo.toml b/crates/tick/Cargo.toml index ca3ec4186..a0d8fab7f 100644 --- a/crates/tick/Cargo.toml +++ b/crates/tick/Cargo.toml @@ -48,7 +48,6 @@ thread_aware.workspace = true tokio = { workspace = true, optional = true, features = ["time", "rt"] } [dev-dependencies] -ohno = { workspace = true, features = ["app-err"] } #external chrono = { workspace = true, features = ["clock"] } @@ -58,6 +57,7 @@ futures = { workspace = true, features = ["default", "executor", "futures-execut insta = { workspace = true } jiff = { workspace = true, default-features = true } mutants = { workspace = true } +ohno = { workspace = true, features = ["app-err"] } serde = { workspace = true, features = ["std", "derive"] } serde_core = { workspace = true, features = ["std"] } serde_json = { workspace = true }