diff --git a/.buildkite/custom-tests.json b/.buildkite/custom-tests.json index 9100775..7bdae17 100644 --- a/.buildkite/custom-tests.json +++ b/.buildkite/custom-tests.json @@ -2,7 +2,7 @@ "tests": [ { "test_name": "build-fam-gnu", - "command": "cargo build --release --features=fam-wrappers", + "command": "cd kvm-bindings && cargo build --release --features=fam-wrappers", "platform": [ "x86_64", "aarch64", @@ -11,7 +11,7 @@ }, { "test_name": "build-fam-musl", - "command": "cargo build --release --features=fam-wrappers --target {target_platform}-unknown-linux-musl", + "command": "cd kvm-bindings && cargo build --release --features=fam-wrappers --target {target_platform}-unknown-linux-musl", "platform": [ "x86_64", "aarch64" @@ -19,7 +19,7 @@ }, { "test_name": "build-serde-gnu", - "command": "cargo build --release --features=serde", + "command": "cd kvm-bindings && cargo build --release --features=serde", "platform": [ "x86_64", "aarch64", @@ -28,7 +28,7 @@ }, { "test_name": "build-serde-musl", - "command": "cargo build --release --features=serde --target {target_platform}-unknown-linux-musl", + "command": "cd kvm-bindings && cargo build --release --features=serde --target {target_platform}-unknown-linux-musl", "platform": [ "x86_64", "aarch64" diff --git a/.cargo/config b/.cargo/config.toml similarity index 100% rename from .cargo/config rename to .cargo/config.toml diff --git a/.gitignore b/.gitignore index d889fbf..babee9d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ **/*.rs.bk Cargo.lock .idea +**/.pytest_cache/ +**/__pycache__/* diff --git a/CODEOWNERS b/CODEOWNERS index 4d96c3f..88f51d5 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,2 +1,3 @@ -# Add the list of code owners here (using their GitHub username) -* gatekeeper-PullAssigner +# These owners will be the default owners for everything in +# the repo. +* gatekeeper-PullAssigner @acatangiu @aghecenco @andreeaflorescu @lauralt @sameo @roypat @ShadowCurse diff --git a/Cargo.toml b/Cargo.toml index df90ae7..03dd93b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,29 +1,7 @@ -[package] -name = "kvm-bindings" -version = "0.10.0" -authors = ["Amazon firecracker team "] -description = "Rust FFI bindings to KVM generated using bindgen." -repository = "https://github.com/rust-vmm/kvm-bindings" -readme = "README.md" -keywords = ["kvm"] -license = "Apache-2.0" +[workspace] +resolver = "2" -[package.metadata.docs.rs] -all-features = true -rustdoc-args = ["--cfg", "docsrs"] - -[features] -fam-wrappers = ["vmm-sys-util"] -# It is not needed to enable the `serde` feature of `vmm-sys-util` here, because due to how cargo merges features, -# if a downstream crate enables vmm-sys-util in its Cargo.toml, it will get enabled globally. -serde = ["dep:serde", "serde/derive", "dep:zerocopy"] - - -[dependencies] -vmm-sys-util = { version = "0.12.1", optional = true } -serde = { version = "1.0.0", optional = true, features = ["derive"] } -zerocopy = { version = "0.7.32", optional = true, features = ["derive"] } - -[dev-dependencies] -bincode = "1.3.3" -serde_json = "1.0.125" +members = [ + "kvm-bindings", + "kvm-ioctls", +] diff --git a/LICENSE b/LICENSE-APACHE similarity index 100% rename from LICENSE rename to LICENSE-APACHE diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000..5c6a646 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,24 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + diff --git a/CHANGELOG.md b/kvm-bindings/CHANGELOG.md similarity index 100% rename from CHANGELOG.md rename to kvm-bindings/CHANGELOG.md diff --git a/CONTRIBUTING.md b/kvm-bindings/CONTRIBUTING.md similarity index 100% rename from CONTRIBUTING.md rename to kvm-bindings/CONTRIBUTING.md diff --git a/kvm-bindings/Cargo.toml b/kvm-bindings/Cargo.toml new file mode 100644 index 0000000..df90ae7 --- /dev/null +++ b/kvm-bindings/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "kvm-bindings" +version = "0.10.0" +authors = ["Amazon firecracker team "] +description = "Rust FFI bindings to KVM generated using bindgen." +repository = "https://github.com/rust-vmm/kvm-bindings" +readme = "README.md" +keywords = ["kvm"] +license = "Apache-2.0" + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[features] +fam-wrappers = ["vmm-sys-util"] +# It is not needed to enable the `serde` feature of `vmm-sys-util` here, because due to how cargo merges features, +# if a downstream crate enables vmm-sys-util in its Cargo.toml, it will get enabled globally. +serde = ["dep:serde", "serde/derive", "dep:zerocopy"] + + +[dependencies] +vmm-sys-util = { version = "0.12.1", optional = true } +serde = { version = "1.0.0", optional = true, features = ["derive"] } +zerocopy = { version = "0.7.32", optional = true, features = ["derive"] } + +[dev-dependencies] +bincode = "1.3.3" +serde_json = "1.0.125" diff --git a/kvm-bindings/LICENSE b/kvm-bindings/LICENSE new file mode 120000 index 0000000..965b606 --- /dev/null +++ b/kvm-bindings/LICENSE @@ -0,0 +1 @@ +../LICENSE-APACHE \ No newline at end of file diff --git a/README.md b/kvm-bindings/README.md similarity index 100% rename from README.md rename to kvm-bindings/README.md diff --git a/coverage_config_aarch64.json b/kvm-bindings/coverage_config_aarch64.json similarity index 100% rename from coverage_config_aarch64.json rename to kvm-bindings/coverage_config_aarch64.json diff --git a/coverage_config_x86_64.json b/kvm-bindings/coverage_config_x86_64.json similarity index 100% rename from coverage_config_x86_64.json rename to kvm-bindings/coverage_config_x86_64.json diff --git a/src/arm64/bindings.rs b/kvm-bindings/src/arm64/bindings.rs similarity index 100% rename from src/arm64/bindings.rs rename to kvm-bindings/src/arm64/bindings.rs diff --git a/src/arm64/fam_wrappers.rs b/kvm-bindings/src/arm64/fam_wrappers.rs similarity index 100% rename from src/arm64/fam_wrappers.rs rename to kvm-bindings/src/arm64/fam_wrappers.rs diff --git a/src/arm64/mod.rs b/kvm-bindings/src/arm64/mod.rs similarity index 100% rename from src/arm64/mod.rs rename to kvm-bindings/src/arm64/mod.rs diff --git a/src/arm64/serialize.rs b/kvm-bindings/src/arm64/serialize.rs similarity index 100% rename from src/arm64/serialize.rs rename to kvm-bindings/src/arm64/serialize.rs diff --git a/src/lib.rs b/kvm-bindings/src/lib.rs similarity index 100% rename from src/lib.rs rename to kvm-bindings/src/lib.rs diff --git a/src/riscv64/bindings.rs b/kvm-bindings/src/riscv64/bindings.rs similarity index 100% rename from src/riscv64/bindings.rs rename to kvm-bindings/src/riscv64/bindings.rs diff --git a/src/riscv64/fam_wrappers.rs b/kvm-bindings/src/riscv64/fam_wrappers.rs similarity index 100% rename from src/riscv64/fam_wrappers.rs rename to kvm-bindings/src/riscv64/fam_wrappers.rs diff --git a/src/riscv64/mod.rs b/kvm-bindings/src/riscv64/mod.rs similarity index 100% rename from src/riscv64/mod.rs rename to kvm-bindings/src/riscv64/mod.rs diff --git a/src/riscv64/serialize.rs b/kvm-bindings/src/riscv64/serialize.rs similarity index 100% rename from src/riscv64/serialize.rs rename to kvm-bindings/src/riscv64/serialize.rs diff --git a/src/serialize.rs b/kvm-bindings/src/serialize.rs similarity index 100% rename from src/serialize.rs rename to kvm-bindings/src/serialize.rs diff --git a/src/x86_64/bindings.rs b/kvm-bindings/src/x86_64/bindings.rs similarity index 100% rename from src/x86_64/bindings.rs rename to kvm-bindings/src/x86_64/bindings.rs diff --git a/src/x86_64/fam_wrappers.rs b/kvm-bindings/src/x86_64/fam_wrappers.rs similarity index 100% rename from src/x86_64/fam_wrappers.rs rename to kvm-bindings/src/x86_64/fam_wrappers.rs diff --git a/src/x86_64/mod.rs b/kvm-bindings/src/x86_64/mod.rs similarity index 100% rename from src/x86_64/mod.rs rename to kvm-bindings/src/x86_64/mod.rs diff --git a/src/x86_64/serialize.rs b/kvm-bindings/src/x86_64/serialize.rs similarity index 100% rename from src/x86_64/serialize.rs rename to kvm-bindings/src/x86_64/serialize.rs diff --git a/kvm-ioctls/CHANGELOG.md b/kvm-ioctls/CHANGELOG.md new file mode 100644 index 0000000..022066e --- /dev/null +++ b/kvm-ioctls/CHANGELOG.md @@ -0,0 +1,296 @@ +# Changelog + +## Upcoming Release + +## v0.19.0 + +### Added + +- [[#275](https://github.com/rust-vmm/kvm-ioctls/pull/275)]: Introduce `riscv64` ioctls. + +### Removed + +- [[#289](https://github.com/rust-vmm/kvm-ioctls/pull/289)]: Drop `x86` 32-bit + and `arm` 32-bit support. + +### Changed + +- [[#273](https://github.com/rust-vmm/kvm-ioctls/pull/273)]: `DeviceFd::get_device_attr` is now + marked as unsafe. +- [[#277](https://github.com/rust-vmm/kvm-ioctls/pull/277)]: Updated kvm-bindings to 0.9.1. + +## v0.18.0 + +### Added + +- [[#264](https://github.com/rust-vmm/kvm-ioctls/pull/264)]: Added `KVM_SET_USER_MEMORY_REGION2`, + `KVM_CREATE_GUEST_MEMFD` and `KVM_SET_MEMORY_ATTRIBUTES` ioctls. +- [[#267](https://github.com/rust-vmm/kvm-ioctls/pull/267)]: Added `HypercallExit` field to + `VcpuExit::Hypercall` and added `ExitHypercall` to `Cap`. +- [[#270](https://github.com/rust-vmm/kvm-ioctls/pull/270)]: Added `MemoryFaultInfo` to `Cap` and + propagated `MemoryFault` exit reason in `KVM_RUN`. + +## v0.17.0 + +### Changed + +- [[#255](https://github.com/rust-vmm/kvm-ioctls/issues/255)]: Fixed a + soundness issue when accessing the `kvm_run` struct. `VcpuFd::run()` and + `VcpuFd::set_kvm_immediate_exit()` now take `&mut self` as a consequence. +- [[#260](https://github.com/rust-vmm/kvm-ioctls/pull/260)]: Updated kvm-bindings to 0.8.0. + +## v0.16.0 + +### Added +- [[#242](https://github.com/rust-vmm/kvm-ioctls/pull/242)] x86: add support + for SMI injection via `Vcpu::smi()` (`KVM_SMI` ioctl). +- [[#241](https://github.com/rust-vmm/kvm-ioctls/pull/241)] Add support for + userspace MSR handling. +- [[#246](https://github.com/rust-vmm/kvm-ioctls/pull/246)] Add support for + userspace NMI injection (`KVM_NMI` ioctl). +- [[#244](https://github.com/rust-vmm/kvm-ioctls/pull/244)] add support for + coalesced MMIO (`KVM_CAP_COALESCED_MMIO` / `KVM_CAP_COALESCED_PIO`) + +### Changed +- [[#234](https://github.com/rust-vmm/kvm-ioctls/issues/234)] vcpu: export +reg_size as a public method. +- [[#243](https://github.com/rust-vmm/kvm-ioctls/pull/243)] derived the `Copy` + trait for `IoEventAddress` and `NoDatamatch`. + +## v0.15.0 + +### Added +- [[#230](https://github.com/rust-vmm/kvm-ioctls/pull/230)] Added + `check_extension_raw` method to use raw integer values instead + of `Cap` enum. +- [[#228](https://github.com/rust-vmm/kvm-ioctls/pull/228)] arm64: add +support for vCPU SVE feature. +- [[#219](https://github.com/rust-vmm/kvm-ioctls/pull/226)] Add `Cap::ArmPtrAuthAddress` + and `Cap::ArmPtrAuthGeneric` capabilities. + +## v0.14.0 + +### Added + +- [[#219](https://github.com/rust-vmm/kvm-ioctls/pull/219)] Support for + `KVM_GET_MSR_FEATURE_INDEX_LIST` and `KVM_GET_MSRS` system ioctls. +- [[#221](https://github.com/rust-vmm/kvm-ioctls/pull/221)] Add + `Cap::ArmPmuV3`. + +### Changed + +- [[#223](https://github.com/rust-vmm/kvm-ioctls/pull/223)] aarch64: + Updated `get/set_one_reg` to support different registers sizes through + byte slices. + +## v0.13.0 + +### Added +- [[#213](https://github.com/rust-vmm/kvm-ioctls/pull/213)] Add `Kvm::new_with_path()` + and `Kvm::open_with_cloexec_at()` to allow using kvm device files other than + `/dev/kvm`. + +## v0.12.0 + +### Added + +- [[#187](https://github.com/rust-vmm/kvm-ioctls/pull/187)] Support for + `KVM_SET_IDENTITY_MAP_ADDR` +- Derive Debug for all exported structs and enums +- [[#189](https://github.com/rust-vmm/kvm-ioctls/pull/189)] Expose `KVM_SET_` and + `KVM_HAS_DEVICE_ATTR` for vcpu +- [[#191](https://github.com/rust-vmm/kvm-ioctls/pull/191)] Add `KVM_TRANSLATE` support and + the `translate_gva` function that translates guest virtual address to the physical address +- [[#190](https://github.com/rust-vmm/kvm-ioctls/pull/190)] Enable usage of `sync_regs` + to allow bulk getting and setting of general purpose registers, reducing the number of + ioctls needed. +- [[#198](https://github.com/rust-vmm/kvm-ioctls/pull/198)] Return details about + `KVM_EXIT_FAIL_ENTRY` in vCPU run +- [[#199](https://github.com/rust-vmm/kvm-ioctls/pull/199)] Add `register_irqfd_with_resample` + so that `irqfd` + `resaplefd` can be registered through `KVM_IRQFD` +- [[#202](https://github.com/rust-vmm/kvm-ioctls/pull/202)] Add `KVM_CAP_GUEST_DEBUG_HVM_DPS/WPS` +- [[#202](https://github.com/rust-vmm/kvm-ioctls/pull/202)] Added `check_extension_int` + which allows checking the capabilities that return numbers instead of booleans + +### Changed + +- Updated vmm-sys-util to 0.11.0 +- Updated kvm-bindings to 0.6.0 +- Upgraded to rust 2021 edition +- Switched to specifying dependencies using caret requirements + instead of comparision requirements +- [[#195](https://github.com/rust-vmm/kvm-ioctls/pull/195)] Do not panic on unsupported + `KVM_EXIT` reason +- [[#196](https://github.com/rust-vmm/kvm-ioctls/pull/196)] Expose a mutable reference + to the `kvm_run` structure to allow proper handling of unsupported exit reasons +- [[#200](https://github.com/rust-vmm/kvm-ioctls/pull/200)] Fix wrong `target_arch` gate + preventing `set_guest_debug` from being exported on ARM +- [[#206](https://github.com/rust-vmm/kvm-ioctls/pull/206)] use `u128` in `get/set_on_reg` + +## v0.11.0 + +### Added +- [[#178](https://github.com/rust-vmm/kvm-ioctls/pull/178)] Support for the AMD + Security Encrypted Virtualization (SEV) through the following VM ioctls: + `encrypt_op`, `encrypt_op_sev`, `register_enc_memory_region` and + `unregister_enc_memory_region`. +- [[#184](https://github.com/rust-vmm/kvm-ioctls/pull/184)] `DeviceFd` now + derives `Debug`. + +## v0.10.0 + +### Changed +- Now depends on kvm-bindings >=0.5.0 which replaced the v4.20 KVM bindings + with the v5.13 ones. +- Updated `VcpuExit::Debug` to return architecture specific information for the + debug event. + +## v0.9.0 + +### Added +- Support for accessing and controlling the Time Stamp Counter on x86 platforms + through the `get_tsc_khz` and `set_tsc_khz` functions. + +### Changed +- Updated `create_vm` on `aarch64` to create a VM fd from the KVM fd using the + host's maximum IPA size. + +## v0.8.0 + +### Added +- Support for specifying VM type (an opaque platform and architecture specific + constant) when creating a VM (`KVM_CREATE_VM` ioctl) via the +`Kvm::create_vm_with_type` function. + +### Changed +- Now depends on kvm-bindings >=0.4.0 to support use of a newer vmm-sys-utils + dependency. + +## v0.7.0 + +### Added +- Support for the system API that returns the maximum allowed vCPU ID + (`KVM_CAP_MAX_VCPU_ID`). +- Support for `KVM_MEMORY_ENCRYPT_OP`. + +### Fixed +- [[#119](https://github.com/rust-vmm/kvm-ioctls/issues/119)]: Disallow invalid + number of cpuid entries to be passed to `get_supported_cpuid` and + `get_emulated_cpuid`. + +### Changed +- [[#123](https://github.com/rust-vmm/kvm-ioctls/issues/123)]: Updated + `create_vcpu` to use `u64` as the parameter for the number of vCPUs. + +## v0.6.0 + +### Added +- Support for the vcpu ioctls: `KVM_SET_GUEST_DEBUG`, `KVM_KVMCLOCK_CTRL`, and + `KVM_GET_REG_LIST`. +- Support for the vm ioctl `KVM_GET_DEVICE_ATTR`. +- Support for the device ioctl `KVM_HAS_DEVICE_ATTR`. +- Support for `VcpuExit::Debug`. +- Support for enabling vcpu capabilities using `Vcpu::enable_cap`. +- Support for checking Hyper-V (`HypervSynic` and `HypervSynic2`), MSI + (`MsiDevid`), and IPA Size (`ArmVmIPASize`) capabilities. + using `kvm.check_extension`. +- Support for checking the VM capabilities via `Vm::check_extension`. +- Create a VM with flexible IPA size using `Kvm::create_vm_with_ipa_size`. + +### Removed +- Removed `Kvm::new_with_fd_number`. The same functionality is offered by the + `Kvm` [FromRawFd](https://doc.rust-lang.org/std/os/unix/io/trait.FromRawFd.html) + trait implementation. + +### Changed +- The VM ioctl `unregister_ioevent` now correctly unregisters the events that + correspond to the data match passed as a parameter. +- The `SystemEvent` Vcpu Exit now also contains the relevant type and flags. +- Updated `get_dirty_log` such that it does not assume the page size is 4K, + but instead reads it using `libc::sysconf`. + +## v0.5.0 + +### Added +- Support for the vcpu ioctls `KVM_GET/SET_VCPU_EVENTS` and `KVM_GET_DIRTY_LOG` + on `aarch64`. +- Support for the vcpu ioctl `KVM_IRQ_LINE`. + +## v0.4.0 + +### Added +- Support for unregistering ioeventfds through `KVM_IOEVENTFD`. + +### Changed +- Functions working with event FDs now require + vmm_sys_util::eventfd::EventFd in their interface instead of + RawFd. +- Functions working with FAM structs kvm_msr_list and kvm_msrs, were + changed to work with their respective safe counterparts MsrList and + respectively Msrs. +- Now exporting kvm_ioctls::Error type definition so that users of this + crate can create their own wrapping errors without having to know the + Error type used internally by this crate. +- No longer exporting kvm_ioctls::Result. Users of this crate should + not have to use kvm_ioctls::Result outside the crate. +- kvm_ioctls::Error now works with errno::Error instead of io::Error. + +### Removed +- CpuId safe wrapper over FAM struct kvm_cpuid2. The safe wrapper is + now provided by the kvm_bindings crate starting with v0.2.0. +- KVM_MAX_MSR_ENTRIES and MAX_KVM_CPUID_ENTRIES. Equivalent constants + are provided by the kvm_bindings crate starting with v0.2.0. + +## v0.3.0 + +### Added +- Support for setting vcpu `kvm_immediate_exit` flag +- Support for the vcpu ioctl `KVM_GET_CPUID2` +- Support for the vcpu ioctl `KVM_GET_MP_STATE` +- Support for the vcpu ioctl `KVM_SET_MP_STATE` +- Support for the vcpu ioctl `KVM_GET_VCPU_EVENTS` +- Support for the vcpu ioctl `KVM_SET_VCPU_EVENTS` +- Support for the vcpu ioctl `KVM_GET_DEBUGREGS` +- Support for the vcpu ioctl `KVM_SET_DEBUGREGS` +- Support for the vcpu ioctl `KVM_GET_XSAVE` +- Support for the vcpu ioctl `KVM_SET_XSAVE` +- Support for the vcpu ioctl `KVM_GET_XCRS` +- Support for the vcpu ioctl `KVM_SET_XCRS` +- Support for the vm ioctl `KVM_GET_IRQCHIP` +- Support for the vm ioctl `KVM_SET_IRQCHIP` +- Support for the vm ioctl `KVM_GET_CLOCK` +- Support for the vm ioctl `KVM_SET_CLOCK` +- Support for the vm ioctl `KVM_GET_PIT2` +- Support for the vm ioctl `KVM_SET_PIT2` +- Support for the vcpu ioctl `KVM_GET_ONE_REG` + +### Changed +- Function offering support for `KVM_SET_MSRS` also returns the number + of MSR entries successfully written. + +## v0.2.0 + +### Added +- Add support for `KVM_ENABLE_CAP`. +- Add support for `KVM_SIGNAL_MSI`. + +### Fixed +- Fix bug in KvmRunWrapper. The memory for kvm_run struct was not unmapped + after the KvmRunWrapper object got out of scope. +- Return proper value when receiving the EOI KVM exit. +- Mark set_user_memory_region as unsafe. + +## v0.1.0 + +First release of the kvm-ioctls crate. + +The kvm-ioctls crate provides safe wrappers over the KVM API, a set of ioctls +used for creating and configuring Virtual Machines (VMs) on Linux. +The ioctls are accessible through four structures: +- Kvm - wrappers over system ioctls +- VmFd - wrappers over VM ioctls +- VcpuFd - wrappers over vCPU ioctls +- DeviceFd - wrappers over device ioctls + +The kvm-ioctls can be used on x86_64 and aarch64. Right now the aarch64 +support is considered experimental. diff --git a/kvm-ioctls/Cargo.toml b/kvm-ioctls/Cargo.toml new file mode 100644 index 0000000..3ea2ed3 --- /dev/null +++ b/kvm-ioctls/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "kvm-ioctls" +version = "0.19.0" +authors = ["Amazon Firecracker Team "] +description = "Safe wrappers over KVM ioctls" +repository = "https://github.com/rust-vmm/kvm-ioctls" +readme = "README.md" +keywords = ["kvm"] +license = "Apache-2.0 OR MIT" +edition = "2021" + +[dependencies] +libc = "0.2.39" +kvm-bindings = { path = "../kvm-bindings", version = "0.10.0", features = ["fam-wrappers"] } +vmm-sys-util = "0.12.1" +bitflags = "2.4.1" + +[dev-dependencies] +byteorder = "1.2.1" diff --git a/kvm-ioctls/LICENSE-APACHE b/kvm-ioctls/LICENSE-APACHE new file mode 120000 index 0000000..965b606 --- /dev/null +++ b/kvm-ioctls/LICENSE-APACHE @@ -0,0 +1 @@ +../LICENSE-APACHE \ No newline at end of file diff --git a/kvm-ioctls/LICENSE-MIT b/kvm-ioctls/LICENSE-MIT new file mode 120000 index 0000000..76219eb --- /dev/null +++ b/kvm-ioctls/LICENSE-MIT @@ -0,0 +1 @@ +../LICENSE-MIT \ No newline at end of file diff --git a/kvm-ioctls/README.md b/kvm-ioctls/README.md new file mode 100644 index 0000000..2b4a5fd --- /dev/null +++ b/kvm-ioctls/README.md @@ -0,0 +1,46 @@ +[![Build Status](https://badge.buildkite.com/9e0e6c88972a3248a0908506d6946624da84e4e18c0870c4d0.svg)](https://buildkite.com/rust-vmm/kvm-ioctls-ci) +[![crates.io](https://img.shields.io/crates/v/kvm-ioctls.svg)](https://crates.io/crates/kvm-ioctls) + +# kvm-ioctls + +The kvm-ioctls crate provides safe wrappers over the +[KVM API](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt), a set +of ioctls used for creating and configuring Virtual Machines (VMs) on Linux. +The ioctls are accessible through four structures: +- `Kvm` - wrappers over system ioctls +- `VmFd` - wrappers over VM ioctls +- `VcpuFd` - wrappers over vCPU ioctls +- `DeviceFd` - wrappers over device ioctls + +For further details check the +[KVM API](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt) as well +as the code documentation. + +## Supported Platforms + +The kvm-ioctls can be used on x86_64, aarch64 and riscv64 (experimental). + +## Running the tests + +Our Continuous Integration (CI) pipeline is implemented on top of +[Buildkite](https://buildkite.com/). +For the complete list of tests, check our +[CI pipeline](https://buildkite.com/rust-vmm/kvm-ioctls-ci). + +Each individual test runs in a container. To reproduce a test locally, you can +use the dev-container on x86_64, arm64 and riscv64. + +```bash +# For running riscv64 tests, replace v47 with v47-riscv. This provides an +# emulated riscv64 environment on a x86_64 host. +docker run --device=/dev/kvm \ + -it \ + --security-opt seccomp=unconfined \ + --volume $(pwd)/kvm-ioctls:/kvm-ioctls \ + rustvmm/dev:v47 +cd kvm-ioctls/ +cargo test +``` + +For more details about the integration tests that are run for `kvm-ioctls`, +check the [rust-vmm-ci](https://github.com/rust-vmm/rust-vmm-ci) readme. diff --git a/kvm-ioctls/THIRD-PARTY b/kvm-ioctls/THIRD-PARTY new file mode 100644 index 0000000..8bafca3 --- /dev/null +++ b/kvm-ioctls/THIRD-PARTY @@ -0,0 +1,27 @@ +// Copyright 2017 The Chromium OS Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/kvm-ioctls/build.rs b/kvm-ioctls/build.rs new file mode 100644 index 0000000..067a65d --- /dev/null +++ b/kvm-ioctls/build.rs @@ -0,0 +1,12 @@ +// Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT + +fn main() { + // Add `has_sev` to expected attributes. + println!("cargo:rustc-check-cfg=cfg(has_sev)"); + // Define a `has_sev` attribute, which is used for conditional + // execution of SEV-specific tests and examples. + if std::path::Path::new("/dev/sev").exists() { + println!("cargo:rustc-cfg=has_sev"); + } +} diff --git a/kvm-ioctls/coverage_config_aarch64.json b/kvm-ioctls/coverage_config_aarch64.json new file mode 100644 index 0000000..27587e4 --- /dev/null +++ b/kvm-ioctls/coverage_config_aarch64.json @@ -0,0 +1,5 @@ +{ + "coverage_score": 77.1, + "exclude_path": "", + "crate_features": "" +} diff --git a/kvm-ioctls/coverage_config_x86_64.json b/kvm-ioctls/coverage_config_x86_64.json new file mode 100644 index 0000000..08c0327 --- /dev/null +++ b/kvm-ioctls/coverage_config_x86_64.json @@ -0,0 +1,5 @@ +{ + "coverage_score": 92.61, + "exclude_path": "", + "crate_features": "" +} diff --git a/kvm-ioctls/src/cap.rs b/kvm-ioctls/src/cap.rs new file mode 100644 index 0000000..617c2a9 --- /dev/null +++ b/kvm-ioctls/src/cap.rs @@ -0,0 +1,163 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use kvm_bindings::*; + +/// Capabilities exposed by KVM. +/// +/// The capabilities list can be used in conjunction with +/// [Kvm::check_extension()](struct.Kvm.html#method.check_extension) to check if a particular +/// capability is available. +/// +/// The list of capabilities is based on the the KVM_CAP_* defines from the +/// [Linux KVM header](https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/kvm.h). +#[derive(Clone, Copy, Debug)] +#[repr(u32)] +// We are allowing docs to be missing here because this enum is a wrapper +// over auto-generated code. +#[allow(missing_docs)] +#[derive(PartialEq, Eq)] +pub enum Cap { + Irqchip = KVM_CAP_IRQCHIP, + Hlt = KVM_CAP_HLT, + MmuShadowCacheControl = KVM_CAP_MMU_SHADOW_CACHE_CONTROL, + UserMemory = KVM_CAP_USER_MEMORY, + SetTssAddr = KVM_CAP_SET_TSS_ADDR, + Vapic = KVM_CAP_VAPIC, + ExtCpuid = KVM_CAP_EXT_CPUID, + Clocksource = KVM_CAP_CLOCKSOURCE, + NrVcpus = KVM_CAP_NR_VCPUS, + NrMemslots = KVM_CAP_NR_MEMSLOTS, + Pit = KVM_CAP_PIT, + NopIoDelay = KVM_CAP_NOP_IO_DELAY, + PvMmu = KVM_CAP_PV_MMU, + MpState = KVM_CAP_MP_STATE, + CoalescedMmio = KVM_CAP_COALESCED_MMIO, + SyncMmu = KVM_CAP_SYNC_MMU, + Iommu = KVM_CAP_IOMMU, + DestroyMemoryRegionWorks = KVM_CAP_DESTROY_MEMORY_REGION_WORKS, + UserNmi = KVM_CAP_USER_NMI, + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "s390x"))] + SetGuestDebug = KVM_CAP_SET_GUEST_DEBUG, + #[cfg(target_arch = "x86_64")] + ReinjectControl = KVM_CAP_REINJECT_CONTROL, + IrqRouting = KVM_CAP_IRQ_ROUTING, + IrqInjectStatus = KVM_CAP_IRQ_INJECT_STATUS, + AssignDevIrq = KVM_CAP_ASSIGN_DEV_IRQ, + JoinMemoryRegionsWorks = KVM_CAP_JOIN_MEMORY_REGIONS_WORKS, + #[cfg(target_arch = "x86_64")] + Mce = KVM_CAP_MCE, + Irqfd = KVM_CAP_IRQFD, + #[cfg(target_arch = "x86_64")] + Pit2 = KVM_CAP_PIT2, + SetBootCpuId = KVM_CAP_SET_BOOT_CPU_ID, + #[cfg(target_arch = "x86_64")] + PitState2 = KVM_CAP_PIT_STATE2, + Ioeventfd = KVM_CAP_IOEVENTFD, + SetIdentityMapAddr = KVM_CAP_SET_IDENTITY_MAP_ADDR, + #[cfg(target_arch = "x86_64")] + XenHvm = KVM_CAP_XEN_HVM, + AdjustClock = KVM_CAP_ADJUST_CLOCK, + InternalErrorData = KVM_CAP_INTERNAL_ERROR_DATA, + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + VcpuEvents = KVM_CAP_VCPU_EVENTS, + S390Psw = KVM_CAP_S390_PSW, + PpcSegstate = KVM_CAP_PPC_SEGSTATE, + Hyperv = KVM_CAP_HYPERV, + HypervVapic = KVM_CAP_HYPERV_VAPIC, + HypervSpin = KVM_CAP_HYPERV_SPIN, + PciSegment = KVM_CAP_PCI_SEGMENT, + PpcPairedSingles = KVM_CAP_PPC_PAIRED_SINGLES, + IntrShadow = KVM_CAP_INTR_SHADOW, + #[cfg(target_arch = "x86_64")] + Debugregs = KVM_CAP_DEBUGREGS, + X86RobustSinglestep = KVM_CAP_X86_ROBUST_SINGLESTEP, + PpcOsi = KVM_CAP_PPC_OSI, + PpcUnsetIrq = KVM_CAP_PPC_UNSET_IRQ, + EnableCap = KVM_CAP_ENABLE_CAP, + #[cfg(target_arch = "x86_64")] + Xsave = KVM_CAP_XSAVE, + #[cfg(target_arch = "x86_64")] + Xcrs = KVM_CAP_XCRS, + PpcGetPvinfo = KVM_CAP_PPC_GET_PVINFO, + PpcIrqLevel = KVM_CAP_PPC_IRQ_LEVEL, + AsyncPf = KVM_CAP_ASYNC_PF, + TscControl = KVM_CAP_TSC_CONTROL, + GetTscKhz = KVM_CAP_GET_TSC_KHZ, + PpcBookeSregs = KVM_CAP_PPC_BOOKE_SREGS, + SpaprTce = KVM_CAP_SPAPR_TCE, + PpcSmt = KVM_CAP_PPC_SMT, + PpcRma = KVM_CAP_PPC_RMA, + MaxVcpus = KVM_CAP_MAX_VCPUS, + MaxVcpuId = KVM_CAP_MAX_VCPU_ID, + PpcHior = KVM_CAP_PPC_HIOR, + PpcPapr = KVM_CAP_PPC_PAPR, + SwTlb = KVM_CAP_SW_TLB, + OneReg = KVM_CAP_ONE_REG, + S390Gmap = KVM_CAP_S390_GMAP, + TscDeadlineTimer = KVM_CAP_TSC_DEADLINE_TIMER, + S390Ucontrol = KVM_CAP_S390_UCONTROL, + SyncRegs = KVM_CAP_SYNC_REGS, + Pci23 = KVM_CAP_PCI_2_3, + KvmclockCtrl = KVM_CAP_KVMCLOCK_CTRL, + SignalMsi = KVM_CAP_SIGNAL_MSI, + PpcGetSmmuInfo = KVM_CAP_PPC_GET_SMMU_INFO, + S390Cow = KVM_CAP_S390_COW, + PpcAllocHtab = KVM_CAP_PPC_ALLOC_HTAB, + ReadonlyMem = KVM_CAP_READONLY_MEM, + IrqfdResample = KVM_CAP_IRQFD_RESAMPLE, + PpcBookeWatchdog = KVM_CAP_PPC_BOOKE_WATCHDOG, + PpcHtabFd = KVM_CAP_PPC_HTAB_FD, + S390CssSupport = KVM_CAP_S390_CSS_SUPPORT, + PpcEpr = KVM_CAP_PPC_EPR, + ArmPsci = KVM_CAP_ARM_PSCI, + ArmSetDeviceAddr = KVM_CAP_ARM_SET_DEVICE_ADDR, + DeviceCtrl = KVM_CAP_DEVICE_CTRL, + IrqMpic = KVM_CAP_IRQ_MPIC, + PpcRtas = KVM_CAP_PPC_RTAS, + IrqXics = KVM_CAP_IRQ_XICS, + ArmEl132bit = KVM_CAP_ARM_EL1_32BIT, + SpaprMultitce = KVM_CAP_SPAPR_MULTITCE, + ExtEmulCpuid = KVM_CAP_EXT_EMUL_CPUID, + HypervTime = KVM_CAP_HYPERV_TIME, + IoapicPolarityIgnored = KVM_CAP_IOAPIC_POLARITY_IGNORED, + EnableCapVm = KVM_CAP_ENABLE_CAP_VM, + S390Irqchip = KVM_CAP_S390_IRQCHIP, + IoeventfdNoLength = KVM_CAP_IOEVENTFD_NO_LENGTH, + VmAttributes = KVM_CAP_VM_ATTRIBUTES, + ArmPsci02 = KVM_CAP_ARM_PSCI_0_2, + PpcFixupHcall = KVM_CAP_PPC_FIXUP_HCALL, + PpcEnableHcall = KVM_CAP_PPC_ENABLE_HCALL, + CheckExtensionVm = KVM_CAP_CHECK_EXTENSION_VM, + S390UserSigp = KVM_CAP_S390_USER_SIGP, + #[cfg(target_arch = "x86_64")] + X86Smm = KVM_CAP_X86_SMM, + #[cfg(target_arch = "x86_64")] + SplitIrqchip = KVM_CAP_SPLIT_IRQCHIP, + ArmPmuV3 = KVM_CAP_ARM_PMU_V3, + ImmediateExit = KVM_CAP_IMMEDIATE_EXIT, + ArmVmIPASize = KVM_CAP_ARM_VM_IPA_SIZE, + MsiDevid = KVM_CAP_MSI_DEVID, + HypervSynic = KVM_CAP_HYPERV_SYNIC, + HypervSynic2 = KVM_CAP_HYPERV_SYNIC2, + DebugHwBps = KVM_CAP_GUEST_DEBUG_HW_BPS, + DebugHwWps = KVM_CAP_GUEST_DEBUG_HW_WPS, + GetMsrFeatures = KVM_CAP_GET_MSR_FEATURES, + CoalescedPio = KVM_CAP_COALESCED_PIO, + #[cfg(target_arch = "aarch64")] + ArmSve = KVM_CAP_ARM_SVE, + #[cfg(target_arch = "aarch64")] + ArmPtrAuthAddress = KVM_CAP_ARM_PTRAUTH_ADDRESS, + #[cfg(target_arch = "aarch64")] + ArmPtrAuthGeneric = KVM_CAP_ARM_PTRAUTH_GENERIC, + #[cfg(target_arch = "x86_64")] + X86UserSpaceMsr = KVM_CAP_X86_USER_SPACE_MSR, + #[cfg(target_arch = "x86_64")] + ExitHypercall = KVM_CAP_EXIT_HYPERCALL, + #[cfg(target_arch = "x86_64")] + MemoryFaultInfo = KVM_CAP_MEMORY_FAULT_INFO, +} diff --git a/kvm-ioctls/src/ioctls/device.rs b/kvm-ioctls/src/ioctls/device.rs new file mode 100644 index 0000000..fc13104 --- /dev/null +++ b/kvm-ioctls/src/ioctls/device.rs @@ -0,0 +1,412 @@ +// Copyright © 2024 Institute of Software, CAS. All rights reserved. +// +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use std::fs::File; +use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; + +use crate::ioctls::Result; +use crate::kvm_ioctls::{KVM_GET_DEVICE_ATTR, KVM_HAS_DEVICE_ATTR, KVM_SET_DEVICE_ATTR}; +use kvm_bindings::kvm_device_attr; +use vmm_sys_util::errno; +use vmm_sys_util::ioctl::{ioctl_with_mut_ref, ioctl_with_ref}; + +/// Wrapper over the file descriptor obtained when creating an emulated device in the kernel. +#[derive(Debug)] +pub struct DeviceFd { + fd: File, +} + +impl DeviceFd { + /// Tests whether a device supports a particular attribute. + /// + /// See the documentation for `KVM_HAS_DEVICE_ATTR`. + /// # Arguments + /// + /// * `device_attr` - The device attribute to be tested. `addr` field is ignored. + pub fn has_device_attr(&self, device_attr: &kvm_device_attr) -> Result<()> { + // SAFETY: We are calling this function with a Device fd, and we trust the kernel. + let ret = unsafe { ioctl_with_ref(self, KVM_HAS_DEVICE_ATTR(), device_attr) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Sets a specified piece of device configuration and/or state. + /// + /// See the documentation for `KVM_SET_DEVICE_ATTR`. + /// # Arguments + /// + /// * `device_attr` - The device attribute to be set. + /// + /// # Example + /// + /// Configuring a VFIO device using `set_device_attr`. Note that VFIO + /// devices are not yet available on RISC-V The patch for QEMU: + /// https://lore.kernel.org/all/20240903201633.93182-1-dbarboza@ventanamicro.com/ + /// and patch for linux kernel + /// https://github.com/ventanamicro/linux/tree/dev-upstream are both not + /// upstreamed. Disabling VFIO device test for RISC-V at the time being. + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// #[cfg(not(target_arch = "riscv64"))] + /// { + /// # use kvm_bindings::{ + /// # kvm_device_type_KVM_DEV_TYPE_VFIO, + /// # KVM_DEV_VFIO_GROUP, KVM_DEV_VFIO_GROUP_ADD, KVM_CREATE_DEVICE_TEST + /// # }; + /// let mut device = kvm_bindings::kvm_create_device { + /// type_: kvm_device_type_KVM_DEV_TYPE_VFIO, + /// fd: 0, + /// flags: KVM_CREATE_DEVICE_TEST, + /// }; + /// + /// let device_fd = vm + /// .create_device(&mut device) + /// .expect("Cannot create KVM device"); + /// + /// let dist_attr = kvm_bindings::kvm_device_attr { + /// group: KVM_DEV_VFIO_GROUP, + /// attr: u64::from(KVM_DEV_VFIO_GROUP_ADD), + /// addr: 0x0, + /// flags: 0, + /// }; + /// + /// if (device_fd.has_device_attr(&dist_attr).is_ok()) { + /// device_fd.set_device_attr(&dist_attr).unwrap(); + /// } + /// } + /// ``` + pub fn set_device_attr(&self, device_attr: &kvm_device_attr) -> Result<()> { + // SAFETY: We are calling this function with a Device fd, and we trust the kernel. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_DEVICE_ATTR(), device_attr) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Gets a specified piece of device configuration and/or state. + /// + /// See the documentation for `KVM_GET_DEVICE_ATTR`. + /// + /// # Arguments + /// + /// * `device_attr` - The device attribute to be get. + /// Note: This argument serves as both input and output. + /// When calling this function, the user should explicitly provide + /// valid values for the `group` and the `attr` field of the + /// `kvm_device_attr` structure, and a valid userspace address + /// (i.e. the `addr` field) to access the returned device attribute + /// data. + /// + /// # Returns + /// + /// * Returns the last occured `errno` wrapped in an `Err`. + /// * `device_attr` - The `addr` field of the `device_attr` structure will point to + /// the device attribute data. + /// + /// # Safety + /// + /// The caller is responsible for the validity of the `device_attr` argument, + /// including that it is safe to write to the `addr` member. + /// + /// # Examples + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// // As on x86_64, `get_device_attr` is not necessarily needed. Therefore here + /// // the code example is only for AArch64. + /// #[cfg(any(target_arch = "aarch64"))] + /// { + /// use kvm_bindings::{ + /// kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, + /// KVM_DEV_ARM_VGIC_GRP_NR_IRQS, + /// }; + /// + /// // Create a GIC device. + /// let mut gic_device = kvm_bindings::kvm_create_device { + /// type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, + /// fd: 0, + /// flags: 0, + /// }; + /// let device_fd = match vm.create_device(&mut gic_device) { + /// Ok(fd) => fd, + /// Err(_) => { + /// gic_device.type_ = kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2; + /// vm.create_device(&mut gic_device) + /// .expect("Cannot create KVM vGIC device") + /// } + /// }; + /// + /// let mut data: u32 = 0; + /// let mut gic_attr = kvm_bindings::kvm_device_attr::default(); + /// gic_attr.group = KVM_DEV_ARM_VGIC_GRP_NR_IRQS; + /// gic_attr.addr = &mut data as *mut u32 as u64; + /// + /// // SAFETY: gic_attr.addr is safe to write to. + /// unsafe { device_fd.get_device_attr(&mut gic_attr) }.unwrap(); + /// } + /// ``` + pub unsafe fn get_device_attr(&self, device_attr: &mut kvm_device_attr) -> Result<()> { + // SAFETY: Caller has ensured device_attr.addr is safe to write to. + // We are calling this function with a Device fd, we trust the kernel. + let ret = ioctl_with_mut_ref(self, KVM_GET_DEVICE_ATTR(), device_attr); + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } +} + +/// Helper function for creating a new device. +pub fn new_device(dev_fd: File) -> DeviceFd { + DeviceFd { fd: dev_fd } +} + +impl AsRawFd for DeviceFd { + fn as_raw_fd(&self) -> RawFd { + self.fd.as_raw_fd() + } +} + +impl FromRawFd for DeviceFd { + /// # Safety + /// + /// This function is unsafe as the primitives currently returned have the contract that + /// they are the sole owner of the file descriptor they are wrapping. Usage of this function + /// could accidentally allow violating this contract which can cause memory unsafety in code + /// that relies on it being true. + unsafe fn from_raw_fd(fd: RawFd) -> Self { + DeviceFd { + fd: File::from_raw_fd(fd), + } + } +} + +#[cfg(test)] +mod tests { + #![allow(clippy::undocumented_unsafe_blocks)] + use super::*; + use crate::ioctls::system::Kvm; + #[cfg(target_arch = "x86_64")] + use kvm_bindings::{ + kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, kvm_device_type_KVM_DEV_TYPE_VFIO, + KVM_DEV_VFIO_GROUP, KVM_DEV_VFIO_GROUP_ADD, + }; + #[cfg(target_arch = "aarch64")] + use kvm_bindings::{KVM_DEV_VFIO_GROUP, KVM_DEV_VFIO_GROUP_ADD}; + + use kvm_bindings::KVM_CREATE_DEVICE_TEST; + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_create_device() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + let mut gic_device = kvm_bindings::kvm_create_device { + type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, + fd: 0, + flags: KVM_CREATE_DEVICE_TEST, + }; + // This fails on x86_64 because there is no VGIC there. + vm.create_device(&mut gic_device).unwrap_err(); + + gic_device.type_ = kvm_device_type_KVM_DEV_TYPE_VFIO; + + let device_fd = vm + .create_device(&mut gic_device) + .expect("Cannot create KVM device"); + + // Following lines to re-construct device_fd are used to test + // DeviceFd::from_raw_fd() and DeviceFd::as_raw_fd(). + let raw_fd = unsafe { libc::dup(device_fd.as_raw_fd()) }; + assert!(raw_fd >= 0); + let device_fd = unsafe { DeviceFd::from_raw_fd(raw_fd) }; + + let dist_attr = kvm_bindings::kvm_device_attr { + group: KVM_DEV_VFIO_GROUP, + attr: u64::from(KVM_DEV_VFIO_GROUP_ADD), + addr: 0x0, + flags: 0, + }; + + let mut dist_attr_mut = dist_attr; + + // We are just creating a test device. Creating a real device would make the CI dependent + // on host configuration (like having /dev/vfio). We expect this to fail. + device_fd.has_device_attr(&dist_attr).unwrap_err(); + unsafe { device_fd.get_device_attr(&mut dist_attr_mut) }.unwrap_err(); + device_fd.set_device_attr(&dist_attr).unwrap_err(); + assert_eq!(errno::Error::last().errno(), 25); + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_create_device() { + use crate::ioctls::vm::{create_gic_device, request_gic_init, set_supported_nr_irqs}; + use kvm_bindings::{ + kvm_device_type_KVM_DEV_TYPE_FSL_MPIC_20, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, + }; + use vmm_sys_util::errno::Error; + + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + let mut gic_device = kvm_bindings::kvm_create_device { + type_: kvm_device_type_KVM_DEV_TYPE_FSL_MPIC_20, + fd: 0, + flags: KVM_CREATE_DEVICE_TEST, + }; + // This fails on aarch64 as it does not use MPIC (MultiProcessor Interrupt Controller), + // it uses the VGIC. + vm.create_device(&mut gic_device).unwrap_err(); + + let device_fd = create_gic_device(&vm, 0); + + // GICv3 on arm/aarch64 requires an online vCPU prior to setting device attributes, + // see: https://www.kernel.org/doc/html/latest/virt/kvm/devices/arm-vgic-v3.html + vm.create_vcpu(0).unwrap(); + + // Following lines to re-construct device_fd are used to test + // DeviceFd::from_raw_fd() and DeviceFd::as_raw_fd(). + let raw_fd = unsafe { libc::dup(device_fd.as_raw_fd()) }; + assert!(raw_fd >= 0); + let device_fd = unsafe { DeviceFd::from_raw_fd(raw_fd) }; + + // Set some attribute that does not apply to VGIC, expect the test to fail. + let dist_attr = kvm_bindings::kvm_device_attr { + group: KVM_DEV_VFIO_GROUP, + attr: u64::from(KVM_DEV_VFIO_GROUP_ADD), + addr: 0x0, + flags: 0, + }; + device_fd.has_device_attr(&dist_attr).unwrap_err(); + + // Set maximum supported number of IRQs of the vGIC device to 128. + set_supported_nr_irqs(&device_fd, 128); + + // Initialize valid vGIC device. + request_gic_init(&device_fd); + + // Test `get_device_attr`. Here we try to extract the maximum supported number of IRQs. + // This value should be saved in the address provided to the ioctl. + let mut data: u32 = 0; + + let mut gic_attr = kvm_bindings::kvm_device_attr { + group: KVM_DEV_ARM_VGIC_GRP_NR_IRQS, + addr: data as u64, + ..Default::default() + }; + + // Without properly providing the address to where the + // value will be stored, the ioctl fails with EFAULT. + let res = unsafe { device_fd.get_device_attr(&mut gic_attr) }; + assert_eq!(res, Err(Error::new(libc::EFAULT))); + + gic_attr.addr = &mut data as *mut u32 as u64; + unsafe { device_fd.get_device_attr(&mut gic_attr) }.unwrap(); + // The maximum supported number of IRQs should be 128, same as the value + // when we initialize the GIC. + assert_eq!(data, 128); + } + + #[test] + #[cfg(target_arch = "riscv64")] + fn test_create_device() { + use crate::ioctls::vm::{create_aia_device, request_aia_init, set_supported_nr_irqs}; + use kvm_bindings::{ + kvm_device_attr, kvm_device_type_KVM_DEV_TYPE_FSL_MPIC_20, + KVM_DEV_RISCV_AIA_ADDR_APLIC, KVM_DEV_RISCV_AIA_CONFIG_SRCS, + KVM_DEV_RISCV_AIA_GRP_ADDR, KVM_DEV_RISCV_AIA_GRP_CONFIG, + }; + use vmm_sys_util::errno::Error; + + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + let mut aia_device = kvm_bindings::kvm_create_device { + type_: kvm_device_type_KVM_DEV_TYPE_FSL_MPIC_20, + fd: 0, + flags: KVM_CREATE_DEVICE_TEST, + }; + // This fails on riscv64 as it does not use MPIC (MultiProcessor Interrupt Controller), + // it uses the vAIA. + vm.create_device(&mut aia_device).unwrap_err(); + + let device_fd = create_aia_device(&vm, 0); + + // AIA on riscv64 requires at least one online vCPU prior to setting + // device attributes. Otherwise it would fail when trying to set address + // of IMSIC. + vm.create_vcpu(0).unwrap(); + + // Following lines to re-construct device_fd are used to test + // DeviceFd::from_raw_fd() and DeviceFd::as_raw_fd(). + let raw_fd = unsafe { libc::dup(device_fd.as_raw_fd()) }; + assert!(raw_fd >= 0); + let device_fd = unsafe { DeviceFd::from_raw_fd(raw_fd) }; + + // Set maximum supported number of IRQs of the vAIA device to 128. + set_supported_nr_irqs(&device_fd, 128); + + // Before request vAIA device to initialize, APLIC and IMSIC must be set + let aplic_addr: u64 = 0x4000; + device_fd + .set_device_attr(&kvm_device_attr { + group: KVM_DEV_RISCV_AIA_GRP_ADDR, + attr: u64::from(KVM_DEV_RISCV_AIA_ADDR_APLIC), + addr: &aplic_addr as *const u64 as u64, + flags: 0, + }) + .unwrap(); + let imsic_addr: u64 = 0x8000; + device_fd + .set_device_attr(&kvm_device_attr { + group: KVM_DEV_RISCV_AIA_GRP_ADDR, + attr: 1u64, + addr: &imsic_addr as *const u64 as u64, + flags: 0, + }) + .unwrap(); + + // Initialize valid vAIA device. + request_aia_init(&device_fd); + + // Test `get_device_attr`. Here we try to extract the maximum supported number of IRQs. + // This value should be saved in the address provided to the ioctl. + let mut data: u32 = 0; + + let mut aia_attr = kvm_bindings::kvm_device_attr { + group: KVM_DEV_RISCV_AIA_GRP_CONFIG, + attr: u64::from(KVM_DEV_RISCV_AIA_CONFIG_SRCS), + addr: data as u64, + ..Default::default() + }; + + // Without properly providing the address to where the + // value will be stored, the ioctl fails with EFAULT. + let res = unsafe { device_fd.get_device_attr(&mut aia_attr) }; + assert_eq!(res, Err(Error::new(libc::EFAULT))); + + aia_attr.addr = &mut data as *mut u32 as u64; + unsafe { device_fd.get_device_attr(&mut aia_attr) }.unwrap(); + // The maximum supported number of IRQs should be 128, same as the value + // when we initialize the AIA. + assert_eq!(data, 128); + } +} diff --git a/kvm-ioctls/src/ioctls/mod.rs b/kvm-ioctls/src/ioctls/mod.rs new file mode 100644 index 0000000..a5f96c1 --- /dev/null +++ b/kvm-ioctls/src/ioctls/mod.rs @@ -0,0 +1,200 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::mem::size_of; +use std::os::unix::io::AsRawFd; +use std::ptr::{null_mut, NonNull}; + +use kvm_bindings::{ + kvm_coalesced_mmio, kvm_coalesced_mmio_ring, kvm_run, KVM_COALESCED_MMIO_PAGE_OFFSET, +}; +use vmm_sys_util::errno; + +/// Wrappers over KVM device ioctls. +pub mod device; +/// Wrappers over KVM system ioctls. +pub mod system; +/// Wrappers over KVM VCPU ioctls. +pub mod vcpu; +/// Wrappers over KVM Virtual Machine ioctls. +pub mod vm; + +/// A specialized `Result` type for KVM ioctls. +/// +/// This typedef is generally used to avoid writing out errno::Error directly and +/// is otherwise a direct mapping to Result. +pub type Result = std::result::Result; + +/// A wrapper around the coalesced MMIO ring page. +#[derive(Debug)] +pub(crate) struct KvmCoalescedIoRing { + addr: NonNull, + page_size: usize, +} + +impl KvmCoalescedIoRing { + /// Maps the coalesced MMIO ring from the vCPU file descriptor. + pub(crate) fn mmap_from_fd(fd: &F) -> Result { + // SAFETY: We trust the sysconf libc function and we're calling it + // with a correct parameter. + let page_size = match unsafe { libc::sysconf(libc::_SC_PAGESIZE) } { + -1 => return Err(errno::Error::last()), + ps => ps as usize, + }; + + let offset = KVM_COALESCED_MMIO_PAGE_OFFSET * page_size as u32; + // SAFETY: KVM guarantees that there is a page at offset + // KVM_COALESCED_MMIO_PAGE_OFFSET * PAGE_SIZE if the appropriate + // capability is available. If it is not, the call will simply + // fail. + let addr = unsafe { + libc::mmap( + null_mut(), + page_size, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_SHARED, + fd.as_raw_fd(), + offset.into(), + ) + }; + let addr = NonNull::new(addr) + .filter(|addr| addr.as_ptr() != libc::MAP_FAILED) + .ok_or_else(errno::Error::last)?; + + Ok(Self { + addr: addr.cast(), + page_size, + }) + } + + /// Compute the size of the MMIO ring. + /// Taken from [include/uapi/linux/kvm.h](https://elixir.bootlin.com/linux/v6.6/source/include/uapi/linux/kvm.h#L562) + const fn ring_max(&self) -> usize { + (self.page_size - size_of::()) / size_of::() + } + + /// Gets a mutable reference to the ring + fn ring_mut(&mut self) -> &mut kvm_coalesced_mmio_ring { + // SAFETY: We have a `&mut self` and the pointer is private, so this + // access is exclusive. + unsafe { self.addr.as_mut() } + } + + /// Reads a single entry from the MMIO ring. + /// + /// # Returns + /// + /// An entry from the MMIO ring buffer, or [`None`] if the ring is empty. + pub(crate) fn read_entry(&mut self) -> Option { + let ring_max = self.ring_max(); + + let ring = self.ring_mut(); + if ring.first == ring.last { + return None; + } + + let entries = ring.coalesced_mmio.as_ptr(); + // SAFETY: `ring.first` is an `u32` coming from mapped memory filled + // by the kernel, so we trust it. `entries` is a pointer coming from + // mmap(), so pointer arithmetic cannot overflow. We have a `&mut self`, + // so nobody else has access to the contents of the pointer. + let elem = unsafe { entries.add(ring.first as usize).read() }; + ring.first = (ring.first + 1) % ring_max as u32; + + Some(elem) + } +} + +impl Drop for KvmCoalescedIoRing { + fn drop(&mut self) { + // SAFETY: This is safe because we mmap the page ourselves, and nobody + // else is holding a reference to it. + unsafe { + libc::munmap(self.addr.as_ptr().cast(), self.page_size); + } + } +} + +// SAFETY: See safety comments about [`KvmRunWrapper`]. +unsafe impl Send for KvmCoalescedIoRing {} +// SAFETY: See safety comments about [`KvmRunWrapper`]. +unsafe impl Sync for KvmCoalescedIoRing {} + +/// Safe wrapper over the `kvm_run` struct. +/// +/// The wrapper is needed for sending the pointer to `kvm_run` between +/// threads as raw pointers do not implement `Send` and `Sync`. +#[derive(Debug)] +pub struct KvmRunWrapper { + kvm_run_ptr: NonNull, + // This field is need so we can `munmap` the memory mapped to hold `kvm_run`. + mmap_size: usize, +} + +// SAFETY: Send and Sync aren't automatically inherited for the raw address pointer. +// Accessing that pointer is only done through the stateless interface which +// allows the object to be shared by multiple threads without a decrease in +// safety. +unsafe impl Send for KvmRunWrapper {} +// SAFETY: See above. +unsafe impl Sync for KvmRunWrapper {} + +impl KvmRunWrapper { + /// Maps the first `size` bytes of the given `fd`. + /// + /// # Arguments + /// * `fd` - File descriptor to mmap from. + /// * `size` - Size of memory region in bytes. + pub fn mmap_from_fd(fd: &F, size: usize) -> Result { + // SAFETY: This is safe because we are creating a mapping in a place not already used by + // any other area in this process. + let addr = unsafe { + libc::mmap( + null_mut(), + size, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_SHARED, + fd.as_raw_fd(), + 0, + ) + }; + let addr = NonNull::new(addr) + .filter(|addr| addr.as_ptr() != libc::MAP_FAILED) + .ok_or_else(errno::Error::last)?; + + Ok(KvmRunWrapper { + kvm_run_ptr: addr.cast(), + mmap_size: size, + }) + } + + /// Returns a mutable reference to `kvm_run`. + pub fn as_mut_ref(&mut self) -> &mut kvm_run { + // SAFETY: Safe because we know we mapped enough memory to hold the kvm_run struct because + // the kernel told us how large it was. Nobody else has access to this pointer so it cannot + // be aliased. + unsafe { self.kvm_run_ptr.as_mut() } + } +} + +impl AsRef for KvmRunWrapper { + fn as_ref(&self) -> &kvm_run { + // SAFETY: Safe because we know we mapped enough memory to hold the kvm_run struct because + // the kernel told us how large it was. + unsafe { self.kvm_run_ptr.as_ref() } + } +} + +impl Drop for KvmRunWrapper { + fn drop(&mut self) { + // SAFETY: This is safe because we mmap the area at kvm_run_ptr ourselves, + // and nobody else is holding a reference to it. + unsafe { + libc::munmap(self.kvm_run_ptr.as_ptr().cast(), self.mmap_size); + } + } +} diff --git a/kvm-ioctls/src/ioctls/system.rs b/kvm-ioctls/src/ioctls/system.rs new file mode 100644 index 0000000..83d41fc --- /dev/null +++ b/kvm-ioctls/src/ioctls/system.rs @@ -0,0 +1,986 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. +use libc::{open, O_CLOEXEC, O_RDWR}; +use std::ffi::CStr; +use std::fs::File; +use std::os::raw::{c_char, c_ulong}; +use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; + +use crate::cap::Cap; +use crate::ioctls::vm::{new_vmfd, VmFd}; +use crate::ioctls::Result; +use crate::kvm_ioctls::*; +#[cfg(target_arch = "aarch64")] +use kvm_bindings::KVM_VM_TYPE_ARM_IPA_SIZE_MASK; +#[cfg(target_arch = "x86_64")] +use kvm_bindings::{CpuId, MsrList, Msrs, KVM_MAX_CPUID_ENTRIES, KVM_MAX_MSR_ENTRIES}; +use vmm_sys_util::errno; +#[cfg(target_arch = "x86_64")] +use vmm_sys_util::ioctl::ioctl_with_mut_ptr; +use vmm_sys_util::ioctl::{ioctl, ioctl_with_val}; + +/// Wrapper over KVM system ioctls. +#[derive(Debug)] +pub struct Kvm { + kvm: File, +} + +impl Kvm { + /// Opens `/dev/kvm` and returns a `Kvm` object on success. + /// + /// # Example + /// + /// ``` + /// use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// ``` + #[allow(clippy::new_ret_no_self)] + pub fn new() -> Result { + // Open `/dev/kvm` using `O_CLOEXEC` flag. + let fd = Self::open_with_cloexec(true)?; + // SAFETY: Safe because we verify that the fd is valid in `open_with_cloexec` and we own + // the fd. + Ok(unsafe { Self::from_raw_fd(fd) }) + } + + /// Opens the KVM device at `kvm_path` and returns a `Kvm` object on success. + /// + /// # Arguments + /// + /// * `kvm_path`: path to the KVM device. Usually it is `/dev/kvm`. + /// + /// # Example + /// + /// ``` + /// use kvm_ioctls::Kvm; + /// use std::ffi::CString; + /// let kvm_path = CString::new("/dev/kvm").unwrap(); + /// let kvm = Kvm::new_with_path(&kvm_path).unwrap(); + /// ``` + #[allow(clippy::new_ret_no_self)] + pub fn new_with_path

(kvm_path: P) -> Result + where + P: AsRef, + { + // Open `kvm_path` using `O_CLOEXEC` flag. + let fd = Self::open_with_cloexec_at(kvm_path, true)?; + // SAFETY: Safe because we verify that the fd is valid in `open_with_cloexec_at` + // and we own the fd. + Ok(unsafe { Self::from_raw_fd(fd) }) + } + + /// Opens `/dev/kvm` and returns the fd number on success. + /// + /// One usecase for this method is opening `/dev/kvm` before exec-ing into a + /// process with seccomp filters enabled that blacklist the `sys_open` syscall. + /// For this usecase `open_with_cloexec` must be called with the `close_on_exec` + /// parameter set to false. + /// + /// # Arguments + /// + /// * `close_on_exec`: If true opens `/dev/kvm` using the `O_CLOEXEC` flag. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// # use std::os::unix::io::FromRawFd; + /// let kvm_fd = Kvm::open_with_cloexec(false).unwrap(); + /// // The `kvm_fd` can now be passed to another process where we can use + /// // `from_raw_fd` for creating a `Kvm` object: + /// let kvm = unsafe { Kvm::from_raw_fd(kvm_fd) }; + /// ``` + pub fn open_with_cloexec(close_on_exec: bool) -> Result { + // SAFETY: Safe because we give a constant nul-terminated string. + let kvm_path = c"/dev/kvm"; + Self::open_with_cloexec_at(kvm_path, close_on_exec) + } + + /// Opens the KVM device at `kvm_path` and returns the fd number on success. + /// Same as [open_with_cloexec()](struct.Kvm.html#method.open_with_cloexec) + /// except this method opens `kvm_path` instead of `/dev/kvm`. + /// + /// # Arguments + /// + /// * `kvm_path`: path to the KVM device. Usually it is `/dev/kvm`. + /// * `close_on_exec`: If true opens `kvm_path` using the `O_CLOEXEC` flag. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// # use std::ffi::CString; + /// # use std::os::unix::io::FromRawFd; + /// let kvm_path = CString::new("/dev/kvm").unwrap(); + /// let kvm_fd = Kvm::open_with_cloexec_at(kvm_path, false).unwrap(); + /// // The `kvm_fd` can now be passed to another process where we can use + /// // `from_raw_fd` for creating a `Kvm` object: + /// let kvm = unsafe { Kvm::from_raw_fd(kvm_fd) }; + /// ``` + pub fn open_with_cloexec_at

(path: P, close_on_exec: bool) -> Result + where + P: AsRef, + { + let open_flags = O_RDWR | if close_on_exec { O_CLOEXEC } else { 0 }; + // SAFETY: Safe because we verify the result. + let ret = unsafe { open(path.as_ref().as_ptr() as *const c_char, open_flags) }; + if ret < 0 { + Err(errno::Error::last()) + } else { + Ok(ret) + } + } + + /// Returns the KVM API version. + /// + /// See the documentation for `KVM_GET_API_VERSION`. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// assert_eq!(kvm.get_api_version(), 12); + /// ``` + pub fn get_api_version(&self) -> i32 { + // SAFETY: Safe because we know that our file is a KVM fd and that the request is one of + // the ones defined by kernel. + unsafe { ioctl(self, KVM_GET_API_VERSION()) } + } + + /// AArch64 specific call to get the host Intermediate Physical Address space limit. + /// + /// Returns 0 if the capability is not available and an integer >= 32 otherwise. + #[cfg(target_arch = "aarch64")] + pub fn get_host_ipa_limit(&self) -> i32 { + self.check_extension_int(Cap::ArmVmIPASize) + } + + /// AArch64 specific call to get the number of supported hardware breakpoints. + /// + /// Returns 0 if the capability is not available and a positive integer otherwise. + #[cfg(target_arch = "aarch64")] + pub fn get_guest_debug_hw_bps(&self) -> i32 { + self.check_extension_int(Cap::DebugHwBps) + } + + /// AArch64 specific call to get the number of supported hardware watchpoints. + /// + /// Returns 0 if the capability is not available and a positive integer otherwise. + #[cfg(target_arch = "aarch64")] + pub fn get_guest_debug_hw_wps(&self) -> i32 { + self.check_extension_int(Cap::DebugHwWps) + } + + /// Wrapper over `KVM_CHECK_EXTENSION`. + /// + /// Returns 0 if the capability is not available and a positive integer otherwise. + /// See the documentation for `KVM_CHECK_EXTENSION`. + /// + /// # Arguments + /// + /// * `c` - KVM capability to check in a form of a raw integer. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// # use std::os::raw::c_ulong; + /// use kvm_ioctls::Cap; + /// + /// let kvm = Kvm::new().unwrap(); + /// assert!(kvm.check_extension_raw(Cap::MaxVcpus as c_ulong) > 0); + /// ``` + pub fn check_extension_raw(&self, c: c_ulong) -> i32 { + // SAFETY: Safe because we know that our file is a KVM fd. + // If `c` is not a known kernel extension, kernel will return 0. + unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), c) } + } + + /// Wrapper over `KVM_CHECK_EXTENSION`. + /// + /// Returns 0 if the capability is not available and a positive integer otherwise. + /// See the documentation for `KVM_CHECK_EXTENSION`. + /// + /// # Arguments + /// + /// * `c` - KVM capability to check. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// use kvm_ioctls::Cap; + /// + /// let kvm = Kvm::new().unwrap(); + /// assert!(kvm.check_extension_int(Cap::MaxVcpus) > 0); + /// ``` + pub fn check_extension_int(&self, c: Cap) -> i32 { + self.check_extension_raw(c as c_ulong) + } + + /// Checks if a particular `Cap` is available. + /// + /// Returns true if the capability is supported and false otherwise. + /// See the documentation for `KVM_CHECK_EXTENSION`. + /// + /// # Arguments + /// + /// * `c` - KVM capability to check. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// use kvm_ioctls::Cap; + /// + /// let kvm = Kvm::new().unwrap(); + /// // Check if `KVM_CAP_USER_MEMORY` is supported. + /// assert!(kvm.check_extension(Cap::UserMemory)); + /// ``` + pub fn check_extension(&self, c: Cap) -> bool { + self.check_extension_int(c) > 0 + } + + /// Returns the size of the memory mapping required to use the vcpu's `kvm_run` structure. + /// + /// See the documentation for `KVM_GET_VCPU_MMAP_SIZE`. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// assert!(kvm.get_vcpu_mmap_size().unwrap() > 0); + /// ``` + pub fn get_vcpu_mmap_size(&self) -> Result { + // SAFETY: Safe because we know that our file is a KVM fd and we verify the return result. + let res = unsafe { ioctl(self, KVM_GET_VCPU_MMAP_SIZE()) }; + if res > 0 { + Ok(res as usize) + } else { + Err(errno::Error::last()) + } + } + + /// Gets the recommended number of VCPUs per VM. + /// + /// See the documentation for `KVM_CAP_NR_VCPUS`. + /// Default to 4 when `KVM_CAP_NR_VCPUS` is not implemented. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// // We expect the number of vCPUs to be > 0 as per KVM API documentation. + /// assert!(kvm.get_nr_vcpus() > 0); + /// ``` + pub fn get_nr_vcpus(&self) -> usize { + let x = self.check_extension_int(Cap::NrVcpus); + if x > 0 { + x as usize + } else { + 4 + } + } + + /// Returns the maximum allowed memory slots per VM. + /// + /// KVM reports the number of available memory slots (`KVM_CAP_NR_MEMSLOTS`) + /// using the extension interface. Both x86 and s390 implement this, ARM + /// and powerpc do not yet enable it. + /// Default to 32 when `KVM_CAP_NR_MEMSLOTS` is not implemented. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// assert!(kvm.get_nr_memslots() > 0); + /// ``` + pub fn get_nr_memslots(&self) -> usize { + let x = self.check_extension_int(Cap::NrMemslots); + if x > 0 { + x as usize + } else { + 32 + } + } + + /// Gets the recommended maximum number of VCPUs per VM. + /// + /// See the documentation for `KVM_CAP_MAX_VCPUS`. + /// Returns [get_nr_vcpus()](struct.Kvm.html#method.get_nr_vcpus) when + /// `KVM_CAP_MAX_VCPUS` is not implemented. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// assert!(kvm.get_max_vcpus() > 0); + /// ``` + pub fn get_max_vcpus(&self) -> usize { + match self.check_extension_int(Cap::MaxVcpus) { + 0 => self.get_nr_vcpus(), + x => x as usize, + } + } + + /// Gets the Maximum VCPU ID per VM. + /// + /// See the documentation for `KVM_CAP_MAX_VCPU_ID` + /// Returns [get_max_vcpus()](struct.Kvm.html#method.get_max_vcpus) when + /// `KVM_CAP_MAX_VCPU_ID` is not implemented + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// assert!(kvm.get_max_vcpu_id() > 0); + /// ``` + pub fn get_max_vcpu_id(&self) -> usize { + match self.check_extension_int(Cap::MaxVcpuId) { + 0 => self.get_max_vcpus(), + x => x as usize, + } + } + + #[cfg(target_arch = "x86_64")] + fn get_cpuid(&self, kind: u64, num_entries: usize) -> Result { + if num_entries > KVM_MAX_CPUID_ENTRIES { + // Returns the same error the underlying `ioctl` would have sent. + return Err(errno::Error::new(libc::ENOMEM)); + } + + let mut cpuid = CpuId::new(num_entries).map_err(|_| errno::Error::new(libc::ENOMEM))?; + // SAFETY: The kernel is trusted not to write beyond the bounds of the memory + // allocated for the struct. The limit is read from nent, which is set to the allocated + // size(num_entries) above. + let ret = unsafe { ioctl_with_mut_ptr(self, kind, cpuid.as_mut_fam_struct_ptr()) }; + if ret < 0 { + return Err(errno::Error::last()); + } + + Ok(cpuid) + } + + /// X86 specific call to get the system emulated CPUID values. + /// + /// See the documentation for `KVM_GET_EMULATED_CPUID`. + /// + /// # Arguments + /// + /// * `num_entries` - Maximum number of CPUID entries. This function can return less than + /// this when the hardware does not support so many CPUID entries. + /// + /// Returns Error `errno::Error(libc::ENOMEM)` when the input `num_entries` is greater than + /// `KVM_MAX_CPUID_ENTRIES`. + /// + /// # Example + /// + /// ``` + /// extern crate kvm_bindings; + /// use kvm_bindings::KVM_MAX_CPUID_ENTRIES; + /// use kvm_ioctls::Kvm; + /// + /// let kvm = Kvm::new().unwrap(); + /// let mut cpuid = kvm.get_emulated_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + /// let cpuid_entries = cpuid.as_mut_slice(); + /// assert!(cpuid_entries.len() <= KVM_MAX_CPUID_ENTRIES); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn get_emulated_cpuid(&self, num_entries: usize) -> Result { + self.get_cpuid(KVM_GET_EMULATED_CPUID(), num_entries) + } + + /// X86 specific call to get the system supported CPUID values. + /// + /// See the documentation for `KVM_GET_SUPPORTED_CPUID`. + /// + /// # Arguments + /// + /// * `num_entries` - Maximum number of CPUID entries. This function can return less than + /// this when the hardware does not support so many CPUID entries. + /// + /// Returns Error `errno::Error(libc::ENOMEM)` when the input `num_entries` is greater than + /// `KVM_MAX_CPUID_ENTRIES`. + /// + /// # Example + /// + /// ``` + /// extern crate kvm_bindings; + /// use kvm_bindings::KVM_MAX_CPUID_ENTRIES; + /// use kvm_ioctls::Kvm; + /// + /// let kvm = Kvm::new().unwrap(); + /// let mut cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + /// let cpuid_entries = cpuid.as_mut_slice(); + /// assert!(cpuid_entries.len() <= KVM_MAX_CPUID_ENTRIES); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn get_supported_cpuid(&self, num_entries: usize) -> Result { + self.get_cpuid(KVM_GET_SUPPORTED_CPUID(), num_entries) + } + + /// X86 specific call to get list of supported MSRS + /// + /// See the documentation for `KVM_GET_MSR_INDEX_LIST`. + /// + /// # Example + /// + /// ``` + /// use kvm_ioctls::Kvm; + /// + /// let kvm = Kvm::new().unwrap(); + /// let msr_index_list = kvm.get_msr_index_list().unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn get_msr_index_list(&self) -> Result { + let mut msr_list = + MsrList::new(KVM_MAX_MSR_ENTRIES).map_err(|_| errno::Error::new(libc::ENOMEM))?; + + // SAFETY: The kernel is trusted not to write beyond the bounds of the memory + // allocated for the struct. The limit is read from nmsrs, which is set to the allocated + // size (KVM_MAX_MSR_ENTRIES) above. + let ret = unsafe { + ioctl_with_mut_ptr( + self, + KVM_GET_MSR_INDEX_LIST(), + msr_list.as_mut_fam_struct_ptr(), + ) + }; + if ret < 0 { + return Err(errno::Error::last()); + } + + // The ioctl will also update the internal `nmsrs` with the actual count. + Ok(msr_list) + } + + /// X86 specific call to get a list of MSRs that can be passed to the KVM_GET_MSRS system ioctl. + /// + /// See the documentation for `KVM_GET_MSR_FEATURE_INDEX_LIST`. + /// + /// # Example + /// + /// ``` + /// use kvm_bindings::{kvm_msr_entry, Msrs}; + /// use kvm_ioctls::Kvm; + /// + /// let kvm = Kvm::new().unwrap(); + /// let msr_feature_index_list = kvm.get_msr_feature_index_list().unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn get_msr_feature_index_list(&self) -> Result { + let mut msr_list = + MsrList::new(KVM_MAX_MSR_ENTRIES).map_err(|_| errno::Error::new(libc::ENOMEM))?; + + // SAFETY: The kernel is trusted not to write beyond the bounds of the memory + // allocated for the struct. The limit is read from nmsrs, which is set to the allocated + // size (KVM_MAX_MSR_ENTRIES) above. + let ret = unsafe { + ioctl_with_mut_ptr( + self, + KVM_GET_MSR_FEATURE_INDEX_LIST(), + msr_list.as_mut_fam_struct_ptr(), + ) + }; + if ret < 0 { + return Err(errno::Error::last()); + } + + Ok(msr_list) + } + + /// X86 specific call to read the values of MSR-based features that are available for the VM. + /// As opposed to `VcpuFd::get_msrs()`, this call returns all the MSRs supported by the + /// system, similar to `get_supported_cpuid()` for CPUID. + /// + /// See the documentation for `KVM_GET_MSRS`. + /// + /// # Arguments + /// + /// * `msrs` - MSRs (input/output). For details check the `kvm_msrs` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ``` + /// use kvm_bindings::{kvm_msr_entry, Msrs}; + /// use kvm_ioctls::Kvm; + /// + /// let kvm = Kvm::new().unwrap(); + /// let msr_feature_index_list = kvm.get_msr_feature_index_list().unwrap(); + /// let mut msrs = Msrs::from_entries( + /// &msr_feature_index_list + /// .as_slice() + /// .iter() + /// .map(|&idx| kvm_msr_entry { + /// index: idx, + /// ..Default::default() + /// }) + /// .collect::>(), + /// ) + /// .unwrap(); + /// let ret = kvm.get_msrs(&mut msrs).unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn get_msrs(&self, msrs: &mut Msrs) -> Result { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_msrs struct. + let ret = unsafe { ioctl_with_mut_ptr(self, KVM_GET_MSRS(), msrs.as_mut_fam_struct_ptr()) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(ret as usize) + } + + /// Creates a VM fd using the KVM fd. + /// + /// See the documentation for `KVM_CREATE_VM`. + /// A call to this function will also initialize the size of the vcpu mmap area using the + /// `KVM_GET_VCPU_MMAP_SIZE` ioctl. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// // Check that the VM mmap size is the same reported by `KVM_GET_VCPU_MMAP_SIZE`. + /// assert!(vm.run_size() == kvm.get_vcpu_mmap_size().unwrap()); + /// ``` + #[cfg(not(any(target_arch = "aarch64")))] + pub fn create_vm(&self) -> Result { + self.create_vm_with_type(0) // Create using default VM type + } + + /// AArch64 specific create_vm to create a VM fd using the KVM fd using the host's maximum IPA size. + /// + /// See the arm64 section of KVM documentation for `KVM_CREATE_VM`. + /// A call to this function will also initialize the size of the vcpu mmap area using the + /// `KVM_GET_VCPU_MMAP_SIZE` ioctl. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// // Check that the VM mmap size is the same reported by `KVM_GET_VCPU_MMAP_SIZE`. + /// assert!(vm.run_size() == kvm.get_vcpu_mmap_size().unwrap()); + /// ``` + #[cfg(target_arch = "aarch64")] + pub fn create_vm(&self) -> Result { + let mut ipa_size = 0; // Create using default VM type + if self.check_extension(Cap::ArmVmIPASize) { + ipa_size = self.get_host_ipa_limit(); + } + self.create_vm_with_type(ipa_size as u64) + } + + /// AArch64 specific function to create a VM fd using the KVM fd with flexible IPA size. + /// + /// See the arm64 section of KVM documentation for `KVM_CREATE_VM`. + /// A call to this function will also initialize the size of the vcpu mmap area using the + /// `KVM_GET_VCPU_MMAP_SIZE` ioctl. + /// + /// Note: `Cap::ArmVmIPASize` should be checked using `check_extension` before calling + /// this function to determine if the host machine supports the IPA size capability. + /// + /// # Arguments + /// + /// * `ipa_size` - Guest VM IPA size, 32 <= ipa_size <= Host_IPA_Limit. + /// The value of `Host_IPA_Limit` may be different between hardware + /// implementations and can be extracted by calling `get_host_ipa_limit`. + /// Possible values can be found in documentation of registers `TCR_EL2` + /// and `VTCR_EL2`. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::{Kvm, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// // Check if the ArmVmIPASize cap is supported. + /// if kvm.check_extension(Cap::ArmVmIPASize) { + /// let host_ipa_limit = kvm.get_host_ipa_limit(); + /// let vm = kvm.create_vm_with_ipa_size(host_ipa_limit as u32).unwrap(); + /// // Check that the VM mmap size is the same reported by `KVM_GET_VCPU_MMAP_SIZE`. + /// assert!(vm.run_size() == kvm.get_vcpu_mmap_size().unwrap()); + /// } + /// ``` + #[cfg(target_arch = "aarch64")] + pub fn create_vm_with_ipa_size(&self, ipa_size: u32) -> Result { + self.create_vm_with_type((ipa_size & KVM_VM_TYPE_ARM_IPA_SIZE_MASK).into()) + } + + /// Creates a VM fd using the KVM fd of a specific type. + /// + /// See the documentation for `KVM_CREATE_VM`. + /// A call to this function will also initialize the size of the vcpu mmap area using the + /// `KVM_GET_VCPU_MMAP_SIZE` ioctl. + /// + /// * `vm_type` - Platform and architecture specific platform VM type. A value of 0 is the equivalent + /// to using the default VM type. + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm_with_type(0).unwrap(); + /// // Check that the VM mmap size is the same reported by `KVM_GET_VCPU_MMAP_SIZE`. + /// assert!(vm.run_size() == kvm.get_vcpu_mmap_size().unwrap()); + /// ``` + pub fn create_vm_with_type(&self, vm_type: u64) -> Result { + // SAFETY: Safe because we know `self.kvm` is a real KVM fd as this module is the only one + // that create Kvm objects. + let ret = unsafe { ioctl_with_val(&self.kvm, KVM_CREATE_VM(), vm_type) }; + if ret >= 0 { + // SAFETY: Safe because we verify the value of ret and we are the owners of the fd. + let vm_file = unsafe { File::from_raw_fd(ret) }; + let run_mmap_size = self.get_vcpu_mmap_size()?; + Ok(new_vmfd(vm_file, run_mmap_size)) + } else { + Err(errno::Error::last()) + } + } + + /// Creates a VmFd object from a VM RawFd. + /// + /// # Arguments + /// + /// * `fd` - the RawFd used for creating the VmFd object. + /// + /// # Safety + /// + /// This function is unsafe as the primitives currently returned have the contract that + /// they are the sole owner of the file descriptor they are wrapping. Usage of this function + /// could accidentally allow violating this contract which can cause memory unsafety in code + /// that relies on it being true. + /// + /// The caller of this method must make sure the fd is valid and nothing else uses it. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use std::os::unix::io::AsRawFd; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let rawfd = unsafe { libc::dup(vm.as_raw_fd()) }; + /// assert!(rawfd >= 0); + /// let vm = unsafe { kvm.create_vmfd_from_rawfd(rawfd).unwrap() }; + /// ``` + pub unsafe fn create_vmfd_from_rawfd(&self, fd: RawFd) -> Result { + let run_mmap_size = self.get_vcpu_mmap_size()?; + Ok(new_vmfd(File::from_raw_fd(fd), run_mmap_size)) + } +} + +impl AsRawFd for Kvm { + fn as_raw_fd(&self) -> RawFd { + self.kvm.as_raw_fd() + } +} + +impl FromRawFd for Kvm { + /// Creates a new Kvm object assuming `fd` represents an existing open file descriptor + /// associated with `/dev/kvm`. + /// + /// For usage examples check [open_with_cloexec()](struct.Kvm.html#method.open_with_cloexec). + /// + /// # Arguments + /// + /// * `fd` - File descriptor for `/dev/kvm`. + /// + /// # Safety + /// + /// This function is unsafe as the primitives currently returned have the contract that + /// they are the sole owner of the file descriptor they are wrapping. Usage of this function + /// could accidentally allow violating this contract which can cause memory unsafety in code + /// that relies on it being true. + /// + /// The caller of this method must make sure the fd is valid and nothing else uses it. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// # use std::os::unix::io::FromRawFd; + /// let kvm_fd = Kvm::open_with_cloexec(true).unwrap(); + /// // Safe because we verify that the fd is valid in `open_with_cloexec` and we own the fd. + /// let kvm = unsafe { Kvm::from_raw_fd(kvm_fd) }; + /// ``` + unsafe fn from_raw_fd(fd: RawFd) -> Self { + Kvm { + kvm: File::from_raw_fd(fd), + } + } +} + +#[cfg(test)] +mod tests { + #![allow(clippy::undocumented_unsafe_blocks)] + use super::*; + use libc::{fcntl, FD_CLOEXEC, F_GETFD}; + use std::os::fd::IntoRawFd; + #[cfg(target_arch = "x86_64")] + use vmm_sys_util::fam::FamStruct; + + #[test] + fn test_kvm_new() { + Kvm::new().unwrap(); + } + + #[test] + fn test_kvm_new_with_path() { + let kvm_path = c"/dev/kvm"; + Kvm::new_with_path(kvm_path).unwrap(); + } + + #[test] + fn test_open_with_cloexec() { + let fd = Kvm::open_with_cloexec(false).unwrap(); + let flags = unsafe { fcntl(fd, F_GETFD, 0) }; + assert_eq!(flags & FD_CLOEXEC, 0); + let fd = Kvm::open_with_cloexec(true).unwrap(); + let flags = unsafe { fcntl(fd, F_GETFD, 0) }; + assert_eq!(flags & FD_CLOEXEC, FD_CLOEXEC); + } + + #[test] + fn test_open_with_cloexec_at() { + let kvm_path = std::ffi::CString::new("/dev/kvm").unwrap(); + let fd = Kvm::open_with_cloexec_at(&kvm_path, false).unwrap(); + let flags = unsafe { fcntl(fd, F_GETFD, 0) }; + assert_eq!(flags & FD_CLOEXEC, 0); + let fd = Kvm::open_with_cloexec_at(&kvm_path, true).unwrap(); + let flags = unsafe { fcntl(fd, F_GETFD, 0) }; + assert_eq!(flags & FD_CLOEXEC, FD_CLOEXEC); + } + + #[test] + fn test_kvm_api_version() { + let kvm = Kvm::new().unwrap(); + assert_eq!(kvm.get_api_version(), 12); + assert!(kvm.check_extension(Cap::UserMemory)); + } + + #[test] + fn test_kvm_check_extension() { + let kvm = Kvm::new().unwrap(); + // unsupported extension will return 0 + assert_eq!(kvm.check_extension_raw(696969), 0); + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_get_host_ipa_limit() { + let kvm = Kvm::new().unwrap(); + let host_ipa_limit = kvm.get_host_ipa_limit(); + + if host_ipa_limit > 0 { + assert!(host_ipa_limit >= 32); + } else { + // if unsupported, the return value should be 0. + assert_eq!(host_ipa_limit, 0); + } + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_guest_debug_hw_capacity() { + let kvm = Kvm::new().unwrap(); + // The number of supported breakpoints and watchpoints may vary on + // different platforms. + // It could be 0 if no supported, or any positive integer otherwise. + assert!(kvm.get_guest_debug_hw_bps() >= 0); + assert!(kvm.get_guest_debug_hw_wps() >= 0); + } + + #[test] + fn test_kvm_getters() { + let kvm = Kvm::new().unwrap(); + + // vCPU related getters + let nr_vcpus = kvm.get_nr_vcpus(); + assert!(nr_vcpus >= 4); + + assert!(kvm.get_max_vcpus() >= nr_vcpus); + + // Memory related getters + assert!(kvm.get_vcpu_mmap_size().unwrap() > 0); + assert!(kvm.get_nr_memslots() >= 32); + } + + #[test] + fn test_create_vm() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + // Test create_vmfd_from_rawfd() + let rawfd = unsafe { libc::dup(vm.as_raw_fd()) }; + assert!(rawfd >= 0); + let vm = unsafe { kvm.create_vmfd_from_rawfd(rawfd).unwrap() }; + + assert_eq!(vm.run_size(), kvm.get_vcpu_mmap_size().unwrap()); + } + + #[test] + fn test_create_vm_with_type() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm_with_type(0).unwrap(); + + // Test create_vmfd_from_rawfd() + let rawfd = unsafe { libc::dup(vm.as_raw_fd()) }; + assert!(rawfd >= 0); + let vm = unsafe { kvm.create_vmfd_from_rawfd(rawfd).unwrap() }; + + assert_eq!(vm.run_size(), kvm.get_vcpu_mmap_size().unwrap()); + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_create_vm_with_ipa_size() { + let kvm = Kvm::new().unwrap(); + if kvm.check_extension(Cap::ArmVmIPASize) { + let host_ipa_limit = kvm.get_host_ipa_limit(); + // Here we test with the maximum value that the host supports to both test the + // discoverability of supported IPA sizes and likely some other values than 40. + kvm.create_vm_with_ipa_size(host_ipa_limit as u32).unwrap(); + // Test invalid input values + // Case 1: IPA size is smaller than 32. + kvm.create_vm_with_ipa_size(31).unwrap_err(); + // Case 2: IPA size is bigger than Host_IPA_Limit. + kvm.create_vm_with_ipa_size((host_ipa_limit + 1) as u32) + .unwrap_err(); + } else { + // Unsupported, we can't provide an IPA size. Only KVM type=0 works. + kvm.create_vm_with_type(0).unwrap_err(); + } + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_get_supported_cpuid() { + let kvm = Kvm::new().unwrap(); + let mut cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + let cpuid_entries = cpuid.as_mut_slice(); + assert!(!cpuid_entries.is_empty()); + assert!(cpuid_entries.len() <= KVM_MAX_CPUID_ENTRIES); + + // Test case for more than MAX entries + let cpuid_err = kvm.get_emulated_cpuid(KVM_MAX_CPUID_ENTRIES + 1_usize); + cpuid_err.unwrap_err(); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_get_emulated_cpuid() { + let kvm = Kvm::new().unwrap(); + let mut cpuid = kvm.get_emulated_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + let cpuid_entries = cpuid.as_mut_slice(); + assert!(!cpuid_entries.is_empty()); + assert!(cpuid_entries.len() <= KVM_MAX_CPUID_ENTRIES); + + // Test case for more than MAX entries + let cpuid_err = kvm.get_emulated_cpuid(KVM_MAX_CPUID_ENTRIES + 1_usize); + cpuid_err.unwrap_err(); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_cpuid_clone() { + let kvm = Kvm::new().unwrap(); + + // Test from_raw_fd() + let rawfd = unsafe { libc::dup(kvm.as_raw_fd()) }; + assert!(rawfd >= 0); + let kvm = unsafe { Kvm::from_raw_fd(rawfd) }; + + let cpuid_1 = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + let _ = CpuId::new(cpuid_1.as_fam_struct_ref().len()).unwrap(); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn get_msr_index_list() { + let kvm = Kvm::new().unwrap(); + let msr_list = kvm.get_msr_index_list().unwrap(); + assert!(msr_list.as_slice().len() >= 2); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn get_msr_feature_index_list() { + let kvm = Kvm::new().unwrap(); + let msr_feature_index_list = kvm.get_msr_feature_index_list().unwrap(); + assert!(!msr_feature_index_list.as_slice().is_empty()); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn get_msrs() { + use kvm_bindings::kvm_msr_entry; + + let kvm = Kvm::new().unwrap(); + let mut msrs = Msrs::from_entries(&[ + kvm_msr_entry { + index: 0x0000010a, // MSR_IA32_ARCH_CAPABILITIES + ..Default::default() + }, + kvm_msr_entry { + index: 0x00000345, // MSR_IA32_PERF_CAPABILITIES + ..Default::default() + }, + ]) + .unwrap(); + let nmsrs = kvm.get_msrs(&mut msrs).unwrap(); + + assert_eq!(nmsrs, 2); + } + + #[test] + fn test_bad_kvm_fd() { + let badf_errno = libc::EBADF; + + let faulty_kvm = Kvm { + kvm: unsafe { File::from_raw_fd(-2) }, + }; + + assert_eq!( + faulty_kvm.get_vcpu_mmap_size().unwrap_err().errno(), + badf_errno + ); + assert_eq!(faulty_kvm.get_nr_vcpus(), 4); + assert_eq!(faulty_kvm.get_nr_memslots(), 32); + #[cfg(target_arch = "x86_64")] + { + assert_eq!( + faulty_kvm.get_emulated_cpuid(4).err().unwrap().errno(), + badf_errno + ); + assert_eq!( + faulty_kvm.get_supported_cpuid(4).err().unwrap().errno(), + badf_errno + ); + + assert_eq!( + faulty_kvm.get_msr_index_list().err().unwrap().errno(), + badf_errno + ); + } + assert_eq!(faulty_kvm.create_vm().err().unwrap().errno(), badf_errno); + + // Don't drop the File object, or it'll notice the file it's trying to close is + // invalid and abort the process. + let _ = faulty_kvm.kvm.into_raw_fd(); + } +} diff --git a/kvm-ioctls/src/ioctls/vcpu.rs b/kvm-ioctls/src/ioctls/vcpu.rs new file mode 100644 index 0000000..8b2caa9 --- /dev/null +++ b/kvm-ioctls/src/ioctls/vcpu.rs @@ -0,0 +1,3791 @@ +// Copyright © 2024 Institute of Software, CAS. All rights reserved. +// +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use kvm_bindings::*; +use libc::EINVAL; +use std::fs::File; +use std::os::unix::io::{AsRawFd, RawFd}; + +use crate::ioctls::{KvmCoalescedIoRing, KvmRunWrapper, Result}; +use crate::kvm_ioctls::*; +use vmm_sys_util::errno; +use vmm_sys_util::ioctl::{ioctl, ioctl_with_mut_ref, ioctl_with_ref}; +#[cfg(target_arch = "x86_64")] +use vmm_sys_util::ioctl::{ioctl_with_mut_ptr, ioctl_with_ptr, ioctl_with_val}; + +/// Helper method to obtain the size of the register through its id +#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] +pub fn reg_size(reg_id: u64) -> usize { + 2_usize.pow(((reg_id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT) as u32) +} + +/// Information about a [`VcpuExit`] triggered by an Hypercall (`KVM_EXIT_HYPERCALL`). +#[derive(Debug)] +pub struct HypercallExit<'a> { + /// The hypercall number. + pub nr: u64, + /// The arguments for the hypercall. + pub args: [u64; 6], + /// The return code to be indicated to the guest. + pub ret: &'a mut u64, + /// Whether the hypercall was executed in long mode. + pub longmode: u32, +} + +/// Information about a [`VcpuExit`] triggered by an MSR read (`KVM_EXIT_X86_RDMSR`). +#[derive(Debug)] +pub struct ReadMsrExit<'a> { + /// Must be set to 1 by the the user if the read access should fail. This + /// will inject a #GP fault into the guest when the VCPU is executed + /// again. + pub error: &'a mut u8, + /// The reason for this exit. + pub reason: MsrExitReason, + /// The MSR the guest wants to read. + pub index: u32, + /// The data to be supplied by the user as the MSR Contents to the guest. + pub data: &'a mut u64, +} + +/// Information about a [`VcpuExit`] triggered by an MSR write (`KVM_EXIT_X86_WRMSR`). +#[derive(Debug)] +pub struct WriteMsrExit<'a> { + /// Must be set to 1 by the the user if the write access should fail. This + /// will inject a #GP fault into the guest when the VCPU is executed + /// again. + pub error: &'a mut u8, + /// The reason for this exit. + pub reason: MsrExitReason, + /// The MSR the guest wants to write. + pub index: u32, + /// The data the guest wants to write into the MSR. + pub data: u64, +} + +bitflags::bitflags! { + /// The reason for a [`VcpuExit::X86Rdmsr`] or[`VcpuExit::X86Wrmsr`]. This + /// is also used when enabling + /// [`Cap::X86UserSpaceMsr`](crate::Cap::X86UserSpaceMsr) to specify which + /// reasons should be forwarded to the user via those exits. + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub struct MsrExitReason: u32 { + /// Corresponds to [`KVM_MSR_EXIT_REASON_UNKNOWN`]. The exit was + /// triggered by an access to an MSR that is unknown to KVM. + const Unknown = KVM_MSR_EXIT_REASON_UNKNOWN; + /// Corresponds to [`KVM_MSR_EXIT_REASON_INVAL`]. The exit was + /// triggered by an access to an invalid MSR or to reserved bits. + const Inval = KVM_MSR_EXIT_REASON_INVAL; + /// Corresponds to [`KVM_MSR_EXIT_REASON_FILTER`]. The exit was + /// triggered by an access to a filtered MSR. + const Filter = KVM_MSR_EXIT_REASON_FILTER; + } +} + +/// Reasons for vCPU exits. +/// +/// The exit reasons are mapped to the `KVM_EXIT_*` defines in the +/// [Linux KVM header](https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/kvm.h). +#[derive(Debug)] +pub enum VcpuExit<'a> { + /// An out port instruction was run on the given port with the given data. + IoOut(u16 /* port */, &'a [u8] /* data */), + /// An in port instruction was run on the given port. + /// + /// The given slice should be filled in before [run()](struct.VcpuFd.html#method.run) + /// is called again. + IoIn(u16 /* port */, &'a mut [u8] /* data */), + /// A read instruction was run against the given MMIO address. + /// + /// The given slice should be filled in before [run()](struct.VcpuFd.html#method.run) + /// is called again. + MmioRead(u64 /* address */, &'a mut [u8]), + /// A write instruction was run against the given MMIO address with the given data. + MmioWrite(u64 /* address */, &'a [u8]), + /// Corresponds to KVM_EXIT_UNKNOWN. + Unknown, + /// Corresponds to KVM_EXIT_EXCEPTION. + Exception, + /// Corresponds to KVM_EXIT_HYPERCALL. + Hypercall(HypercallExit<'a>), + /// Corresponds to KVM_EXIT_DEBUG. + /// + /// Provides architecture specific information for the debug event. + Debug(kvm_debug_exit_arch), + /// Corresponds to KVM_EXIT_HLT. + Hlt, + /// Corresponds to KVM_EXIT_IRQ_WINDOW_OPEN. + IrqWindowOpen, + /// Corresponds to KVM_EXIT_SHUTDOWN. + Shutdown, + /// Corresponds to KVM_EXIT_FAIL_ENTRY. + FailEntry( + u64, /* hardware_entry_failure_reason */ + u32, /* cpu */ + ), + /// Corresponds to KVM_EXIT_INTR. + Intr, + /// Corresponds to KVM_EXIT_SET_TPR. + SetTpr, + /// Corresponds to KVM_EXIT_TPR_ACCESS. + TprAccess, + /// Corresponds to KVM_EXIT_S390_SIEIC. + S390Sieic, + /// Corresponds to KVM_EXIT_S390_RESET. + S390Reset, + /// Corresponds to KVM_EXIT_DCR. + Dcr, + /// Corresponds to KVM_EXIT_NMI. + Nmi, + /// Corresponds to KVM_EXIT_INTERNAL_ERROR. + InternalError, + /// Corresponds to KVM_EXIT_OSI. + Osi, + /// Corresponds to KVM_EXIT_PAPR_HCALL. + PaprHcall, + /// Corresponds to KVM_EXIT_S390_UCONTROL. + S390Ucontrol, + /// Corresponds to KVM_EXIT_WATCHDOG. + Watchdog, + /// Corresponds to KVM_EXIT_S390_TSCH. + S390Tsch, + /// Corresponds to KVM_EXIT_EPR. + Epr, + /// Corresponds to KVM_EXIT_SYSTEM_EVENT. + SystemEvent(u32 /* type */, &'a [u64] /* data */), + /// Corresponds to KVM_EXIT_S390_STSI. + S390Stsi, + /// Corresponds to KVM_EXIT_IOAPIC_EOI. + IoapicEoi(u8 /* vector */), + /// Corresponds to KVM_EXIT_HYPERV. + Hyperv, + /// Corresponds to KVM_EXIT_X86_RDMSR. + X86Rdmsr(ReadMsrExit<'a>), + /// Corresponds to KVM_EXIT_X86_WRMSR. + X86Wrmsr(WriteMsrExit<'a>), + /// Corresponds to KVM_EXIT_MEMORY_FAULT. + MemoryFault { + /// flags + flags: u64, + /// gpa + gpa: u64, + /// size + size: u64, + }, + /// Corresponds to an exit reason that is unknown from the current version + /// of the kvm-ioctls crate. Let the consumer decide about what to do with + /// it. + Unsupported(u32), +} + +/// Wrapper over KVM vCPU ioctls. +#[derive(Debug)] +pub struct VcpuFd { + vcpu: File, + kvm_run_ptr: KvmRunWrapper, + /// A pointer to the coalesced MMIO page + coalesced_mmio_ring: Option, +} + +/// KVM Sync Registers used to tell KVM which registers to sync +#[repr(u32)] +#[derive(Debug, Copy, Clone)] +#[cfg(target_arch = "x86_64")] +pub enum SyncReg { + /// General purpose registers, + Register = KVM_SYNC_X86_REGS, + + /// System registers + SystemRegister = KVM_SYNC_X86_SREGS, + + /// CPU events + VcpuEvents = KVM_SYNC_X86_EVENTS, +} + +impl VcpuFd { + /// Returns the vCPU general purpose registers. + /// + /// The registers are returned in a `kvm_regs` structure as defined in the + /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// See documentation for `KVM_GET_REGS`. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let regs = vcpu.get_regs().unwrap(); + /// ``` + #[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))] + pub fn get_regs(&self) -> Result { + let mut regs = kvm_regs::default(); + // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only + // read the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_REGS(), &mut regs) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(regs) + } + + /// Sets a specified piece of cpu configuration and/or state. + /// + /// See the documentation for `KVM_SET_DEVICE_ATTR` in + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt) + /// # Arguments + /// + /// * `device_attr` - The cpu attribute to be set. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// # use kvm_bindings::{ + /// KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT + /// }; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// let dist_attr = kvm_bindings::kvm_device_attr { + /// group: KVM_ARM_VCPU_PMU_V3_CTRL, + /// attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT), + /// addr: 0x0, + /// flags: 0, + /// }; + /// + /// if (vcpu.has_device_attr(&dist_attr).is_ok()) { + /// vcpu.set_device_attr(&dist_attr).unwrap(); + /// } + /// ``` + #[cfg(target_arch = "aarch64")] + pub fn set_device_attr(&self, device_attr: &kvm_device_attr) -> Result<()> { + // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_DEVICE_ATTR(), device_attr) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Tests whether a cpu supports a particular attribute. + /// + /// See the documentation for `KVM_HAS_DEVICE_ATTR` in + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt) + /// # Arguments + /// + /// * `device_attr` - The cpu attribute to be tested. `addr` field is ignored. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// # use kvm_bindings::{ + /// KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT + /// }; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// let dist_attr = kvm_bindings::kvm_device_attr { + /// group: KVM_ARM_VCPU_PMU_V3_CTRL, + /// attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT), + /// addr: 0x0, + /// flags: 0, + /// }; + /// + /// vcpu.has_device_attr(&dist_attr); + /// ``` + #[cfg(target_arch = "aarch64")] + pub fn has_device_attr(&self, device_attr: &kvm_device_attr) -> Result<()> { + // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel. + let ret = unsafe { ioctl_with_ref(self, KVM_HAS_DEVICE_ATTR(), device_attr) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. + /// + /// # Arguments + /// + /// * `regs` - general purpose registers. For details check the `kvm_regs` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// // Get the current vCPU registers. + /// let mut regs = vcpu.get_regs().unwrap(); + /// // Set a new value for the Instruction Pointer. + /// regs.rip = 0x100; + /// vcpu.set_regs(®s).unwrap(); + /// ``` + #[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))] + pub fn set_regs(&self, regs: &kvm_regs) -> Result<()> { + // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only + // read the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_REGS(), regs) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Returns the vCPU special registers. + /// + /// The registers are returned in a `kvm_sregs` structure as defined in the + /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// See documentation for `KVM_GET_SREGS`. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let sregs = vcpu.get_sregs().unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn get_sregs(&self) -> Result { + let mut regs = kvm_sregs::default(); + // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only + // write the correct amount of memory to our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_SREGS(), &mut regs) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(regs) + } + + /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. + /// + /// # Arguments + /// + /// * `sregs` - Special registers. For details check the `kvm_sregs` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// let mut sregs = vcpu.get_sregs().unwrap(); + /// // Update the code segment (cs). + /// sregs.cs.base = 0; + /// sregs.cs.selector = 0; + /// vcpu.set_sregs(&sregs).unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn set_sregs(&self, sregs: &kvm_sregs) -> Result<()> { + // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only + // read the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_SREGS(), sregs) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Returns the floating point state (FPU) from the vCPU. + /// + /// The state is returned in a `kvm_fpu` structure as defined in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// See the documentation for `KVM_GET_FPU`. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// #[cfg(target_arch = "x86_64")] + /// let fpu = vcpu.get_fpu().unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn get_fpu(&self) -> Result { + let mut fpu = kvm_fpu::default(); + // SAFETY: Here we trust the kernel not to read past the end of the kvm_fpu struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_FPU(), &mut fpu) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(fpu) + } + + /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct. + /// + /// # Arguments + /// + /// * `fpu` - FPU configuration. For details check the `kvm_fpu` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// # use kvm_bindings::kvm_fpu; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// #[cfg(target_arch = "x86_64")] + /// { + /// let KVM_FPU_CWD: u16 = 0x37f; + /// let fpu = kvm_fpu { + /// fcw: KVM_FPU_CWD, + /// ..Default::default() + /// }; + /// vcpu.set_fpu(&fpu).unwrap(); + /// } + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn set_fpu(&self, fpu: &kvm_fpu) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_fpu struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_FPU(), fpu) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// X86 specific call to setup the CPUID registers. + /// + /// See the documentation for `KVM_SET_CPUID2`. + /// + /// # Arguments + /// + /// * `cpuid` - CPUID registers. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_bindings::KVM_MAX_CPUID_ENTRIES; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let mut kvm_cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// // Update the CPUID entries to disable the EPB feature. + /// const ECX_EPB_SHIFT: u32 = 3; + /// { + /// let entries = kvm_cpuid.as_mut_slice(); + /// for entry in entries.iter_mut() { + /// match entry.function { + /// 6 => entry.ecx &= !(1 << ECX_EPB_SHIFT), + /// _ => (), + /// } + /// } + /// } + /// + /// vcpu.set_cpuid2(&kvm_cpuid).unwrap(); + /// ``` + /// + #[cfg(target_arch = "x86_64")] + pub fn set_cpuid2(&self, cpuid: &CpuId) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_cpuid2 struct. + let ret = unsafe { ioctl_with_ptr(self, KVM_SET_CPUID2(), cpuid.as_fam_struct_ptr()) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// X86 specific call to retrieve the CPUID registers. + /// + /// It requires knowledge of how many `kvm_cpuid_entry2` entries there are to get. + /// See the documentation for `KVM_GET_CPUID2` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `num_entries` - Number of CPUID entries to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_bindings::KVM_MAX_CPUID_ENTRIES; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let cpuid = vcpu.get_cpuid2(KVM_MAX_CPUID_ENTRIES).unwrap(); + /// ``` + /// + #[cfg(target_arch = "x86_64")] + pub fn get_cpuid2(&self, num_entries: usize) -> Result { + if num_entries > KVM_MAX_CPUID_ENTRIES { + // Returns the same error the underlying `ioctl` would have sent. + return Err(errno::Error::new(libc::ENOMEM)); + } + + let mut cpuid = CpuId::new(num_entries).map_err(|_| errno::Error::new(libc::ENOMEM))?; + let ret = + // SAFETY: Here we trust the kernel not to read past the end of the kvm_cpuid2 struct. + unsafe { ioctl_with_mut_ptr(self, KVM_GET_CPUID2(), cpuid.as_mut_fam_struct_ptr()) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(cpuid) + } + + /// + /// See the documentation for `KVM_ENABLE_CAP`. + /// + /// # Arguments + /// + /// * kvm_enable_cap - KVM capability structure. For details check the `kvm_enable_cap` + /// structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_bindings::{kvm_enable_cap, KVM_MAX_CPUID_ENTRIES, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP}; + /// # use kvm_ioctls::{Kvm, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut cap: kvm_enable_cap = Default::default(); + /// if cfg!(target_arch = "x86_64") { + /// // KVM_CAP_HYPERV_SYNIC needs KVM_CAP_SPLIT_IRQCHIP enabled + /// cap.cap = KVM_CAP_SPLIT_IRQCHIP; + /// cap.args[0] = 24; + /// vm.enable_cap(&cap).unwrap(); + /// + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// if kvm.check_extension(Cap::HypervSynic) { + /// let mut cap: kvm_enable_cap = Default::default(); + /// cap.cap = KVM_CAP_HYPERV_SYNIC; + /// vcpu.enable_cap(&cap).unwrap(); + /// } + /// } + /// ``` + /// + #[cfg(target_arch = "x86_64")] + pub fn enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> { + // SAFETY: The ioctl is safe because we allocated the struct and we know the + // kernel will write exactly the size of the struct. + let ret = unsafe { ioctl_with_ref(self, KVM_ENABLE_CAP(), cap) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). + /// + /// The state is returned in a `kvm_lapic_state` structure as defined in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// See the documentation for `KVM_GET_LAPIC`. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// // For `get_lapic` to work, you first need to create a IRQ chip before creating the vCPU. + /// vm.create_irq_chip().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let lapic = vcpu.get_lapic().unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn get_lapic(&self) -> Result { + let mut klapic = kvm_lapic_state::default(); + + // SAFETY: The ioctl is unsafe unless you trust the kernel not to write past the end of the + // local_apic struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_LAPIC(), &mut klapic) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(klapic) + } + + /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). + /// + /// See the documentation for `KVM_SET_LAPIC`. + /// + /// # Arguments + /// + /// * `klapic` - LAPIC state. For details check the `kvm_lapic_state` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// use std::io::Write; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// // For `get_lapic` to work, you first need to create a IRQ chip before creating the vCPU. + /// vm.create_irq_chip().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let mut lapic = vcpu.get_lapic().unwrap(); + /// + /// // Write to APIC_ICR offset the value 2. + /// let apic_icr_offset = 0x300; + /// let write_value: &[u8] = &[2, 0, 0, 0]; + /// let mut apic_icr_slice = + /// unsafe { &mut *(&mut lapic.regs[apic_icr_offset..] as *mut [i8] as *mut [u8]) }; + /// apic_icr_slice.write(write_value).unwrap(); + /// + /// // Update the value of LAPIC. + /// vcpu.set_lapic(&lapic).unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()> { + // SAFETY: The ioctl is safe because the kernel will only read from the klapic struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_LAPIC(), klapic) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Returns the model-specific registers (MSR) for this vCPU. + /// + /// It emulates `KVM_GET_MSRS` ioctl's behavior by returning the number of MSRs + /// successfully read upon success or the last error number in case of failure. + /// The MSRs are returned in the `msr` method argument. + /// + /// # Arguments + /// + /// * `msrs` - MSRs (input/output). For details check the `kvm_msrs` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// # use kvm_bindings::{kvm_msr_entry, Msrs}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// // Configure the struct to say which entries we want to get. + /// let mut msrs = Msrs::from_entries(&[ + /// kvm_msr_entry { + /// index: 0x0000_0174, + /// ..Default::default() + /// }, + /// kvm_msr_entry { + /// index: 0x0000_0175, + /// ..Default::default() + /// }, + /// ]) + /// .unwrap(); + /// let read = vcpu.get_msrs(&mut msrs).unwrap(); + /// assert_eq!(read, 2); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn get_msrs(&self, msrs: &mut Msrs) -> Result { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_msrs struct. + let ret = unsafe { ioctl_with_mut_ptr(self, KVM_GET_MSRS(), msrs.as_mut_fam_struct_ptr()) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(ret as usize) + } + + /// Setup the model-specific registers (MSR) for this vCPU. + /// Returns the number of MSR entries actually written. + /// + /// See the documentation for `KVM_SET_MSRS`. + /// + /// # Arguments + /// + /// * `msrs` - MSRs. For details check the `kvm_msrs` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// # use kvm_bindings::{kvm_msr_entry, Msrs}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// // Configure the entries we want to set. + /// let mut msrs = Msrs::from_entries(&[kvm_msr_entry { + /// index: 0x0000_0174, + /// ..Default::default() + /// }]) + /// .unwrap(); + /// let written = vcpu.set_msrs(&msrs).unwrap(); + /// assert_eq!(written, 1); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn set_msrs(&self, msrs: &Msrs) -> Result { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_msrs struct. + let ret = unsafe { ioctl_with_ptr(self, KVM_SET_MSRS(), msrs.as_fam_struct_ptr()) }; + // KVM_SET_MSRS actually returns the number of msr entries written. + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(ret as usize) + } + + /// Returns the vcpu's current "multiprocessing state". + /// + /// See the documentation for `KVM_GET_MP_STATE` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_mp_state` - multiprocessing state to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let mp_state = vcpu.get_mp_state().unwrap(); + /// ``` + #[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "s390x" + ))] + pub fn get_mp_state(&self) -> Result { + let mut mp_state = Default::default(); + // SAFETY: Here we trust the kernel not to read past the end of the kvm_mp_state struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE(), &mut mp_state) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(mp_state) + } + + /// Sets the vcpu's current "multiprocessing state". + /// + /// See the documentation for `KVM_SET_MP_STATE` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_mp_state` - multiprocessing state to be written. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let mp_state = Default::default(); + /// // Your `mp_state` manipulation here. + /// vcpu.set_mp_state(mp_state).unwrap(); + /// ``` + #[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "s390x" + ))] + pub fn set_mp_state(&self, mp_state: kvm_mp_state) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_mp_state struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_MP_STATE(), &mp_state) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// X86 specific call that returns the vcpu's current "xsave struct". + /// + /// See the documentation for `KVM_GET_XSAVE` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_xsave` - xsave struct to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let xsave = vcpu.get_xsave().unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn get_xsave(&self) -> Result { + let mut xsave = Default::default(); + // SAFETY: Here we trust the kernel not to read past the end of the kvm_xsave struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XSAVE(), &mut xsave) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(xsave) + } + + /// X86 specific call that sets the vcpu's current "xsave struct". + /// + /// See the documentation for `KVM_SET_XSAVE` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_xsave` - xsave struct to be written. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let xsave = Default::default(); + /// // Your `xsave` manipulation here. + /// vcpu.set_xsave(&xsave).unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn set_xsave(&self, xsave: &kvm_xsave) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_xsave struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_XSAVE(), xsave) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// X86 specific call that returns the vcpu's current "xcrs". + /// + /// See the documentation for `KVM_GET_XCRS` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_xcrs` - xcrs to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let xcrs = vcpu.get_xcrs().unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn get_xcrs(&self) -> Result { + let mut xcrs = Default::default(); + // SAFETY: Here we trust the kernel not to read past the end of the kvm_xcrs struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XCRS(), &mut xcrs) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(xcrs) + } + + /// X86 specific call that sets the vcpu's current "xcrs". + /// + /// See the documentation for `KVM_SET_XCRS` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_xcrs` - xcrs to be written. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let xcrs = Default::default(); + /// // Your `xcrs` manipulation here. + /// vcpu.set_xcrs(&xcrs).unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn set_xcrs(&self, xcrs: &kvm_xcrs) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_xcrs struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_XCRS(), xcrs) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// X86 specific call that returns the vcpu's current "debug registers". + /// + /// See the documentation for `KVM_GET_DEBUGREGS` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_debugregs` - debug registers to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let debug_regs = vcpu.get_debug_regs().unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn get_debug_regs(&self) -> Result { + let mut debug_regs = Default::default(); + // SAFETY: Here we trust the kernel not to read past the end of the kvm_debugregs struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_DEBUGREGS(), &mut debug_regs) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(debug_regs) + } + + /// X86 specific call that sets the vcpu's current "debug registers". + /// + /// See the documentation for `KVM_SET_DEBUGREGS` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_debugregs` - debug registers to be written. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let debug_regs = Default::default(); + /// // Your `debug_regs` manipulation here. + /// vcpu.set_debug_regs(&debug_regs).unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn set_debug_regs(&self, debug_regs: &kvm_debugregs) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_debugregs struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_DEBUGREGS(), debug_regs) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Returns currently pending exceptions, interrupts, and NMIs as well as related + /// states of the vcpu. + /// + /// See the documentation for `KVM_GET_VCPU_EVENTS` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_vcpu_events` - vcpu events to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Kvm, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// if kvm.check_extension(Cap::VcpuEvents) { + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let vcpu_events = vcpu.get_vcpu_events().unwrap(); + /// } + /// ``` + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + pub fn get_vcpu_events(&self) -> Result { + let mut vcpu_events = Default::default(); + // SAFETY: Here we trust the kernel not to read past the end of the kvm_vcpu_events struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_VCPU_EVENTS(), &mut vcpu_events) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(vcpu_events) + } + + /// Sets pending exceptions, interrupts, and NMIs as well as related states of the vcpu. + /// + /// See the documentation for `KVM_SET_VCPU_EVENTS` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_vcpu_events` - vcpu events to be written. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Kvm, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// if kvm.check_extension(Cap::VcpuEvents) { + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let vcpu_events = Default::default(); + /// // Your `vcpu_events` manipulation here. + /// vcpu.set_vcpu_events(&vcpu_events).unwrap(); + /// } + /// ``` + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + pub fn set_vcpu_events(&self, vcpu_events: &kvm_vcpu_events) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_vcpu_events struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_VCPU_EVENTS(), vcpu_events) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Sets the type of CPU to be exposed to the guest and optional features. + /// + /// This initializes an ARM vCPU to the specified type with the specified features + /// and resets the values of all of its registers to defaults. See the documentation for + /// `KVM_ARM_VCPU_INIT`. + /// + /// # Arguments + /// + /// * `kvi` - information about preferred CPU target type and recommended features for it. + /// For details check the `kvm_vcpu_init` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// use kvm_bindings::kvm_vcpu_init; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// let mut kvi = kvm_vcpu_init::default(); + /// vm.get_preferred_target(&mut kvi).unwrap(); + /// vcpu.vcpu_init(&kvi).unwrap(); + /// ``` + #[cfg(target_arch = "aarch64")] + pub fn vcpu_init(&self, kvi: &kvm_vcpu_init) -> Result<()> { + // SAFETY: This is safe because we allocated the struct and we know the kernel will read + // exactly the size of the struct. + let ret = unsafe { ioctl_with_ref(self, KVM_ARM_VCPU_INIT(), kvi) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Finalizes the configuration of the specified vcpu feature. + /// + /// The vcpu must already have been initialised, enabling the affected feature, + /// by means of a successful KVM_ARM_VCPU_INIT call with the appropriate flag set + /// in features[]. + /// + /// For affected vcpu features, this is a mandatory step that must be performed before + /// the vcpu is fully usable. + /// + /// Between KVM_ARM_VCPU_INIT and KVM_ARM_VCPU_FINALIZE, the feature may be configured + /// by use of ioctls such as KVM_SET_ONE_REG. The exact configuration that should be + /// performaned and how to do it are feature-dependent. + /// + /// Other calls that depend on a particular feature being finalized, such as KVM_RUN, + /// KVM_GET_REG_LIST, KVM_GET_ONE_REG and KVM_SET_ONE_REG, will fail with -EPERM unless + /// the feature has already been finalized by means of a KVM_ARM_VCPU_FINALIZE call. + /// + /// See KVM_ARM_VCPU_INIT for details of vcpu features that require finalization using this ioctl. + /// [KVM_ARM_VCPU_FINALIZE](https://www.kernel.org/doc/html/latest/virt/kvm/api.html#kvm-arm-vcpu-finalize). + /// + /// # Arguments + /// + /// * `feature` - vCPU features that needs to be finalized. + /// + /// # Example + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// use std::arch::is_aarch64_feature_detected; + /// + /// use kvm_bindings::{kvm_vcpu_init, KVM_ARM_VCPU_SVE}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// let mut kvi = kvm_vcpu_init::default(); + /// vm.get_preferred_target(&mut kvi).unwrap(); + /// kvi.features[0] |= 1 << KVM_ARM_VCPU_SVE; + /// if is_aarch64_feature_detected!("sve2") || is_aarch64_feature_detected!("sve") { + /// vcpu.vcpu_init(&kvi).unwrap(); + /// let feature = KVM_ARM_VCPU_SVE as i32; + /// vcpu.vcpu_finalize(&feature).unwrap(); + /// } + /// ``` + #[cfg(target_arch = "aarch64")] + pub fn vcpu_finalize(&self, feature: &std::os::raw::c_int) -> Result<()> { + // SAFETY: This is safe because we know the kernel will only read this + // parameter to select the correct finalization case in KVM. + let ret = unsafe { ioctl_with_ref(self, KVM_ARM_VCPU_FINALIZE(), feature) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Returns the guest registers that are supported for the + /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. + /// + /// # Arguments + /// + /// * `reg_list` - list of registers (input/output). For details check the `kvm_reg_list` + /// structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// # use kvm_bindings::RegList; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// // KVM_GET_REG_LIST on Aarch64 demands that the vcpus be initialized. + /// #[cfg(target_arch = "aarch64")] + /// { + /// let mut kvi = kvm_bindings::kvm_vcpu_init::default(); + /// vm.get_preferred_target(&mut kvi).unwrap(); + /// vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu"); + /// + /// let mut reg_list = RegList::new(500).unwrap(); + /// vcpu.get_reg_list(&mut reg_list).unwrap(); + /// assert!(reg_list.as_fam_struct_ref().n > 0); + /// } + /// ``` + #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] + pub fn get_reg_list(&self, reg_list: &mut RegList) -> Result<()> { + let ret = + // SAFETY: This is safe because we allocated the struct and we trust the kernel will read + // exactly the size of the struct. + unsafe { ioctl_with_mut_ref(self, KVM_GET_REG_LIST(), reg_list.as_mut_fam_struct()) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Sets processor-specific debug registers and configures the vcpu for handling + /// certain guest debug events using the `KVM_SET_GUEST_DEBUG` ioctl. + /// + /// # Arguments + /// + /// * `debug_struct` - control bitfields and debug registers, depending on the specific architecture. + /// For details check the `kvm_guest_debug` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// # use kvm_bindings::{ + /// # KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_USE_SW_BP, kvm_guest_debug_arch, kvm_guest_debug + /// # }; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + /// { + /// let debug_struct = kvm_guest_debug { + /// // Configure the vcpu so that a KVM_DEBUG_EXIT would be generated + /// // when encountering a software breakpoint during execution + /// control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP, + /// pad: 0, + /// // Reset all arch-specific debug registers + /// arch: Default::default(), + /// }; + /// + /// vcpu.set_guest_debug(&debug_struct).unwrap(); + /// } + /// ``` + #[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "s390x", + target_arch = "powerpc" + ))] + pub fn set_guest_debug(&self, debug_struct: &kvm_guest_debug) -> Result<()> { + // SAFETY: Safe because we allocated the structure and we trust the kernel. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_GUEST_DEBUG(), debug_struct) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Sets the value of one register for this vCPU. + /// + /// The id of the register is encoded as specified in the kernel documentation + /// for `KVM_SET_ONE_REG`. + /// + /// # Arguments + /// + /// * `reg_id` - ID of the register for which we are setting the value. + /// * `data` - byte slice where the register value will be written to. + /// + /// # Note + /// + /// `data` should be equal or bigger then the register size + /// oterwise function will return EINVAL error + #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] + pub fn set_one_reg(&self, reg_id: u64, data: &[u8]) -> Result { + let reg_size = reg_size(reg_id); + if data.len() < reg_size { + return Err(errno::Error::new(libc::EINVAL)); + } + let onereg = kvm_one_reg { + id: reg_id, + addr: data.as_ptr() as u64, + }; + // SAFETY: This is safe because we allocated the struct and we know the kernel will read + // exactly the size of the struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_ONE_REG(), &onereg) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(reg_size) + } + + /// Writes the value of the specified vCPU register into provided buffer. + /// + /// The id of the register is encoded as specified in the kernel documentation + /// for `KVM_GET_ONE_REG`. + /// + /// # Arguments + /// + /// * `reg_id` - ID of the register. + /// * `data` - byte slice where the register value will be written to. + /// # Note + /// + /// `data` should be equal or bigger then the register size + /// oterwise function will return EINVAL error + #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] + pub fn get_one_reg(&self, reg_id: u64, data: &mut [u8]) -> Result { + let reg_size = reg_size(reg_id); + if data.len() < reg_size { + return Err(errno::Error::new(libc::EINVAL)); + } + let mut onereg = kvm_one_reg { + id: reg_id, + addr: data.as_ptr() as u64, + }; + // SAFETY: This is safe because we allocated the struct and we know the kernel will read + // exactly the size of the struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_ONE_REG(), &mut onereg) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(reg_size) + } + + /// Notify the guest about the vCPU being paused. + /// + /// See the documentation for `KVM_KVMCLOCK_CTRL` in the + /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + #[cfg(target_arch = "x86_64")] + pub fn kvmclock_ctrl(&self) -> Result<()> { + // SAFETY: Safe because we know that our file is a KVM fd and that the request + // is one of the ones defined by kernel. + let ret = unsafe { ioctl(self, KVM_KVMCLOCK_CTRL()) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Triggers the running of the current virtual CPU returning an exit reason. + /// + /// See documentation for `KVM_RUN`. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use std::io::Write; + /// # use std::ptr::null_mut; + /// # use std::slice; + /// # use kvm_ioctls::{Kvm, VcpuExit}; + /// # use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_LOG_DIRTY_PAGES}; + /// # let kvm = Kvm::new().unwrap(); + /// # let vm = kvm.create_vm().unwrap(); + /// // This is a dummy example for running on x86 based on https://lwn.net/Articles/658511/. + /// #[cfg(target_arch = "x86_64")] + /// { + /// let mem_size = 0x4000; + /// let guest_addr: u64 = 0x1000; + /// let load_addr: *mut u8 = unsafe { + /// libc::mmap( + /// null_mut(), + /// mem_size, + /// libc::PROT_READ | libc::PROT_WRITE, + /// libc::MAP_ANONYMOUS | libc::MAP_SHARED | libc::MAP_NORESERVE, + /// -1, + /// 0, + /// ) as *mut u8 + /// }; + /// + /// let mem_region = kvm_userspace_memory_region { + /// slot: 0, + /// guest_phys_addr: guest_addr, + /// memory_size: mem_size as u64, + /// userspace_addr: load_addr as u64, + /// flags: 0, + /// }; + /// unsafe { vm.set_user_memory_region(mem_region).unwrap() }; + /// + /// // Dummy x86 code that just calls halt. + /// let x86_code = [0xf4 /* hlt */]; + /// + /// // Write the code in the guest memory. This will generate a dirty page. + /// unsafe { + /// let mut slice = slice::from_raw_parts_mut(load_addr, mem_size); + /// slice.write(&x86_code).unwrap(); + /// } + /// + /// let mut vcpu_fd = vm.create_vcpu(0).unwrap(); + /// + /// let mut vcpu_sregs = vcpu_fd.get_sregs().unwrap(); + /// vcpu_sregs.cs.base = 0; + /// vcpu_sregs.cs.selector = 0; + /// vcpu_fd.set_sregs(&vcpu_sregs).unwrap(); + /// + /// let mut vcpu_regs = vcpu_fd.get_regs().unwrap(); + /// // Set the Instruction Pointer to the guest address where we loaded the code. + /// vcpu_regs.rip = guest_addr; + /// vcpu_regs.rax = 2; + /// vcpu_regs.rbx = 3; + /// vcpu_regs.rflags = 2; + /// vcpu_fd.set_regs(&vcpu_regs).unwrap(); + /// + /// loop { + /// match vcpu_fd.run().expect("run failed") { + /// VcpuExit::Hlt => { + /// break; + /// } + /// exit_reason => panic!("unexpected exit reason: {:?}", exit_reason), + /// } + /// } + /// } + /// ``` + pub fn run(&mut self) -> Result { + // SAFETY: Safe because we know that our file is a vCPU fd and we verify the return result. + let ret = unsafe { ioctl(self, KVM_RUN()) }; + if ret == 0 { + let run = self.kvm_run_ptr.as_mut_ref(); + match run.exit_reason { + // make sure you treat all possible exit reasons from include/uapi/linux/kvm.h corresponding + // when upgrading to a different kernel version + KVM_EXIT_UNKNOWN => Ok(VcpuExit::Unknown), + KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception), + KVM_EXIT_IO => { + let run_start = run as *mut kvm_run as *mut u8; + // SAFETY: Safe because the exit_reason (which comes from the kernel) told us + // which union field to use. + let io = unsafe { run.__bindgen_anon_1.io }; + let port = io.port; + let data_size = io.count as usize * io.size as usize; + // SAFETY: The data_offset is defined by the kernel to be some number of bytes + // into the kvm_run stucture, which we have fully mmap'd. + let data_ptr = unsafe { run_start.offset(io.data_offset as isize) }; + let data_slice = + // SAFETY: The slice's lifetime is limited to the lifetime of this vCPU, which is equal + // to the mmap of the `kvm_run` struct that this is slicing from. + unsafe { std::slice::from_raw_parts_mut::(data_ptr, data_size) }; + match u32::from(io.direction) { + KVM_EXIT_IO_IN => Ok(VcpuExit::IoIn(port, data_slice)), + KVM_EXIT_IO_OUT => Ok(VcpuExit::IoOut(port, data_slice)), + _ => Err(errno::Error::new(EINVAL)), + } + } + KVM_EXIT_HYPERCALL => { + // SAFETY: Safe because the exit_reason (which comes from the kernel) told us + // which union field to use. + let hypercall = unsafe { &mut run.__bindgen_anon_1.hypercall }; + Ok(VcpuExit::Hypercall(HypercallExit { + nr: hypercall.nr, + args: hypercall.args, + ret: &mut hypercall.ret, + // SAFETY: Safe because the exit_reason (which comes from the kernel) told us + // which union field to use. + longmode: unsafe { hypercall.__bindgen_anon_1.longmode }, + })) + } + KVM_EXIT_DEBUG => { + // SAFETY: Safe because the exit_reason (which comes from the kernel) told us + // which union field to use. + let debug = unsafe { run.__bindgen_anon_1.debug }; + Ok(VcpuExit::Debug(debug.arch)) + } + KVM_EXIT_HLT => Ok(VcpuExit::Hlt), + KVM_EXIT_MMIO => { + // SAFETY: Safe because the exit_reason (which comes from the kernel) told us + // which union field to use. + let mmio = unsafe { &mut run.__bindgen_anon_1.mmio }; + let addr = mmio.phys_addr; + let len = mmio.len as usize; + let data_slice = &mut mmio.data[..len]; + if mmio.is_write != 0 { + Ok(VcpuExit::MmioWrite(addr, data_slice)) + } else { + Ok(VcpuExit::MmioRead(addr, data_slice)) + } + } + KVM_EXIT_X86_RDMSR => { + // SAFETY: Safe because the exit_reason (which comes from the kernel) told us + // which union field to use. + let msr = unsafe { &mut run.__bindgen_anon_1.msr }; + let exit = ReadMsrExit { + error: &mut msr.error, + reason: MsrExitReason::from_bits_truncate(msr.reason), + index: msr.index, + data: &mut msr.data, + }; + Ok(VcpuExit::X86Rdmsr(exit)) + } + KVM_EXIT_X86_WRMSR => { + // SAFETY: Safe because the exit_reason (which comes from the kernel) told us + // which union field to use. + let msr = unsafe { &mut run.__bindgen_anon_1.msr }; + let exit = WriteMsrExit { + error: &mut msr.error, + reason: MsrExitReason::from_bits_truncate(msr.reason), + index: msr.index, + data: msr.data, + }; + Ok(VcpuExit::X86Wrmsr(exit)) + } + KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen), + KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown), + KVM_EXIT_FAIL_ENTRY => { + // SAFETY: Safe because the exit_reason (which comes from the kernel) told us + // which union field to use. + let fail_entry = unsafe { &mut run.__bindgen_anon_1.fail_entry }; + Ok(VcpuExit::FailEntry( + fail_entry.hardware_entry_failure_reason, + fail_entry.cpu, + )) + } + KVM_EXIT_INTR => Ok(VcpuExit::Intr), + KVM_EXIT_SET_TPR => Ok(VcpuExit::SetTpr), + KVM_EXIT_TPR_ACCESS => Ok(VcpuExit::TprAccess), + KVM_EXIT_S390_SIEIC => Ok(VcpuExit::S390Sieic), + KVM_EXIT_S390_RESET => Ok(VcpuExit::S390Reset), + KVM_EXIT_DCR => Ok(VcpuExit::Dcr), + KVM_EXIT_NMI => Ok(VcpuExit::Nmi), + KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError), + KVM_EXIT_OSI => Ok(VcpuExit::Osi), + KVM_EXIT_PAPR_HCALL => Ok(VcpuExit::PaprHcall), + KVM_EXIT_S390_UCONTROL => Ok(VcpuExit::S390Ucontrol), + KVM_EXIT_WATCHDOG => Ok(VcpuExit::Watchdog), + KVM_EXIT_S390_TSCH => Ok(VcpuExit::S390Tsch), + KVM_EXIT_EPR => Ok(VcpuExit::Epr), + KVM_EXIT_SYSTEM_EVENT => { + // SAFETY: Safe because the exit_reason (which comes from the kernel) told us + // which union field to use. + let system_event = unsafe { &mut run.__bindgen_anon_1.system_event }; + let ndata = system_event.ndata; + // SAFETY: Safe because we only populate with valid data (based on ndata) + let data = unsafe { &system_event.__bindgen_anon_1.data[0..ndata as usize] }; + Ok(VcpuExit::SystemEvent(system_event.type_, data)) + } + KVM_EXIT_S390_STSI => Ok(VcpuExit::S390Stsi), + KVM_EXIT_IOAPIC_EOI => { + // SAFETY: Safe because the exit_reason (which comes from the kernel) told us + // which union field to use. + let eoi = unsafe { &mut run.__bindgen_anon_1.eoi }; + Ok(VcpuExit::IoapicEoi(eoi.vector)) + } + KVM_EXIT_HYPERV => Ok(VcpuExit::Hyperv), + r => Ok(VcpuExit::Unsupported(r)), + } + } else { + let errno = errno::Error::last(); + let run = self.kvm_run_ptr.as_mut_ref(); + // From https://docs.kernel.org/virt/kvm/api.html#kvm-run : + // + // KVM_EXIT_MEMORY_FAULT is unique among all KVM exit reasons in that it accompanies + // a return code of ‘-1’, not ‘0’! errno will always be set to EFAULT or EHWPOISON + // when KVM exits with KVM_EXIT_MEMORY_FAULT, userspace should assume kvm_run.exit_reason + // is stale/undefined for all other error numbers. + if ret == -1 + && (errno == errno::Error::new(libc::EFAULT) + || errno == errno::Error::new(libc::EHWPOISON)) + && run.exit_reason == KVM_EXIT_MEMORY_FAULT + { + // SAFETY: Safe because the exit_reason (which comes from the kernel) told us + // which union field to use. + let fault = unsafe { &mut run.__bindgen_anon_1.memory_fault }; + Ok(VcpuExit::MemoryFault { + flags: fault.flags, + gpa: fault.gpa, + size: fault.size, + }) + } else { + Err(errno) + } + } + } + + /// Returns a mutable reference to the kvm_run structure + pub fn get_kvm_run(&mut self) -> &mut kvm_run { + self.kvm_run_ptr.as_mut_ref() + } + + /// Sets the `immediate_exit` flag on the `kvm_run` struct associated with this vCPU to `val`. + pub fn set_kvm_immediate_exit(&mut self, val: u8) { + let kvm_run = self.kvm_run_ptr.as_mut_ref(); + kvm_run.immediate_exit = val; + } + + /// Returns the vCPU TSC frequency in KHz or an error if the host has unstable TSC. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let tsc_khz = vcpu.get_tsc_khz().unwrap(); + /// ``` + /// + #[cfg(target_arch = "x86_64")] + pub fn get_tsc_khz(&self) -> Result { + // SAFETY: Safe because we know that our file is a KVM fd and that the request is one of + // the ones defined by kernel. + let ret = unsafe { ioctl(self, KVM_GET_TSC_KHZ()) }; + if ret >= 0 { + Ok(ret as u32) + } else { + Err(errno::Error::new(ret)) + } + } + + /// Sets the specified vCPU TSC frequency. + /// + /// # Arguments + /// + /// * `freq` - The frequency unit is KHz as per the KVM API documentation + /// for `KVM_SET_TSC_KHZ`. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Cap, Kvm}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// if kvm.check_extension(Cap::GetTscKhz) && kvm.check_extension(Cap::TscControl) { + /// vcpu.set_tsc_khz(1000).unwrap(); + /// } + /// ``` + /// + #[cfg(target_arch = "x86_64")] + pub fn set_tsc_khz(&self, freq: u32) -> Result<()> { + // SAFETY: Safe because we know that our file is a KVM fd and that the request is one of + // the ones defined by kernel. + let ret = unsafe { ioctl_with_val(self, KVM_SET_TSC_KHZ(), freq as u64) }; + if ret < 0 { + Err(errno::Error::last()) + } else { + Ok(()) + } + } + + /// Translates a virtual address according to the vCPU's current address translation mode. + /// + /// The physical address is returned in a `kvm_translation` structure as defined in the + /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// See documentation for `KVM_TRANSLATE`. + /// + /// # Arguments + /// + /// * `gva` - The virtual address to translate. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// #[cfg(target_arch = "x86_64")] + /// let tr = vcpu.translate_gva(0x10000).unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn translate_gva(&self, gva: u64) -> Result { + let mut tr = kvm_translation { + linear_address: gva, + ..Default::default() + }; + + // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only + // write the correct amount of memory to our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_TRANSLATE(), &mut tr) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(tr) + } + + /// Enable the given [`SyncReg`] to be copied to userspace on the next exit + /// + /// # Arguments + /// + /// * `reg` - The [`SyncReg`] to copy out of the guest + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut vcpu = vm.create_vcpu(0).unwrap(); + /// vcpu.set_sync_valid_reg(SyncReg::Register); + /// vcpu.set_sync_valid_reg(SyncReg::SystemRegister); + /// vcpu.set_sync_valid_reg(SyncReg::VcpuEvents); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn set_sync_valid_reg(&mut self, reg: SyncReg) { + let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref(); + kvm_run.kvm_valid_regs |= reg as u64; + } + + /// Tell KVM to copy the given [`SyncReg`] into the guest on the next entry + /// + /// # Arguments + /// + /// * `reg` - The [`SyncReg`] to copy into the guest + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut vcpu = vm.create_vcpu(0).unwrap(); + /// vcpu.set_sync_dirty_reg(SyncReg::Register); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn set_sync_dirty_reg(&mut self, reg: SyncReg) { + let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref(); + kvm_run.kvm_dirty_regs |= reg as u64; + } + + /// Disable the given [`SyncReg`] to be copied to userspace on the next exit + /// + /// # Arguments + /// + /// * `reg` - The [`SyncReg`] to not copy out of the guest + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut vcpu = vm.create_vcpu(0).unwrap(); + /// vcpu.clear_sync_valid_reg(SyncReg::Register); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn clear_sync_valid_reg(&mut self, reg: SyncReg) { + let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref(); + kvm_run.kvm_valid_regs &= !(reg as u64); + } + + /// Tell KVM to not copy the given [`SyncReg`] into the guest on the next entry + /// + /// # Arguments + /// + /// * `reg` - The [`SyncReg`] to not copy out into the guest + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut vcpu = vm.create_vcpu(0).unwrap(); + /// vcpu.clear_sync_dirty_reg(SyncReg::Register); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn clear_sync_dirty_reg(&mut self, reg: SyncReg) { + let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref(); + kvm_run.kvm_dirty_regs &= !(reg as u64); + } + + /// Get the [`kvm_sync_regs`] from the VM + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut vcpu = vm.create_vcpu(0).unwrap(); + /// if kvm.check_extension(Cap::SyncRegs) { + /// vcpu.set_sync_valid_reg(SyncReg::Register); + /// vcpu.run(); + /// let guest_rax = vcpu.sync_regs().regs.rax; + /// } + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn sync_regs(&self) -> kvm_sync_regs { + let kvm_run = self.kvm_run_ptr.as_ref(); + + // SAFETY: Accessing this union field could be out of bounds if the `kvm_run` + // allocation isn't large enough. The `kvm_run` region is set using + // `get_vcpu_map_size`, so this region is in bounds + unsafe { kvm_run.s.regs } + } + + /// Get a mutable reference to the [`kvm_sync_regs`] from the VM + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut vcpu = vm.create_vcpu(0).unwrap(); + /// if kvm.check_extension(Cap::SyncRegs) { + /// vcpu.set_sync_valid_reg(SyncReg::Register); + /// vcpu.run(); + /// // Set the guest RAX to 0xdeadbeef + /// vcpu.sync_regs_mut().regs.rax = 0xdeadbeef; + /// vcpu.set_sync_dirty_reg(SyncReg::Register); + /// vcpu.run(); + /// } + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn sync_regs_mut(&mut self) -> &mut kvm_sync_regs { + let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref(); + + // SAFETY: Accessing this union field could be out of bounds if the `kvm_run` + // allocation isn't large enough. The `kvm_run` region is set using + // `get_vcpu_map_size`, so this region is in bounds + unsafe { &mut kvm_run.s.regs } + } + + /// Triggers an SMI on the virtual CPU. + /// + /// See documentation for `KVM_SMI`. + /// + /// ```rust + /// # use kvm_ioctls::{Kvm, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// if kvm.check_extension(Cap::X86Smm) { + /// vcpu.smi().unwrap(); + /// } + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn smi(&self) -> Result<()> { + // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel. + let ret = unsafe { ioctl(self, KVM_SMI()) }; + match ret { + 0 => Ok(()), + _ => Err(errno::Error::last()), + } + } + + /// Queues an NMI on the thread's vcpu. Only usable if `KVM_CAP_USER_NMI` + /// is available. + /// + /// See the documentation for `KVM_NMI`. + /// + /// # Example + /// + /// ```rust + /// # use kvm_ioctls::{Kvm, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// if kvm.check_extension(Cap::UserNmi) { + /// vcpu.nmi().unwrap(); + /// } + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn nmi(&self) -> Result<()> { + // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel. + let ret = unsafe { ioctl(self, KVM_NMI()) }; + match ret { + 0 => Ok(()), + _ => Err(errno::Error::last()), + } + } + + /// Maps the coalesced MMIO ring page. This allows reading entries from + /// the ring via [`coalesced_mmio_read()`](VcpuFd::coalesced_mmio_read). + /// + /// # Returns + /// + /// Returns an error if the buffer could not be mapped, usually because + /// `KVM_CAP_COALESCED_MMIO` ([`Cap::CoalescedMmio`](crate::Cap::CoalescedMmio)) + /// is not available. + /// + /// # Examples + /// + /// ```rust + /// # use kvm_ioctls::{Kvm, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut vcpu = vm.create_vcpu(0).unwrap(); + /// if kvm.check_extension(Cap::CoalescedMmio) { + /// vcpu.map_coalesced_mmio_ring().unwrap(); + /// } + /// ``` + pub fn map_coalesced_mmio_ring(&mut self) -> Result<()> { + if self.coalesced_mmio_ring.is_none() { + let ring = KvmCoalescedIoRing::mmap_from_fd(&self.vcpu)?; + self.coalesced_mmio_ring = Some(ring); + } + Ok(()) + } + + /// Read a single entry from the coalesced MMIO ring. + /// For entries to be appended to the ring by the kernel, addresses must be registered + /// via [`VmFd::register_coalesced_mmio()`](crate::VmFd::register_coalesced_mmio()). + /// + /// [`map_coalesced_mmio_ring()`](VcpuFd::map_coalesced_mmio_ring) must have been called beforehand. + /// + /// See the documentation for `KVM_(UN)REGISTER_COALESCED_MMIO`. + /// + /// # Returns + /// + /// * An error if [`map_coalesced_mmio_ring()`](VcpuFd::map_coalesced_mmio_ring) + /// was not called beforehand. + /// * [`Ok`] if the ring is empty. + /// * [`Ok>`] if an entry was successfully read. + pub fn coalesced_mmio_read(&mut self) -> Result> { + self.coalesced_mmio_ring + .as_mut() + .ok_or(errno::Error::new(libc::EIO)) + .map(|ring| ring.read_entry()) + } +} + +/// Helper function to create a new `VcpuFd`. +/// +/// This should not be exported as a public function because the preferred way is to use +/// `create_vcpu` from `VmFd`. The function cannot be part of the `VcpuFd` implementation because +/// then it would be exported with the public `VcpuFd` interface. +pub fn new_vcpu(vcpu: File, kvm_run_ptr: KvmRunWrapper) -> VcpuFd { + VcpuFd { + vcpu, + kvm_run_ptr, + coalesced_mmio_ring: None, + } +} + +impl AsRawFd for VcpuFd { + fn as_raw_fd(&self) -> RawFd { + self.vcpu.as_raw_fd() + } +} + +#[cfg(test)] +mod tests { + #![allow(clippy::undocumented_unsafe_blocks)] + extern crate byteorder; + + use super::*; + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + use crate::cap::Cap; + use crate::ioctls::system::Kvm; + use std::ptr::NonNull; + + // Helper function for memory mapping `size` bytes of anonymous memory. + // Panics if the mmap fails. + fn mmap_anonymous(size: usize) -> NonNull { + use std::ptr::null_mut; + + let addr = unsafe { + libc::mmap( + null_mut(), + size, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_ANONYMOUS | libc::MAP_SHARED | libc::MAP_NORESERVE, + -1, + 0, + ) + }; + if addr == libc::MAP_FAILED { + panic!("mmap failed."); + } + + NonNull::new(addr).unwrap().cast() + } + + #[test] + fn test_create_vcpu() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + vm.create_vcpu(0).unwrap(); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_get_cpuid() { + let kvm = Kvm::new().unwrap(); + if kvm.check_extension(Cap::ExtCpuid) { + let vm = kvm.create_vm().unwrap(); + let cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + let ncpuids = cpuid.as_slice().len(); + assert!(ncpuids <= KVM_MAX_CPUID_ENTRIES); + let nr_vcpus = kvm.get_nr_vcpus(); + for cpu_idx in 0..nr_vcpus { + let vcpu = vm.create_vcpu(cpu_idx as u64).unwrap(); + vcpu.set_cpuid2(&cpuid).unwrap(); + let retrieved_cpuid = vcpu.get_cpuid2(ncpuids).unwrap(); + // Only check the first few leafs as some (e.g. 13) are reserved. + assert_eq!(cpuid.as_slice()[..3], retrieved_cpuid.as_slice()[..3]); + } + } + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_get_cpuid_fail_num_entries_too_high() { + let kvm = Kvm::new().unwrap(); + if kvm.check_extension(Cap::ExtCpuid) { + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let err_cpuid = vcpu.get_cpuid2(KVM_MAX_CPUID_ENTRIES + 1_usize).err(); + assert_eq!(err_cpuid.unwrap().errno(), libc::ENOMEM); + } + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_get_cpuid_fail_num_entries_too_small() { + let kvm = Kvm::new().unwrap(); + if kvm.check_extension(Cap::ExtCpuid) { + let vm = kvm.create_vm().unwrap(); + let cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + let ncpuids = cpuid.as_slice().len(); + assert!(ncpuids <= KVM_MAX_CPUID_ENTRIES); + let nr_vcpus = kvm.get_nr_vcpus(); + for cpu_idx in 0..nr_vcpus { + let vcpu = vm.create_vcpu(cpu_idx as u64).unwrap(); + vcpu.set_cpuid2(&cpuid).unwrap(); + let err = vcpu.get_cpuid2(ncpuids - 1_usize).err(); + assert_eq!(err.unwrap().errno(), libc::E2BIG); + } + } + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_set_cpuid() { + let kvm = Kvm::new().unwrap(); + if kvm.check_extension(Cap::ExtCpuid) { + let vm = kvm.create_vm().unwrap(); + let mut cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + let ncpuids = cpuid.as_slice().len(); + assert!(ncpuids <= KVM_MAX_CPUID_ENTRIES); + let vcpu = vm.create_vcpu(0).unwrap(); + + // Setting Manufacturer ID + { + let entries = cpuid.as_mut_slice(); + for entry in entries.iter_mut() { + if entry.function == 0 { + // " KVMKVMKVM " + entry.ebx = 0x4b4d564b; + entry.ecx = 0x564b4d56; + entry.edx = 0x4d; + } + } + } + vcpu.set_cpuid2(&cpuid).unwrap(); + let cpuid_0 = vcpu.get_cpuid2(ncpuids).unwrap(); + for entry in cpuid_0.as_slice() { + if entry.function == 0 { + assert_eq!(entry.ebx, 0x4b4d564b); + assert_eq!(entry.ecx, 0x564b4d56); + assert_eq!(entry.edx, 0x4d); + } + } + + // Disabling Intel SHA extensions. + const EBX_SHA_SHIFT: u32 = 29; + let mut ebx_sha_off = 0u32; + { + let entries = cpuid.as_mut_slice(); + for entry in entries.iter_mut() { + if entry.function == 7 && entry.ecx == 0 { + entry.ebx &= !(1 << EBX_SHA_SHIFT); + ebx_sha_off = entry.ebx; + } + } + } + vcpu.set_cpuid2(&cpuid).unwrap(); + let cpuid_1 = vcpu.get_cpuid2(ncpuids).unwrap(); + for entry in cpuid_1.as_slice() { + if entry.function == 7 && entry.ecx == 0 { + assert_eq!(entry.ebx, ebx_sha_off); + } + } + } + } + + #[cfg(target_arch = "x86_64")] + #[allow(non_snake_case)] + #[test] + fn test_fpu() { + // as per https://github.com/torvalds/linux/blob/master/arch/x86/include/asm/fpu/internal.h + let KVM_FPU_CWD: usize = 0x37f; + let KVM_FPU_MXCSR: usize = 0x1f80; + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let mut fpu: kvm_fpu = kvm_fpu { + fcw: KVM_FPU_CWD as u16, + mxcsr: KVM_FPU_MXCSR as u32, + ..Default::default() + }; + + fpu.fcw = KVM_FPU_CWD as u16; + fpu.mxcsr = KVM_FPU_MXCSR as u32; + + vcpu.set_fpu(&fpu).unwrap(); + assert_eq!(vcpu.get_fpu().unwrap().fcw, KVM_FPU_CWD as u16); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn lapic_test() { + use std::io::Cursor; + // We might get read of byteorder if we replace mem::transmute with something safer. + use self::byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; + // As per https://github.com/torvalds/linux/arch/x86/kvm/lapic.c + // Try to write and read the APIC_ICR (0x300) register which is non-read only and + // one can simply write to it. + let kvm = Kvm::new().unwrap(); + assert!(kvm.check_extension(Cap::Irqchip)); + let vm = kvm.create_vm().unwrap(); + // The get_lapic ioctl will fail if there is no irqchip created beforehand. + vm.create_irq_chip().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let mut klapic: kvm_lapic_state = vcpu.get_lapic().unwrap(); + + let reg_offset = 0x300; + let value = 2_u32; + //try to write and read the APIC_ICR 0x300 + let write_slice = + unsafe { &mut *(&mut klapic.regs[reg_offset..] as *mut [i8] as *mut [u8]) }; + let mut writer = Cursor::new(write_slice); + writer.write_u32::(value).unwrap(); + vcpu.set_lapic(&klapic).unwrap(); + klapic = vcpu.get_lapic().unwrap(); + let read_slice = unsafe { &*(&klapic.regs[reg_offset..] as *const [i8] as *const [u8]) }; + let mut reader = Cursor::new(read_slice); + assert_eq!(reader.read_u32::().unwrap(), value); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn msrs_test() { + use vmm_sys_util::fam::FamStruct; + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + // Set the following MSRs. + let msrs_to_set = [ + kvm_msr_entry { + index: 0x0000_0174, + data: 0x0, + ..Default::default() + }, + kvm_msr_entry { + index: 0x0000_0175, + data: 0x1, + ..Default::default() + }, + ]; + let msrs_wrapper = Msrs::from_entries(&msrs_to_set).unwrap(); + vcpu.set_msrs(&msrs_wrapper).unwrap(); + + // Now test that GET_MSRS returns the same. + // Configure the struct to say which entries we want. + let mut returned_kvm_msrs = Msrs::from_entries(&[ + kvm_msr_entry { + index: 0x0000_0174, + ..Default::default() + }, + kvm_msr_entry { + index: 0x0000_0175, + ..Default::default() + }, + ]) + .unwrap(); + let nmsrs = vcpu.get_msrs(&mut returned_kvm_msrs).unwrap(); + + // Verify the lengths match. + assert_eq!(nmsrs, msrs_to_set.len()); + assert_eq!(nmsrs, returned_kvm_msrs.as_fam_struct_ref().len()); + + // Verify the contents match. + let returned_kvm_msr_entries = returned_kvm_msrs.as_slice(); + for (i, entry) in returned_kvm_msr_entries.iter().enumerate() { + assert_eq!(entry, &msrs_to_set[i]); + } + } + + #[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "s390x" + ))] + #[test] + fn mpstate_test() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let mp_state = vcpu.get_mp_state().unwrap(); + vcpu.set_mp_state(mp_state).unwrap(); + let other_mp_state = vcpu.get_mp_state().unwrap(); + assert_eq!(mp_state, other_mp_state); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn xsave_test() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let xsave = vcpu.get_xsave().unwrap(); + vcpu.set_xsave(&xsave).unwrap(); + let other_xsave = vcpu.get_xsave().unwrap(); + assert_eq!(&xsave.region[..], &other_xsave.region[..]); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn xcrs_test() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let xcrs = vcpu.get_xcrs().unwrap(); + vcpu.set_xcrs(&xcrs).unwrap(); + let other_xcrs = vcpu.get_xcrs().unwrap(); + assert_eq!(xcrs, other_xcrs); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn debugregs_test() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let debugregs = vcpu.get_debug_regs().unwrap(); + vcpu.set_debug_regs(&debugregs).unwrap(); + let other_debugregs = vcpu.get_debug_regs().unwrap(); + assert_eq!(debugregs, other_debugregs); + } + + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + #[test] + fn vcpu_events_test() { + let kvm = Kvm::new().unwrap(); + if kvm.check_extension(Cap::VcpuEvents) { + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let vcpu_events = vcpu.get_vcpu_events().unwrap(); + vcpu.set_vcpu_events(&vcpu_events).unwrap(); + let other_vcpu_events = vcpu.get_vcpu_events().unwrap(); + assert_eq!(vcpu_events, other_vcpu_events); + } + } + + #[cfg(target_arch = "aarch64")] + #[test] + fn test_run_code() { + use std::io::Write; + + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + #[rustfmt::skip] + let code = [ + 0x40, 0x20, 0x80, 0x52, /* mov w0, #0x102 */ + 0x00, 0x01, 0x00, 0xb9, /* str w0, [x8]; test physical memory write */ + 0x81, 0x60, 0x80, 0x52, /* mov w1, #0x304 */ + 0x02, 0x00, 0x80, 0x52, /* mov w2, #0x0 */ + 0x20, 0x01, 0x40, 0xb9, /* ldr w0, [x9]; test MMIO read */ + 0x1f, 0x18, 0x14, 0x71, /* cmp w0, #0x506 */ + 0x20, 0x00, 0x82, 0x1a, /* csel w0, w1, w2, eq */ + 0x20, 0x01, 0x00, 0xb9, /* str w0, [x9]; test MMIO write */ + 0x00, 0x80, 0xb0, 0x52, /* mov w0, #0x84000000 */ + 0x00, 0x00, 0x1d, 0x32, /* orr w0, w0, #0x08 */ + 0x02, 0x00, 0x00, 0xd4, /* hvc #0x0 */ + 0x00, 0x00, 0x00, 0x14, /* b ; shouldn't get here, but if so loop forever */ + ]; + + let mem_size = 0x20000; + let load_addr = mmap_anonymous(mem_size).as_ptr(); + let guest_addr: u64 = 0x10000; + let slot: u32 = 0; + let mem_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: guest_addr, + memory_size: mem_size as u64, + userspace_addr: load_addr as u64, + flags: KVM_MEM_LOG_DIRTY_PAGES, + }; + unsafe { + vm.set_user_memory_region(mem_region).unwrap(); + } + + unsafe { + // Get a mutable slice of `mem_size` from `load_addr`. + // This is safe because we mapped it before. + let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size); + slice.write_all(&code).unwrap(); + } + + let mut vcpu_fd = vm.create_vcpu(0).unwrap(); + let mut kvi = kvm_vcpu_init::default(); + vm.get_preferred_target(&mut kvi).unwrap(); + kvi.features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; + vcpu_fd.vcpu_init(&kvi).unwrap(); + + let core_reg_base: u64 = 0x6030_0000_0010_0000; + let mmio_addr: u64 = guest_addr + mem_size as u64; + + // Set the PC to the guest address where we loaded the code. + vcpu_fd + .set_one_reg(core_reg_base + 2 * 32, &(guest_addr as u128).to_le_bytes()) + .unwrap(); + + // Set x8 and x9 to the addresses the guest test code needs + vcpu_fd + .set_one_reg( + core_reg_base + 2 * 8, + &(guest_addr as u128 + 0x10000).to_le_bytes(), + ) + .unwrap(); + vcpu_fd + .set_one_reg(core_reg_base + 2 * 9, &(mmio_addr as u128).to_le_bytes()) + .unwrap(); + + loop { + match vcpu_fd.run().expect("run failed") { + VcpuExit::MmioRead(addr, data) => { + assert_eq!(addr, mmio_addr); + assert_eq!(data.len(), 4); + data[3] = 0x0; + data[2] = 0x0; + data[1] = 0x5; + data[0] = 0x6; + } + VcpuExit::MmioWrite(addr, data) => { + assert_eq!(addr, mmio_addr); + assert_eq!(data.len(), 4); + assert_eq!(data[3], 0x0); + assert_eq!(data[2], 0x0); + assert_eq!(data[1], 0x3); + assert_eq!(data[0], 0x4); + // The code snippet dirties one page at guest_addr + 0x10000. + // The code page should not be dirty, as it's not written by the guest. + let dirty_pages_bitmap = vm.get_dirty_log(slot, mem_size).unwrap(); + let dirty_pages: u32 = dirty_pages_bitmap + .into_iter() + .map(|page| page.count_ones()) + .sum(); + assert_eq!(dirty_pages, 1); + } + VcpuExit::SystemEvent(type_, data) => { + assert_eq!(type_, KVM_SYSTEM_EVENT_SHUTDOWN); + assert_eq!(data[0], 0); + break; + } + r => panic!("unexpected exit reason: {:?}", r), + } + } + } + + #[cfg(target_arch = "riscv64")] + #[test] + fn test_run_code() { + use std::io::Write; + + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + #[rustfmt::skip] + let code = [ + 0x13, 0x05, 0x50, 0x40, // li a0, 0x0405; + 0x23, 0x20, 0xac, 0x00, // sw a0, 0(s8); test physical memory write + 0x03, 0xa5, 0x0c, 0x00, // lw a0, 0(s9); test MMIO read + 0x93, 0x05, 0x70, 0x60, // li a1, 0x0607; + 0x23, 0xa0, 0xbc, 0x00, // sw a1, 0(s9); test MMIO write + 0x6f, 0x00, 0x00, 0x00, // j .; shouldn't get here, but if so loop forever + ]; + + let mem_size = 0x20000; + let load_addr = mmap_anonymous(mem_size).as_ptr(); + let guest_addr: u64 = 0x10000; + let slot: u32 = 0; + let mem_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: guest_addr, + memory_size: mem_size as u64, + userspace_addr: load_addr as u64, + flags: KVM_MEM_LOG_DIRTY_PAGES, + }; + unsafe { + vm.set_user_memory_region(mem_region).unwrap(); + } + + unsafe { + // Get a mutable slice of `mem_size` from `load_addr`. + // This is safe because we mapped it before. + let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size); + slice.write_all(&code).unwrap(); + } + + let mut vcpu_fd = vm.create_vcpu(0).unwrap(); + + let core_reg_base: u64 = 0x8030_0000_0200_0000; + let mmio_addr: u64 = guest_addr + mem_size as u64; + + // Set the PC to the guest address where we loaded the code. + vcpu_fd + .set_one_reg(core_reg_base, &(guest_addr as u128).to_le_bytes()) + .unwrap(); + + // Set s8 and s9 to the addresses the guest test code needs + vcpu_fd + .set_one_reg( + core_reg_base + 24, + &(guest_addr as u128 + 0x10000).to_le_bytes(), + ) + .unwrap(); + vcpu_fd + .set_one_reg(core_reg_base + 25, &(mmio_addr as u128).to_le_bytes()) + .unwrap(); + + loop { + match vcpu_fd.run().expect("run failed") { + VcpuExit::MmioRead(addr, data) => { + assert_eq!(addr, mmio_addr); + assert_eq!(data.len(), 4); + data[3] = 0x0; + data[2] = 0x0; + data[1] = 0x5; + data[0] = 0x6; + } + VcpuExit::MmioWrite(addr, data) => { + assert_eq!(addr, mmio_addr); + assert_eq!(data.len(), 4); + assert_eq!(data[3], 0x0); + assert_eq!(data[2], 0x0); + assert_eq!(data[1], 0x6); + assert_eq!(data[0], 0x7); + // The code snippet dirties one page at guest_addr + 0x10000. + // The code page should not be dirty, as it's not written by the guest. + let dirty_pages_bitmap = vm.get_dirty_log(slot, mem_size).unwrap(); + let dirty_pages: u32 = dirty_pages_bitmap + .into_iter() + .map(|page| page.count_ones()) + .sum(); + assert_eq!(dirty_pages, 1); + break; + } + r => panic!("unexpected exit reason: {:?}", r), + } + } + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_run_code() { + use std::io::Write; + + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + // This example is based on https://lwn.net/Articles/658511/ + #[rustfmt::skip] + let code = [ + 0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */ + 0x00, 0xd8, /* add %bl, %al */ + 0x04, b'0', /* add $'0', %al */ + 0xee, /* out %al, %dx */ + 0xec, /* in %dx, %al */ + 0xc6, 0x06, 0x00, 0x80, 0x00, /* movl $0, (0x8000); This generates a MMIO Write.*/ + 0x8a, 0x16, 0x00, 0x80, /* movl (0x8000), %dl; This generates a MMIO Read.*/ + 0xc6, 0x06, 0x00, 0x20, 0x00, /* movl $0, (0x2000); Dirty one page in guest mem. */ + 0xf4, /* hlt */ + ]; + let expected_rips: [u64; 3] = [0x1003, 0x1005, 0x1007]; + + let mem_size = 0x4000; + let load_addr = mmap_anonymous(mem_size).as_ptr(); + let guest_addr: u64 = 0x1000; + let slot: u32 = 0; + let mem_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: guest_addr, + memory_size: mem_size as u64, + userspace_addr: load_addr as u64, + flags: KVM_MEM_LOG_DIRTY_PAGES, + }; + unsafe { + vm.set_user_memory_region(mem_region).unwrap(); + } + + unsafe { + // Get a mutable slice of `mem_size` from `load_addr`. + // This is safe because we mapped it before. + let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size); + slice.write_all(&code).unwrap(); + } + + let mut vcpu_fd = vm.create_vcpu(0).unwrap(); + + let mut vcpu_sregs = vcpu_fd.get_sregs().unwrap(); + assert_ne!(vcpu_sregs.cs.base, 0); + assert_ne!(vcpu_sregs.cs.selector, 0); + vcpu_sregs.cs.base = 0; + vcpu_sregs.cs.selector = 0; + vcpu_fd.set_sregs(&vcpu_sregs).unwrap(); + + let mut vcpu_regs = vcpu_fd.get_regs().unwrap(); + // Set the Instruction Pointer to the guest address where we loaded the code. + vcpu_regs.rip = guest_addr; + vcpu_regs.rax = 2; + vcpu_regs.rbx = 3; + vcpu_regs.rflags = 2; + vcpu_fd.set_regs(&vcpu_regs).unwrap(); + + let mut debug_struct = kvm_guest_debug { + control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP, + pad: 0, + arch: kvm_guest_debug_arch { + debugreg: [0, 0, 0, 0, 0, 0, 0, 0], + }, + }; + vcpu_fd.set_guest_debug(&debug_struct).unwrap(); + + let mut instr_idx = 0; + loop { + match vcpu_fd.run().expect("run failed") { + VcpuExit::IoIn(addr, data) => { + assert_eq!(addr, 0x3f8); + assert_eq!(data.len(), 1); + } + VcpuExit::IoOut(addr, data) => { + assert_eq!(addr, 0x3f8); + assert_eq!(data.len(), 1); + assert_eq!(data[0], b'5'); + } + VcpuExit::MmioRead(addr, data) => { + assert_eq!(addr, 0x8000); + assert_eq!(data.len(), 1); + } + VcpuExit::MmioWrite(addr, data) => { + assert_eq!(addr, 0x8000); + assert_eq!(data.len(), 1); + assert_eq!(data[0], 0); + } + VcpuExit::Debug(debug) => { + if instr_idx == expected_rips.len() - 1 { + // Disabling debugging/single-stepping + debug_struct.control = 0; + vcpu_fd.set_guest_debug(&debug_struct).unwrap(); + } else if instr_idx >= expected_rips.len() { + unreachable!(); + } + let vcpu_regs = vcpu_fd.get_regs().unwrap(); + assert_eq!(vcpu_regs.rip, expected_rips[instr_idx]); + assert_eq!(debug.exception, 1); + assert_eq!(debug.pc, expected_rips[instr_idx]); + // Check first 15 bits of DR6 + let mask = (1 << 16) - 1; + assert_eq!(debug.dr6 & mask, 0b100111111110000); + // Bit 10 in DR7 is always 1 + assert_eq!(debug.dr7, 1 << 10); + instr_idx += 1; + } + VcpuExit::Hlt => { + // The code snippet dirties 2 pages: + // * one when the code itself is loaded in memory; + // * and one more from the `movl` that writes to address 0x8000 + let dirty_pages_bitmap = vm.get_dirty_log(slot, mem_size).unwrap(); + let dirty_pages: u32 = dirty_pages_bitmap + .into_iter() + .map(|page| page.count_ones()) + .sum(); + assert_eq!(dirty_pages, 2); + break; + } + r => panic!("unexpected exit reason: {:?}", r), + } + } + } + + #[test] + #[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64" + ))] + fn test_faulty_vcpu_fd() { + use std::os::unix::io::{FromRawFd, IntoRawFd}; + + let badf_errno = libc::EBADF; + + let mut faulty_vcpu_fd = VcpuFd { + vcpu: unsafe { File::from_raw_fd(-2) }, + kvm_run_ptr: KvmRunWrapper { + kvm_run_ptr: mmap_anonymous(10).cast(), + mmap_size: 10, + }, + coalesced_mmio_ring: None, + }; + + assert_eq!( + faulty_vcpu_fd.get_mp_state().unwrap_err().errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .set_mp_state(kvm_mp_state::default()) + .unwrap_err() + .errno(), + badf_errno + ); + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + assert_eq!( + faulty_vcpu_fd.get_vcpu_events().unwrap_err().errno(), + badf_errno + ); + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + assert_eq!( + faulty_vcpu_fd + .set_vcpu_events(&kvm_vcpu_events::default()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!(faulty_vcpu_fd.run().unwrap_err().errno(), badf_errno); + + // Don't drop the File object, or it'll notice the file it's trying to close is + // invalid and abort the process. + let _ = faulty_vcpu_fd.vcpu.into_raw_fd(); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_faulty_vcpu_fd_x86_64() { + use std::os::unix::io::{FromRawFd, IntoRawFd}; + + let badf_errno = libc::EBADF; + + let faulty_vcpu_fd = VcpuFd { + vcpu: unsafe { File::from_raw_fd(-2) }, + kvm_run_ptr: KvmRunWrapper { + kvm_run_ptr: mmap_anonymous(10).cast(), + mmap_size: 10, + }, + coalesced_mmio_ring: None, + }; + + assert_eq!(faulty_vcpu_fd.get_regs().unwrap_err().errno(), badf_errno); + assert_eq!( + faulty_vcpu_fd + .set_regs(&unsafe { std::mem::zeroed() }) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!(faulty_vcpu_fd.get_sregs().unwrap_err().errno(), badf_errno); + assert_eq!( + faulty_vcpu_fd + .set_sregs(&unsafe { std::mem::zeroed() }) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!(faulty_vcpu_fd.get_fpu().unwrap_err().errno(), badf_errno); + assert_eq!( + faulty_vcpu_fd + .set_fpu(&unsafe { std::mem::zeroed() }) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .set_cpuid2( + &Kvm::new() + .unwrap() + .get_supported_cpuid(KVM_MAX_CPUID_ENTRIES) + .unwrap() + ) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd.get_cpuid2(1).err().unwrap().errno(), + badf_errno + ); + // `kvm_lapic_state` does not implement debug by default so we cannot + // use unwrap_err here. + faulty_vcpu_fd.get_lapic().unwrap_err(); + assert_eq!( + faulty_vcpu_fd + .set_lapic(&unsafe { std::mem::zeroed() }) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .get_msrs(&mut Msrs::new(1).unwrap()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .set_msrs(&Msrs::new(1).unwrap()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd.get_xsave().err().unwrap().errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .set_xsave(&kvm_xsave::default()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!(faulty_vcpu_fd.get_xcrs().unwrap_err().errno(), badf_errno); + assert_eq!( + faulty_vcpu_fd + .set_xcrs(&kvm_xcrs::default()) + .err() + .unwrap() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd.get_debug_regs().unwrap_err().errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .set_debug_regs(&kvm_debugregs::default()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd.kvmclock_ctrl().unwrap_err().errno(), + badf_errno + ); + faulty_vcpu_fd.get_tsc_khz().unwrap_err(); + faulty_vcpu_fd.set_tsc_khz(1000000).unwrap_err(); + faulty_vcpu_fd.translate_gva(u64::MAX).unwrap_err(); + + // Don't drop the File object, or it'll notice the file it's trying to close is + // invalid and abort the process. + let _ = faulty_vcpu_fd.vcpu.into_raw_fd(); + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_faulty_vcpu_target_aarch64() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + // KVM defines valid targets as 0 to KVM_ARM_NUM_TARGETS-1, so pick a big raw number + // greater than that as target to be invalid + let kvi = kvm_vcpu_init { + target: 300, + ..Default::default() + }; + + vcpu.vcpu_init(&kvi).unwrap_err(); + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_faulty_vcpu_fd_aarch64() { + use std::os::unix::io::{FromRawFd, IntoRawFd}; + + let badf_errno = libc::EBADF; + + let faulty_vcpu_fd = VcpuFd { + vcpu: unsafe { File::from_raw_fd(-2) }, + kvm_run_ptr: KvmRunWrapper { + kvm_run_ptr: mmap_anonymous(10).cast(), + mmap_size: 10, + }, + coalesced_mmio_ring: None, + }; + + let device_attr = kvm_device_attr { + group: KVM_ARM_VCPU_PMU_V3_CTRL, + attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT), + addr: 0x0, + flags: 0, + }; + + let reg_id = 0x6030_0000_0010_0042; + let mut reg_data = 0u128.to_le_bytes(); + + assert_eq!( + faulty_vcpu_fd + .set_device_attr(&device_attr) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .has_device_attr(&device_attr) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .vcpu_init(&kvm_vcpu_init::default()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .vcpu_finalize(&(KVM_ARM_VCPU_SVE as i32)) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .get_reg_list(&mut RegList::new(500).unwrap()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .set_one_reg(reg_id, ®_data) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .get_one_reg(reg_id, &mut reg_data) + .unwrap_err() + .errno(), + badf_errno + ); + + // Don't drop the File object, or it'll notice the file it's trying to close is + // invalid and abort the process. + faulty_vcpu_fd.vcpu.into_raw_fd(); + } + + #[test] + #[cfg(target_arch = "riscv64")] + fn test_faulty_vcpu_fd_riscv64() { + use std::os::unix::io::{FromRawFd, IntoRawFd}; + + let badf_errno = libc::EBADF; + + let faulty_vcpu_fd = VcpuFd { + vcpu: unsafe { File::from_raw_fd(-2) }, + kvm_run_ptr: KvmRunWrapper { + kvm_run_ptr: mmap_anonymous(10).cast(), + mmap_size: 10, + }, + coalesced_mmio_ring: None, + }; + + let reg_id = 0x8030_0000_0200_000a; + let mut reg_data = 0u128.to_le_bytes(); + + assert_eq!( + faulty_vcpu_fd + .get_reg_list(&mut RegList::new(200).unwrap()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .set_one_reg(reg_id, ®_data) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .get_one_reg(reg_id, &mut reg_data) + .unwrap_err() + .errno(), + badf_errno + ); + + // Don't drop the File object, or it'll notice the file it's trying to close is + // invalid and abort the process. + faulty_vcpu_fd.vcpu.into_raw_fd(); + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_get_preferred_target() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + let mut kvi = kvm_vcpu_init::default(); + + vm.get_preferred_target(&mut kvi) + .expect("Cannot get preferred target"); + vcpu.vcpu_init(&kvi).unwrap(); + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_set_one_reg() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + let mut kvi = kvm_vcpu_init::default(); + vm.get_preferred_target(&mut kvi) + .expect("Cannot get preferred target"); + vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu"); + let data: u128 = 0; + let reg_id: u64 = 0; + + vcpu.set_one_reg(reg_id, &data.to_le_bytes()).unwrap_err(); + // Exercising KVM_SET_ONE_REG by trying to alter the data inside the PSTATE register (which is a + // specific aarch64 register). + // This regiseter is 64 bit wide (8 bytes). + const PSTATE_REG_ID: u64 = 0x6030_0000_0010_0042; + vcpu.set_one_reg(PSTATE_REG_ID, &data.to_le_bytes()) + .expect("Failed to set pstate register"); + + // Trying to set 8 byte register with 7 bytes must fail. + vcpu.set_one_reg(PSTATE_REG_ID, &[0_u8; 7]).unwrap_err(); + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_get_one_reg() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + let mut kvi = kvm_vcpu_init::default(); + vm.get_preferred_target(&mut kvi) + .expect("Cannot get preferred target"); + vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu"); + + // PSR (Processor State Register) bits. + // Taken from arch/arm64/include/uapi/asm/ptrace.h. + const PSR_MODE_EL1H: u64 = 0x0000_0005; + const PSR_F_BIT: u64 = 0x0000_0040; + const PSR_I_BIT: u64 = 0x0000_0080; + const PSR_A_BIT: u64 = 0x0000_0100; + const PSR_D_BIT: u64 = 0x0000_0200; + const PSTATE_FAULT_BITS_64: u64 = + PSR_MODE_EL1H | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT; + let data: u128 = PSTATE_FAULT_BITS_64 as u128; + const PSTATE_REG_ID: u64 = 0x6030_0000_0010_0042; + vcpu.set_one_reg(PSTATE_REG_ID, &data.to_le_bytes()) + .expect("Failed to set pstate register"); + + let mut bytes = [0_u8; 16]; + vcpu.get_one_reg(PSTATE_REG_ID, &mut bytes) + .expect("Failed to get pstate register"); + let data = u128::from_le_bytes(bytes); + assert_eq!(data, PSTATE_FAULT_BITS_64 as u128); + + // Trying to get 8 byte register with 7 bytes must fail. + vcpu.get_one_reg(PSTATE_REG_ID, &mut [0_u8; 7]).unwrap_err(); + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_get_reg_list() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + let mut reg_list = RegList::new(1).unwrap(); + // KVM_GET_REG_LIST demands that the vcpus be initalized, so we expect this to fail. + let err = vcpu.get_reg_list(&mut reg_list).unwrap_err(); + assert!(err.errno() == libc::ENOEXEC); + + let mut kvi = kvm_vcpu_init::default(); + vm.get_preferred_target(&mut kvi) + .expect("Cannot get preferred target"); + vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu"); + + // KVM_GET_REG_LIST offers us a number of registers for which we have + // not allocated memory, so the first time it fails. + let err = vcpu.get_reg_list(&mut reg_list).unwrap_err(); + assert!(err.errno() == libc::E2BIG); + // SAFETY: This structure is a result from a specific vCPU ioctl + assert!(unsafe { reg_list.as_mut_fam_struct() }.n > 0); + + // We make use of the number of registers returned to allocate memory and + // try one more time. + // SAFETY: This structure is a result from a specific vCPU ioctl + let mut reg_list = + RegList::new(unsafe { reg_list.as_mut_fam_struct() }.n as usize).unwrap(); + vcpu.get_reg_list(&mut reg_list).unwrap() + } + + #[test] + #[cfg(target_arch = "riscv64")] + fn test_set_one_reg() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + let data: u128 = 0; + let reg_id: u64 = 0; + + vcpu.set_one_reg(reg_id, &data.to_le_bytes()).unwrap_err(); + // Exercising KVM_SET_ONE_REG by trying to alter the data inside the A0 + // register. + // This regiseter is 64 bit wide (8 bytes). + const A0_REG_ID: u64 = 0x8030_0000_0200_000a; + vcpu.set_one_reg(A0_REG_ID, &data.to_le_bytes()) + .expect("Failed to set a0 register"); + + // Trying to set 8 byte register with 7 bytes must fail. + vcpu.set_one_reg(A0_REG_ID, &[0_u8; 7]).unwrap_err(); + } + + #[test] + #[cfg(target_arch = "riscv64")] + fn test_get_one_reg() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + const PRESET: u64 = 0x7; + let data: u128 = PRESET as u128; + const A0_REG_ID: u64 = 0x8030_0000_0200_000a; + vcpu.set_one_reg(A0_REG_ID, &data.to_le_bytes()) + .expect("Failed to set a0 register"); + + let mut bytes = [0_u8; 16]; + vcpu.get_one_reg(A0_REG_ID, &mut bytes) + .expect("Failed to get a0 register"); + let data = u128::from_le_bytes(bytes); + assert_eq!(data, PRESET as u128); + + // Trying to get 8 byte register with 7 bytes must fail. + vcpu.get_one_reg(A0_REG_ID, &mut [0_u8; 7]).unwrap_err(); + } + + #[test] + #[cfg(target_arch = "riscv64")] + fn test_get_reg_list() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + let mut reg_list = RegList::new(1).unwrap(); + + // KVM_GET_REG_LIST offers us a number of registers for which we have + // not allocated memory, so the first time it fails. + let err = vcpu.get_reg_list(&mut reg_list).unwrap_err(); + assert!(err.errno() == libc::E2BIG); + // SAFETY: This structure is a result from a specific vCPU ioctl + assert!(unsafe { reg_list.as_mut_fam_struct() }.n > 0); + + // We make use of the number of registers returned to allocate memory and + // try one more time. + // SAFETY: This structure is a result from a specific vCPU ioctl + let mut reg_list = + RegList::new(unsafe { reg_list.as_mut_fam_struct() }.n as usize).unwrap(); + vcpu.get_reg_list(&mut reg_list).unwrap(); + + // Test get a register list contains 200 registers explicitly + let mut reg_list = RegList::new(200).unwrap(); + vcpu.get_reg_list(&mut reg_list).unwrap(); + } + + #[test] + fn test_get_kvm_run() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let mut vcpu = vm.create_vcpu(0).unwrap(); + vcpu.kvm_run_ptr.as_mut_ref().immediate_exit = 1; + assert_eq!(vcpu.get_kvm_run().immediate_exit, 1); + } + + #[test] + fn test_set_kvm_immediate_exit() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let mut vcpu = vm.create_vcpu(0).unwrap(); + assert_eq!(vcpu.kvm_run_ptr.as_ref().immediate_exit, 0); + vcpu.set_kvm_immediate_exit(1); + assert_eq!(vcpu.kvm_run_ptr.as_ref().immediate_exit, 1); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_enable_cap() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let mut cap = kvm_enable_cap { + // KVM_CAP_HYPERV_SYNIC needs KVM_CAP_SPLIT_IRQCHIP enabled + cap: KVM_CAP_SPLIT_IRQCHIP, + ..Default::default() + }; + cap.args[0] = 24; + vm.enable_cap(&cap).unwrap(); + + let vcpu = vm.create_vcpu(0).unwrap(); + if kvm.check_extension(Cap::HypervSynic) { + let cap = kvm_enable_cap { + cap: KVM_CAP_HYPERV_SYNIC, + ..Default::default() + }; + vcpu.enable_cap(&cap).unwrap(); + } + } + #[cfg(target_arch = "x86_64")] + #[test] + fn test_get_tsc_khz() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + if !kvm.check_extension(Cap::GetTscKhz) { + vcpu.get_tsc_khz().unwrap_err(); + } else { + assert!(vcpu.get_tsc_khz().unwrap() > 0); + } + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_set_tsc_khz() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let freq = vcpu.get_tsc_khz().unwrap(); + + if !(kvm.check_extension(Cap::GetTscKhz) && kvm.check_extension(Cap::TscControl)) { + vcpu.set_tsc_khz(0).unwrap_err(); + } else { + vcpu.set_tsc_khz(freq - 500000).unwrap(); + assert_eq!(vcpu.get_tsc_khz().unwrap(), freq - 500000); + vcpu.set_tsc_khz(freq + 500000).unwrap(); + assert_eq!(vcpu.get_tsc_khz().unwrap(), freq + 500000); + } + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_sync_regs() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let mut vcpu = vm.create_vcpu(0).unwrap(); + + // Test setting each valid register + let sync_regs = [ + SyncReg::Register, + SyncReg::SystemRegister, + SyncReg::VcpuEvents, + ]; + for reg in &sync_regs { + vcpu.set_sync_valid_reg(*reg); + assert_eq!(vcpu.kvm_run_ptr.as_ref().kvm_valid_regs, *reg as u64); + vcpu.clear_sync_valid_reg(*reg); + assert_eq!(vcpu.kvm_run_ptr.as_ref().kvm_valid_regs, 0); + } + + // Test that multiple valid SyncRegs can be set at the same time + vcpu.set_sync_valid_reg(SyncReg::Register); + vcpu.set_sync_valid_reg(SyncReg::SystemRegister); + vcpu.set_sync_valid_reg(SyncReg::VcpuEvents); + assert_eq!( + vcpu.kvm_run_ptr.as_ref().kvm_valid_regs, + SyncReg::Register as u64 | SyncReg::SystemRegister as u64 | SyncReg::VcpuEvents as u64 + ); + + // Test setting each dirty register + let sync_regs = [ + SyncReg::Register, + SyncReg::SystemRegister, + SyncReg::VcpuEvents, + ]; + + for reg in &sync_regs { + vcpu.set_sync_dirty_reg(*reg); + assert_eq!(vcpu.kvm_run_ptr.as_ref().kvm_dirty_regs, *reg as u64); + vcpu.clear_sync_dirty_reg(*reg); + assert_eq!(vcpu.kvm_run_ptr.as_ref().kvm_dirty_regs, 0); + } + + // Test that multiple dirty SyncRegs can be set at the same time + vcpu.set_sync_dirty_reg(SyncReg::Register); + vcpu.set_sync_dirty_reg(SyncReg::SystemRegister); + vcpu.set_sync_dirty_reg(SyncReg::VcpuEvents); + assert_eq!( + vcpu.kvm_run_ptr.as_ref().kvm_dirty_regs, + SyncReg::Register as u64 | SyncReg::SystemRegister as u64 | SyncReg::VcpuEvents as u64 + ); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_sync_regs_with_run() { + use std::io::Write; + + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + if kvm.check_extension(Cap::SyncRegs) { + // This example is based on https://lwn.net/Articles/658511/ + #[rustfmt::skip] + let code = [ + 0xff, 0xc0, /* inc eax */ + 0xf4, /* hlt */ + ]; + + let mem_size = 0x4000; + let load_addr = mmap_anonymous(mem_size).as_ptr(); + let guest_addr: u64 = 0x1000; + let slot: u32 = 0; + let mem_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: guest_addr, + memory_size: mem_size as u64, + userspace_addr: load_addr as u64, + flags: KVM_MEM_LOG_DIRTY_PAGES, + }; + unsafe { + vm.set_user_memory_region(mem_region).unwrap(); + } + + unsafe { + // Get a mutable slice of `mem_size` from `load_addr`. + // This is safe because we mapped it before. + let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size); + slice.write_all(&code).unwrap(); + } + + let mut vcpu = vm.create_vcpu(0).unwrap(); + + let orig_sregs = vcpu.get_sregs().unwrap(); + + let sync_regs = vcpu.sync_regs_mut(); + + // Initialize the sregs in sync_regs to be the original sregs + sync_regs.sregs = orig_sregs; + sync_regs.sregs.cs.base = 0; + sync_regs.sregs.cs.selector = 0; + + // Set up the guest to attempt to `inc rax` + sync_regs.regs.rip = guest_addr; + sync_regs.regs.rax = 0x8000; + sync_regs.regs.rflags = 2; + + // Initialize the sync_reg flags + vcpu.set_sync_valid_reg(SyncReg::Register); + vcpu.set_sync_valid_reg(SyncReg::SystemRegister); + vcpu.set_sync_valid_reg(SyncReg::VcpuEvents); + vcpu.set_sync_dirty_reg(SyncReg::Register); + vcpu.set_sync_dirty_reg(SyncReg::SystemRegister); + vcpu.set_sync_dirty_reg(SyncReg::VcpuEvents); + + // hlt is the only expected return from guest execution + assert!(matches!(vcpu.run().expect("run failed"), VcpuExit::Hlt)); + + let regs = vcpu.get_regs().unwrap(); + + let sync_regs = vcpu.sync_regs(); + assert_eq!(regs, sync_regs.regs); + assert_eq!(sync_regs.regs.rax, 0x8001); + } + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_translate_gva() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + vcpu.translate_gva(0x10000).unwrap(); + assert_eq!(vcpu.translate_gva(0x10000).unwrap().valid, 1); + assert_eq!( + vcpu.translate_gva(0x10000).unwrap().physical_address, + 0x10000 + ); + vcpu.translate_gva(u64::MAX).unwrap(); + assert_eq!(vcpu.translate_gva(u64::MAX).unwrap().valid, 0); + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_vcpu_attr() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + let dist_attr = kvm_device_attr { + group: KVM_ARM_VCPU_PMU_V3_CTRL, + attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT), + addr: 0x0, + flags: 0, + }; + + vcpu.has_device_attr(&dist_attr).unwrap_err(); + vcpu.set_device_attr(&dist_attr).unwrap_err(); + let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default(); + vm.get_preferred_target(&mut kvi) + .expect("Cannot get preferred target"); + kvi.features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2 | 1 << KVM_ARM_VCPU_PMU_V3; + vcpu.vcpu_init(&kvi).unwrap(); + vcpu.has_device_attr(&dist_attr).unwrap(); + vcpu.set_device_attr(&dist_attr).unwrap(); + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_pointer_authentication() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + let mut kvi = kvm_vcpu_init::default(); + vm.get_preferred_target(&mut kvi) + .expect("Cannot get preferred target"); + if kvm.check_extension(Cap::ArmPtrAuthAddress) { + kvi.features[0] |= 1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS; + } + if kvm.check_extension(Cap::ArmPtrAuthGeneric) { + kvi.features[0] |= 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC; + } + vcpu.vcpu_init(&kvi).unwrap(); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_userspace_rdmsr_exit() { + use std::io::Write; + + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + #[rustfmt::skip] + let code = [ + 0x0F, 0x32, /* rdmsr */ + 0xF4 /* hlt */ + ]; + + if !vm.check_extension(Cap::X86UserSpaceMsr) { + return; + } + let cap = kvm_enable_cap { + cap: Cap::X86UserSpaceMsr as u32, + args: [MsrExitReason::Unknown.bits() as u64, 0, 0, 0], + ..Default::default() + }; + vm.enable_cap(&cap).unwrap(); + + let mem_size = 0x4000; + let load_addr = mmap_anonymous(mem_size).as_ptr(); + let guest_addr: u64 = 0x1000; + let slot: u32 = 0; + let mem_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: guest_addr, + memory_size: mem_size as u64, + userspace_addr: load_addr as u64, + flags: 0, + }; + unsafe { + vm.set_user_memory_region(mem_region).unwrap(); + + // Get a mutable slice of `mem_size` from `load_addr`. + // This is safe because we mapped it before. + let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size); + slice.write_all(&code).unwrap(); + } + + let mut vcpu = vm.create_vcpu(0).unwrap(); + + // Set up special registers + let mut vcpu_sregs = vcpu.get_sregs().unwrap(); + assert_ne!(vcpu_sregs.cs.base, 0); + assert_ne!(vcpu_sregs.cs.selector, 0); + vcpu_sregs.cs.base = 0; + vcpu_sregs.cs.selector = 0; + vcpu.set_sregs(&vcpu_sregs).unwrap(); + + // Set the Instruction Pointer to the guest address where we loaded + // the code, and RCX to the MSR to be read. + let mut vcpu_regs = vcpu.get_regs().unwrap(); + vcpu_regs.rip = guest_addr; + vcpu_regs.rcx = 0x474f4f00; + vcpu.set_regs(&vcpu_regs).unwrap(); + + match vcpu.run().unwrap() { + VcpuExit::X86Rdmsr(exit) => { + assert_eq!(exit.reason, MsrExitReason::Unknown); + assert_eq!(exit.index, 0x474f4f00); + } + e => panic!("Unexpected exit: {:?}", e), + } + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_userspace_hypercall_exit() { + use std::io::Write; + + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + // Use `vmcall` or `vmmcall` depending on what's supported. + let cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + let supports_vmcall = cpuid + .as_slice() + .iter() + .find(|entry| entry.function == 1) + .map_or(false, |entry| entry.ecx & (1 << 5) != 0); + let supports_vmmcall = cpuid + .as_slice() + .iter() + .find(|entry| entry.function == 0x8000_0001) + .map_or(false, |entry| entry.ecx & (1 << 2) != 0); + #[rustfmt::skip] + let code = if supports_vmcall { + [ + 0x0F, 0x01, 0xC1, /* vmcall */ + 0xF4 /* hlt */ + ] + } else if supports_vmmcall { + [ + 0x0F, 0x01, 0xD9, /* vmmcall */ + 0xF4 /* hlt */ + ] + } else { + return; + }; + + if !vm.check_extension(Cap::ExitHypercall) { + return; + } + const KVM_HC_MAP_GPA_RANGE: u64 = 12; + let cap = kvm_enable_cap { + cap: Cap::ExitHypercall as u32, + args: [1 << KVM_HC_MAP_GPA_RANGE, 0, 0, 0], + ..Default::default() + }; + vm.enable_cap(&cap).unwrap(); + + let mem_size = 0x4000; + let load_addr = mmap_anonymous(mem_size).as_ptr(); + let guest_addr: u64 = 0x1000; + let slot: u32 = 0; + let mem_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: guest_addr, + memory_size: mem_size as u64, + userspace_addr: load_addr as u64, + flags: 0, + }; + unsafe { + vm.set_user_memory_region(mem_region).unwrap(); + + // Get a mutable slice of `mem_size` from `load_addr`. + // This is safe because we mapped it before. + let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size); + slice.write_all(&code).unwrap(); + } + + let mut vcpu = vm.create_vcpu(0).unwrap(); + + // Set up special registers + let mut vcpu_sregs = vcpu.get_sregs().unwrap(); + assert_ne!(vcpu_sregs.cs.base, 0); + assert_ne!(vcpu_sregs.cs.selector, 0); + vcpu_sregs.cs.base = 0; + vcpu_sregs.cs.selector = 0; + vcpu.set_sregs(&vcpu_sregs).unwrap(); + + // Set the Instruction Pointer to the guest address where we loaded + // the code, and RCX to the MSR to be read. + let mut vcpu_regs = vcpu.get_regs().unwrap(); + vcpu_regs.rip = guest_addr; + vcpu_regs.rax = KVM_HC_MAP_GPA_RANGE; + vcpu_regs.rbx = 0x1234000; + vcpu_regs.rcx = 1; + vcpu_regs.rdx = 0; + vcpu.set_regs(&vcpu_regs).unwrap(); + + match vcpu.run().unwrap() { + VcpuExit::Hypercall(exit) => { + assert_eq!(exit.nr, KVM_HC_MAP_GPA_RANGE); + assert_eq!(exit.args[0], 0x1234000); + assert_eq!(exit.args[1], 1); + assert_eq!(exit.args[2], 0); + } + e => panic!("Unexpected exit: {:?}", e), + } + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_userspace_wrmsr_exit() { + use std::io::Write; + + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + #[rustfmt::skip] + let code = [ + 0x0F, 0x30, /* wrmsr */ + 0xF4 /* hlt */ + ]; + + if !vm.check_extension(Cap::X86UserSpaceMsr) { + return; + } + let cap = kvm_enable_cap { + cap: Cap::X86UserSpaceMsr as u32, + args: [MsrExitReason::Unknown.bits() as u64, 0, 0, 0], + ..Default::default() + }; + vm.enable_cap(&cap).unwrap(); + + let mem_size = 0x4000; + let load_addr = mmap_anonymous(mem_size).as_ptr(); + let guest_addr: u64 = 0x1000; + let slot: u32 = 0; + let mem_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: guest_addr, + memory_size: mem_size as u64, + userspace_addr: load_addr as u64, + flags: 0, + }; + unsafe { + vm.set_user_memory_region(mem_region).unwrap(); + + // Get a mutable slice of `mem_size` from `load_addr`. + // This is safe because we mapped it before. + let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size); + slice.write_all(&code).unwrap(); + } + + let mut vcpu = vm.create_vcpu(0).unwrap(); + + // Set up special registers + let mut vcpu_sregs = vcpu.get_sregs().unwrap(); + assert_ne!(vcpu_sregs.cs.base, 0); + assert_ne!(vcpu_sregs.cs.selector, 0); + vcpu_sregs.cs.base = 0; + vcpu_sregs.cs.selector = 0; + vcpu.set_sregs(&vcpu_sregs).unwrap(); + + // Set the Instruction Pointer to the guest address where we loaded + // the code, RCX to the MSR to be written, and EDX:EAX to the data to + // be written. + let mut vcpu_regs = vcpu.get_regs().unwrap(); + vcpu_regs.rip = guest_addr; + vcpu_regs.rcx = 0x474f4f00; + vcpu_regs.rax = 0xdeadbeef; + vcpu_regs.rdx = 0xd0c0ffee; + vcpu.set_regs(&vcpu_regs).unwrap(); + + match vcpu.run().unwrap() { + VcpuExit::X86Wrmsr(exit) => { + assert_eq!(exit.reason, MsrExitReason::Unknown); + assert_eq!(exit.index, 0x474f4f00); + assert_eq!(exit.data & 0xffffffff, 0xdeadbeef); + assert_eq!((exit.data >> 32) & 0xffffffff, 0xd0c0ffee); + } + e => panic!("Unexpected exit: {:?}", e), + } + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_coalesced_pio() { + use crate::IoEventAddress; + use std::io::Write; + + const PORT: u64 = 0x2c; + const DATA: u64 = 0x39; + const SIZE: u32 = 1; + + #[rustfmt::skip] + let code = [ + 0xe6, 0x2c, // out 0x2c, al + 0xf4, // hlt + 0xe6, 0x2c, // out 0x2c, al + 0xf4, // hlt + ]; + + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + assert!(vm.check_extension(Cap::CoalescedPio)); + + // Prepare guest memory + let mem_size = 0x4000; + let load_addr = mmap_anonymous(mem_size).as_ptr(); + let guest_addr: u64 = 0x1000; + let slot = 0; + let mem_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: guest_addr, + memory_size: mem_size as u64, + userspace_addr: load_addr as u64, + flags: 0, + }; + + unsafe { + vm.set_user_memory_region(mem_region).unwrap(); + + // Get a mutable slice of `mem_size` from `load_addr`. + // This is safe because we mapped it before. + let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size); + slice.write_all(&code).unwrap(); + } + + let addr = IoEventAddress::Pio(PORT); + vm.register_coalesced_mmio(addr, SIZE).unwrap(); + + let mut vcpu = vm.create_vcpu(0).unwrap(); + + // Map the MMIO ring + vcpu.map_coalesced_mmio_ring().unwrap(); + + // Set regs + let mut regs = vcpu.get_regs().unwrap(); + regs.rip = guest_addr; + regs.rax = DATA; + regs.rflags = 2; + vcpu.set_regs(®s).unwrap(); + + // Set sregs + let mut sregs = vcpu.get_sregs().unwrap(); + sregs.cs.base = 0; + sregs.cs.selector = 0; + vcpu.set_sregs(&sregs).unwrap(); + + // Run and check that the exit was caused by the hlt and not the port + // I/O + let exit = vcpu.run().unwrap(); + assert!(matches!(exit, VcpuExit::Hlt)); + + // Check that the ring buffer entry is what we expect + let entry = vcpu.coalesced_mmio_read().unwrap().unwrap(); + assert_eq!(entry.phys_addr, PORT); + assert_eq!(entry.len, 1); + assert_eq!(entry.data[0] as u64, DATA); + // SAFETY: this field is a u32 in all variants of the union, + // so access is always safe. + let pio = unsafe { entry.__bindgen_anon_1.pio }; + assert_eq!(pio, 1); + + // The ring buffer should be empty now + assert!(vcpu.coalesced_mmio_read().unwrap().is_none()); + + // Unregister and check that the next PIO write triggers an exit + vm.unregister_coalesced_mmio(addr, SIZE).unwrap(); + let exit = vcpu.run().unwrap(); + let VcpuExit::IoOut(port, data) = exit else { + panic!("Unexpected VM exit: {:?}", exit); + }; + assert_eq!(port, PORT as u16); + assert_eq!(data, (DATA as u8).to_le_bytes()); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_coalesced_mmio() { + use crate::IoEventAddress; + use std::io::Write; + + const ADDR: u64 = 0x124; + const DATA: u64 = 0x39; + const SIZE: u32 = 2; + + #[rustfmt::skip] + let code = [ + 0x66, 0x31, 0xFF, // xor di,di + 0x66, 0xBF, 0x24, 0x01, // mov di, 0x124 + 0x67, 0x66, 0x89, 0x05, // mov WORD PTR [di], ax + 0xF4, // hlt + 0x66, 0x31, 0xFF, // xor di,di + 0x66, 0xBF, 0x24, 0x01, // mov di, 0x124 + 0x67, 0x66, 0x89, 0x05, // mov WORD PTR [di], ax + 0xF4, // hlt + ]; + + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + assert!(vm.check_extension(Cap::CoalescedMmio)); + + // Prepare guest memory + let mem_size = 0x4000; + let load_addr = mmap_anonymous(mem_size).as_ptr(); + let guest_addr: u64 = 0x1000; + let slot: u32 = 0; + let mem_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: guest_addr, + memory_size: mem_size as u64, + userspace_addr: load_addr as u64, + flags: 0, + }; + + unsafe { + vm.set_user_memory_region(mem_region).unwrap(); + + // Get a mutable slice of `mem_size` from `load_addr`. + // This is safe because we mapped it before. + let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size); + slice.write_all(&code).unwrap(); + } + + let addr = IoEventAddress::Mmio(ADDR); + vm.register_coalesced_mmio(addr, SIZE).unwrap(); + + let mut vcpu = vm.create_vcpu(0).unwrap(); + + // Map the MMIO ring + vcpu.map_coalesced_mmio_ring().unwrap(); + + // Set regs + let mut regs = vcpu.get_regs().unwrap(); + regs.rip = guest_addr; + regs.rax = DATA; + regs.rdx = ADDR; + regs.rflags = 2; + vcpu.set_regs(®s).unwrap(); + + // Set sregs + let mut sregs = vcpu.get_sregs().unwrap(); + sregs.cs.base = 0; + sregs.cs.selector = 0; + vcpu.set_sregs(&sregs).unwrap(); + + // Run and check that the exit was caused by the hlt and not the MMIO + // access + let exit = vcpu.run().unwrap(); + assert!(matches!(exit, VcpuExit::Hlt)); + + // Check that the ring buffer entry is what we expect + let entry = vcpu.coalesced_mmio_read().unwrap().unwrap(); + assert_eq!(entry.phys_addr, ADDR); + assert_eq!(entry.len, SIZE); + assert_eq!(entry.data[0] as u64, DATA); + // SAFETY: this field is a u32 in all variants of the union, + // so access is always safe. + let pio = unsafe { entry.__bindgen_anon_1.pio }; + assert_eq!(pio, 0); + + // The ring buffer should be empty now + assert!(vcpu.coalesced_mmio_read().unwrap().is_none()); + + // Unregister and check that the next MMIO write triggers an exit + vm.unregister_coalesced_mmio(addr, SIZE).unwrap(); + let exit = vcpu.run().unwrap(); + let VcpuExit::MmioWrite(addr, data) = exit else { + panic!("Unexpected VM exit: {:?}", exit); + }; + assert_eq!(addr, ADDR); + assert_eq!(data, (DATA as u16).to_le_bytes()); + } +} diff --git a/kvm-ioctls/src/ioctls/vm.rs b/kvm-ioctls/src/ioctls/vm.rs new file mode 100644 index 0000000..e4c7c02 --- /dev/null +++ b/kvm-ioctls/src/ioctls/vm.rs @@ -0,0 +1,2772 @@ +// Copyright © 2024 Institute of Software, CAS. All rights reserved. +// +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use kvm_bindings::*; +use std::fs::File; +use std::os::raw::c_void; +use std::os::raw::{c_int, c_ulong}; +use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; + +use crate::cap::Cap; +use crate::ioctls::device::new_device; +use crate::ioctls::device::DeviceFd; +use crate::ioctls::vcpu::new_vcpu; +use crate::ioctls::vcpu::VcpuFd; +use crate::ioctls::{KvmRunWrapper, Result}; +use crate::kvm_ioctls::*; +use vmm_sys_util::errno; +use vmm_sys_util::eventfd::EventFd; +#[cfg(target_arch = "x86_64")] +use vmm_sys_util::ioctl::ioctl_with_mut_ptr; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +use vmm_sys_util::ioctl::{ioctl, ioctl_with_mut_ref}; +use vmm_sys_util::ioctl::{ioctl_with_ref, ioctl_with_val}; + +/// An address either in programmable I/O space or in memory mapped I/O space. +/// +/// The `IoEventAddress` is used for specifying the type when registering an event +/// in [register_ioevent](struct.VmFd.html#method.register_ioevent). +#[derive(Debug, Clone, Copy)] +pub enum IoEventAddress { + /// Representation of an programmable I/O address. + Pio(u64), + /// Representation of an memory mapped I/O address. + Mmio(u64), +} + +/// Helper structure for disabling datamatch. +/// +/// The structure can be used as a parameter to +/// [`register_ioevent`](struct.VmFd.html#method.register_ioevent) +/// to disable filtering of events based on the datamatch flag. For details check the +/// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). +#[derive(Debug, Clone, Copy)] +pub struct NoDatamatch; +impl From for u64 { + fn from(_: NoDatamatch) -> u64 { + 0 + } +} + +/// Wrapper over KVM VM ioctls. +#[derive(Debug)] +pub struct VmFd { + vm: File, + run_size: usize, +} + +impl VmFd { + /// Creates/modifies a guest physical memory slot. + /// + /// See the documentation for `KVM_SET_USER_MEMORY_REGION`. + /// + /// # Arguments + /// + /// * `user_memory_region` - Guest physical memory slot. For details check the + /// `kvm_userspace_memory_region` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Safety + /// + /// This function is unsafe because there is no guarantee `userspace_addr` points to a valid + /// memory region, nor the memory region lives as long as the kernel needs it to. + /// + /// The caller of this method must make sure that: + /// - the raw pointer (`userspace_addr`) points to valid memory + /// - the regions provided to KVM are not overlapping other memory regions. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// extern crate kvm_bindings; + /// + /// use kvm_bindings::kvm_userspace_memory_region; + /// use kvm_ioctls::Kvm; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mem_region = kvm_userspace_memory_region { + /// slot: 0, + /// guest_phys_addr: 0x10000 as u64, + /// memory_size: 0x10000 as u64, + /// userspace_addr: 0x0 as u64, + /// flags: 0, + /// }; + /// unsafe { + /// vm.set_user_memory_region(mem_region).unwrap(); + /// }; + /// ``` + pub unsafe fn set_user_memory_region( + &self, + user_memory_region: kvm_userspace_memory_region, + ) -> Result<()> { + let ret = ioctl_with_ref(self, KVM_SET_USER_MEMORY_REGION(), &user_memory_region); + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Creates/modifies a guest physical memory slot. + /// + /// See the documentation for `KVM_SET_USER_MEMORY_REGION2`. + /// + /// # Arguments + /// + /// * `user_memory_region2` - Guest physical memory slot. For details check the + /// `kvm_userspace_memory_region2` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Safety + /// + /// This function is unsafe because there is no guarantee `userspace_addr` points to a valid + /// memory region, nor the memory region lives as long as the kernel needs it to. + /// + /// The caller of this method must make sure that: + /// - the raw pointer (`userspace_addr`) points to valid memory + /// - the regions provided to KVM are not overlapping other memory regions. + /// - the guest_memfd points at a file created via KVM_CREATE_GUEST_MEMFD on + /// the current VM, and the target range must not be bound to any other memory region + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// extern crate kvm_bindings; + /// + /// use kvm_bindings::{ + /// kvm_create_guest_memfd, kvm_enable_cap, kvm_userspace_memory_region2, KVM_CAP_GUEST_MEMFD, + /// KVM_CAP_USER_MEMORY2, KVM_MEM_GUEST_MEMFD, + /// }; + /// use kvm_ioctls::Kvm; + /// use std::os::fd::RawFd; + /// + /// # #[cfg(target_arch = "x86_64")] + /// { + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// let address_space = unsafe { libc::mmap(0 as _, 10000, 3, 34, -1, 0) }; + /// let userspace_addr = address_space as *const u8 as u64; + /// + /// let mut config = kvm_enable_cap { + /// cap: KVM_CAP_GUEST_MEMFD, + /// ..Default::default() + /// }; + /// + /// if vm.enable_cap(&config).is_err() { + /// return; + /// } + /// let gmem = kvm_create_guest_memfd { + /// size: 0x10000, + /// flags: 0, + /// reserved: [0; 6], + /// }; + /// + /// let fd: RawFd = unsafe { vm.create_guest_memfd(gmem).unwrap() }; + /// + /// config.cap = KVM_CAP_USER_MEMORY2; + /// + /// if vm.enable_cap(&config).is_err() { + /// return; + /// } + /// + /// let mem_region = kvm_userspace_memory_region2 { + /// slot: 0, + /// flags: KVM_MEM_GUEST_MEMFD, + /// guest_phys_addr: 0x10000 as u64, + /// memory_size: 0x10000 as u64, + /// userspace_addr, + /// guest_memfd_offset: 0, + /// guest_memfd: fd as u32, + /// pad1: 0, + /// pad2: [0; 14], + /// }; + /// unsafe { + /// vm.set_user_memory_region2(mem_region).unwrap(); + /// }; + /// } + /// ``` + pub unsafe fn set_user_memory_region2( + &self, + user_memory_region2: kvm_userspace_memory_region2, + ) -> Result<()> { + let ret = ioctl_with_ref(self, KVM_SET_USER_MEMORY_REGION2(), &user_memory_region2); + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Sets the address of the three-page region in the VM's address space. + /// + /// See the documentation for `KVM_SET_TSS_ADDR`. + /// + /// # Arguments + /// + /// * `offset` - Physical address of a three-page region in the guest's physical address space. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// vm.set_tss_address(0xfffb_d000).unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn set_tss_address(&self, offset: usize) -> Result<()> { + // SAFETY: Safe because we know that our file is a VM fd and we verify the return result. + let ret = unsafe { ioctl_with_val(self, KVM_SET_TSS_ADDR(), offset as c_ulong) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Sets the address of the one-page region in the VM's address space. + /// + /// See the documentation for `KVM_SET_IDENTITY_MAP_ADDR`. + /// + /// # Arguments + /// + /// * `address` - Physical address of a one-page region in the guest's physical address space. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// vm.set_identity_map_address(0xfffb_c000).unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn set_identity_map_address(&self, address: u64) -> Result<()> { + // SAFETY: Safe because we know that our file is a VM fd and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_IDENTITY_MAP_ADDR(), &address) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Creates an in-kernel interrupt controller. + /// + /// See the documentation for `KVM_CREATE_IRQCHIP`. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// #[cfg(target_arch = "x86_64")] + /// vm.create_irq_chip().unwrap(); + /// #[cfg(target_arch = "aarch64")] + /// { + /// use kvm_bindings::{ + /// kvm_create_device, kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, KVM_CREATE_DEVICE_TEST, + /// }; + /// let mut gic_device = kvm_bindings::kvm_create_device { + /// type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, + /// fd: 0, + /// flags: KVM_CREATE_DEVICE_TEST, + /// }; + /// if vm.create_device(&mut gic_device).is_ok() { + /// vm.create_irq_chip().unwrap(); + /// } + /// } + /// ``` + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + pub fn create_irq_chip(&self) -> Result<()> { + // SAFETY: Safe because we know that our file is a VM fd and we verify the return result. + let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP()) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// X86 specific call to retrieve the state of a kernel interrupt controller. + /// + /// See the documentation for `KVM_GET_IRQCHIP` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `irqchip` - `kvm_irqchip` (input/output) to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_bindings; + /// # extern crate kvm_ioctls; + /// # use kvm_bindings::{kvm_irqchip, KVM_IRQCHIP_PIC_MASTER}; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// vm.create_irq_chip().unwrap(); + /// let mut irqchip = kvm_irqchip::default(); + /// irqchip.chip_id = KVM_IRQCHIP_PIC_MASTER; + /// vm.get_irqchip(&mut irqchip).unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn get_irqchip(&self, irqchip: &mut kvm_irqchip) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_irqchip struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), irqchip) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// X86 specific call to set the state of a kernel interrupt controller. + /// + /// See the documentation for `KVM_SET_IRQCHIP` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `irqchip` - `kvm_irqchip` (input/output) to be written. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_bindings; + /// # extern crate kvm_ioctls; + /// # use kvm_bindings::{kvm_irqchip, KVM_IRQCHIP_PIC_MASTER}; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// vm.create_irq_chip().unwrap(); + /// let mut irqchip = kvm_irqchip::default(); + /// irqchip.chip_id = KVM_IRQCHIP_PIC_MASTER; + /// // Your `irqchip` manipulation here. + /// vm.set_irqchip(&mut irqchip).unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn set_irqchip(&self, irqchip: &kvm_irqchip) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_irqchip struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP(), irqchip) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Creates a PIT as per the `KVM_CREATE_PIT2` ioctl. + /// + /// # Arguments + /// + /// * pit_config - PIT configuration. For details check the `kvm_pit_config` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// use kvm_bindings::kvm_pit_config; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// vm.create_irq_chip().unwrap(); + /// let pit_config = kvm_pit_config::default(); + /// vm.create_pit2(pit_config).unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn create_pit2(&self, pit_config: kvm_pit_config) -> Result<()> { + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_CREATE_PIT2(), &pit_config) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// X86 specific call to retrieve the state of the in-kernel PIT model. + /// + /// See the documentation for `KVM_GET_PIT2` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `pitstate` - `kvm_pit_state2` to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_bindings; + /// # extern crate kvm_ioctls; + /// # use kvm_bindings::kvm_pit_config; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// vm.create_irq_chip().unwrap(); + /// + /// let pit_config = kvm_pit_config::default(); + /// vm.create_pit2(pit_config).unwrap(); + /// let pitstate = vm.get_pit2().unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn get_pit2(&self) -> Result { + let mut pitstate = Default::default(); + // SAFETY: Here we trust the kernel not to read past the end of the kvm_pit_state2 struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_PIT2(), &mut pitstate) }; + if ret == 0 { + Ok(pitstate) + } else { + Err(errno::Error::last()) + } + } + + /// X86 specific call to set the state of the in-kernel PIT model. + /// + /// See the documentation for `KVM_SET_PIT2` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `pitstate` - `kvm_pit_state2` to be written. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_bindings; + /// # extern crate kvm_ioctls; + /// # use kvm_bindings::{kvm_pit_config, kvm_pit_state2}; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// vm.create_irq_chip().unwrap(); + /// + /// let pit_config = kvm_pit_config::default(); + /// vm.create_pit2(pit_config).unwrap(); + /// let mut pitstate = kvm_pit_state2::default(); + /// // Your `pitstate` manipulation here. + /// vm.set_pit2(&mut pitstate).unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn set_pit2(&self, pitstate: &kvm_pit_state2) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_pit_state2 struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_PIT2(), pitstate) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// X86 specific call to retrieve the current timestamp of kvmclock. + /// + /// See the documentation for `KVM_GET_CLOCK` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `clock` - `kvm_clock_data` to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let clock = vm.get_clock().unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn get_clock(&self) -> Result { + let mut clock = Default::default(); + // SAFETY: Here we trust the kernel not to read past the end of the kvm_clock_data struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_CLOCK(), &mut clock) }; + if ret == 0 { + Ok(clock) + } else { + Err(errno::Error::last()) + } + } + + /// X86 specific call to set the current timestamp of kvmclock. + /// + /// See the documentation for `KVM_SET_CLOCK` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `clock` - `kvm_clock_data` to be written. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_bindings; + /// # extern crate kvm_ioctls; + /// # use kvm_bindings::kvm_clock_data; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut clock = kvm_clock_data::default(); + /// vm.set_clock(&mut clock).unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn set_clock(&self, clock: &kvm_clock_data) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_clock_data struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_CLOCK(), clock) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Directly injects a MSI message as per the `KVM_SIGNAL_MSI` ioctl. + /// + /// See the documentation for `KVM_SIGNAL_MSI`. + /// + /// This ioctl returns > 0 when the MSI is successfully delivered and 0 + /// when the guest blocked the MSI. + /// + /// # Arguments + /// + /// * kvm_msi - MSI message configuration. For details check the `kvm_msi` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// # Example + /// + /// In this example, the important function signal_msi() calling into + /// the actual ioctl is commented out. The reason is that MSI vectors are + /// not chosen from the HW side (VMM). The guest OS (or anything that runs + /// inside the VM) is supposed to allocate the MSI vectors, and usually + /// communicate back through PCI configuration space. Sending a random MSI + /// vector through this signal_msi() function will always result in a + /// failure, which is why it needs to be commented out. + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// use kvm_bindings::kvm_msi; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let msi = kvm_msi::default(); + /// #[cfg(target_arch = "x86_64")] + /// vm.create_irq_chip().unwrap(); + /// //vm.signal_msi(msi).unwrap(); + /// ``` + #[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64" + ))] + pub fn signal_msi(&self, msi: kvm_msi) -> Result { + // SAFETY: Safe because we allocated the structure and we know the kernel + // will read exactly the size of the structure. + let ret = unsafe { ioctl_with_ref(self, KVM_SIGNAL_MSI(), &msi) }; + if ret >= 0 { + Ok(ret) + } else { + Err(errno::Error::last()) + } + } + + /// Sets the GSI routing table entries, overwriting any previously set + /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. + /// + /// See the documentation for `KVM_SET_GSI_ROUTING`. + /// + /// Returns an io::Error when the table could not be updated. + /// + /// # Arguments + /// + /// * kvm_irq_routing - IRQ routing configuration. Describe all routes + /// associated with GSI entries. For details check + /// the `kvm_irq_routing` and `kvm_irq_routing_entry` + /// structures in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// use kvm_bindings::kvm_irq_routing; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// #[cfg(target_arch = "x86_64")] + /// vm.create_irq_chip().unwrap(); + /// + /// #[cfg(target_arch = "riscv64")] + /// vm.create_device(&mut kvm_bindings::kvm_create_device { + /// type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_RISCV_AIA, + /// fd: 0, + /// flags: 0, + /// }) + /// .expect("Cannot create KVM vAIA device."); + /// + /// let irq_routing = kvm_irq_routing::default(); + /// vm.set_gsi_routing(&irq_routing).unwrap(); + /// ``` + #[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64" + ))] + pub fn set_gsi_routing(&self, irq_routing: &kvm_irq_routing) -> Result<()> { + // SAFETY: Safe because we allocated the structure and we know the kernel + // will read exactly the size of the structure. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING(), irq_routing) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Registers an event to be signaled whenever a certain address is written to. + /// + /// See the documentation for `KVM_IOEVENTFD`. + /// + /// # Arguments + /// + /// * `fd` - `EventFd` which will be signaled. When signaling, the usual `vmexit` to userspace + /// is prevented. + /// * `addr` - Address being written to. + /// * `datamatch` - Limits signaling `fd` to only the cases where the value being written is + /// equal to this parameter. The size of `datamatch` is important and it must + /// match the expected size of the guest's write. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// extern crate libc; + /// extern crate vmm_sys_util; + /// # use kvm_ioctls::{IoEventAddress, Kvm, NoDatamatch}; + /// use libc::{eventfd, EFD_NONBLOCK}; + /// use vmm_sys_util::eventfd::EventFd; + /// let kvm = Kvm::new().unwrap(); + /// let vm_fd = kvm.create_vm().unwrap(); + /// let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); + /// vm_fd + /// .register_ioevent(&evtfd, &IoEventAddress::Pio(0xf4), NoDatamatch) + /// .unwrap(); + /// vm_fd + /// .register_ioevent(&evtfd, &IoEventAddress::Mmio(0x1000), NoDatamatch) + /// .unwrap(); + /// ``` + pub fn register_ioevent>( + &self, + fd: &EventFd, + addr: &IoEventAddress, + datamatch: T, + ) -> Result<()> { + let mut flags = 0; + if std::mem::size_of::() > 0 { + flags |= 1 << kvm_ioeventfd_flag_nr_datamatch + } + if let IoEventAddress::Pio(_) = *addr { + flags |= 1 << kvm_ioeventfd_flag_nr_pio + } + + let ioeventfd = kvm_ioeventfd { + datamatch: datamatch.into(), + len: std::mem::size_of::() as u32, + addr: match addr { + IoEventAddress::Pio(ref p) => *p, + IoEventAddress::Mmio(ref m) => *m, + }, + fd: fd.as_raw_fd(), + flags, + ..Default::default() + }; + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD(), &ioeventfd) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Unregisters an event from a certain address it has been previously registered to. + /// + /// See the documentation for `KVM_IOEVENTFD`. + /// + /// # Arguments + /// + /// * `fd` - FD which will be unregistered. + /// * `addr` - Address being written to. + /// + /// # Safety + /// + /// This function is unsafe because it relies on RawFd. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// extern crate libc; + /// extern crate vmm_sys_util; + /// # use kvm_ioctls::{IoEventAddress, Kvm, NoDatamatch}; + /// use libc::EFD_NONBLOCK; + /// use vmm_sys_util::eventfd::EventFd; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm_fd = kvm.create_vm().unwrap(); + /// let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); + /// let pio_addr = IoEventAddress::Pio(0xf4); + /// let mmio_addr = IoEventAddress::Mmio(0x1000); + /// vm_fd + /// .register_ioevent(&evtfd, &pio_addr, NoDatamatch) + /// .unwrap(); + /// vm_fd + /// .register_ioevent(&evtfd, &mmio_addr, 0x1234u32) + /// .unwrap(); + /// vm_fd + /// .unregister_ioevent(&evtfd, &pio_addr, NoDatamatch) + /// .unwrap(); + /// vm_fd + /// .unregister_ioevent(&evtfd, &mmio_addr, 0x1234u32) + /// .unwrap(); + /// ``` + pub fn unregister_ioevent>( + &self, + fd: &EventFd, + addr: &IoEventAddress, + datamatch: T, + ) -> Result<()> { + let mut flags = 1 << kvm_ioeventfd_flag_nr_deassign; + if std::mem::size_of::() > 0 { + flags |= 1 << kvm_ioeventfd_flag_nr_datamatch + } + if let IoEventAddress::Pio(_) = *addr { + flags |= 1 << kvm_ioeventfd_flag_nr_pio + } + + let ioeventfd = kvm_ioeventfd { + datamatch: datamatch.into(), + len: std::mem::size_of::() as u32, + addr: match addr { + IoEventAddress::Pio(ref p) => *p, + IoEventAddress::Mmio(ref m) => *m, + }, + fd: fd.as_raw_fd(), + flags, + ..Default::default() + }; + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD(), &ioeventfd) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Gets the bitmap of pages dirtied since the last call of this function. + /// + /// Leverages the dirty page logging feature in KVM. As a side-effect, this also resets the + /// bitmap inside the kernel. For the dirty log to be available, you have to set the flag + /// `KVM_MEM_LOG_DIRTY_PAGES` when creating guest memory regions. + /// + /// Check the documentation for `KVM_GET_DIRTY_LOG`. + /// + /// # Arguments + /// + /// * `slot` - Guest memory slot identifier. + /// * `memory_size` - Size of the memory region. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use std::io::Write; + /// # use std::ptr::null_mut; + /// # use std::slice; + /// # use kvm_ioctls::{Kvm, VcpuExit}; + /// # use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_LOG_DIRTY_PAGES}; + /// # let kvm = Kvm::new().unwrap(); + /// # let vm = kvm.create_vm().unwrap(); + /// // This example is based on https://lwn.net/Articles/658511/. + /// let mem_size = 0x4000; + /// let guest_addr: u64 = 0x1000; + /// let load_addr: *mut u8 = unsafe { + /// libc::mmap( + /// null_mut(), + /// mem_size, + /// libc::PROT_READ | libc::PROT_WRITE, + /// libc::MAP_ANONYMOUS | libc::MAP_SHARED | libc::MAP_NORESERVE, + /// -1, + /// 0, + /// ) as *mut u8 + /// }; + /// + /// // Initialize a guest memory region using the flag `KVM_MEM_LOG_DIRTY_PAGES`. + /// let mem_region = kvm_userspace_memory_region { + /// slot: 0, + /// guest_phys_addr: guest_addr, + /// memory_size: mem_size as u64, + /// userspace_addr: load_addr as u64, + /// flags: KVM_MEM_LOG_DIRTY_PAGES, + /// }; + /// unsafe { vm.set_user_memory_region(mem_region).unwrap() }; + /// + /// #[cfg(target_arch = "x86_64")] + /// // ASM code that just forces a MMIO Write. + /// let asm_code = [0xc6, 0x06, 0x00, 0x80, 0x00]; + /// #[cfg(target_arch = "aarch64")] + /// let asm_code = [ + /// 0x01, 0x00, 0x00, 0x10, /* adr x1, */ + /// 0x22, 0x10, 0x00, 0xb9, /* str w2, [x1, #16]; write to this page */ + /// 0x02, 0x00, 0x00, 0xb9, /* str w2, [x0]; force MMIO exit */ + /// 0x00, 0x00, 0x00, + /// 0x14, /* b ; shouldn't get here, but if so loop forever */ + /// ]; + /// #[cfg(target_arch = "riscv64")] + /// let asm_code = [ + /// 0x17, 0x03, 0x00, 0x00, // auipc t1, 0; -> t1 + /// 0xa3, 0x23, 0x73, 0x00, // sw t2, t1 + 7; dirty current page + /// 0x23, 0x20, 0x75, 0x00, // sw t2, a0; trigger MMIO exit + /// 0x6f, 0x00, 0x00, 0x00, // j .;shouldn't get here, but if so loop forever + /// ]; + /// + /// // Write the code in the guest memory. This will generate a dirty page. + /// unsafe { + /// let mut slice = slice::from_raw_parts_mut(load_addr, mem_size); + /// slice.write(&asm_code).unwrap(); + /// } + /// + /// let mut vcpu_fd = vm.create_vcpu(0).unwrap(); + /// + /// #[cfg(target_arch = "x86_64")] + /// { + /// // x86_64 specific registry setup. + /// let mut vcpu_sregs = vcpu_fd.get_sregs().unwrap(); + /// vcpu_sregs.cs.base = 0; + /// vcpu_sregs.cs.selector = 0; + /// vcpu_fd.set_sregs(&vcpu_sregs).unwrap(); + /// + /// let mut vcpu_regs = vcpu_fd.get_regs().unwrap(); + /// // Set the Instruction Pointer to the guest address where we loaded the code. + /// vcpu_regs.rip = guest_addr; + /// vcpu_regs.rax = 2; + /// vcpu_regs.rbx = 3; + /// vcpu_regs.rflags = 2; + /// vcpu_fd.set_regs(&vcpu_regs).unwrap(); + /// } + /// + /// #[cfg(target_arch = "aarch64")] + /// { + /// // aarch64 specific registry setup. + /// let mut kvi = kvm_bindings::kvm_vcpu_init::default(); + /// vm.get_preferred_target(&mut kvi).unwrap(); + /// vcpu_fd.vcpu_init(&kvi).unwrap(); + /// + /// let core_reg_base: u64 = 0x6030_0000_0010_0000; + /// let mmio_addr: u64 = guest_addr + mem_size as u64; + /// vcpu_fd.set_one_reg(core_reg_base + 2 * 32, &guest_addr.to_le_bytes()); // set PC + /// vcpu_fd.set_one_reg(core_reg_base + 2 * 0, &mmio_addr.to_le_bytes()); // set X0 + /// } + /// + /// #[cfg(target_arch = "riscv64")] + /// { + /// let core_reg_base: u64 = 0x8030_0000_0200_0000; + /// let mmio_addr: u64 = guest_addr + mem_size as u64; + /// vcpu_fd.set_one_reg(core_reg_base, &guest_addr.to_le_bytes()); // set PC + /// vcpu_fd.set_one_reg(core_reg_base + 10, &mmio_addr.to_le_bytes()); // set A0 + /// } + /// + /// loop { + /// match vcpu_fd.run().expect("run failed") { + /// VcpuExit::MmioWrite(addr, data) => { + /// // On x86_64, the code snippet dirties 1 page when loading the code in memory + /// // while on aarch64 the dirty bit comes from writing to guest_addr (current PC). + /// let dirty_pages_bitmap = vm.get_dirty_log(0, mem_size).unwrap(); + /// let dirty_pages = dirty_pages_bitmap + /// .into_iter() + /// .map(|page| page.count_ones()) + /// .fold(0, |dirty_page_count, i| dirty_page_count + i); + /// assert_eq!(dirty_pages, 1); + /// break; + /// } + /// exit_reason => panic!("unexpected exit reason: {:?}", exit_reason), + /// } + /// } + /// ``` + pub fn get_dirty_log(&self, slot: u32, memory_size: usize) -> Result> { + // Compute the length of the bitmap needed for all dirty pages in one memory slot. + // One memory page is `page_size` bytes and `KVM_GET_DIRTY_LOG` returns one dirty bit for + // each page. + // SAFETY: We trust the sysconf libc function and we're calling it with a correct parameter. + let page_size = match unsafe { libc::sysconf(libc::_SC_PAGESIZE) } { + -1 => return Err(errno::Error::last()), + ps => ps as usize, + }; + + // For ease of access we are saving the bitmap in a u64 vector. We are using ceil to + // make sure we count all dirty pages even when `memory_size` is not a multiple of + // `page_size * 64`. + let bitmap_size = memory_size.div_ceil(page_size * 64); + let mut bitmap = vec![0u64; bitmap_size]; + let dirtylog = kvm_dirty_log { + slot, + padding1: 0, + __bindgen_anon_1: kvm_dirty_log__bindgen_ty_1 { + dirty_bitmap: bitmap.as_mut_ptr() as *mut c_void, + }, + }; + // SAFETY: Safe because we know that our file is a VM fd, and we know that the amount of + // memory we allocated for the bitmap is at least one bit per page. + let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG(), &dirtylog) }; + if ret == 0 { + Ok(bitmap) + } else { + Err(errno::Error::last()) + } + } + + /// Registers an event that will, when signaled, trigger the `gsi` IRQ. + /// + /// # Arguments + /// + /// * `fd` - `EventFd` to be signaled. + /// * `gsi` - IRQ to be triggered. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate libc; + /// # extern crate vmm_sys_util; + /// # use kvm_ioctls::Kvm; + /// # use libc::EFD_NONBLOCK; + /// # use vmm_sys_util::eventfd::EventFd; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); + /// #[cfg(target_arch = "x86_64")] + /// { + /// vm.create_irq_chip().unwrap(); + /// vm.register_irqfd(&evtfd, 0).unwrap(); + /// } + /// ``` + #[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64" + ))] + pub fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> Result<()> { + let irqfd = kvm_irqfd { + fd: fd.as_raw_fd() as u32, + gsi, + ..Default::default() + }; + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Registers an event that will, when signaled, assert the `gsi` IRQ. + /// If the irqchip is resampled by the guest, the IRQ is de-asserted, + /// and `resamplefd` is notified. + /// + /// # Arguments + /// + /// * `fd` - `EventFd` to be signaled. + /// * `resamplefd` - `EventFd`to be notified on resample. + /// * `gsi` - IRQ to be triggered. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate libc; + /// # extern crate vmm_sys_util; + /// # use kvm_ioctls::Kvm; + /// # use libc::EFD_NONBLOCK; + /// # use vmm_sys_util::eventfd::EventFd; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); + /// let resamplefd = EventFd::new(EFD_NONBLOCK).unwrap(); + /// #[cfg(target_arch = "x86_64")] + /// { + /// vm.create_irq_chip().unwrap(); + /// vm.register_irqfd_with_resample(&evtfd, &resamplefd, 0) + /// .unwrap(); + /// } + /// ``` + #[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64" + ))] + pub fn register_irqfd_with_resample( + &self, + fd: &EventFd, + resamplefd: &EventFd, + gsi: u32, + ) -> Result<()> { + let irqfd = kvm_irqfd { + fd: fd.as_raw_fd() as u32, + resamplefd: resamplefd.as_raw_fd() as u32, + gsi, + flags: KVM_IRQFD_FLAG_RESAMPLE, + ..Default::default() + }; + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. + /// + /// # Arguments + /// + /// * `fd` - `EventFd` to be signaled. + /// * `gsi` - IRQ to be triggered. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate libc; + /// # extern crate vmm_sys_util; + /// # use kvm_ioctls::Kvm; + /// # use libc::EFD_NONBLOCK; + /// # use vmm_sys_util::eventfd::EventFd; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); + /// let resamplefd = EventFd::new(EFD_NONBLOCK).unwrap(); + /// #[cfg(target_arch = "x86_64")] + /// { + /// vm.create_irq_chip().unwrap(); + /// vm.register_irqfd(&evtfd, 0).unwrap(); + /// vm.unregister_irqfd(&evtfd, 0).unwrap(); + /// vm.register_irqfd_with_resample(&evtfd, &resamplefd, 0) + /// .unwrap(); + /// vm.unregister_irqfd(&evtfd, 0).unwrap(); + /// } + /// ``` + #[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64" + ))] + pub fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> Result<()> { + let irqfd = kvm_irqfd { + fd: fd.as_raw_fd() as u32, + gsi, + flags: KVM_IRQFD_FLAG_DEASSIGN, + ..Default::default() + }; + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Sets the level on the given irq to 1 if `active` is true, and 0 otherwise. + /// + /// # Arguments + /// + /// * `irq` - IRQ to be set. + /// * `active` - Level of the IRQ input. + /// + /// # Errors + /// + /// Returns an io::Error when the irq field is invalid + /// + /// # Examples + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate libc; + /// # extern crate vmm_sys_util; + /// # use kvm_ioctls::{Kvm, VmFd}; + /// # use libc::EFD_NONBLOCK; + /// # use vmm_sys_util::eventfd::EventFd; + /// fn arch_setup(vm_fd: &VmFd) { + /// // Arch-specific setup: + /// // For x86 architectures, it simply means calling vm.create_irq_chip().unwrap(). + /// # #[cfg(target_arch = "x86_64")] + /// # vm_fd.create_irq_chip().unwrap(); + /// // For Arm architectures, the IRQ controllers need to be setup first. + /// // Details please refer to the kernel documentation. + /// // https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt + /// # #[cfg(target_arch = "aarch64")] { + /// # vm_fd.create_vcpu(0).unwrap(); + /// # // ... rest of setup for Arm goes here + /// # } + /// } + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// arch_setup(&vm); + /// #[cfg(target_arch = "x86_64")] + /// { + /// vm.set_irq_line(4, true); + /// // ... + /// } + /// #[cfg(target_arch = "aarch64")] + /// { + /// vm.set_irq_line(0x01_00_0020, true); + /// // .... + /// } + /// ``` + #[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64" + ))] + pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> { + let mut irq_level = kvm_irq_level::default(); + irq_level.__bindgen_anon_1.irq = irq; + irq_level.level = u32::from(active); + + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE(), &irq_level) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Creates a new KVM vCPU file descriptor and maps the memory corresponding + /// its `kvm_run` structure. + /// + /// See the documentation for `KVM_CREATE_VCPU`. + /// + /// # Arguments + /// + /// * `id` - The vCPU ID. + /// + /// # Errors + /// + /// Returns an io::Error when the VM fd is invalid or the vCPU memory cannot + /// be mapped correctly. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// // Create one vCPU with the ID=0. + /// let vcpu = vm.create_vcpu(0); + /// ``` + pub fn create_vcpu(&self, id: u64) -> Result { + #[allow(clippy::cast_lossless)] + // SAFETY: Safe because we know that vm is a VM fd and we verify the return result. + let vcpu_fd = unsafe { ioctl_with_val(&self.vm, KVM_CREATE_VCPU(), id as c_ulong) }; + if vcpu_fd < 0 { + return Err(errno::Error::last()); + } + + // Wrap the vCPU now in case the following ? returns early. + // SAFETY: This is safe because we verified the value of the fd and we own the fd. + let vcpu = unsafe { File::from_raw_fd(vcpu_fd) }; + + let kvm_run_ptr = KvmRunWrapper::mmap_from_fd(&vcpu, self.run_size)?; + + Ok(new_vcpu(vcpu, kvm_run_ptr)) + } + + /// Creates a VcpuFd object from a vcpu RawFd. + /// + /// # Arguments + /// + /// * `fd` - the RawFd used for creating the VcpuFd object. + /// + /// # Safety + /// + /// This function is unsafe as the primitives currently returned have the contract that + /// they are the sole owner of the file descriptor they are wrapping. Usage of this function + /// could accidentally allow violating this contract which can cause memory unsafety in code + /// that relies on it being true. + /// + /// The caller of this method must make sure the fd is valid and nothing else uses it. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use std::os::unix::io::AsRawFd; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// // Create one vCPU with the ID=0. + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let rawfd = unsafe { libc::dup(vcpu.as_raw_fd()) }; + /// assert!(rawfd >= 0); + /// let vcpu = unsafe { vm.create_vcpu_from_rawfd(rawfd).unwrap() }; + /// ``` + pub unsafe fn create_vcpu_from_rawfd(&self, fd: RawFd) -> Result { + let vcpu = File::from_raw_fd(fd); + let kvm_run_ptr = KvmRunWrapper::mmap_from_fd(&vcpu, self.run_size)?; + Ok(new_vcpu(vcpu, kvm_run_ptr)) + } + + /// Creates an emulated device in the kernel. + /// + /// See the documentation for `KVM_CREATE_DEVICE`. + /// + /// # Arguments + /// + /// * `device`: device configuration. For details check the `kvm_create_device` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// use kvm_bindings::{ + /// kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, + /// kvm_device_type_KVM_DEV_TYPE_RISCV_AIA, kvm_device_type_KVM_DEV_TYPE_VFIO, + /// KVM_CREATE_DEVICE_TEST, + /// }; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// // Creating a device with the KVM_CREATE_DEVICE_TEST flag to check + /// // whether the device type is supported. This will not create the device. + /// // To create the device the flag needs to be removed. + /// let mut device = kvm_bindings::kvm_create_device { + /// #[cfg(target_arch = "x86_64")] + /// type_: kvm_device_type_KVM_DEV_TYPE_VFIO, + /// #[cfg(target_arch = "aarch64")] + /// type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, + /// #[cfg(target_arch = "riscv64")] + /// type_: kvm_device_type_KVM_DEV_TYPE_RISCV_AIA, + /// fd: 0, + /// flags: KVM_CREATE_DEVICE_TEST, + /// }; + /// // On ARM, creating VGICv3 may fail due to hardware dependency. + /// // Retry to create VGICv2 in that case. + /// let device_fd = vm.create_device(&mut device).unwrap_or_else(|_| { + /// #[cfg(target_arch = "x86_64")] + /// panic!("Cannot create VFIO device."); + /// #[cfg(target_arch = "aarch64")] + /// { + /// device.type_ = kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2; + /// vm.create_device(&mut device) + /// .expect("Cannot create vGIC device") + /// } + /// #[cfg(target_arch = "riscv64")] + /// panic!("Cannot create vAIA device."); + /// }); + /// ``` + pub fn create_device(&self, device: &mut kvm_create_device) -> Result { + // SAFETY: Safe because we are calling this with the VM fd and we trust the kernel. + let ret = unsafe { ioctl_with_ref(self, KVM_CREATE_DEVICE(), device) }; + if ret == 0 { + // SAFETY: We validated the return of the function creating the fd and we trust the + // kernel. + Ok(new_device(unsafe { File::from_raw_fd(device.fd as i32) })) + } else { + Err(errno::Error::last()) + } + } + + /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. + /// + /// The preferred CPU target is returned in the `kvi` parameter. + /// See documentation for `KVM_ARM_PREFERRED_TARGET`. + /// + /// # Arguments + /// * `kvi` - CPU target configuration (out). For details check the `kvm_vcpu_init` + /// structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// use kvm_bindings::kvm_vcpu_init; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut kvi = kvm_vcpu_init::default(); + /// vm.get_preferred_target(&mut kvi).unwrap(); + /// ``` + #[cfg(target_arch = "aarch64")] + pub fn get_preferred_target(&self, kvi: &mut kvm_vcpu_init) -> Result<()> { + // SAFETY: The ioctl is safe because we allocated the struct and we know the + // kernel will write exactly the size of the struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_ARM_PREFERRED_TARGET(), kvi) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Enable the specified capability as per the `KVM_ENABLE_CAP` ioctl. + /// + /// See the documentation for `KVM_ENABLE_CAP`. + /// + /// Returns an io::Error when the capability could not be enabled. + /// + /// # Arguments + /// + /// * kvm_enable_cap - KVM capability structure. For details check the `kvm_enable_cap` + /// structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// extern crate kvm_bindings; + /// + /// # use kvm_ioctls::Kvm; + /// use kvm_bindings::{kvm_enable_cap, KVM_CAP_SPLIT_IRQCHIP}; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut cap: kvm_enable_cap = Default::default(); + /// // This example cannot enable an arm/aarch64 capability since there + /// // is no capability available for these architectures. + /// if cfg!(target_arch = "x86_64") { + /// cap.cap = KVM_CAP_SPLIT_IRQCHIP; + /// // As per the KVM documentation, KVM_CAP_SPLIT_IRQCHIP only emulates + /// // the local APIC in kernel, expecting that a userspace IOAPIC will + /// // be implemented by the VMM. + /// // Along with this capability, the user needs to specify the number + /// // of pins reserved for the userspace IOAPIC. This number needs to be + /// // provided through the first argument of the capability structure, as + /// // specified in KVM documentation: + /// // args[0] - number of routes reserved for userspace IOAPICs + /// // + /// // Because an IOAPIC supports 24 pins, that's the reason why this test + /// // picked this number as reference. + /// cap.args[0] = 24; + /// vm.enable_cap(&cap).unwrap(); + /// } + /// ``` + #[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))] + pub fn enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> { + // SAFETY: The ioctl is safe because we allocated the struct and we know the + // kernel will write exactly the size of the struct. + let ret = unsafe { ioctl_with_ref(self, KVM_ENABLE_CAP(), cap) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Get the `kvm_run` size. + pub fn run_size(&self) -> usize { + self.run_size + } + + /// Wrapper over `KVM_CHECK_EXTENSION`. + /// + /// Returns 0 if the capability is not available and a positive integer otherwise. + fn check_extension_int(&self, c: Cap) -> i32 { + // SAFETY: Safe because we know that our file is a VM fd and that the extension is one of + // the ones defined by kernel. + unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), c as c_ulong) } + } + + /// Checks if a particular `Cap` is available. + /// + /// Returns true if the capability is supported and false otherwise. + /// See the documentation for `KVM_CHECK_EXTENSION`. + /// + /// # Arguments + /// + /// * `c` - VM capability to check. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// use kvm_ioctls::Cap; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// // Check if `KVM_CAP_MP_STATE` is supported. + /// assert!(vm.check_extension(Cap::MpState)); + /// ``` + pub fn check_extension(&self, c: Cap) -> bool { + self.check_extension_int(c) > 0 + } + + /// Creates an anonymous file and returns a file descriptor that refers to it. + /// + /// See the documentation for `KVM_CREATE_GUEST_MEMFD`. + /// + /// Returns an io::Error when the file could not be created. + /// + /// # Arguments + /// + /// * kvm_create_guest_memfd - KVM create guest memfd structure. For details check the + /// `kvm_create_guest_memfd` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// extern crate kvm_bindings; + /// + /// # use kvm_ioctls::Kvm; + /// use kvm_bindings::{kvm_create_guest_memfd, kvm_enable_cap, KVM_CAP_GUEST_MEMFD}; + /// use std::os::fd::RawFd; + /// + /// # #[cfg(target_arch = "x86_64")] + /// { + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// let config = kvm_enable_cap { + /// cap: KVM_CAP_GUEST_MEMFD, + /// ..Default::default() + /// }; + /// + /// if vm.enable_cap(&config).is_err() { + /// return; + /// } + /// + /// let gmem = kvm_create_guest_memfd { + /// size: 0x1000, + /// flags: 0, + /// reserved: [0; 6], + /// }; + /// + /// let id: RawFd = vm.create_guest_memfd(gmem).unwrap(); + /// } + /// ``` + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + pub fn create_guest_memfd(&self, gmem: kvm_create_guest_memfd) -> Result { + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only + // read the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_CREATE_GUEST_MEMFD(), &gmem) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(ret) + } + + /// Allows userspace to set memory attributes for a range of guest physical memory. + /// + /// See the documentation for `KVM_SET_MEMORY_ATTRIBUTES`. + /// + /// Returns an io::Error when the attributes could not be set. + /// + /// # Arguments + /// + /// * kvm_memory_attributes - KVM set memory attributes structure. For details check the + /// `kvm_memory_attributes` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// extern crate kvm_bindings; + /// + /// # use kvm_ioctls::Kvm; + /// use kvm_bindings::{ + /// kvm_create_guest_memfd, kvm_enable_cap, kvm_memory_attributes, + /// kvm_userspace_memory_region2, KVM_CAP_GUEST_MEMFD, KVM_CAP_MEMORY_ATTRIBUTES, + /// KVM_CAP_USER_MEMORY2, KVM_MEMORY_ATTRIBUTE_PRIVATE, KVM_MEM_GUEST_MEMFD, + /// }; + /// use std::os::fd::RawFd; + /// + /// # #[cfg(target_arch = "x86_64")] + /// { + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let gmem = kvm_create_guest_memfd { + /// size: 0x10000, + /// flags: 0, + /// reserved: [0; 6], + /// }; + /// + /// let address_space = unsafe { libc::mmap(0 as _, 10000, 3, 34, -1, 0) }; + /// let userspace_addr = address_space as *const u8 as u64; + /// let mut config = kvm_enable_cap { + /// cap: KVM_CAP_GUEST_MEMFD, + /// ..Default::default() + /// }; + /// + /// if vm.enable_cap(&config).is_err() { + /// return; + /// } + /// + /// config.cap = KVM_CAP_USER_MEMORY2; + /// + /// if vm.enable_cap(&config).is_err() { + /// return; + /// } + /// config.cap = KVM_CAP_MEMORY_ATTRIBUTES; + /// + /// if vm.enable_cap(&config).is_err() { + /// return; + /// } + /// let fd: RawFd = unsafe { vm.create_guest_memfd(gmem).unwrap() }; + /// let mem_region = kvm_userspace_memory_region2 { + /// slot: 0, + /// flags: KVM_MEM_GUEST_MEMFD, + /// guest_phys_addr: 0x10000 as u64, + /// memory_size: 0x10000 as u64, + /// userspace_addr, + /// guest_memfd_offset: 0, + /// guest_memfd: fd as u32, + /// pad1: 0, + /// pad2: [0; 14], + /// }; + /// unsafe { + /// vm.set_user_memory_region2(mem_region).unwrap(); + /// }; + /// + /// let attr = kvm_memory_attributes { + /// address: 0x10000, + /// size: 0x10000, + /// attributes: KVM_MEMORY_ATTRIBUTE_PRIVATE as u64, + /// flags: 0, + /// }; + /// vm.set_memory_attributes(attr).unwrap(); + /// } + /// ``` + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + pub fn set_memory_attributes(&self, attr: kvm_memory_attributes) -> Result<()> { + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_MEMORY_ATTRIBUTES(), &attr) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Issues platform-specific memory encryption commands to manage encrypted VMs if + /// the platform supports creating those encrypted VMs. + /// + /// Currently, this ioctl is used for issuing Secure Encrypted Virtualization + /// (SEV) commands on AMD Processors. + /// + /// See the documentation for `KVM_MEMORY_ENCRYPT_OP` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// For SEV-specific functionality, prefer safe wrapper: + /// - [`encrypt_op_sev`](Self::encrypt_op_sev) + /// + /// # Safety + /// + /// This function is unsafe because there is no guarantee `T` is valid in this context, how + /// much data kernel will read from memory and where it will write data on error. + /// + /// # Arguments + /// + /// * `op` - an opaque platform specific structure. + /// + /// # Example + #[cfg_attr(has_sev, doc = "```rust")] + #[cfg_attr(not(has_sev), doc = "```rust,no_run")] + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// use kvm_bindings::bindings::kvm_sev_cmd; + /// # use kvm_ioctls::Kvm; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// // Initialize the SEV platform context. + /// let mut init: kvm_sev_cmd = Default::default(); + /// unsafe { vm.encrypt_op(&mut init).unwrap() }; + /// ``` + #[cfg(target_arch = "x86_64")] + pub unsafe fn encrypt_op(&self, op: *mut T) -> Result<()> { + let ret = ioctl_with_mut_ptr(self, KVM_MEMORY_ENCRYPT_OP(), op); + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Issue common lifecycle events of SEV guests, such as launching, running, snapshotting, + /// migrating and decommissioning via `KVM_MEMORY_ENCRYPT_OP` ioctl. + /// + /// Kernel documentation states that this ioctl can be used for testing whether SEV is enabled + /// by sending `NULL`. To do that, pass [`std::ptr::null_mut`](std::ptr::null_mut) to [`encrypt_op`](Self::encrypt_op). + /// + /// See the documentation for Secure Encrypted Virtualization (SEV). + /// + /// # Arguments + /// + /// * `op` - SEV-specific structure. For details check the + /// [Secure Encrypted Virtualization (SEV) doc](https://www.kernel.org/doc/Documentation/virtual/kvm/amd-memory-encryption.rst). + /// + /// # Example + #[cfg_attr(has_sev, doc = "```rust")] + #[cfg_attr(not(has_sev), doc = "```rust,no_run")] + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use std::{os::raw::c_void, ptr::null_mut}; + /// use kvm_bindings::bindings::kvm_sev_cmd; + /// # use kvm_ioctls::Kvm; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// // Check whether SEV is enabled, optional. + /// unsafe { vm.encrypt_op(null_mut() as *mut c_void) }.unwrap(); + /// + /// // Initialize the SEV platform context. + /// let mut init: kvm_sev_cmd = Default::default(); + /// vm.encrypt_op_sev(&mut init).unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn encrypt_op_sev(&self, op: &mut kvm_sev_cmd) -> Result<()> { + // SAFETY: Safe because we know that kernel will only read the correct amount of memory + // from our pointer and we know where it will write it (op.error). + unsafe { self.encrypt_op(op) } + } + + /// Register a guest memory region which may contain encrypted data. + /// + /// It is used in the SEV-enabled guest. + /// + /// See the documentation for `KVM_MEMORY_ENCRYPT_REG_REGION` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `memory_region` - Guest physical memory region. + /// + /// # Example + #[cfg_attr(has_sev, doc = "```rust")] + #[cfg_attr(not(has_sev), doc = "```rust,no_run")] + /// # extern crate kvm_bindings; + /// # extern crate kvm_ioctls; + /// # extern crate libc; + /// # use std::{fs::OpenOptions, ptr::null_mut}; + /// # use std::os::unix::io::AsRawFd; + /// use kvm_bindings::bindings::{kvm_enc_region, kvm_sev_cmd, kvm_sev_launch_start, sev_cmd_id_KVM_SEV_LAUNCH_START}; + /// # use kvm_ioctls::Kvm; + /// use libc; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let sev = OpenOptions::new() + /// .read(true) + /// .write(true) + /// .open("/dev/sev") + /// .unwrap(); + /// + /// // Initialize the SEV platform context. + /// let mut init: kvm_sev_cmd = Default::default(); + /// vm.encrypt_op_sev(&mut init).unwrap(); + /// + /// // Create the memory encryption context. + /// let start_data: kvm_sev_launch_start = Default::default(); + /// let mut start = kvm_sev_cmd { + /// id: sev_cmd_id_KVM_SEV_LAUNCH_START, + /// data: &start_data as *const kvm_sev_launch_start as _, + /// sev_fd: sev.as_raw_fd() as _, + /// ..Default::default() + /// }; + /// vm.encrypt_op_sev(&mut start).unwrap(); + /// + /// let addr = unsafe { + /// libc::mmap( + /// null_mut(), + /// 4096, + /// libc::PROT_READ | libc::PROT_WRITE, + /// libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, + /// -1, + /// 0, + /// ) + /// }; + /// assert_ne!(addr, libc::MAP_FAILED); + /// + /// let memory_region = kvm_enc_region { + /// addr: addr as _, + /// size: 4096, + /// }; + /// vm.register_enc_memory_region(&memory_region).unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn register_enc_memory_region(&self, memory_region: &kvm_enc_region) -> Result<()> { + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_MEMORY_ENCRYPT_REG_REGION(), memory_region) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Unregister a guest memory region registered with + /// [`register_enc_memory_region`](Self::register_enc_memory_region). + /// + /// It is used in the SEV-enabled guest. + /// + /// See the documentation for `KVM_MEMORY_ENCRYPT_UNREG_REGION` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `memory_region` - Guest physical memory region. + /// + /// # Example + #[cfg_attr(has_sev, doc = "```rust")] + #[cfg_attr(not(has_sev), doc = "```rust,no_run")] + /// # extern crate kvm_bindings; + /// # extern crate kvm_ioctls; + /// # extern crate libc; + /// # use std::{fs::OpenOptions, ptr::null_mut}; + /// # use std::os::unix::io::AsRawFd; + /// use kvm_bindings::bindings::{kvm_enc_region, kvm_sev_cmd, kvm_sev_launch_start, sev_cmd_id_KVM_SEV_LAUNCH_START}; + /// # use kvm_ioctls::Kvm; + /// use libc; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let sev = OpenOptions::new() + /// .read(true) + /// .write(true) + /// .open("/dev/sev") + /// .unwrap(); + /// + /// // Initialize the SEV platform context. + /// let mut init: kvm_sev_cmd = Default::default(); + /// vm.encrypt_op_sev(&mut init).unwrap(); + /// + /// // Create the memory encryption context. + /// let start_data: kvm_sev_launch_start = Default::default(); + /// let mut start = kvm_sev_cmd { + /// id: sev_cmd_id_KVM_SEV_LAUNCH_START, + /// data: &start_data as *const kvm_sev_launch_start as _, + /// sev_fd: sev.as_raw_fd() as _, + /// ..Default::default() + /// }; + /// vm.encrypt_op_sev(&mut start).unwrap(); + /// + /// let addr = unsafe { + /// libc::mmap( + /// null_mut(), + /// 4096, + /// libc::PROT_READ | libc::PROT_WRITE, + /// libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, + /// -1, + /// 0, + /// ) + /// }; + /// assert_ne!(addr, libc::MAP_FAILED); + /// + /// let memory_region = kvm_enc_region { + /// addr: addr as _, + /// size: 4096, + /// }; + /// vm.register_enc_memory_region(&memory_region).unwrap(); + /// vm.unregister_enc_memory_region(&memory_region).unwrap(); + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn unregister_enc_memory_region(&self, memory_region: &kvm_enc_region) -> Result<()> { + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_MEMORY_ENCRYPT_UNREG_REGION(), memory_region) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Registers an address for coalesced MMIO. Write accesses to the address + /// will not cause a corresponding [`VcpuExit`](crate::VcpuExit), but + /// instead will be appended to the MMIO ring buffer. The [`VcpuFd`] can + /// read entries in the ring buffer via [`VcpuFd::coalesced_mmio_read()`]. + /// If entries are not read the buffer will eventually be full, + /// preventing further elements from being appended by the kernel. + /// + /// Needs `KVM_CAP_COALESCED_MMIO` ([`Cap::CoalescedMmio`](crate::Cap::CoalescedMmio)) + /// and/or `KVM_CAP_COALESCED_PIO` ([`Cap::CoalescedMmio`](crate::Cap::CoalescedPio)). + /// + /// See the documentation for `KVM_REGISTER_COALESCED_MMIO`. + /// + /// # Arguments + /// + /// * `addr` - Address being written to. + /// * `size` - The size of the write for the mechanism to trigger. + pub fn register_coalesced_mmio(&self, addr: IoEventAddress, size: u32) -> Result<()> { + let (addr, pio) = match addr { + IoEventAddress::Pio(addr) => (addr, 1), + IoEventAddress::Mmio(addr) => (addr, 0), + }; + let mut zone = kvm_coalesced_mmio_zone { + addr, + size, + ..Default::default() + }; + zone.__bindgen_anon_1.pio = pio; + + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_REGISTER_COALESCED_MMIO(), &zone) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Unregister an address that was previously registered via + /// [`register_coalesced_mmio()`](VmFd::register_coalesced_mmio). + /// + /// See the documentation for `KVM_UNREGISTER_COALESCED_MMIO`. + pub fn unregister_coalesced_mmio(&self, addr: IoEventAddress, size: u32) -> Result<()> { + let (addr, pio) = match addr { + IoEventAddress::Pio(addr) => (addr, 1), + IoEventAddress::Mmio(addr) => (addr, 0), + }; + let mut zone = kvm_coalesced_mmio_zone { + addr, + size, + ..Default::default() + }; + zone.__bindgen_anon_1.pio = pio; + + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_UNREGISTER_COALESCED_MMIO(), &zone) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } +} + +/// Helper function to create a new `VmFd`. +/// +/// This should not be exported as a public function because the preferred way is to use +/// `create_vm` from `Kvm`. The function cannot be part of the `VmFd` implementation because +/// then it would be exported with the public `VmFd` interface. +pub fn new_vmfd(vm: File, run_size: usize) -> VmFd { + VmFd { vm, run_size } +} + +impl AsRawFd for VmFd { + fn as_raw_fd(&self) -> RawFd { + self.vm.as_raw_fd() + } +} + +/// Create a dummy GIC device. +/// +/// # Arguments +/// +/// * `vm` - The vm file descriptor. +/// * `flags` - Flags to be passed to `KVM_CREATE_DEVICE`. +#[cfg(test)] +#[cfg(target_arch = "aarch64")] +pub(crate) fn create_gic_device(vm: &VmFd, flags: u32) -> DeviceFd { + let mut gic_device = kvm_create_device { + type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, + fd: 0, + flags, + }; + match vm.create_device(&mut gic_device) { + Ok(fd) => fd, + Err(_) => { + gic_device.type_ = kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2; + vm.create_device(&mut gic_device) + .expect("Cannot create KVM vGIC device") + } + } +} + +/// Set supported number of IRQs for vGIC. +/// +/// # Arguments +/// +/// * `vgic` - The vGIC file descriptor. +/// * `nr_irqs` - Number of IRQs. +#[cfg(test)] +#[cfg(target_arch = "aarch64")] +pub(crate) fn set_supported_nr_irqs(vgic: &DeviceFd, nr_irqs: u32) { + let vgic_attr = kvm_device_attr { + group: KVM_DEV_ARM_VGIC_GRP_NR_IRQS, + attr: 0, + addr: &nr_irqs as *const u32 as u64, + flags: 0, + }; + vgic.has_device_attr(&vgic_attr).unwrap(); + vgic.set_device_attr(&vgic_attr).unwrap(); +} + +/// Request the initialization of the vGIC. +/// +/// # Arguments +/// +/// * `vgic` - The vGIC file descriptor. +#[cfg(test)] +#[cfg(target_arch = "aarch64")] +pub(crate) fn request_gic_init(vgic: &DeviceFd) { + let vgic_attr = kvm_device_attr { + group: KVM_DEV_ARM_VGIC_GRP_CTRL, + attr: u64::from(KVM_DEV_ARM_VGIC_CTRL_INIT), + addr: 0, + flags: 0, + }; + vgic.has_device_attr(&vgic_attr).unwrap(); + vgic.set_device_attr(&vgic_attr).unwrap(); +} + +/// Create a dummy AIA device. +/// +/// # Arguments +/// +/// * `vm` - The vm file descriptor. +/// * `flags` - Flags to be passed to `KVM_CREATE_DEVICE`. +#[cfg(test)] +#[cfg(target_arch = "riscv64")] +pub(crate) fn create_aia_device(vm: &VmFd, flags: u32) -> DeviceFd { + let mut aia_device = kvm_create_device { + type_: kvm_device_type_KVM_DEV_TYPE_RISCV_AIA, + fd: 0, + flags, + }; + vm.create_device(&mut aia_device) + .expect("Cannot create KVM vAIA device") +} + +/// Set supported number of IRQs for vAIA. +/// +/// # Arguments +/// +/// * `vaia` - The vAIA file descriptor. +/// * `nr_irqs` - Number of IRQs. +#[cfg(test)] +#[cfg(target_arch = "riscv64")] +pub(crate) fn set_supported_nr_irqs(vaia: &DeviceFd, nr_irqs: u32) { + let vaia_attr = kvm_device_attr { + group: KVM_DEV_RISCV_AIA_GRP_CONFIG, + attr: u64::from(KVM_DEV_RISCV_AIA_CONFIG_SRCS), + addr: &nr_irqs as *const u32 as u64, + flags: 0, + }; + vaia.has_device_attr(&vaia_attr).unwrap(); + vaia.set_device_attr(&vaia_attr).unwrap(); +} + +/// Request the initialization of the vAIA. +/// +/// # Arguments +/// +/// * `vaia` - The vAIA file descriptor. +#[cfg(test)] +#[cfg(target_arch = "riscv64")] +pub(crate) fn request_aia_init(vaia: &DeviceFd) { + let vaia_attr = kvm_device_attr { + group: KVM_DEV_RISCV_AIA_GRP_CTRL, + attr: u64::from(KVM_DEV_RISCV_AIA_CTRL_INIT), + addr: 0, + flags: 0, + }; + vaia.has_device_attr(&vaia_attr).unwrap(); + vaia.set_device_attr(&vaia_attr).unwrap(); +} + +#[cfg(test)] +mod tests { + #![allow(clippy::undocumented_unsafe_blocks)] + use super::*; + use crate::Kvm; + + #[cfg(target_arch = "x86_64")] + use std::{fs::OpenOptions, os::fd::IntoRawFd, ptr::null_mut}; + + use libc::EFD_NONBLOCK; + + #[test] + fn test_set_invalid_memory() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let invalid_mem_region = kvm_userspace_memory_region { + slot: 0, + guest_phys_addr: 0, + memory_size: 0, + userspace_addr: 0, + flags: 0, + }; + unsafe { vm.set_user_memory_region(invalid_mem_region) }.unwrap_err(); + } + + #[test] + fn test_set_invalid_memory2() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let invalid_mem_region = kvm_userspace_memory_region2 { + slot: 0, + flags: 0, + guest_phys_addr: 0, + memory_size: 0, + userspace_addr: 0, + guest_memfd_offset: 0, + guest_memfd: 0, + pad1: 0, + pad2: [0; 14], + }; + unsafe { vm.set_user_memory_region2(invalid_mem_region) }.unwrap_err(); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_set_tss_address() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + vm.set_tss_address(0xfffb_d000).unwrap(); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_set_identity_map_address() { + let kvm = Kvm::new().unwrap(); + if kvm.check_extension(Cap::SetIdentityMapAddr) { + let vm = kvm.create_vm().unwrap(); + vm.set_identity_map_address(0xfffb_c000).unwrap(); + vm.create_vcpu(0).unwrap(); + // Setting the identity map after creating a vCPU must fail. + vm.set_identity_map_address(0xfffb_c000).unwrap_err(); + } + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_irq_chip() { + use Cap; + + let kvm = Kvm::new().unwrap(); + assert!(kvm.check_extension(Cap::Irqchip)); + let vm = kvm.create_vm().unwrap(); + vm.create_irq_chip().unwrap(); + + let mut irqchip = kvm_irqchip { + chip_id: KVM_IRQCHIP_PIC_MASTER, + ..Default::default() + }; + // Set the irq_base to a non-default value to check that set & get work. + irqchip.chip.pic.irq_base = 10; + vm.set_irqchip(&irqchip).unwrap(); + + // We initialize a dummy irq chip (`other_irqchip`) in which the + // function `get_irqchip` returns its result. + let mut other_irqchip = kvm_irqchip { + chip_id: KVM_IRQCHIP_PIC_MASTER, + ..Default::default() + }; + vm.get_irqchip(&mut other_irqchip).unwrap(); + + // Safe because we know that the irqchip type is PIC. + unsafe { assert_eq!(irqchip.chip.pic, other_irqchip.chip.pic) }; + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_irq_chip() { + use Cap; + + let kvm = Kvm::new().unwrap(); + assert!(kvm.check_extension(Cap::Irqchip)); + + let vm = kvm.create_vm().unwrap(); + + // On ARM/arm64, a GICv2 is created. It's better to check ahead whether GICv2 + // can be emulated or not. + let mut gic_device = kvm_create_device { + type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, + fd: 0, + flags: KVM_CREATE_DEVICE_TEST, + }; + + let vgic_v2_supported = vm.create_device(&mut gic_device).is_ok(); + assert_eq!(vm.create_irq_chip().is_ok(), vgic_v2_supported); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_pit2() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + assert!(kvm.check_extension(Cap::Irqchip)); + vm.create_irq_chip().unwrap(); + + vm.create_pit2(kvm_pit_config::default()).unwrap(); + + let pit2 = vm.get_pit2().unwrap(); + vm.set_pit2(&pit2).unwrap(); + let mut other_pit2 = vm.get_pit2().unwrap(); + // Load time will differ, let's overwrite it so we can test equality. + other_pit2.channels[0].count_load_time = pit2.channels[0].count_load_time; + other_pit2.channels[1].count_load_time = pit2.channels[1].count_load_time; + other_pit2.channels[2].count_load_time = pit2.channels[2].count_load_time; + assert_eq!(pit2, other_pit2); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_clock() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + // Get current time. + let orig = vm.get_clock().unwrap(); + + // Reset time. + let fudged = kvm_clock_data { + clock: 10, + ..Default::default() + }; + vm.set_clock(&fudged).unwrap(); + + // Get new time. + let new = vm.get_clock().unwrap(); + + // Verify new time has progressed but is smaller than orig time. + assert!(fudged.clock < new.clock); + assert!(new.clock < orig.clock); + } + + #[test] + fn test_register_ioevent() { + assert_eq!(std::mem::size_of::(), 0); + + let kvm = Kvm::new().unwrap(); + let vm_fd = kvm.create_vm().unwrap(); + let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); + vm_fd + .register_ioevent(&evtfd, &IoEventAddress::Pio(0xf4), NoDatamatch) + .unwrap(); + vm_fd + .register_ioevent(&evtfd, &IoEventAddress::Mmio(0x1000), NoDatamatch) + .unwrap(); + vm_fd + .register_ioevent(&evtfd, &IoEventAddress::Pio(0xc1), 0x7fu8) + .unwrap(); + vm_fd + .register_ioevent(&evtfd, &IoEventAddress::Pio(0xc2), 0x1337u16) + .unwrap(); + vm_fd + .register_ioevent(&evtfd, &IoEventAddress::Pio(0xc4), 0xdead_beefu32) + .unwrap(); + vm_fd + .register_ioevent(&evtfd, &IoEventAddress::Pio(0xc8), 0xdead_beef_dead_beefu64) + .unwrap() + } + + #[test] + fn test_unregister_ioevent() { + assert_eq!(std::mem::size_of::(), 0); + + let kvm = Kvm::new().unwrap(); + let vm_fd = kvm.create_vm().unwrap(); + let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); + let pio_addr = IoEventAddress::Pio(0xf4); + let mmio_addr = IoEventAddress::Mmio(0x1000); + + // First try to unregister addresses which have not been registered. + vm_fd + .unregister_ioevent(&evtfd, &pio_addr, NoDatamatch) + .unwrap_err(); + vm_fd + .unregister_ioevent(&evtfd, &mmio_addr, NoDatamatch) + .unwrap_err(); + + // Now register the addresses + vm_fd + .register_ioevent(&evtfd, &pio_addr, NoDatamatch) + .unwrap(); + vm_fd + .register_ioevent(&evtfd, &mmio_addr, 0x1337u16) + .unwrap(); + + // Try again unregistering the addresses. This time it should work + // since they have been previously registered. + vm_fd + .unregister_ioevent(&evtfd, &pio_addr, NoDatamatch) + .unwrap(); + vm_fd + .unregister_ioevent(&evtfd, &mmio_addr, 0x1337u16) + .unwrap(); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_register_unregister_irqfd() { + let kvm = Kvm::new().unwrap(); + let vm_fd = kvm.create_vm().unwrap(); + let evtfd1 = EventFd::new(EFD_NONBLOCK).unwrap(); + let evtfd2 = EventFd::new(EFD_NONBLOCK).unwrap(); + let evtfd3 = EventFd::new(EFD_NONBLOCK).unwrap(); + let evtfd4 = EventFd::new(EFD_NONBLOCK).unwrap(); + let resamplefd = EventFd::new(EFD_NONBLOCK).unwrap(); + + vm_fd.create_irq_chip().unwrap(); + + vm_fd.register_irqfd(&evtfd1, 4).unwrap(); + vm_fd.register_irqfd(&evtfd2, 8).unwrap(); + vm_fd.register_irqfd(&evtfd3, 4).unwrap(); + vm_fd.unregister_irqfd(&evtfd2, 8).unwrap(); + // KVM irqfd doesn't report failure on this case:( + vm_fd.unregister_irqfd(&evtfd2, 8).unwrap(); + + // Duplicated eventfd registration. + // On x86_64 this fails as the event fd was already matched with a GSI. + vm_fd.register_irqfd(&evtfd3, 4).unwrap_err(); + vm_fd.register_irqfd(&evtfd3, 5).unwrap_err(); + // KVM irqfd doesn't report failure on this case:( + vm_fd.unregister_irqfd(&evtfd3, 5).unwrap(); + + if vm_fd.check_extension(Cap::IrqfdResample) { + vm_fd + .register_irqfd_with_resample(&evtfd4, &resamplefd, 6) + .unwrap(); + vm_fd.unregister_irqfd(&evtfd4, 6).unwrap(); + } else { + vm_fd + .register_irqfd_with_resample(&evtfd4, &resamplefd, 6) + .unwrap_err(); + } + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_register_unregister_irqfd() { + let kvm = Kvm::new().unwrap(); + let vm_fd = kvm.create_vm().unwrap(); + let evtfd1 = EventFd::new(EFD_NONBLOCK).unwrap(); + let evtfd2 = EventFd::new(EFD_NONBLOCK).unwrap(); + let evtfd3 = EventFd::new(EFD_NONBLOCK).unwrap(); + let evtfd4 = EventFd::new(EFD_NONBLOCK).unwrap(); + let resamplefd = EventFd::new(EFD_NONBLOCK).unwrap(); + + // Create the vGIC device. + let vgic_fd = create_gic_device(&vm_fd, 0); + + // GICv3 on arm/aarch64 requires an online vCPU prior to setting device attributes, + // see: https://www.kernel.org/doc/html/latest/virt/kvm/devices/arm-vgic-v3.html + vm_fd.create_vcpu(0).unwrap(); + + // Set supported number of IRQs. + set_supported_nr_irqs(&vgic_fd, 128); + // Request the initialization of the vGIC. + request_gic_init(&vgic_fd); + + vm_fd.register_irqfd(&evtfd1, 4).unwrap(); + vm_fd.register_irqfd(&evtfd2, 8).unwrap(); + vm_fd.register_irqfd(&evtfd3, 4).unwrap(); + vm_fd.unregister_irqfd(&evtfd2, 8).unwrap(); + // KVM irqfd doesn't report failure on this case:( + vm_fd.unregister_irqfd(&evtfd2, 8).unwrap(); + + // Duplicated eventfd registration. + // On aarch64, this fails because setting up the interrupt controller is mandatory before + // registering any IRQ. + vm_fd.register_irqfd(&evtfd3, 4).unwrap_err(); + vm_fd.register_irqfd(&evtfd3, 5).unwrap_err(); + // KVM irqfd doesn't report failure on this case:( + vm_fd.unregister_irqfd(&evtfd3, 5).unwrap(); + + if vm_fd.check_extension(Cap::IrqfdResample) { + vm_fd + .register_irqfd_with_resample(&evtfd4, &resamplefd, 6) + .unwrap(); + vm_fd.unregister_irqfd(&evtfd4, 6).unwrap(); + } else { + vm_fd + .register_irqfd_with_resample(&evtfd4, &resamplefd, 6) + .unwrap_err(); + } + } + + #[test] + #[cfg(target_arch = "riscv64")] + fn test_register_unregister_irqfd() { + let kvm = Kvm::new().unwrap(); + let vm_fd = kvm.create_vm().unwrap(); + let evtfd1 = EventFd::new(EFD_NONBLOCK).unwrap(); + let evtfd2 = EventFd::new(EFD_NONBLOCK).unwrap(); + let evtfd3 = EventFd::new(EFD_NONBLOCK).unwrap(); + + // Create the vAIA device. + let vaia_fd = create_aia_device(&vm_fd, 0); + + // AIA on riscv64 requires at least one online vCPU prior to setting + // device attributes. Otherwise it would fail when trying ot set address + // of IMSIC. + vm_fd.create_vcpu(0).unwrap(); + + // Set maximum supported number of IRQs of the vAIA device to 128. + set_supported_nr_irqs(&vaia_fd, 128); + + // Before request vAIA device to initialize, APLIC and IMSIC must be set + let aplic_addr: u64 = 0x4000; + vaia_fd + .set_device_attr(&kvm_device_attr { + group: KVM_DEV_RISCV_AIA_GRP_ADDR, + attr: u64::from(KVM_DEV_RISCV_AIA_ADDR_APLIC), + addr: &aplic_addr as *const u64 as u64, + flags: 0, + }) + .unwrap(); + let imsic_addr: u64 = 0x8000; + vaia_fd + .set_device_attr(&kvm_device_attr { + group: KVM_DEV_RISCV_AIA_GRP_ADDR, + attr: 1u64, + addr: &imsic_addr as *const u64 as u64, + flags: 0, + }) + .unwrap(); + + // Initialize valid vAIA device. + request_aia_init(&vaia_fd); + + vm_fd.register_irqfd(&evtfd1, 4).unwrap(); + vm_fd.register_irqfd(&evtfd2, 8).unwrap(); + vm_fd.register_irqfd(&evtfd3, 4).unwrap(); + vm_fd.unregister_irqfd(&evtfd2, 8).unwrap(); + // KVM irqfd doesn't report failure on this case:( + vm_fd.unregister_irqfd(&evtfd2, 8).unwrap(); + + // Duplicated eventfd registration. + // On riscv64 this fails as the event fd was already matched with a GSI. + vm_fd.register_irqfd(&evtfd3, 4).unwrap_err(); + vm_fd.register_irqfd(&evtfd3, 5).unwrap_err(); + // KVM irqfd doesn't report failure on this case:( + vm_fd.unregister_irqfd(&evtfd3, 5).unwrap(); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_set_irq_line() { + let kvm = Kvm::new().unwrap(); + let vm_fd = kvm.create_vm().unwrap(); + + vm_fd.create_irq_chip().unwrap(); + + vm_fd.set_irq_line(4, true).unwrap(); + vm_fd.set_irq_line(4, false).unwrap(); + vm_fd.set_irq_line(4, true).unwrap(); + } + + #[test] + #[cfg(target_arch = "aarch64")] + #[allow(clippy::unusual_byte_groupings)] + fn test_set_irq_line() { + let kvm = Kvm::new().unwrap(); + let vm_fd = kvm.create_vm().unwrap(); + // Create a vcpu for test case 2 of the KVM_IRQ_LINE API on aarch64. + vm_fd.create_vcpu(0).unwrap(); + + // Create the vGIC device. + let vgic_fd = create_gic_device(&vm_fd, 0); + // Set supported number of IRQs. + set_supported_nr_irqs(&vgic_fd, 128); + // Request the initialization of the vGIC. + request_gic_init(&vgic_fd); + + // On arm/aarch64, irq field is interpreted like this: + // bits: | 31 ... 24 | 23 ... 16 | 15 ... 0 | + // field: | irq_type | vcpu_index | irq_id | + // The irq_type field has the following values: + // - irq_type[0]: out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ + // - irq_type[1]: in-kernel GIC: SPI, irq_id between 32 and 1019 (incl.) (the vcpu_index field is ignored) + // - irq_type[2]: in-kernel GIC: PPI, irq_id between 16 and 31 (incl.) + // Hence, using irq_type = 1, irq_id = 32 (decimal), the irq field in hex is: 0x01_00_0020 + vm_fd.set_irq_line(0x01_00_0020, true).unwrap(); + vm_fd.set_irq_line(0x01_00_0020, false).unwrap(); + vm_fd.set_irq_line(0x01_00_0020, true).unwrap(); + + // Case 2: using irq_type = 2, vcpu_index = 0, irq_id = 16 (decimal), the irq field in hex is: 0x02_00_0010 + vm_fd.set_irq_line(0x02_00_0010, true).unwrap(); + vm_fd.set_irq_line(0x02_00_0010, false).unwrap(); + vm_fd.set_irq_line(0x02_00_0010, true).unwrap(); + } + + #[test] + #[cfg(target_arch = "riscv64")] + fn test_set_irq_line() { + let kvm = Kvm::new().unwrap(); + let vm_fd = kvm.create_vm().unwrap(); + vm_fd.create_vcpu(0).unwrap(); + + // Create the vAIA device. + let vaia_fd = create_aia_device(&vm_fd, 0); + // Set maximum supported number of IRQs of the vAIA device to 128. + set_supported_nr_irqs(&vaia_fd, 128); + + // Before request vAIA device to initialize, APLIC and IMSIC must be set + let aplic_addr: u64 = 0x4000; + vaia_fd + .set_device_attr(&kvm_device_attr { + group: KVM_DEV_RISCV_AIA_GRP_ADDR, + attr: u64::from(KVM_DEV_RISCV_AIA_ADDR_APLIC), + addr: &aplic_addr as *const u64 as u64, + flags: 0, + }) + .unwrap(); + let imsic_addr: u64 = 0x8000; + vaia_fd + .set_device_attr(&kvm_device_attr { + group: KVM_DEV_RISCV_AIA_GRP_ADDR, + attr: 1u64, + addr: &imsic_addr as *const u64 as u64, + flags: 0, + }) + .unwrap(); + + // Initialize valid vAIA device. + request_aia_init(&vaia_fd); + + vm_fd.set_irq_line(7, true).unwrap(); + vm_fd.set_irq_line(7, false).unwrap(); + vm_fd.set_irq_line(7, true).unwrap(); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_faulty_vm_fd() { + let badf_errno = libc::EBADF; + + let faulty_vm_fd = VmFd { + vm: unsafe { File::from_raw_fd(-2) }, + run_size: 0, + }; + + let invalid_mem_region = kvm_userspace_memory_region { + slot: 0, + guest_phys_addr: 0, + memory_size: 0, + userspace_addr: 0, + flags: 0, + }; + + assert_eq!( + unsafe { + faulty_vm_fd + .set_user_memory_region(invalid_mem_region) + .unwrap_err() + .errno() + }, + badf_errno + ); + assert_eq!( + faulty_vm_fd.set_tss_address(0).unwrap_err().errno(), + badf_errno + ); + assert_eq!( + faulty_vm_fd.create_irq_chip().unwrap_err().errno(), + badf_errno + ); + assert_eq!( + faulty_vm_fd + .create_pit2(kvm_pit_config::default()) + .unwrap_err() + .errno(), + badf_errno + ); + let event_fd = EventFd::new(EFD_NONBLOCK).unwrap(); + assert_eq!( + faulty_vm_fd + .register_ioevent(&event_fd, &IoEventAddress::Pio(0), 0u64) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vm_fd + .get_irqchip(&mut kvm_irqchip::default()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vm_fd + .set_irqchip(&kvm_irqchip::default()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!(faulty_vm_fd.get_clock().unwrap_err().errno(), badf_errno); + assert_eq!( + faulty_vm_fd + .set_clock(&kvm_clock_data::default()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!(faulty_vm_fd.get_pit2().unwrap_err().errno(), badf_errno); + assert_eq!( + faulty_vm_fd + .set_pit2(&kvm_pit_state2::default()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vm_fd + .register_irqfd(&event_fd, 0) + .unwrap_err() + .errno(), + badf_errno + ); + + assert_eq!( + faulty_vm_fd.create_vcpu(0).err().unwrap().errno(), + badf_errno + ); + + assert_eq!( + faulty_vm_fd.get_dirty_log(0, 0).unwrap_err().errno(), + badf_errno + ); + + // Don't drop the File object, or it'll notice the file it's trying to close is + // invalid and abort the process. + let _ = faulty_vm_fd.vm.into_raw_fd(); + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_get_preferred_target() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let mut kvi = kvm_vcpu_init::default(); + vm.get_preferred_target(&mut kvi).unwrap(); + } + + /// As explained in the example code related to signal_msi(), sending + /// a random MSI vector will always fail because no vector has been + /// previously allocated from the guest itself. + #[test] + #[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64" + ))] + fn test_signal_msi_failure() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let msi = kvm_msi::default(); + vm.signal_msi(msi).unwrap_err(); + } + + #[test] + #[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))] + fn test_enable_cap_failure() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let cap: kvm_enable_cap = Default::default(); + // Providing the `kvm_enable_cap` structure filled with default() should + // always result in a failure as it is not a valid capability. + vm.enable_cap(&cap).unwrap_err(); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_enable_split_irqchip_cap() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let mut cap = kvm_enable_cap { + cap: KVM_CAP_SPLIT_IRQCHIP, + ..Default::default() + }; + // As per the KVM documentation, KVM_CAP_SPLIT_IRQCHIP only emulates + // the local APIC in kernel, expecting that a userspace IOAPIC will + // be implemented by the VMM. + // Along with this capability, the user needs to specify the number + // of pins reserved for the userspace IOAPIC. This number needs to be + // provided through the first argument of the capability structure, as + // specified in KVM documentation: + // args[0] - number of routes reserved for userspace IOAPICs + // + // Because an IOAPIC supports 24 pins, that's the reason why this test + // picked this number as reference. + cap.args[0] = 24; + vm.enable_cap(&cap).unwrap(); + } + + #[test] + #[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64" + ))] + fn test_set_gsi_routing() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let irq_routing = kvm_irq_routing::default(); + + // Expect failure for x86 since the irqchip is not created yet. + #[cfg(target_arch = "x86_64")] + vm.set_gsi_routing(&irq_routing).unwrap_err(); + #[cfg(target_arch = "x86_64")] + vm.create_irq_chip().unwrap(); + + // RISC-V 64-bit expect an AIA device to be created in advance of + // committing irq_routing table. + #[cfg(target_arch = "riscv64")] + create_aia_device(&vm, 0); + + vm.set_gsi_routing(&irq_routing).unwrap(); + } + + #[test] + fn test_create_vcpu_different_ids() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + // Fails when an arbitrarily large value + let err = vm.create_vcpu(65537_u64).err(); + assert_eq!(err.unwrap().errno(), libc::EINVAL); + + // Fails when input `id` = `max_vcpu_id` + let max_vcpu_id = kvm.get_max_vcpu_id(); + vm.create_vcpu((max_vcpu_id - 1) as u64).unwrap(); + + let vcpu_err = vm.create_vcpu(max_vcpu_id as u64).err(); + assert_eq!(vcpu_err.unwrap().errno(), libc::EINVAL); + } + + #[test] + fn test_check_extension() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + assert!(vm.check_extension(Cap::MpState)); + } + + #[test] + #[cfg(target_arch = "x86_64")] + #[cfg_attr(not(has_sev), ignore)] + fn test_encrypt_op_sev() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + let mut init: kvm_sev_cmd = Default::default(); + vm.encrypt_op_sev(&mut init).unwrap(); + } + + #[test] + #[cfg(target_arch = "x86_64")] + #[cfg_attr(not(has_sev), ignore)] + fn test_register_unregister_enc_memory_region() { + let sev = OpenOptions::new() + .read(true) + .write(true) + .open("/dev/sev") + .unwrap(); + + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + // Perform SEV launch sequence according to + // https://www.kernel.org/doc/Documentation/virtual/kvm/amd-memory-encryption.rst + + let mut init: kvm_sev_cmd = Default::default(); + vm.encrypt_op_sev(&mut init).unwrap(); + + let start_data: kvm_sev_launch_start = Default::default(); + let mut start = kvm_sev_cmd { + id: sev_cmd_id_KVM_SEV_LAUNCH_START, + data: &start_data as *const kvm_sev_launch_start as _, + sev_fd: sev.as_raw_fd() as _, + ..Default::default() + }; + vm.encrypt_op_sev(&mut start).unwrap(); + + let addr = unsafe { + libc::mmap( + null_mut(), + 4096, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, + -1, + 0, + ) + }; + assert_ne!(addr, libc::MAP_FAILED); + + assert_eq!( + vm.register_enc_memory_region(&Default::default()) + .unwrap_err() + .errno(), + libc::EINVAL + ); + assert_eq!( + vm.unregister_enc_memory_region(&Default::default()) + .unwrap_err() + .errno(), + libc::EINVAL + ); + + let memory_region = kvm_enc_region { + addr: addr as _, + size: 4096, + }; + assert_eq!( + vm.unregister_enc_memory_region(&memory_region) + .unwrap_err() + .errno(), + libc::EINVAL + ); + vm.register_enc_memory_region(&memory_region).unwrap(); + vm.unregister_enc_memory_region(&memory_region).unwrap(); + } +} diff --git a/kvm-ioctls/src/kvm_ioctls.rs b/kvm-ioctls/src/kvm_ioctls.rs new file mode 100644 index 0000000..192b602 --- /dev/null +++ b/kvm-ioctls/src/kvm_ioctls.rs @@ -0,0 +1,318 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Declares necessary ioctls specific to their platform. + +use kvm_bindings::*; + +// Ioctls for /dev/kvm. + +ioctl_io_nr!(KVM_GET_API_VERSION, KVMIO, 0x00); +ioctl_io_nr!(KVM_CREATE_VM, KVMIO, 0x01); +#[cfg(target_arch = "x86_64")] +ioctl_iowr_nr!(KVM_GET_MSR_INDEX_LIST, KVMIO, 0x02, kvm_msr_list); +ioctl_io_nr!(KVM_CHECK_EXTENSION, KVMIO, 0x03); +ioctl_io_nr!(KVM_GET_VCPU_MMAP_SIZE, KVMIO, 0x04); +/* Available with KVM_CAP_EXT_CPUID */ +#[cfg(target_arch = "x86_64")] +ioctl_iowr_nr!(KVM_GET_SUPPORTED_CPUID, KVMIO, 0x05, kvm_cpuid2); +/* Available with KVM_CAP_EXT_EMUL_CPUID */ +#[cfg(target_arch = "x86_64")] +ioctl_iowr_nr!(KVM_GET_EMULATED_CPUID, KVMIO, 0x09, kvm_cpuid2); +/* Available with KVM_CAP_GET_MSR_FEATURES */ +#[cfg(target_arch = "x86_64")] +ioctl_iowr_nr!(KVM_GET_MSR_FEATURE_INDEX_LIST, KVMIO, 0x0a, kvm_msr_list); + +// Ioctls for VM fds. + +ioctl_io_nr!(KVM_CREATE_VCPU, KVMIO, 0x41); +ioctl_iow_nr!(KVM_GET_DIRTY_LOG, KVMIO, 0x42, kvm_dirty_log); +/* Available with KVM_CAP_USER_MEMORY */ +ioctl_iow_nr!( + KVM_SET_USER_MEMORY_REGION, + KVMIO, + 0x46, + kvm_userspace_memory_region +); +ioctl_iow_nr!( + KVM_SET_USER_MEMORY_REGION2, + KVMIO, + 0x49, + kvm_userspace_memory_region2 +); +/* Available with KVM_CAP_SET_TSS_ADDR */ +#[cfg(target_arch = "x86_64")] +ioctl_io_nr!(KVM_SET_TSS_ADDR, KVMIO, 0x47); +/* Available with KVM_CAP_SET_IDENTITY_MAP_ADDR */ +#[cfg(target_arch = "x86_64")] +ioctl_iow_nr!(KVM_SET_IDENTITY_MAP_ADDR, KVMIO, 0x48, u64); +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +ioctl_iowr_nr!(KVM_CREATE_GUEST_MEMFD, KVMIO, 0xd4, kvm_create_guest_memfd); +/* Available with KVM_CAP_IRQCHIP */ +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "s390x"))] +ioctl_io_nr!(KVM_CREATE_IRQCHIP, KVMIO, 0x60); +/* Available with KVM_CAP_IRQCHIP */ +#[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64" +))] +ioctl_iow_nr!(KVM_IRQ_LINE, KVMIO, 0x61, kvm_irq_level); +/* Available with KVM_CAP_COALESCED_MMIO / KVM_CAP_COALESCED_PIO */ +ioctl_iow_nr!( + KVM_REGISTER_COALESCED_MMIO, + KVMIO, + 0x67, + kvm_coalesced_mmio_zone +); +/* Available with KVM_CAP_COALESCED_MMIO / KVM_CAP_COALESCED_PIO */ +ioctl_iow_nr!( + KVM_UNREGISTER_COALESCED_MMIO, + KVMIO, + 0x68, + kvm_coalesced_mmio_zone +); +/* Available with KVM_CAP_IRQ_ROUTING */ +#[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64" +))] +ioctl_iow_nr!(KVM_SET_GSI_ROUTING, KVMIO, 0x6a, kvm_irq_routing); +/* Available with KVM_CAP_IRQFD */ +#[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "s390x" +))] +ioctl_iow_nr!(KVM_IRQFD, KVMIO, 0x76, kvm_irqfd); +/* Available with KVM_CAP_PIT2 */ +#[cfg(target_arch = "x86_64")] +ioctl_iow_nr!(KVM_CREATE_PIT2, KVMIO, 0x77, kvm_pit_config); +/* Available with KVM_CAP_IOEVENTFD */ +ioctl_iow_nr!(KVM_IOEVENTFD, KVMIO, 0x79, kvm_ioeventfd); +/* Available with KVM_CAP_IRQCHIP */ +#[cfg(target_arch = "x86_64")] +ioctl_iowr_nr!(KVM_GET_IRQCHIP, KVMIO, 0x62, kvm_irqchip); +/* Available with KVM_CAP_IRQCHIP */ +#[cfg(target_arch = "x86_64")] +ioctl_ior_nr!(KVM_SET_IRQCHIP, KVMIO, 0x63, kvm_irqchip); +/* Available with KVM_CAP_ADJUST_CLOCK */ +#[cfg(target_arch = "x86_64")] +ioctl_iow_nr!(KVM_SET_CLOCK, KVMIO, 0x7b, kvm_clock_data); +/* Available with KVM_CAP_ADJUST_CLOCK */ +#[cfg(target_arch = "x86_64")] +ioctl_ior_nr!(KVM_GET_CLOCK, KVMIO, 0x7c, kvm_clock_data); +/* Available with KVM_CAP_PIT_STATE2 */ +#[cfg(target_arch = "x86_64")] +ioctl_ior_nr!(KVM_GET_PIT2, KVMIO, 0x9f, kvm_pit_state2); +/* Available with KVM_CAP_PIT_STATE2 */ +#[cfg(target_arch = "x86_64")] +ioctl_iow_nr!(KVM_SET_PIT2, KVMIO, 0xa0, kvm_pit_state2); +/* KVM_MEMORY_ENCRYPT_OP. Takes opaque platform dependent type: i.e. TDX or SEV */ +#[cfg(target_arch = "x86_64")] +ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong); +/* Available on SEV-enabled guests. */ +#[cfg(target_arch = "x86_64")] +ioctl_ior_nr!(KVM_MEMORY_ENCRYPT_REG_REGION, KVMIO, 0xbb, kvm_enc_region); +/* Available on SEV-enabled guests. */ +#[cfg(target_arch = "x86_64")] +ioctl_ior_nr!(KVM_MEMORY_ENCRYPT_UNREG_REGION, KVMIO, 0xbc, kvm_enc_region); + +// Ioctls for VCPU fds. + +ioctl_io_nr!(KVM_RUN, KVMIO, 0x80); +#[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))] +ioctl_ior_nr!(KVM_GET_REGS, KVMIO, 0x81, kvm_regs); +#[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))] +ioctl_iow_nr!(KVM_SET_REGS, KVMIO, 0x82, kvm_regs); +#[cfg(any( + target_arch = "x86_64", + target_arch = "powerpc", + target_arch = "powerpc64" +))] +ioctl_ior_nr!(KVM_GET_SREGS, KVMIO, 0x83, kvm_sregs); +#[cfg(any( + target_arch = "x86_64", + target_arch = "powerpc", + target_arch = "powerpc64" +))] +ioctl_iow_nr!(KVM_SET_SREGS, KVMIO, 0x84, kvm_sregs); +#[cfg(target_arch = "x86_64")] +ioctl_iowr_nr!(KVM_TRANSLATE, KVMIO, 0x85, kvm_translation); +#[cfg(target_arch = "x86_64")] +ioctl_iowr_nr!(KVM_GET_MSRS, KVMIO, 0x88, kvm_msrs); +#[cfg(target_arch = "x86_64")] +ioctl_iow_nr!(KVM_SET_MSRS, KVMIO, 0x89, kvm_msrs); +#[cfg(target_arch = "x86_64")] +ioctl_ior_nr!(KVM_GET_FPU, KVMIO, 0x8c, kvm_fpu); +#[cfg(target_arch = "x86_64")] +ioctl_iow_nr!(KVM_SET_FPU, KVMIO, 0x8d, kvm_fpu); +/* Available with KVM_CAP_IRQCHIP */ +#[cfg(target_arch = "x86_64")] +ioctl_ior_nr!(KVM_GET_LAPIC, KVMIO, 0x8e, kvm_lapic_state); +/* Available with KVM_CAP_IRQCHIP */ +#[cfg(target_arch = "x86_64")] +ioctl_iow_nr!(KVM_SET_LAPIC, KVMIO, 0x8f, kvm_lapic_state); +/* Available with KVM_CAP_EXT_CPUID */ +#[cfg(target_arch = "x86_64")] +ioctl_iow_nr!(KVM_SET_CPUID2, KVMIO, 0x90, kvm_cpuid2); +/* Available with KVM_CAP_EXT_CPUID */ +#[cfg(target_arch = "x86_64")] +ioctl_iowr_nr!(KVM_GET_CPUID2, KVMIO, 0x91, kvm_cpuid2); +/* Available with KVM_CAP_MP_STATE */ +#[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "s390x" +))] +ioctl_ior_nr!(KVM_GET_MP_STATE, KVMIO, 0x98, kvm_mp_state); +/* Available with KVM_CAP_MP_STATE */ +#[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "s390x" +))] +ioctl_iow_nr!(KVM_SET_MP_STATE, KVMIO, 0x99, kvm_mp_state); +/* Available with KVM_CAP_USER_NMI */ +#[cfg(target_arch = "x86_64")] +ioctl_io_nr!(KVM_NMI, KVMIO, 0x9a); +/* Available with KVM_CAP_VCPU_EVENTS */ +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +ioctl_ior_nr!(KVM_GET_VCPU_EVENTS, KVMIO, 0x9f, kvm_vcpu_events); +/* Available with KVM_CAP_VCPU_EVENTS */ +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +ioctl_iow_nr!(KVM_SET_VCPU_EVENTS, KVMIO, 0xa0, kvm_vcpu_events); +/* Available with KVM_CAP_DEBUGREGS */ +#[cfg(target_arch = "x86_64")] +ioctl_ior_nr!(KVM_GET_DEBUGREGS, KVMIO, 0xa1, kvm_debugregs); +/* Available with KVM_CAP_DEBUGREGS */ +#[cfg(target_arch = "x86_64")] +ioctl_iow_nr!(KVM_SET_DEBUGREGS, KVMIO, 0xa2, kvm_debugregs); +/* Available with KVM_CAP_XSAVE */ +#[cfg(target_arch = "x86_64")] +ioctl_ior_nr!(KVM_GET_XSAVE, KVMIO, 0xa4, kvm_xsave); +/* Available with KVM_CAP_XSAVE */ +#[cfg(target_arch = "x86_64")] +ioctl_iow_nr!(KVM_SET_XSAVE, KVMIO, 0xa5, kvm_xsave); +/* Available with KVM_CAP_XCRS */ +#[cfg(target_arch = "x86_64")] +ioctl_ior_nr!(KVM_GET_XCRS, KVMIO, 0xa6, kvm_xcrs); +/* Available with KVM_CAP_XCRS */ +#[cfg(target_arch = "x86_64")] +ioctl_iow_nr!(KVM_SET_XCRS, KVMIO, 0xa7, kvm_xcrs); +/* Available with KVM_CAP_KVMCLOCK_CTRL */ +#[cfg(target_arch = "x86_64")] +ioctl_io_nr!(KVM_KVMCLOCK_CTRL, KVMIO, 0xad); + +/* Available with KVM_CAP_TSC_CONTROL */ +#[cfg(target_arch = "x86_64")] +ioctl_io_nr!(KVM_SET_TSC_KHZ, KVMIO, 0xa2); +/* Available with KVM_CAP_GET_TSC_KHZ */ +#[cfg(target_arch = "x86_64")] +ioctl_io_nr!(KVM_GET_TSC_KHZ, KVMIO, 0xa3); + +/* Available with KVM_CAP_ENABLE_CAP */ +#[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))] +ioctl_iow_nr!(KVM_ENABLE_CAP, KVMIO, 0xa3, kvm_enable_cap); +/* Available with KVM_CAP_SIGNAL_MSI */ +#[cfg(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv64" +))] +ioctl_iow_nr!(KVM_SIGNAL_MSI, KVMIO, 0xa5, kvm_msi); +/* Available with KVM_CAP_ONE_REG */ +#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] +ioctl_iow_nr!(KVM_GET_ONE_REG, KVMIO, 0xab, kvm_one_reg); +#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] +ioctl_iow_nr!(KVM_SET_ONE_REG, KVMIO, 0xac, kvm_one_reg); +#[cfg(target_arch = "aarch64")] +ioctl_iow_nr!(KVM_ARM_VCPU_INIT, KVMIO, 0xae, kvm_vcpu_init); +#[cfg(target_arch = "aarch64")] +ioctl_ior_nr!(KVM_ARM_PREFERRED_TARGET, KVMIO, 0xaf, kvm_vcpu_init); +#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] +ioctl_iowr_nr!(KVM_GET_REG_LIST, KVMIO, 0xb0, kvm_reg_list); + +/* Available with KVM_CAP_X86_SMM */ +#[cfg(target_arch = "x86_64")] +ioctl_io_nr!(KVM_SMI, KVMIO, 0xb7); + +/* Available with KVM_CAP_ARM_SVE */ +#[cfg(target_arch = "aarch64")] +ioctl_iow_nr!(KVM_ARM_VCPU_FINALIZE, KVMIO, 0xc2, std::os::raw::c_int); + +/* Available with KVM_CAP_SET_GUEST_DEBUG */ +ioctl_iow_nr!(KVM_SET_GUEST_DEBUG, KVMIO, 0x9b, kvm_guest_debug); + +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +ioctl_iow_nr!( + KVM_SET_MEMORY_ATTRIBUTES, + KVMIO, + 0xd2, + kvm_memory_attributes +); + +// Device ioctls. + +/* Available with KVM_CAP_DEVICE_CTRL */ +ioctl_iowr_nr!(KVM_CREATE_DEVICE, KVMIO, 0xe0, kvm_create_device); +/* Available with KVM_CAP_DEVICE_CTRL */ +ioctl_iow_nr!(KVM_SET_DEVICE_ATTR, KVMIO, 0xe1, kvm_device_attr); +/* Available with KVM_CAP_DEVICE_CTRL */ +ioctl_iow_nr!(KVM_GET_DEVICE_ATTR, KVMIO, 0xe2, kvm_device_attr); +/* Available with KVM_CAP_DEVICE_CTRL */ +ioctl_iow_nr!(KVM_HAS_DEVICE_ATTR, KVMIO, 0xe3, kvm_device_attr); + +#[cfg(test)] +mod tests { + #![allow(clippy::undocumented_unsafe_blocks)] + use std::fs::File; + use std::os::unix::io::FromRawFd; + + use libc::{c_char, open, O_RDWR}; + use vmm_sys_util::ioctl::{ioctl, ioctl_with_val}; + + use super::*; + const KVM_PATH: &str = "/dev/kvm\0"; + + #[test] + fn get_version() { + let sys_fd = unsafe { open(KVM_PATH.as_ptr() as *const c_char, O_RDWR) }; + assert!(sys_fd >= 0); + + let ret = unsafe { ioctl(&File::from_raw_fd(sys_fd), KVM_GET_API_VERSION()) }; + assert_eq!(ret as u32, KVM_API_VERSION); + } + + #[test] + fn create_vm_fd() { + let sys_fd = unsafe { open(KVM_PATH.as_ptr() as *const c_char, O_RDWR) }; + assert!(sys_fd >= 0); + + let vm_fd = unsafe { ioctl(&File::from_raw_fd(sys_fd), KVM_CREATE_VM()) }; + assert!(vm_fd >= 0); + } + + #[test] + fn check_vm_extension() { + let sys_fd = unsafe { open(KVM_PATH.as_ptr() as *const c_char, O_RDWR) }; + assert!(sys_fd >= 0); + + let has_user_memory = unsafe { + ioctl_with_val( + &File::from_raw_fd(sys_fd), + KVM_CHECK_EXTENSION(), + KVM_CAP_USER_MEMORY.into(), + ) + }; + assert_eq!(has_user_memory, 1); + } +} diff --git a/kvm-ioctls/src/lib.rs b/kvm-ioctls/src/lib.rs new file mode 100644 index 0000000..278e21b --- /dev/null +++ b/kvm-ioctls/src/lib.rs @@ -0,0 +1,264 @@ +// Copyright © 2024 Institute of Software, CAS. All rights reserved. +// +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. +#![deny(missing_docs)] +#![deny(missing_copy_implementations)] +#![deny(missing_debug_implementations)] +#![warn(clippy::assertions_on_result_states)] + +//! A safe wrapper around the kernel's KVM interface. +//! +//! This crate offers safe wrappers for: +//! - [system ioctls](struct.Kvm.html) using the `Kvm` structure +//! - [VM ioctls](struct.VmFd.html) using the `VmFd` structure +//! - [vCPU ioctls](struct.VcpuFd.html) using the `VcpuFd` structure +//! - [device ioctls](struct.DeviceFd.html) using the `DeviceFd` structure +//! +//! # Platform support +//! +//! - x86_64 +//! - arm64 (experimental) +//! - riscv64 (experimental) +//! +//! **NOTE:** The list of available ioctls is not extensive. +//! +//! # Example - Running a VM on x86_64 +//! +//! In this example we are creating a Virtual Machine (VM) with one vCPU. +//! On the vCPU we are running machine specific code. This example is based on +//! the [LWN article](https://lwn.net/Articles/658511/) on using the KVM API. +//! The aarch64 example was modified accordingly. +//! The riscv64 example was modified accordingly. +//! +//! To get code running on the vCPU we are going through the following steps: +//! +//! 1. Instantiate KVM. This is used for running +//! [system specific ioctls](struct.Kvm.html). +//! 2. Use the KVM object to create a VM. The VM is used for running +//! [VM specific ioctls](struct.VmFd.html). +//! 3. Initialize the guest memory for the created VM. In this dummy example we +//! are adding only one memory region and write the code in one memory page. +//! 4. Create a vCPU using the VM object. The vCPU is used for running +//! [vCPU specific ioctls](struct.VcpuFd.html). +//! 5. Setup architectural specific general purpose registers and special registers. For +//! details about how and why these registers are set, please check the +//! [LWN article](https://lwn.net/Articles/658511/) on which this example is +//! built. +//! 6. Run the vCPU code in a loop and check the +//! [exit reasons](enum.VcpuExit.html). +//! +//! +//! ```rust +//! extern crate kvm_ioctls; +//! extern crate kvm_bindings; +//! +//! use kvm_ioctls::VcpuExit; +//! use kvm_ioctls::{Kvm, VcpuFd, VmFd}; +//! +//! fn main() { +//! use std::io::Write; +//! use std::ptr::null_mut; +//! use std::slice; +//! +//! use kvm_bindings::kvm_userspace_memory_region; +//! use kvm_bindings::KVM_MEM_LOG_DIRTY_PAGES; +//! +//! let mem_size = 0x4000; +//! let guest_addr = 0x1000; +//! let asm_code: &[u8]; +//! +//! // Setting up architectural dependent values. +//! #[cfg(target_arch = "x86_64")] +//! { +//! asm_code = &[ +//! 0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */ +//! 0x00, 0xd8, /* add %bl, %al */ +//! 0x04, b'0', /* add $'0', %al */ +//! 0xee, /* out %al, %dx */ +//! 0xec, /* in %dx, %al */ +//! 0xc6, 0x06, 0x00, 0x80, +//! 0x00, /* movl $0, (0x8000); This generates a MMIO Write. */ +//! 0x8a, 0x16, 0x00, 0x80, /* movl (0x8000), %dl; This generates a MMIO Read. */ +//! 0xf4, /* hlt */ +//! ]; +//! } +//! #[cfg(target_arch = "aarch64")] +//! { +//! asm_code = &[ +//! 0x01, 0x00, 0x00, 0x10, /* adr x1, */ +//! 0x22, 0x10, 0x00, 0xb9, /* str w2, [x1, #16]; write to this page */ +//! 0x02, 0x00, 0x00, 0xb9, /* str w2, [x0]; This generates a MMIO Write. */ +//! 0x00, 0x00, 0x00, +//! 0x14, /* b ; shouldn't get here, but if so loop forever */ +//! ]; +//! } +//! #[cfg(target_arch = "riscv64")] +//! { +//! asm_code = &[ +//! 0x17, 0x03, 0x00, 0x00, // auipc t1, 0; -> t1 +//! 0xa3, 0x23, 0x73, 0x00, // sw t2, t1 + 7; dirty current page +//! 0x23, 0x20, 0x75, 0x00, // sw t2, a0; trigger MMIO exit +//! 0x6f, 0x00, 0x00, 0x00, // j .;shouldn't get here, but if so loop forever +//! ]; +//! } +//! +//! // 1. Instantiate KVM. +//! let kvm = Kvm::new().unwrap(); +//! +//! // 2. Create a VM. +//! let vm = kvm.create_vm().unwrap(); +//! +//! // 3. Initialize Guest Memory. +//! let load_addr: *mut u8 = unsafe { +//! libc::mmap( +//! null_mut(), +//! mem_size, +//! libc::PROT_READ | libc::PROT_WRITE, +//! libc::MAP_ANONYMOUS | libc::MAP_SHARED | libc::MAP_NORESERVE, +//! -1, +//! 0, +//! ) as *mut u8 +//! }; +//! +//! let slot = 0; +//! // When initializing the guest memory slot specify the +//! // `KVM_MEM_LOG_DIRTY_PAGES` to enable the dirty log. +//! let mem_region = kvm_userspace_memory_region { +//! slot, +//! guest_phys_addr: guest_addr, +//! memory_size: mem_size as u64, +//! userspace_addr: load_addr as u64, +//! flags: KVM_MEM_LOG_DIRTY_PAGES, +//! }; +//! unsafe { vm.set_user_memory_region(mem_region).unwrap() }; +//! +//! // Write the code in the guest memory. This will generate a dirty page. +//! unsafe { +//! let mut slice = slice::from_raw_parts_mut(load_addr, mem_size); +//! slice.write(&asm_code).unwrap(); +//! } +//! +//! // 4. Create one vCPU. +//! let mut vcpu_fd = vm.create_vcpu(0).unwrap(); +//! +//! // 5. Initialize general purpose and special registers. +//! #[cfg(target_arch = "x86_64")] +//! { +//! // x86_64 specific registry setup. +//! let mut vcpu_sregs = vcpu_fd.get_sregs().unwrap(); +//! vcpu_sregs.cs.base = 0; +//! vcpu_sregs.cs.selector = 0; +//! vcpu_fd.set_sregs(&vcpu_sregs).unwrap(); +//! +//! let mut vcpu_regs = vcpu_fd.get_regs().unwrap(); +//! vcpu_regs.rip = guest_addr; +//! vcpu_regs.rax = 2; +//! vcpu_regs.rbx = 3; +//! vcpu_regs.rflags = 2; +//! vcpu_fd.set_regs(&vcpu_regs).unwrap(); +//! } +//! +//! #[cfg(target_arch = "aarch64")] +//! { +//! // aarch64 specific registry setup. +//! let mut kvi = kvm_bindings::kvm_vcpu_init::default(); +//! vm.get_preferred_target(&mut kvi).unwrap(); +//! vcpu_fd.vcpu_init(&kvi).unwrap(); +//! +//! let core_reg_base: u64 = 0x6030_0000_0010_0000; +//! let mmio_addr: u64 = guest_addr + mem_size as u64; +//! // set PC +//! vcpu_fd.set_one_reg(core_reg_base + 2 * 32, &guest_addr.to_le_bytes()); +//! // set X0 +//! vcpu_fd.set_one_reg(core_reg_base + 2 * 0, &mmio_addr.to_le_bytes()); +//! } +//! +//! #[cfg(target_arch = "riscv64")] +//! { +//! // riscv64 specific register setup. +//! let core_reg_base: u64 = 0x8030_0000_0200_0000; +//! let mmio_addr: u64 = guest_addr + mem_size as u64; +//! // set PC +//! vcpu_fd.set_one_reg(core_reg_base, &guest_addr.to_le_bytes()); +//! // set A0 +//! vcpu_fd.set_one_reg(core_reg_base + 10, &mmio_addr.to_le_bytes()); +//! } +//! +//! // 6. Run code on the vCPU. +//! loop { +//! match vcpu_fd.run().expect("run failed") { +//! VcpuExit::IoIn(addr, data) => { +//! println!( +//! "Received an I/O in exit. Address: {:#x}. Data: {:#x}", +//! addr, data[0], +//! ); +//! } +//! VcpuExit::IoOut(addr, data) => { +//! println!( +//! "Received an I/O out exit. Address: {:#x}. Data: {:#x}", +//! addr, data[0], +//! ); +//! } +//! VcpuExit::MmioRead(addr, data) => { +//! println!("Received an MMIO Read Request for the address {:#x}.", addr,); +//! } +//! VcpuExit::MmioWrite(addr, data) => { +//! println!("Received an MMIO Write Request to the address {:#x}.", addr,); +//! // The code snippet dirties 1 page when it is loaded in memory +//! let dirty_pages_bitmap = vm.get_dirty_log(slot, mem_size).unwrap(); +//! let dirty_pages = dirty_pages_bitmap +//! .into_iter() +//! .map(|page| page.count_ones()) +//! .fold(0, |dirty_page_count, i| dirty_page_count + i); +//! assert_eq!(dirty_pages, 1); +//! // Since on aarch64 there is not halt instruction, +//! // we break immediately after the last known instruction +//! // of the asm code example so that we avoid an infinite loop. +//! #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] +//! break; +//! } +//! VcpuExit::Hlt => { +//! break; +//! } +//! r => panic!("Unexpected exit reason: {:?}", r), +//! } +//! } +//! } +//! ``` + +extern crate kvm_bindings; +extern crate libc; +#[macro_use] +extern crate vmm_sys_util; + +#[macro_use] +mod kvm_ioctls; +mod cap; +mod ioctls; + +pub use cap::Cap; +pub use ioctls::device::DeviceFd; +pub use ioctls::system::Kvm; +#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] +pub use ioctls::vcpu::reg_size; +pub use ioctls::vcpu::{HypercallExit, VcpuExit, VcpuFd}; + +#[cfg(target_arch = "x86_64")] +pub use ioctls::vcpu::{MsrExitReason, ReadMsrExit, SyncReg, WriteMsrExit}; + +pub use ioctls::vm::{IoEventAddress, NoDatamatch, VmFd}; +// The following example is used to verify that our public +// structures are exported properly. +/// # Example +/// +/// ``` +/// #[cfg(target_arch = "x86_64")] +/// use kvm_ioctls::{Error, KvmRunWrapper}; +/// ``` +pub use ioctls::KvmRunWrapper; +pub use vmm_sys_util::errno::Error;