diff --git a/src/aead/aes.rs b/src/aead/aes.rs index d3ce316899..e0416cd796 100644 --- a/src/aead/aes.rs +++ b/src/aead/aes.rs @@ -13,14 +13,24 @@ // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. use super::{nonce::Nonce, quic::Sample, NONCE_LEN}; -use crate::{constant_time, cpu, error}; +use crate::{ + constant_time, + cpu::{self, GetFeature as _}, + error, +}; use cfg_if::cfg_if; use core::ops::RangeFrom; pub(super) use ffi::Counter; + #[macro_use] mod ffi; +mod bs; +pub(super) mod fallback; +pub(super) mod hw; +pub(super) mod vp; + cfg_if! { if #[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] { pub(super) use ffi::AES_KEY; @@ -30,8 +40,19 @@ cfg_if! { } #[derive(Clone)] -pub(super) struct Key { - inner: AES_KEY, +pub(super) enum Key { + #[cfg(any(target_arch = "aarch64", target_arch = "x86_64", target_arch = "x86"))] + Hw(hw::Key), + + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "x86", + target_arch = "x86_64" + ))] + Vp(vp::Key), + + Fallback(fallback::Key), } impl Key { @@ -40,201 +61,48 @@ impl Key { bytes: KeyBytes<'_>, cpu_features: cpu::Features, ) -> Result { - let key = match detect_implementation(cpu_features) { - #[cfg(any(target_arch = "aarch64", target_arch = "x86_64", target_arch = "x86"))] - // SAFETY: `aes_hw_set_encrypt_key` satisfies the `set_encrypt_key!` - // contract for these target architectures. - Implementation::HWAES => unsafe { - set_encrypt_key!(aes_hw_set_encrypt_key, bytes, cpu_features) - }, - - #[cfg(any( - target_arch = "aarch64", - target_arch = "arm", - target_arch = "x86_64", - target_arch = "x86" - ))] - // SAFETY: `vpaes_set_encrypt_key` satisfies the `set_encrypt_key!` - // contract for these target architectures. - Implementation::VPAES_BSAES => unsafe { - set_encrypt_key!(vpaes_set_encrypt_key, bytes, cpu_features) - }, - - // SAFETY: `aes_nohw_set_encrypt_key` satisfies the `set_encrypt_key!` - // contract. - Implementation::NOHW => unsafe { - set_encrypt_key!(aes_nohw_set_encrypt_key, bytes, cpu_features) - }, - }?; - - Ok(Self { inner: key }) - } - - #[inline] - pub fn encrypt_block(&self, a: Block, cpu_features: cpu::Features) -> Block { - match detect_implementation(cpu_features) { - #[cfg(any(target_arch = "aarch64", target_arch = "x86_64", target_arch = "x86"))] - Implementation::HWAES => self.encrypt_iv_xor_block(Iv(a), ZERO_BLOCK, cpu_features), - - #[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "x86_64"))] - Implementation::VPAES_BSAES => { - self.encrypt_iv_xor_block(Iv(a), ZERO_BLOCK, cpu_features) - } - - // `encrypt_iv_xor_block` calls `encrypt_block` on `target_arch = "x86"`. - #[cfg(target_arch = "x86")] - Implementation::VPAES_BSAES => unsafe { encrypt_block!(vpaes_encrypt, a, &self.inner) }, - - Implementation::NOHW => unsafe { encrypt_block!(aes_nohw_encrypt, a, &self.inner) }, + #[cfg(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64"))] + if let Some(hw_features) = cpu_features.get_feature() { + return Ok(Self::Hw(hw::Key::new(bytes, hw_features)?)); } - } - pub fn encrypt_iv_xor_block( - &self, - iv: Iv, - mut block: Block, - cpu_features: cpu::Features, - ) -> Block { - let use_ctr32 = match detect_implementation(cpu_features) { - // These have specialized one-block implementations. - #[cfg(any(target_arch = "aarch64", target_arch = "x86_64", target_arch = "x86"))] - Implementation::HWAES => true, - // `ctr32_encrypt_within` calls `encrypt_iv_xor_block` on `target_arch = "x86"`. - #[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "x86_64"))] - Implementation::VPAES_BSAES => true, - _ => false, - }; - if use_ctr32 { - let mut ctr = Counter(iv.0); // We're only doing one block so this is OK. - self.ctr32_encrypt_within(&mut block, 0.., &mut ctr, cpu_features); - block - } else { - let encrypted_iv = self.encrypt_block(iv.into_block_less_safe(), cpu_features); - constant_time::xor_16(encrypted_iv, block) + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "x86_64", + target_arch = "x86" + ))] + if let Some(vp_features) = cpu_features.get_feature() { + return Ok(Self::Vp(vp::Key::new(bytes, vp_features)?)); } + + let _ = cpu_features; + + Ok(Self::Fallback(fallback::Key::new(bytes)?)) } #[inline] - pub(super) fn ctr32_encrypt_within( - &self, - in_out: &mut [u8], - src: RangeFrom, - ctr: &mut Counter, - cpu_features: cpu::Features, - ) { - match detect_implementation(cpu_features) { + pub fn encrypt_block(&self, a: Block) -> Block { + match self { #[cfg(any(target_arch = "aarch64", target_arch = "x86_64", target_arch = "x86"))] - // SAFETY: - // * self.inner was initialized with `aes_hw_set_encrypt_key` above, - // as required by `aes_hw_ctr32_encrypt_blocks`. - // * `aes_hw_ctr32_encrypt_blocks` satisfies the contract for - // `ctr32_encrypt_blocks`. - Implementation::HWAES => unsafe { - ctr32_encrypt_blocks!( - aes_hw_ctr32_encrypt_blocks, - in_out, - src, - &self.inner, - ctr, - cpu_features - ) - }, - - #[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "x86_64"))] - Implementation::VPAES_BSAES => { - #[cfg(target_arch = "arm")] - let in_out = { - let blocks = in_out[src.clone()].len() / BLOCK_LEN; - - // bsaes operates in batches of 8 blocks. - let bsaes_blocks = if blocks >= 8 && (blocks % 8) < 6 { - // It's faster to use bsaes for all the full batches and then - // switch to vpaes for the last partial batch (if any). - blocks - (blocks % 8) - } else if blocks >= 8 { - // It's faster to let bsaes handle everything including - // the last partial batch. - blocks - } else { - // It's faster to let vpaes handle everything. - 0 - }; - let bsaes_in_out_len = bsaes_blocks * BLOCK_LEN; - - // SAFETY: - // * self.inner was initialized with `vpaes_set_encrypt_key` above, - // as required by `bsaes_ctr32_encrypt_blocks_with_vpaes_key`. - unsafe { - bsaes_ctr32_encrypt_blocks_with_vpaes_key( - &mut in_out[..(src.start + bsaes_in_out_len)], - src.clone(), - &self.inner, - ctr, - cpu_features, - ); - } - - &mut in_out[bsaes_in_out_len..] - }; - - // SAFETY: - // * self.inner was initialized with `vpaes_set_encrypt_key` above, - // as required by `vpaes_ctr32_encrypt_blocks`. - // * `vpaes_ctr32_encrypt_blocks` satisfies the contract for - // `ctr32_encrypt_blocks`. - unsafe { - ctr32_encrypt_blocks!( - vpaes_ctr32_encrypt_blocks, - in_out, - src, - &self.inner, - ctr, - cpu_features - ) - } - } - - #[cfg(target_arch = "x86")] - Implementation::VPAES_BSAES => { - super::shift::shift_full_blocks(in_out, src, |input| { - self.encrypt_iv_xor_block(ctr.increment(), *input, cpu_features) - }); - } - - // SAFETY: - // * self.inner was initialized with `aes_nohw_set_encrypt_key` - // above, as required by `aes_nohw_ctr32_encrypt_blocks`. - // * `aes_nohw_ctr32_encrypt_blocks` satisfies the contract for - // `ctr32_encrypt_blocks`. - Implementation::NOHW => unsafe { - ctr32_encrypt_blocks!( - aes_nohw_ctr32_encrypt_blocks, - in_out, - src, - &self.inner, - ctr, - cpu_features - ) - }, + Key::Hw(inner) => inner.encrypt_block(a), + + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "x86", + target_arch = "x86_64" + ))] + Key::Vp(inner) => inner.encrypt_block(a), + + Key::Fallback(inner) => inner.encrypt_block(a), } } pub fn new_mask(&self, sample: Sample) -> [u8; 5] { - let [b0, b1, b2, b3, b4, ..] = self.encrypt_block(sample, cpu::features()); + let [b0, b1, b2, b3, b4, ..] = self.encrypt_block(sample); [b0, b1, b2, b3, b4] } - - #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] - #[must_use] - pub fn is_aes_hw(&self, cpu_features: cpu::Features) -> bool { - matches!(detect_implementation(cpu_features), Implementation::HWAES) - } - - #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] - #[must_use] - pub(super) fn inner_less_safe(&self) -> &AES_KEY { - &self.inner - } } pub const AES_128_KEY_LEN: usize = 128 / 8; @@ -280,131 +148,39 @@ impl From for Iv { } } -impl Iv { - /// "Less safe" because it defeats attempts to use the type system to prevent reuse of the IV. - #[inline] - pub(super) fn into_block_less_safe(self) -> Block { - self.0 - } -} - pub(super) type Block = [u8; BLOCK_LEN]; pub(super) const BLOCK_LEN: usize = 16; pub(super) const ZERO_BLOCK: Block = [0u8; BLOCK_LEN]; -#[derive(Clone, Copy)] -#[allow(clippy::upper_case_acronyms)] -pub enum Implementation { - #[cfg(any(target_arch = "aarch64", target_arch = "x86_64", target_arch = "x86"))] - HWAES, - - // On "arm" only, this indicates that the bsaes implementation may be used. - #[cfg(any( - target_arch = "aarch64", - target_arch = "arm", - target_arch = "x86_64", - target_arch = "x86" - ))] - VPAES_BSAES, - - NOHW, +pub(super) trait EncryptBlock { + fn encrypt_block(&self, block: Block) -> Block; + fn encrypt_iv_xor_block(&self, iv: Iv, block: Block) -> Block; } -fn detect_implementation(cpu_features: cpu::Features) -> Implementation { - // `cpu_features` is only used for specific platforms. - #[cfg(not(any( - target_arch = "aarch64", - target_arch = "arm", - target_arch = "x86_64", - target_arch = "x86" - )))] - let _cpu_features = cpu_features; - - #[cfg(target_arch = "aarch64")] - { - if cpu::arm::AES.available(cpu_features) { - return Implementation::HWAES; - } - } - - #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] - { - if cpu::intel::AES.available(cpu_features) { - return Implementation::HWAES; - } - } - - #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] - { - if cpu::intel::SSSE3.available(cpu_features) { - return Implementation::VPAES_BSAES; - } - } - - #[cfg(any(target_arch = "aarch64", target_arch = "arm"))] - { - if cpu::arm::NEON.available(cpu_features) { - return Implementation::VPAES_BSAES; - } - } +pub(super) trait EncryptCtr32 { + // TODO: Document safety: + fn ctr32_encrypt_within(&self, in_out: &mut [u8], src: RangeFrom, ctr: &mut Counter); +} - { - Implementation::NOHW - } +#[allow(dead_code)] +fn encrypt_block_using_encrypt_iv_xor_block(key: &impl EncryptBlock, block: Block) -> Block { + key.encrypt_iv_xor_block(Iv(block), ZERO_BLOCK) } -/// SAFETY: -/// * The caller must ensure that if blocks > 0 then either `input` and -/// `output` do not overlap at all, or input == output.add(n) for some -/// (nonnegative) n. -/// * if blocks > 0, The caller must ensure `input` points to `blocks` blocks -/// and that `output` points to writable space for `blocks` blocks. -/// * The caller must ensure that `vpaes_key` was initialized with -/// `vpaes_set_encrypt_key`. -/// * Upon returning, `blocks` blocks will have been read from `input` and -/// written to `output`. -#[cfg(target_arch = "arm")] -unsafe fn bsaes_ctr32_encrypt_blocks_with_vpaes_key( - in_out: &mut [u8], - src: RangeFrom, - vpaes_key: &AES_KEY, - ctr: &mut Counter, - cpu_features: cpu::Features, -) { - prefixed_extern! { - // bsaes_ctr32_encrypt_blocks requires transformation of an existing - // VPAES key; there is no `bsaes_set_encrypt_key`. - fn vpaes_encrypt_key_to_bsaes(bsaes_key: *mut AES_KEY, vpaes_key: &AES_KEY); - } +fn encrypt_iv_xor_block_using_encrypt_block( + key: &impl EncryptBlock, + iv: Iv, + block: Block, +) -> Block { + let encrypted_iv = key.encrypt_block(iv.0); + constant_time::xor_16(encrypted_iv, block) +} - // SAFETY: - // * The caller ensures `vpaes_key` was initialized by - // `vpaes_set_encrypt_key`. - // * `bsaes_key was zeroed above, and `vpaes_encrypt_key_to_bsaes` - // is assumed to initialize `bsaes_key`. - let bsaes_key = - unsafe { AES_KEY::derive(vpaes_encrypt_key_to_bsaes, &vpaes_key, cpu_features) }; - - // The code for `vpaes_encrypt_key_to_bsaes` notes "vpaes stores one - // fewer round count than bsaes, but the number of keys is the same," - // so use this as a sanity check. - debug_assert_eq!(bsaes_key.rounds(), vpaes_key.rounds() + 1); - - // SAFETY: - // * `bsaes_key` is in bsaes format after calling - // `vpaes_encrypt_key_to_bsaes`. - // * `bsaes_ctr32_encrypt_blocks` satisfies the contract for - // `ctr32_encrypt_blocks`. - unsafe { - ctr32_encrypt_blocks!( - bsaes_ctr32_encrypt_blocks, - in_out, - src, - &bsaes_key, - ctr, - cpu_features - ); - } +#[allow(dead_code)] +fn encrypt_iv_xor_block_using_ctr32(key: &impl EncryptCtr32, iv: Iv, mut block: Block) -> Block { + let mut ctr = Counter(iv.0); // This is OK because we're only encrypting one block. + key.ctr32_encrypt_within(&mut block, 0.., &mut ctr); + block } #[cfg(test)] @@ -414,7 +190,6 @@ mod tests { #[test] pub fn test_aes() { - let cpu_features = cpu::features(); test::run(test_file!("aes_tests.txt"), |section, test_case| { assert_eq!(section, ""); let key = consume_key(test_case, "Key"); @@ -422,7 +197,7 @@ mod tests { let block: Block = input.as_slice().try_into()?; let expected_output = test_case.consume_bytes("Output"); - let output = key.encrypt_block(block, cpu_features); + let output = key.encrypt_block(block); assert_eq!(output.as_ref(), &expected_output[..]); Ok(()) diff --git a/src/aead/aes/bs.rs b/src/aead/aes/bs.rs new file mode 100644 index 0000000000..82d467d946 --- /dev/null +++ b/src/aead/aes/bs.rs @@ -0,0 +1,63 @@ +// Copyright 2018-2024 Brian Smith. +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +#![cfg(target_arch = "arm")] + +use super::{Counter, AES_KEY, MAX_ROUNDS}; +use crate::cpu; +use core::ops::RangeFrom; + +/// SAFETY: +/// * The caller must ensure that if blocks > 0 then either `input` and +/// `output` do not overlap at all, or input == output.add(n) for some +/// (nonnegative) n. +/// * if blocks > 0, The caller must ensure `input` points to `blocks` blocks +/// and that `output` points to writable space for `blocks` blocks. +/// * The caller must ensure that `vpaes_key` was initialized with +/// `vpaes_set_encrypt_key`. +/// * Upon returning, `blocks` blocks will have been read from `input` and +/// written to `output`. +pub(super) unsafe fn ctr32_encrypt_blocks_with_vpaes_key( + in_out: &mut [u8], + src: RangeFrom, + vpaes_key: &AES_KEY, + ctr: &mut Counter, +) { + prefixed_extern! { + // bsaes_ctr32_encrypt_blocks requires transformation of an existing + // VPAES key; there is no `bsaes_set_encrypt_key`. + fn vpaes_encrypt_key_to_bsaes(bsaes_key: *mut AES_KEY, vpaes_key: &AES_KEY); + } + + // SAFETY: + // * The caller ensures `vpaes_key` was initialized by + // `vpaes_set_encrypt_key`. + // * `bsaes_key was zeroed above, and `vpaes_encrypt_key_to_bsaes` + // is assumed to initialize `bsaes_key`. + let bsaes_key = unsafe { AES_KEY::derive(vpaes_encrypt_key_to_bsaes, vpaes_key) }; + + // The code for `vpaes_encrypt_key_to_bsaes` notes "vpaes stores one + // fewer round count than bsaes, but the number of keys is the same," + // so use this as a sanity check. + debug_assert_eq!(bsaes_key.rounds(), vpaes_key.rounds() + 1); + + // SAFETY: + // * `bsaes_key` is in bsaes format after calling + // `vpaes_encrypt_key_to_bsaes`. + // * `bsaes_ctr32_encrypt_blocks` satisfies the contract for + // `ctr32_encrypt_blocks`. + unsafe { + ctr32_encrypt_blocks!(bsaes_ctr32_encrypt_blocks, in_out, src, &bsaes_key, ctr,); + } +} diff --git a/src/aead/aes/fallback.rs b/src/aead/aes/fallback.rs new file mode 100644 index 0000000000..00caa694ab --- /dev/null +++ b/src/aead/aes/fallback.rs @@ -0,0 +1,47 @@ +// Copyright 2018-2024 Brian Smith. +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +use super::{Block, Counter, EncryptBlock, EncryptCtr32, Iv, KeyBytes, AES_KEY}; +use crate::error; +use core::ops::RangeFrom; + +#[derive(Clone)] +pub struct Key { + inner: AES_KEY, +} + +impl Key { + pub(in super::super) fn new(bytes: KeyBytes<'_>) -> Result { + let inner = unsafe { set_encrypt_key!(aes_nohw_set_encrypt_key, bytes) }?; + Ok(Self { inner }) + } +} + +impl EncryptBlock for Key { + fn encrypt_block(&self, block: Block) -> Block { + unsafe { encrypt_block!(aes_nohw_encrypt, block, &self.inner) } + } + + fn encrypt_iv_xor_block(&self, iv: Iv, block: Block) -> Block { + super::encrypt_iv_xor_block_using_encrypt_block(self, iv, block) + } +} + +impl EncryptCtr32 for Key { + fn ctr32_encrypt_within(&self, in_out: &mut [u8], src: RangeFrom, ctr: &mut Counter) { + unsafe { + ctr32_encrypt_blocks!(aes_nohw_ctr32_encrypt_blocks, in_out, src, &self.inner, ctr) + } + } +} diff --git a/src/aead/aes/ffi.rs b/src/aead/aes/ffi.rs index 5248fbe228..840845059b 100644 --- a/src/aead/aes/ffi.rs +++ b/src/aead/aes/ffi.rs @@ -13,7 +13,7 @@ // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. use super::{Block, KeyBytes, BLOCK_LEN}; -use crate::{bits::BitLength, c, cpu, error, polyfill::slice}; +use crate::{bits::BitLength, c, error, polyfill::slice}; use core::{num::NonZeroUsize, ops::RangeFrom}; /// nonce || big-endian counter. @@ -36,7 +36,6 @@ impl AES_KEY { pub(super) unsafe fn new( f: unsafe extern "C" fn(*const u8, BitLength, *mut AES_KEY) -> c::int, bytes: KeyBytes<'_>, - _cpu_features: cpu::Features, ) -> Result { let mut key = Self { rd_key: [0; 4 * (MAX_ROUNDS + 1)], @@ -63,7 +62,6 @@ impl AES_KEY { pub(super) unsafe fn derive( f: for<'a> unsafe extern "C" fn(*mut AES_KEY, &'a AES_KEY), src: &Self, - _cpu_features: cpu::Features, ) -> Self { let mut r = AES_KEY { rd_key: [0u32; 4 * (MAX_ROUNDS + 1)], @@ -89,12 +87,12 @@ impl AES_KEY { // In BoringSSL, the C prototypes for these are in // crypto/fipsmodule/aes/internal.h. macro_rules! set_encrypt_key { - ( $name:ident, $key_bytes:expr, $cpu_features:expr $(,)? ) => {{ + ( $name:ident, $key_bytes:expr $(,)? ) => {{ use crate::{bits::BitLength, c}; prefixed_extern! { fn $name(user_key: *const u8, bits: BitLength, key: *mut AES_KEY) -> c::int; } - $crate::aead::aes::ffi::AES_KEY::new($name, $key_bytes, $cpu_features) + $crate::aead::aes::ffi::AES_KEY::new($name, $key_bytes) }}; } @@ -129,7 +127,7 @@ impl AES_KEY { /// * The caller must ensure that fhe function `$name` satisfies the conditions /// for the `f` parameter to `ctr32_encrypt_blocks`. macro_rules! ctr32_encrypt_blocks { - ($name:ident, $in_out:expr, $src:expr, $key:expr, $ctr:expr, $cpu_features:expr ) => {{ + ($name:ident, $in_out:expr, $src:expr, $key:expr, $ctr:expr $(,)? ) => {{ use crate::{ aead::aes::{ffi::AES_KEY, Counter, BLOCK_LEN}, c, @@ -143,7 +141,7 @@ macro_rules! ctr32_encrypt_blocks { ivec: &Counter, ); } - $key.ctr32_encrypt_blocks($name, $in_out, $src, $ctr, $cpu_features) + $key.ctr32_encrypt_blocks($name, $in_out, $src, $ctr) }}; } @@ -172,7 +170,6 @@ impl AES_KEY { in_out: &mut [u8], src: RangeFrom, ctr: &mut Counter, - cpu_features: cpu::Features, ) { let (input, leftover) = slice::as_chunks(&in_out[src]); debug_assert_eq!(leftover.len(), 0); @@ -189,8 +186,6 @@ impl AES_KEY { let input = input.as_ptr(); let output: *mut [u8; BLOCK_LEN] = in_out.as_mut_ptr().cast(); - let _: cpu::Features = cpu_features; - // SAFETY: // * `input` points to `blocks` blocks. // * `output` points to space for `blocks` blocks to be written. @@ -200,8 +195,6 @@ impl AES_KEY { // `blocks` including zero. // * The caller is responsible for ensuring `key` was initialized by the // `set_encrypt_key!` invocation required by `f`. - // * CPU feature detection has been done so `f` can inspect - // CPU features. unsafe { f(input, output, blocks, self, ctr); } diff --git a/src/aead/aes/hw.rs b/src/aead/aes/hw.rs new file mode 100644 index 0000000000..0db11cfb37 --- /dev/null +++ b/src/aead/aes/hw.rs @@ -0,0 +1,66 @@ +// Copyright 2018-2024 Brian Smith. +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +#![cfg(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64"))] + +use super::{Block, Counter, EncryptBlock, EncryptCtr32, Iv, KeyBytes, AES_KEY}; +use crate::{cpu, error}; +use core::ops::RangeFrom; + +#[cfg(target_arch = "aarch64")] +pub(in super::super) type RequiredCpuFeatures = cpu::arm::Aes; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +pub(in super::super) type RequiredCpuFeatures = cpu::intel::Aes; + +#[derive(Clone)] +pub struct Key { + inner: AES_KEY, +} + +impl Key { + pub(in super::super) fn new( + bytes: KeyBytes<'_>, + _cpu: RequiredCpuFeatures, + ) -> Result { + let inner = unsafe { set_encrypt_key!(aes_hw_set_encrypt_key, bytes) }?; + Ok(Self { inner }) + } + + #[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] + #[must_use] + pub(in super::super) fn inner_less_safe(&self) -> &AES_KEY { + &self.inner + } +} + +impl EncryptBlock for Key { + fn encrypt_block(&self, block: Block) -> Block { + super::encrypt_block_using_encrypt_iv_xor_block(self, block) + } + + fn encrypt_iv_xor_block(&self, iv: Iv, block: Block) -> Block { + super::encrypt_iv_xor_block_using_ctr32(self, iv, block) + } +} + +impl EncryptCtr32 for Key { + fn ctr32_encrypt_within(&self, in_out: &mut [u8], src: RangeFrom, ctr: &mut Counter) { + #[cfg(target_arch = "x86_64")] + let _: cpu::Features = cpu::features(); + unsafe { + ctr32_encrypt_blocks!(aes_hw_ctr32_encrypt_blocks, in_out, src, &self.inner, ctr,) + } + } +} diff --git a/src/aead/aes/vp.rs b/src/aead/aes/vp.rs new file mode 100644 index 0000000000..fb97bd44f5 --- /dev/null +++ b/src/aead/aes/vp.rs @@ -0,0 +1,130 @@ +// Copyright 2018-2024 Brian Smith. +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +#![cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "x86", + target_arch = "x86_64" +))] + +use super::{Block, Counter, EncryptBlock, EncryptCtr32, Iv, KeyBytes, AES_KEY}; +use crate::{cpu, error}; +use core::ops::RangeFrom; + +#[cfg(any(target_arch = "aarch64", target_arch = "arm"))] +type RequiredCpuFeatures = cpu::arm::Neon; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +type RequiredCpuFeatures = cpu::intel::Ssse3; + +#[derive(Clone)] +pub(in super::super) struct Key { + inner: AES_KEY, +} + +impl Key { + pub(in super::super) fn new( + bytes: KeyBytes<'_>, + _cpu: RequiredCpuFeatures, + ) -> Result { + let inner = unsafe { set_encrypt_key!(vpaes_set_encrypt_key, bytes) }?; + Ok(Self { inner }) + } +} + +#[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "x86_64"))] +impl EncryptBlock for Key { + fn encrypt_block(&self, block: Block) -> Block { + super::encrypt_block_using_encrypt_iv_xor_block(self, block) + } + + fn encrypt_iv_xor_block(&self, iv: Iv, block: Block) -> Block { + super::encrypt_iv_xor_block_using_ctr32(self, iv, block) + } +} + +#[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] +impl EncryptCtr32 for Key { + fn ctr32_encrypt_within(&self, in_out: &mut [u8], src: RangeFrom, ctr: &mut Counter) { + unsafe { ctr32_encrypt_blocks!(vpaes_ctr32_encrypt_blocks, in_out, src, &self.inner, ctr,) } + } +} + +#[cfg(target_arch = "arm")] +impl EncryptCtr32 for Key { + fn ctr32_encrypt_within(&self, in_out: &mut [u8], src: RangeFrom, ctr: &mut Counter) { + use super::{bs, BLOCK_LEN}; + + let in_out = { + let blocks = in_out[src.clone()].len() / BLOCK_LEN; + + // bsaes operates in batches of 8 blocks. + let bsaes_blocks = if blocks >= 8 && (blocks % 8) < 6 { + // It's faster to use bsaes for all the full batches and then + // switch to vpaes for the last partial batch (if any). + blocks - (blocks % 8) + } else if blocks >= 8 { + // It's faster to let bsaes handle everything including + // the last partial batch. + blocks + } else { + // It's faster to let vpaes handle everything. + 0 + }; + let bsaes_in_out_len = bsaes_blocks * BLOCK_LEN; + + // SAFETY: + // * self.inner was initialized with `vpaes_set_encrypt_key` above, + // as required by `bsaes_ctr32_encrypt_blocks_with_vpaes_key`. + unsafe { + bs::ctr32_encrypt_blocks_with_vpaes_key( + &mut in_out[..(src.start + bsaes_in_out_len)], + src.clone(), + &self.inner, + ctr, + ); + } + + &mut in_out[bsaes_in_out_len..] + }; + + // SAFETY: + // * self.inner was initialized with `vpaes_set_encrypt_key` above, + // as required by `vpaes_ctr32_encrypt_blocks`. + // * `vpaes_ctr32_encrypt_blocks` satisfies the contract for + // `ctr32_encrypt_blocks`. + unsafe { ctr32_encrypt_blocks!(vpaes_ctr32_encrypt_blocks, in_out, src, &self.inner, ctr,) } + } +} + +#[cfg(target_arch = "x86")] +impl EncryptBlock for Key { + fn encrypt_block(&self, block: Block) -> Block { + unsafe { encrypt_block!(vpaes_encrypt, block, &self.inner) } + } + + fn encrypt_iv_xor_block(&self, iv: Iv, block: Block) -> Block { + super::encrypt_iv_xor_block_using_encrypt_block(self, iv, block) + } +} + +#[cfg(target_arch = "x86")] +impl EncryptCtr32 for Key { + fn ctr32_encrypt_within(&self, in_out: &mut [u8], src: RangeFrom, ctr: &mut Counter) { + super::super::shift::shift_full_blocks(in_out, src, |input| { + self.encrypt_iv_xor_block(ctr.increment(), *input) + }); + } +} diff --git a/src/aead/aes_gcm.rs b/src/aead/aes_gcm.rs index aa83a975b9..ca44ac3403 100644 --- a/src/aead/aes_gcm.rs +++ b/src/aead/aes_gcm.rs @@ -22,48 +22,110 @@ use crate::{ }; use core::ops::RangeFrom; +#[cfg(target_arch = "x86_64")] +use aes::EncryptCtr32 as _; + +#[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "x86", + target_arch = "x86_64" +))] +use cpu::GetFeature as _; + #[derive(Clone)] -pub struct Key { - gcm_key: gcm::Key, // First because it has a large alignment requirement. - aes_key: aes::Key, -} +pub(super) struct Key(DynKey); impl Key { pub(super) fn new( key: aes::KeyBytes, cpu_features: cpu::Features, ) -> Result { - let aes_key = aes::Key::new(key, cpu_features)?; - let gcm_key = gcm::Key::new( - aes_key.encrypt_block(ZERO_BLOCK, cpu_features), - cpu_features, - ); - Ok(Self { gcm_key, aes_key }) + Ok(Self(DynKey::new(key, cpu_features)?)) + } +} + +#[derive(Clone)] +enum DynKey { + #[cfg(target_arch = "x86_64")] + AesHwClMulAvxMovbe(Combo), + + #[cfg(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64"))] + AesHwClMul(Combo), + + #[cfg(any(target_arch = "aarch64", target_arch = "arm"))] + Simd(Combo), + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + Simd(Combo), + + Fallback(Combo), +} + +impl DynKey { + fn new(key: aes::KeyBytes, cpu_features: cpu::Features) -> Result { + #[cfg(target_arch = "x86_64")] + if let (Some(aes), Some(gcm)) = (cpu_features.get_feature(), cpu_features.get_feature()) { + let aes_key = aes::hw::Key::new(key, aes)?; + let gcm_key_value = derive_gcm_key_value(&aes_key); + let gcm_key = gcm::clmulavxmovbe::Key::new(gcm_key_value, gcm); + return Ok(Self::AesHwClMulAvxMovbe(Combo { aes_key, gcm_key })); + } + + #[cfg(any(target_arch = "aarch64", target_arch = "x86_64", target_arch = "x86"))] + if let (Some(aes), Some(gcm)) = (cpu_features.get_feature(), cpu_features.get_feature()) { + let aes_key = aes::hw::Key::new(key, aes)?; + let gcm_key_value = derive_gcm_key_value(&aes_key); + let gcm_key = gcm::clmul::Key::new(gcm_key_value, gcm); + return Ok(Self::AesHwClMul(Combo { aes_key, gcm_key })); + } + + #[cfg(any(target_arch = "aarch64", target_arch = "arm"))] + if let (Some(aes), Some(gcm)) = (cpu_features.get_feature(), cpu_features.get_feature()) { + let aes_key = aes::vp::Key::new(key, aes)?; + let gcm_key_value = derive_gcm_key_value(&aes_key); + let gcm_key = gcm::neon::Key::new(gcm_key_value, gcm); + return Ok(Self::Simd(Combo { aes_key, gcm_key })); + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if let Some(aes) = cpu_features.get_feature() { + let aes_key = aes::vp::Key::new(key, aes)?; + let gcm_key_value = derive_gcm_key_value(&aes_key); + let gcm_key = gcm::fallback::Key::new(gcm_key_value); + return Ok(Self::Simd(Combo { aes_key, gcm_key })); + } + + let _ = cpu_features; + + let aes_key = aes::fallback::Key::new(key)?; + let gcm_key_value = derive_gcm_key_value(&aes_key); + let gcm_key = gcm::fallback::Key::new(gcm_key_value); + Ok(Self::Fallback(Combo { aes_key, gcm_key })) } } +fn derive_gcm_key_value(aes_key: &impl aes::EncryptBlock) -> gcm::KeyValue { + gcm::KeyValue::new(aes_key.encrypt_block(ZERO_BLOCK)) +} + const CHUNK_BLOCKS: usize = 3 * 1024 / 16; +#[inline(never)] pub(super) fn seal( - key: &Key, + Key(key): &Key, nonce: Nonce, aad: Aad<&[u8]>, in_out: &mut [u8], - cpu_features: cpu::Features, ) -> Result { - let Key { gcm_key, aes_key } = key; - - let mut auth = gcm::Context::new(gcm_key, aad, in_out.len(), cpu_features)?; - let mut ctr = Counter::one(nonce); let tag_iv = ctr.increment(); - #[cfg(target_arch = "x86_64")] - let in_out = { - if !aes_key.is_aes_hw(cpu_features) || !auth.is_avx() { - in_out - } else { + match key { + #[cfg(target_arch = "x86_64")] + DynKey::AesHwClMulAvxMovbe(Combo { aes_key, gcm_key }) => { use crate::c; + let mut auth = gcm::Context::new(gcm_key, aad, in_out.len())?; let (htable, xi) = auth.inner(); prefixed_extern! { // `HTable` and `Xi` should be 128-bit aligned. TODO: Can we shrink `HTable`? The @@ -89,26 +151,27 @@ pub(super) fn seal( ) }; - match in_out.get_mut(processed..) { + let ramaining = match in_out.get_mut(processed..) { Some(remaining) => remaining, None => { // This can't happen. If it did, then the assembly already // caused a buffer overflow. unreachable!() } - } + }; + let (whole, remainder) = slice::as_chunks_mut(ramaining); + aes_key.ctr32_encrypt_within(slice::flatten_mut(whole), 0.., &mut ctr); + auth.update_blocks(whole); + seal_finish(aes_key, auth, remainder, ctr, tag_iv) } - }; - let (whole, remainder) = slice::as_chunks_mut(in_out); - - #[cfg(target_arch = "aarch64")] - let whole = { - if !aes_key.is_aes_hw(cpu_features) || !auth.is_clmul() { - whole - } else { + #[cfg(target_arch = "aarch64")] + DynKey::AesHwClMul(Combo { aes_key, gcm_key }) => { use crate::bits::BitLength; + let mut auth = gcm::Context::new(gcm_key, aad, in_out.len())?; + + let (whole, remainder) = slice::as_chunks_mut(in_out); let whole_block_bits = auth.in_out_whole_block_bits(); let whole_block_bits_u64: BitLength = whole_block_bits.into(); if let Ok(whole_block_bits) = whole_block_bits_u64.try_into() { @@ -137,20 +200,54 @@ pub(super) fn seal( ) } } - - &mut [] + seal_finish(aes_key, auth, remainder, ctr, tag_iv) } - }; + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + DynKey::AesHwClMul(c) => seal_strided(c, aad, in_out, ctr, tag_iv), + + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "x86_64", + target_arch = "x86" + ))] + DynKey::Simd(c) => seal_strided(c, aad, in_out, ctr, tag_iv), + + DynKey::Fallback(c) => seal_strided(c, aad, in_out, ctr, tag_iv), + } +} + +fn seal_strided( + Combo { aes_key, gcm_key }: &Combo, + aad: Aad<&[u8]>, + in_out: &mut [u8], + mut ctr: Counter, + tag_iv: aes::Iv, +) -> Result { + let mut auth = gcm::Context::new(gcm_key, aad, in_out.len())?; + + let (whole, remainder) = slice::as_chunks_mut(in_out); for chunk in whole.chunks_mut(CHUNK_BLOCKS) { - aes_key.ctr32_encrypt_within(slice::flatten_mut(chunk), 0.., &mut ctr, cpu_features); + aes_key.ctr32_encrypt_within(slice::flatten_mut(chunk), 0.., &mut ctr); auth.update_blocks(chunk); } + seal_finish(aes_key, auth, remainder, ctr, tag_iv) +} + +fn seal_finish( + aes_key: &A, + mut auth: gcm::Context, + remainder: &mut [u8], + ctr: Counter, + tag_iv: aes::Iv, +) -> Result { if !remainder.is_empty() { let mut input = ZERO_BLOCK; overwrite_at_start(&mut input, remainder); - let mut output = aes_key.encrypt_iv_xor_block(ctr.into(), input, cpu_features); + let mut output = aes_key.encrypt_iv_xor_block(ctr.into(), input); output[remainder.len()..].fill(0); auth.update_block(output); overwrite_at_start(remainder, &output); @@ -159,36 +256,26 @@ pub(super) fn seal( Ok(finish(aes_key, auth, tag_iv)) } +#[inline(never)] pub(super) fn open( - key: &Key, + Key(key): &Key, nonce: Nonce, aad: Aad<&[u8]>, in_out: &mut [u8], src: RangeFrom, - cpu_features: cpu::Features, ) -> Result { - let Key { gcm_key, aes_key } = key; - - let mut auth = { - let unprefixed_len = in_out - .len() - .checked_sub(src.start) - .ok_or(error::Unspecified)?; - gcm::Context::new(gcm_key, aad, unprefixed_len, cpu_features) - }?; + // Check that `src` is in bounds. + #[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] + let input = in_out.get(src.clone()).ok_or(error::Unspecified)?; let mut ctr = Counter::one(nonce); let tag_iv = ctr.increment(); - let in_prefix_len = src.start; - - #[cfg(target_arch = "x86_64")] - let in_out = { - if !aes_key.is_aes_hw(cpu_features) || !auth.is_avx() { - in_out - } else { + match key { + #[cfg(target_arch = "x86_64")] + DynKey::AesHwClMulAvxMovbe(Combo { aes_key, gcm_key }) => { use crate::c; - let (htable, xi) = auth.inner(); + prefixed_extern! { // `HTable` and `Xi` should be 128-bit aligned. TODO: Can we shrink `HTable`? The // assembly says it needs just nine values in that array. @@ -202,6 +289,8 @@ pub(super) fn open( Xi: &mut gcm::Xi) -> c::size_t; } + let mut auth = gcm::Context::new(gcm_key, aad, input.len())?; + let (htable, xi) = auth.inner(); let processed = unsafe { aesni_gcm_decrypt( in_out[src.clone()].as_ptr(), @@ -213,24 +302,48 @@ pub(super) fn open( xi, ) }; - match in_out.get_mut(processed..) { + let in_out = match in_out.get_mut(processed..) { Some(remaining) => remaining, None => { // This can't happen. If it did, then the assembly already // caused a buffer overflow. unreachable!() } - } + }; + // Authenticate any remaining whole blocks. + let input = match in_out.get(src.clone()) { + Some(remaining_input) => remaining_input, + None => unreachable!(), + }; + let (whole, _) = slice::as_chunks(input); + auth.update_blocks(whole); + + let whole_len = slice::flatten(whole).len(); + + // Decrypt any remaining whole blocks. + aes_key.ctr32_encrypt_within( + &mut in_out[..(src.start + whole_len)], + src.clone(), + &mut ctr, + ); + + let in_out = match in_out.get_mut(whole_len..) { + Some(partial) => partial, + None => unreachable!(), + }; + open_finish(aes_key, auth, in_out, src, ctr, tag_iv) } - }; - #[cfg(target_arch = "aarch64")] - let in_out = { - if !aes_key.is_aes_hw(cpu_features) || !auth.is_clmul() { - in_out - } else { + #[cfg(target_arch = "aarch64")] + DynKey::AesHwClMul(Combo { aes_key, gcm_key }) => { use crate::bits::BitLength; + let input_len = input.len(); + let mut auth = gcm::Context::new(gcm_key, aad, input_len)?; + + let remainder_len = input_len % BLOCK_LEN; + let whole_len = input_len - remainder_len; + let whole_block_bits = auth.in_out_whole_block_bits(); let whole_block_bits_u64: BitLength = whole_block_bits.into(); if let Ok(whole_block_bits) = whole_block_bits_u64.try_into() { @@ -260,15 +373,43 @@ pub(super) fn open( ) } } - - &mut in_out[whole_block_bits.as_usize_bytes_rounded_up()..] + let remainder = &mut in_out[whole_len..]; + open_finish(aes_key, auth, remainder, src, ctr, tag_iv) } - }; - let whole_len = { - let in_out_len = in_out.len() - in_prefix_len; - in_out_len - (in_out_len % BLOCK_LEN) - }; + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + DynKey::AesHwClMul(c) => open_strided(c, aad, in_out, src, ctr, tag_iv), + + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "x86_64", + target_arch = "x86" + ))] + DynKey::Simd(c) => open_strided(c, aad, in_out, src, ctr, tag_iv), + + DynKey::Fallback(c) => open_strided(c, aad, in_out, src, ctr, tag_iv), + } +} + +#[inline(always)] +fn open_strided( + Combo { aes_key, gcm_key }: &Combo, + aad: Aad<&[u8]>, + in_out: &mut [u8], + src: RangeFrom, + mut ctr: Counter, + tag_iv: aes::Iv, +) -> Result { + let input = in_out.get(src.clone()).ok_or(error::Unspecified)?; + let input_len = input.len(); + + let mut auth = gcm::Context::new(gcm_key, aad, input_len)?; + + let remainder_len = input_len % BLOCK_LEN; + let whole_len = input_len - remainder_len; + let in_prefix_len = src.start; + { let mut chunk_len = CHUNK_BLOCKS * BLOCK_LEN; let mut output = 0; @@ -290,29 +431,40 @@ pub(super) fn open( &mut in_out[output..][..(chunk_len + in_prefix_len)], in_prefix_len.., &mut ctr, - cpu_features, ); output += chunk_len; input += chunk_len; } } - let remainder = &mut in_out[whole_len..]; - shift::shift_partial((in_prefix_len, remainder), |remainder| { + open_finish(aes_key, auth, &mut in_out[whole_len..], src, ctr, tag_iv) +} + +fn open_finish( + aes_key: &A, + mut auth: gcm::Context, + remainder: &mut [u8], + src: RangeFrom, + ctr: Counter, + tag_iv: aes::Iv, +) -> Result { + shift::shift_partial((src.start, remainder), |remainder| { let mut input = ZERO_BLOCK; overwrite_at_start(&mut input, remainder); auth.update_block(input); - aes_key.encrypt_iv_xor_block(ctr.into(), input, cpu_features) + aes_key.encrypt_iv_xor_block(ctr.into(), input) }); Ok(finish(aes_key, auth, tag_iv)) } -fn finish(aes_key: &aes::Key, gcm_ctx: gcm::Context, tag_iv: aes::Iv) -> Tag { +fn finish( + aes_key: &A, + gcm_ctx: gcm::Context, + tag_iv: aes::Iv, +) -> Tag { // Finalize the tag and return it. - gcm_ctx.pre_finish(|pre_tag, cpu_features| { - Tag(aes_key.encrypt_iv_xor_block(tag_iv, pre_tag, cpu_features)) - }) + gcm_ctx.pre_finish(|pre_tag| Tag(aes_key.encrypt_iv_xor_block(tag_iv, pre_tag))) } pub(super) const MAX_IN_OUT_LEN: usize = super::max_input_len(BLOCK_LEN, 2); @@ -326,3 +478,9 @@ pub(super) const MAX_IN_OUT_LEN: usize = super::max_input_len(BLOCK_LEN, 2); // [RFC 5116 Section 5.2]: https://tools.ietf.org/html/rfc5116#section-5.2 const _MAX_INPUT_LEN_BOUNDED_BY_NIST: () = assert!(MAX_IN_OUT_LEN == usize_from_u64_saturated(((1u64 << 39) - 256) / 8)); + +#[derive(Copy, Clone)] +pub(super) struct Combo { + pub(super) aes_key: Aes, + pub(super) gcm_key: Gcm, +} diff --git a/src/aead/algorithm.rs b/src/aead/algorithm.rs index 1357e93c03..1556cf5dde 100644 --- a/src/aead/algorithm.rs +++ b/src/aead/algorithm.rs @@ -187,13 +187,13 @@ fn aes_gcm_seal( nonce: Nonce, aad: Aad<&[u8]>, in_out: &mut [u8], - cpu_features: cpu::Features, + _cpu_features: cpu::Features, ) -> Result { let key = match key { KeyInner::AesGcm(key) => key, _ => unreachable!(), }; - aes_gcm::seal(key, nonce, aad, in_out, cpu_features) + aes_gcm::seal(key, nonce, aad, in_out) } pub(super) fn aes_gcm_open( @@ -202,13 +202,13 @@ pub(super) fn aes_gcm_open( aad: Aad<&[u8]>, in_out: &mut [u8], src: RangeFrom, - cpu_features: cpu::Features, + _cpu_features: cpu::Features, ) -> Result { let key = match key { KeyInner::AesGcm(key) => key, _ => unreachable!(), }; - aes_gcm::open(key, nonce, aad, in_out, src, cpu_features) + aes_gcm::open(key, nonce, aad, in_out, src) } /// ChaCha20-Poly1305 as described in [RFC 8439]. diff --git a/src/aead/gcm.rs b/src/aead/gcm.rs index 27716e4ba8..7fcfd88d86 100644 --- a/src/aead/gcm.rs +++ b/src/aead/gcm.rs @@ -1,4 +1,4 @@ -// Copyright 2018 Brian Smith. +// Copyright 2018-2024 Brian Smith. // // Permission to use, copy, modify, and/or distribute this software for any // purpose with or without fee is hereby granted, provided that the above @@ -16,11 +16,13 @@ use self::ffi::{Block, BLOCK_LEN, ZERO_BLOCK}; use super::{aes_gcm, Aad}; use crate::{ bits::{BitLength, FromByteLen as _}, - cpu, error, - polyfill::{sliceutil::overwrite_at_start, ArraySplitMap as _}, + error, + polyfill::{sliceutil::overwrite_at_start, NotSend}, }; use cfg_if::cfg_if; +pub(super) use ffi::KeyValue; + cfg_if! { if #[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] { pub(super) use self::ffi::{HTable, Xi}; @@ -31,48 +33,26 @@ cfg_if! { #[macro_use] mod ffi; -mod gcm_nohw; - -#[derive(Clone)] -pub struct Key { - h_table: HTable, -} - -impl Key { - pub(super) fn new(h_be: Block, cpu_features: cpu::Features) -> Self { - let h: [u64; 2] = h_be.array_split_map(u64::from_be_bytes); - let h_table = match detect_implementation(cpu_features) { - #[cfg(target_arch = "x86_64")] - Implementation::CLMUL if has_avx_movbe(cpu_features) => unsafe { - htable_new!(gcm_init_avx, &h, cou_features) - }, - #[cfg(any(target_arch = "aarch64", target_arch = "x86_64", target_arch = "x86"))] - Implementation::CLMUL => unsafe { htable_new!(gcm_init_clmul, &h, cpu_features) }, - - #[cfg(any(target_arch = "aarch64", target_arch = "arm"))] - Implementation::NEON => unsafe { htable_new!(gcm_init_neon, &h, cpu_features) }, - - Implementation::Fallback => HTable::new_single_entry(gcm_nohw::init(h)), - }; - Self { h_table } - } -} +pub(super) mod clmul; +pub(super) mod clmulavxmovbe; +pub(super) mod fallback; +pub(super) mod neon; -pub struct Context<'key> { +pub(super) struct Context<'key, K> { Xi: Xi, - h_table: &'key HTable, + key: &'key K, aad_len: BitLength, in_out_len: BitLength, - cpu_features: cpu::Features, + _not_send: NotSend, } -impl<'key> Context<'key> { +impl<'key, K: Gmult> Context<'key, K> { + #[inline(always)] pub(crate) fn new( - key: &'key Key, + key: &'key K, aad: Aad<&[u8]>, in_out_len: usize, - cpu_features: cpu::Features, ) -> Result { if in_out_len > aes_gcm::MAX_IN_OUT_LEN { return Err(error::Unspecified); @@ -86,10 +66,10 @@ impl<'key> Context<'key> { let mut ctx = Self { Xi: Xi(ZERO_BLOCK), - h_table: &key.h_table, + key, aad_len, in_out_len, - cpu_features, + _not_send: NotSend::VALUE, }; for ad in aad.0.chunks(BLOCK_LEN) { @@ -100,8 +80,10 @@ impl<'key> Context<'key> { Ok(ctx) } +} - #[cfg(all(target_arch = "aarch64", target_pointer_width = "64"))] +#[cfg(all(target_arch = "aarch64", target_pointer_width = "64"))] +impl Context<'_, K> { pub(super) fn in_out_whole_block_bits(&self) -> BitLength { use crate::polyfill::usize_from_u64; const WHOLE_BLOCK_BITS_MASK: usize = !0b111_1111; @@ -110,160 +92,57 @@ impl<'key> Context<'key> { assert!(WHOLE_BLOCK_BITS_MASK == !((BLOCK_LEN * 8) - 1)); BitLength::from_bits(usize_from_u64(self.in_out_len.as_bits()) & WHOLE_BLOCK_BITS_MASK) } +} - /// Access to `inner` for the integrated AES-GCM implementations only. - #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(target_arch = "aarch64")] +/// Access to `inner` for the integrated AES-GCM implementations only. +impl Context<'_, clmul::Key> { #[inline] pub(super) fn inner(&mut self) -> (&HTable, &mut Xi) { - (self.h_table, &mut self.Xi) + (&self.key.inner(), &mut self.Xi) } +} - pub fn update_blocks(&mut self, input: &[[u8; BLOCK_LEN]]) { - let xi = &mut self.Xi; - let h_table = self.h_table; - - match detect_implementation(self.cpu_features) { - #[cfg(target_arch = "x86_64")] - // SAFETY: gcm_ghash_avx satisfies the ghash! contract. - Implementation::CLMUL if has_avx_movbe(self.cpu_features) => unsafe { - ghash!(gcm_ghash_avx, xi, h_table, input, self.cpu_features); - }, - - #[cfg(target_arch = "aarch64")] - // If we have CLMUL then we probably have AES, so the integrated - // implementation will take care of everything except any final - // partial block. Thus, we avoid having an optimized implementation - // here. - Implementation::CLMUL => self.update_blocks_1x(input), - - #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] - // SAFETY: gcm_ghash_clmul satisfies the ghash! contract on these - // targets. - Implementation::CLMUL => unsafe { - ghash!(gcm_ghash_clmul, xi, h_table, input, self.cpu_features); - }, - - #[cfg(any(target_arch = "aarch64", target_arch = "arm"))] - // SAFETY: gcm_ghash_neon satisfies the ghash! contract on these - // targets. - Implementation::NEON => unsafe { - ghash!(gcm_ghash_neon, xi, h_table, input, self.cpu_features); - }, - - Implementation::Fallback => { - gcm_nohw::ghash(xi, h_table.first_entry(), input); - } - } +#[cfg(target_arch = "x86_64")] +impl Context<'_, clmulavxmovbe::Key> { + /// Access to `inner` for the integrated AES-GCM implementations only. + #[inline] + pub(super) fn inner(&mut self) -> (&HTable, &mut Xi) { + (self.key.inner(), &mut self.Xi) } +} - #[cfg(target_arch = "aarch64")] - #[inline(never)] - fn update_blocks_1x(&mut self, input: &[[u8; BLOCK_LEN]]) { - for input in input { - self.update_block(*input); - } +impl Context<'_, K> { + #[inline(always)] + pub fn update_blocks(&mut self, input: &[[u8; BLOCK_LEN]]) { + self.key.update_blocks(&mut self.Xi, input); } +} +impl Context<'_, K> { pub fn update_block(&mut self, a: Block) { self.Xi.bitxor_assign(a); - - let xi = &mut self.Xi; - let h_table = self.h_table; - - match detect_implementation(self.cpu_features) { - #[cfg(any(target_arch = "aarch64", target_arch = "x86_64", target_arch = "x86"))] - Implementation::CLMUL => unsafe { - gmult!(gcm_gmult_clmul, xi, h_table, self.cpu_features) - }, - - #[cfg(any(target_arch = "aarch64", target_arch = "arm"))] - Implementation::NEON => unsafe { - gmult!(gcm_gmult_neon, xi, h_table, self.cpu_features) - }, - - Implementation::Fallback => { - gcm_nohw::gmult(xi, h_table.first_entry()); - } - } + self.key.gmult(&mut self.Xi); } + #[inline(always)] pub(super) fn pre_finish(mut self, f: F) -> super::Tag where - F: FnOnce(Block, cpu::Features) -> super::Tag, + F: FnOnce(Block) -> super::Tag, { let mut block = [0u8; BLOCK_LEN]; let (alen, clen) = block.split_at_mut(BLOCK_LEN / 2); alen.copy_from_slice(&BitLength::::to_be_bytes(self.aad_len)); clen.copy_from_slice(&BitLength::::to_be_bytes(self.in_out_len)); self.update_block(block); - f(self.Xi.into_block(), self.cpu_features) - } - - #[cfg(target_arch = "x86_64")] - pub(super) fn is_avx(&self) -> bool { - match detect_implementation(self.cpu_features) { - Implementation::CLMUL => has_avx_movbe(self.cpu_features), - _ => false, - } - } - - #[cfg(target_arch = "aarch64")] - pub(super) fn is_clmul(&self) -> bool { - matches!( - detect_implementation(self.cpu_features), - Implementation::CLMUL - ) + f(self.Xi.0) } } -#[allow(clippy::upper_case_acronyms)] -enum Implementation { - #[cfg(any(target_arch = "aarch64", target_arch = "x86_64", target_arch = "x86"))] - CLMUL, - - #[cfg(any(target_arch = "aarch64", target_arch = "arm"))] - NEON, - - Fallback, -} - -#[inline] -fn detect_implementation(cpu_features: cpu::Features) -> Implementation { - // `cpu_features` is only used for specific platforms. - #[cfg(not(any( - target_arch = "aarch64", - target_arch = "arm", - target_arch = "x86_64", - target_arch = "x86" - )))] - let _cpu_features = cpu_features; - - #[cfg(target_arch = "aarch64")] - { - if cpu::arm::PMULL.available(cpu_features) { - return Implementation::CLMUL; - } - } - - #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] - { - if cpu::intel::FXSR.available(cpu_features) && cpu::intel::PCLMULQDQ.available(cpu_features) - { - return Implementation::CLMUL; - } - } - - #[cfg(any(target_arch = "aarch64", target_arch = "arm"))] - { - if cpu::arm::NEON.available(cpu_features) { - return Implementation::NEON; - } - } - - Implementation::Fallback +pub(super) trait Gmult { + fn gmult(&self, xi: &mut Xi); } -#[cfg(target_arch = "x86_64")] -fn has_avx_movbe(cpu_features: cpu::Features) -> bool { - cpu::intel::AVX.available(cpu_features) && cpu::intel::MOVBE.available(cpu_features) +pub(super) trait UpdateBlocks { + fn update_blocks(&self, xi: &mut Xi, input: &[[u8; BLOCK_LEN]]); } diff --git a/src/aead/gcm/clmul.rs b/src/aead/gcm/clmul.rs new file mode 100644 index 0000000000..848258a841 --- /dev/null +++ b/src/aead/gcm/clmul.rs @@ -0,0 +1,66 @@ +// Copyright 2018-2024 Brian Smith. +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +#![cfg(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64"))] + +use super::{ffi::KeyValue, Gmult, HTable, Xi}; +use crate::cpu; + +#[cfg(target_arch = "aarch64")] +pub(in super::super) type RequiredCpuFeatures = cpu::arm::PMull; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +pub(in super::super) type RequiredCpuFeatures = (cpu::intel::ClMul, cpu::intel::Fxsr); + +#[derive(Clone)] +pub struct Key { + h_table: HTable, +} + +impl Key { + pub(in super::super) fn new(value: KeyValue, _cpu: RequiredCpuFeatures) -> Self { + Self { + h_table: unsafe { htable_new!(gcm_init_clmul, value) }, + } + } + + #[cfg(target_arch = "x86_64")] + pub(super) fn new_avx( + value: KeyValue, + _cpu_features: super::clmulavxmovbe::RequiredCpuFeatures, + ) -> Self { + Self { + h_table: unsafe { htable_new!(gcm_init_avx, value) }, + } + } + + #[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] + pub(super) fn inner(&self) -> &HTable { + &self.h_table + } +} + +impl Gmult for Key { + fn gmult(&self, xi: &mut Xi) { + unsafe { gmult!(gcm_gmult_clmul, xi, &self.h_table) } + } +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +impl super::UpdateBlocks for Key { + fn update_blocks(&self, xi: &mut Xi, input: &[[u8; super::BLOCK_LEN]]) { + let _: cpu::Features = cpu::features(); + unsafe { ghash!(gcm_ghash_clmul, xi, &self.h_table, input) } + } +} diff --git a/src/aead/gcm/clmulavxmovbe.rs b/src/aead/gcm/clmulavxmovbe.rs new file mode 100644 index 0000000000..753bb27906 --- /dev/null +++ b/src/aead/gcm/clmulavxmovbe.rs @@ -0,0 +1,53 @@ +// Copyright 2018-2024 Brian Smith. +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +#![cfg(target_arch = "x86_64")] + +use super::{clmul, Gmult, HTable, KeyValue, UpdateBlocks, Xi, BLOCK_LEN}; +use crate::cpu; + +pub(in super::super) type RequiredCpuFeatures = ( + clmul::RequiredCpuFeatures, + cpu::intel::Avx, + cpu::intel::Movbe, +); + +#[derive(Clone)] +pub struct Key { + inner: clmul::Key, +} + +impl Key { + pub(in super::super) fn new(key_value: KeyValue, cpu: RequiredCpuFeatures) -> Self { + Self { + inner: clmul::Key::new_avx(key_value, cpu), + } + } + + pub(super) fn inner(&self) -> &HTable { + self.inner.inner() + } +} + +impl Gmult for Key { + fn gmult(&self, xi: &mut Xi) { + self.inner.gmult(xi) + } +} + +impl UpdateBlocks for Key { + fn update_blocks(&self, xi: &mut Xi, input: &[[u8; BLOCK_LEN]]) { + unsafe { ghash!(gcm_ghash_avx, xi, &self.inner.inner(), input,) } + } +} diff --git a/src/aead/gcm/gcm_nohw.rs b/src/aead/gcm/fallback.rs similarity index 92% rename from src/aead/gcm/gcm_nohw.rs rename to src/aead/gcm/fallback.rs index 77ca08e056..219fbcc81f 100644 --- a/src/aead/gcm/gcm_nohw.rs +++ b/src/aead/gcm/fallback.rs @@ -1,5 +1,5 @@ // Copyright (c) 2019, Google Inc. -// Portions Copyright 2020 Brian Smith. +// Portions Copyright 2020-2024 Brian Smith. // // Permission to use, copy, modify, and/or distribute this software for any // purpose with or without fee is hereby granted, provided that the above @@ -22,9 +22,32 @@ // // Unlike the BearSSL notes, we use u128 in the 64-bit implementation. -use super::{ffi::U128, Xi, BLOCK_LEN}; +use super::{ffi::U128, Gmult, KeyValue, UpdateBlocks, Xi, BLOCK_LEN}; use crate::polyfill::ArraySplitMap as _; +#[derive(Clone)] +pub struct Key { + h: U128, +} + +impl Key { + pub(in super::super) fn new(value: KeyValue) -> Self { + Self { h: init(value) } + } +} + +impl Gmult for Key { + fn gmult(&self, xi: &mut Xi) { + gmult(xi, self.h); + } +} + +impl UpdateBlocks for Key { + fn update_blocks(&self, xi: &mut Xi, input: &[[u8; BLOCK_LEN]]) { + ghash(xi, self.h, input); + } +} + #[cfg(target_pointer_width = "64")] fn gcm_mul64_nohw(a: u64, b: u64) -> (u64, u64) { #[allow(clippy::cast_possible_truncation)] @@ -138,7 +161,9 @@ fn gcm_mul64_nohw(a: u64, b: u64) -> (u64, u64) { (lo ^ (mid << 32), hi ^ (mid >> 32)) } -pub(super) fn init(xi: [u64; 2]) -> U128 { +fn init(value: KeyValue) -> U128 { + let xi = value.into_inner(); + // We implement GHASH in terms of POLYVAL, as described in RFC 8452. This // avoids a shift by 1 in the multiplication, needed to account for bit // reversal losing a bit after multiplication, that is, @@ -217,13 +242,13 @@ fn gcm_polyval_nohw(xi: &mut [u64; 2], h: U128) { *xi = [r2, r3]; } -pub(super) fn gmult(xi: &mut Xi, h: U128) { +fn gmult(xi: &mut Xi, h: U128) { with_swapped_xi(xi, |swapped| { gcm_polyval_nohw(swapped, h); }) } -pub(super) fn ghash(xi: &mut Xi, h: U128, input: &[[u8; BLOCK_LEN]]) { +fn ghash(xi: &mut Xi, h: U128, input: &[[u8; BLOCK_LEN]]) { with_swapped_xi(xi, |swapped| { input.iter().for_each(|&input| { let input = input.array_split_map(u64::from_be_bytes); diff --git a/src/aead/gcm/ffi.rs b/src/aead/gcm/ffi.rs index b8dcee925f..6089800bea 100644 --- a/src/aead/gcm/ffi.rs +++ b/src/aead/gcm/ffi.rs @@ -12,7 +12,7 @@ // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -use crate::constant_time; +use crate::{constant_time, polyfill::ArraySplitMap}; pub(in super::super) const BLOCK_LEN: usize = 16; pub(in super::super) type Block = [u8; BLOCK_LEN]; @@ -25,12 +25,12 @@ pub(super) const ZERO_BLOCK: Block = [0u8; BLOCK_LEN]; target_arch = "x86_64" ))] macro_rules! htable_new { - ( $name:ident, $input:expr, $cpu_features:expr ) => {{ + ( $name:ident, $value:expr $(,)? ) => {{ use crate::aead::gcm::ffi::HTable; prefixed_extern! { fn $name(HTable: &mut HTable, h: &[u64; 2]); } - HTable::new($name, $input) + HTable::new($name, $value) }}; } @@ -41,12 +41,12 @@ macro_rules! htable_new { target_arch = "x86_64" ))] macro_rules! gmult { - ( $name:ident, $xi:expr, $h_table:expr, $cpu_features:expr ) => {{ + ( $name:ident, $xi:expr, $h_table:expr $(,)? ) => {{ use crate::aead::gcm::ffi::{HTable, Xi}; prefixed_extern! { fn $name(xi: &mut Xi, Htable: &HTable); } - $h_table.gmult($name, $xi, $cpu_features) + $h_table.gmult($name, $xi) }}; } @@ -60,7 +60,7 @@ macro_rules! gmult { target_arch = "x86_64" ))] macro_rules! ghash { - ( $name:ident, $xi:expr, $h_table:expr, $input:expr, $cpu_features:expr ) => {{ + ( $name:ident, $xi:expr, $h_table:expr, $input:expr $(,)? ) => {{ use crate::aead::gcm::ffi::{HTable, Xi}; prefixed_extern! { fn $name( @@ -70,10 +70,22 @@ macro_rules! ghash { len: crate::c::NonZero_size_t, ); } - $h_table.ghash($name, $xi, $input, $cpu_features) + $h_table.ghash($name, $xi, $input) }}; } +pub(in super::super) struct KeyValue([u64; 2]); + +impl KeyValue { + pub(in super::super) fn new(value: Block) -> Self { + Self(value.array_split_map(u64::from_be_bytes)) + } + + pub(super) fn into_inner(self) -> [u64; 2] { + self.0 + } +} + /// SAFETY: /// * `f` must read `len` bytes from `inp`; it may assume /// that `len` is a (non-zero) multiple of `BLOCK_LEN`. @@ -86,13 +98,13 @@ macro_rules! ghash { ))] impl HTable { pub(super) unsafe fn new( - init: unsafe extern "C" fn(HTable: &mut HTable, h: &[u64; 2]), - value: &[u64; 2], + init: unsafe extern "C" fn(HTable: &mut HTable, &[u64; 2]), + value: KeyValue, ) -> Self { let mut r = Self { Htable: [U128 { hi: 0, lo: 0 }; HTABLE_LEN], }; - unsafe { init(&mut r, value) }; + unsafe { init(&mut r, &value.0) }; r } @@ -100,7 +112,6 @@ impl HTable { &self, f: unsafe extern "C" fn(xi: &mut Xi, h_table: &HTable), xi: &mut Xi, - _cpu_features: crate::cpu::Features, ) { unsafe { f(xi, self) } } @@ -115,7 +126,6 @@ impl HTable { ), xi: &mut Xi, input: &[[u8; BLOCK_LEN]], - cpu_features: crate::cpu::Features, ) { use crate::polyfill::slice; use core::num::NonZeroUsize; @@ -129,31 +139,15 @@ impl HTable { } }; - let _: crate::cpu::Features = cpu_features; // SAFETY: // * There are `input_len: NonZeroUsize` bytes available at `input` for // `f` to read. - // * CPU feature detection has been done. unsafe { f(xi, self, input.as_ptr(), input_len); } } } -impl HTable { - pub(super) fn new_single_entry(first_entry: U128) -> Self { - let mut r = Self { - Htable: [U128 { hi: 0, lo: 0 }; HTABLE_LEN], - }; - r.Htable[0] = first_entry; - r - } - - pub(super) fn first_entry(&self) -> U128 { - self.Htable[0] - } -} - // The alignment is required by some assembly code. #[derive(Clone)] #[repr(C, align(16))] @@ -178,9 +172,4 @@ impl Xi { pub(super) fn bitxor_assign(&mut self, a: Block) { self.0 = constant_time::xor_16(self.0, a) } - - #[inline] - pub(super) fn into_block(self) -> Block { - self.0 - } } diff --git a/src/aead/gcm/neon.rs b/src/aead/gcm/neon.rs new file mode 100644 index 0000000000..f1dd07cf25 --- /dev/null +++ b/src/aead/gcm/neon.rs @@ -0,0 +1,45 @@ +// Copyright 2018-2024 Brian Smith. +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +#![cfg(any(target_arch = "aarch64", target_arch = "arm"))] + +use super::{Gmult, HTable, KeyValue, UpdateBlocks, Xi, BLOCK_LEN}; +use crate::cpu; + +pub(in super::super) type RequiredCpuFeatures = cpu::arm::Neon; + +#[derive(Clone)] +pub struct Key { + h_table: HTable, +} + +impl Key { + pub(in super::super) fn new(value: KeyValue, _cpu: RequiredCpuFeatures) -> Self { + Self { + h_table: unsafe { htable_new!(gcm_init_neon, value) }, + } + } +} + +impl Gmult for Key { + fn gmult(&self, xi: &mut Xi) { + unsafe { gmult!(gcm_gmult_neon, xi, &self.h_table) } + } +} + +impl UpdateBlocks for Key { + fn update_blocks(&self, xi: &mut Xi, input: &[[u8; BLOCK_LEN]]) { + unsafe { ghash!(gcm_ghash_neon, xi, &self.h_table, input) } + } +} diff --git a/src/aead/shift.rs b/src/aead/shift.rs index fc2227378f..f4a62eceeb 100644 --- a/src/aead/shift.rs +++ b/src/aead/shift.rs @@ -14,7 +14,7 @@ use crate::polyfill::sliceutil::overwrite_at_start; -#[cfg(target_arch = "x86")] +#[allow(dead_code)] pub fn shift_full_blocks( in_out: &mut [u8], src: core::ops::RangeFrom, diff --git a/src/cpu.rs b/src/cpu.rs index bd5833ab99..90b6445c7f 100644 --- a/src/cpu.rs +++ b/src/cpu.rs @@ -14,6 +14,60 @@ pub(crate) use self::features::Features; +macro_rules! impl_get_feature { + { $feature:path => $T:ident } => { + #[derive(Clone, Copy)] + pub(crate) struct $T(crate::cpu::Features); + + impl crate::cpu::GetFeature<$T> for super::Features { + fn get_feature(&self) -> Option<$T> { + if $feature.available(*self) { + Some($T(*self)) + } else { + None + } + } + } + + impl From<$T> for crate::cpu::Features { + fn from($T(features): $T) -> Self { + features + } + } + } +} + +pub(crate) trait GetFeature { + fn get_feature(&self) -> Option; +} + +impl GetFeature<(A, B)> for T +where + T: GetFeature, + T: GetFeature, +{ + fn get_feature(&self) -> Option<(A, B)> { + match (self.get_feature(), self.get_feature()) { + (Some(a), Some(b)) => Some((a, b)), + _ => None, + } + } +} + +impl GetFeature<(A, B, C)> for T +where + T: GetFeature, + T: GetFeature, + T: GetFeature, +{ + fn get_feature(&self) -> Option<(A, B, C)> { + match (self.get_feature(), self.get_feature(), self.get_feature()) { + (Some(a), Some(b), Some(c)) => Some((a, b, c)), + _ => None, + } + } +} + #[inline(always)] pub(crate) fn features() -> Features { get_or_init_feature_flags() diff --git a/src/cpu/arm.rs b/src/cpu/arm.rs index be6322af6d..d2920a10eb 100644 --- a/src/cpu/arm.rs +++ b/src/cpu/arm.rs @@ -63,7 +63,7 @@ cfg_if::cfg_if! { macro_rules! features { { $( - $target_feature_name:expr => $name:ident { + $target_feature_name:expr => $TyName:ident($name:ident) { mask: $mask:expr, } ),+ @@ -74,6 +74,7 @@ macro_rules! features { pub(crate) const $name: Feature = Feature { mask: $mask, }; + impl_get_feature!{ $name => $TyName } )+ // See const assertions below. @@ -115,17 +116,17 @@ impl Feature { #[cfg(target_arch = "aarch64")] features! { // Keep in sync with `ARMV7_NEON`. - "neon" => NEON { + "neon" => Neon(NEON) { mask: 1 << 0, }, // Keep in sync with `ARMV8_AES`. - "aes" => AES { + "aes" => Aes(AES) { mask: 1 << 2, }, // Keep in sync with `ARMV8_SHA256`. - "sha2" => SHA256 { + "sha2" => Sha256(SHA256) { mask: 1 << 4, }, @@ -137,13 +138,13 @@ features! { // https://developer.arm.com/downloads/-/exploration-tools/feature-names-for-a-profile // "Features introduced prior to 2020." Change this to use "pmull" when // that is supported. - "aes" => PMULL { + "aes" => PMull(PMULL) { mask: 1 << 5, }, // Keep in sync with `ARMV8_SHA512`. // "sha3" is overloaded for both SHA-3 and SHA512. - "sha3" => SHA512 { + "sha3" => Sha512(SHA512) { mask: 1 << 6, }, } @@ -151,7 +152,7 @@ features! { #[cfg(target_arch = "arm")] features! { // Keep in sync with `ARMV7_NEON`. - "neon" => NEON { + "neon" => Neon(NEON) { mask: 1 << 0, }, } diff --git a/src/cpu/intel.rs b/src/cpu/intel.rs index d2d18e316a..172fe47bef 100644 --- a/src/cpu/intel.rs +++ b/src/cpu/intel.rs @@ -12,6 +12,8 @@ // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +use cfg_if::cfg_if; + mod abi_assumptions { // TOOD: Support targets that do not have SSE and SSE2 enabled, such as // x86_64-unknown-linux-none. See @@ -118,22 +120,32 @@ pub(crate) const SSE41: Feature = Feature { mask: 1 << 19, }; -#[cfg(target_arch = "x86_64")] -pub(crate) const MOVBE: Feature = Feature { - word: 1, - mask: 1 << 22, -}; - pub(crate) const AES: Feature = Feature { word: 1, mask: 1 << 25, }; -#[cfg(target_arch = "x86_64")] -pub(crate) const AVX: Feature = Feature { - word: 1, - mask: 1 << 28, -}; +impl_get_feature! { AES => Aes } +impl_get_feature! { FXSR => Fxsr } +impl_get_feature! { PCLMULQDQ => ClMul } +impl_get_feature! { SSSE3 => Ssse3 } + +cfg_if! { + if #[cfg(any(target_arch = "x86_64"))] { + pub(crate) const MOVBE: Feature = Feature { + word: 1, + mask: 1 << 22, + }; + + pub(crate) const AVX: Feature = Feature { + word: 1, + mask: 1 << 28, + }; + + impl_get_feature!{ MOVBE => Movbe } + impl_get_feature!{ AVX => Avx } + } +} #[cfg(all(target_arch = "x86_64", test))] mod x86_64_tests { diff --git a/src/lib.rs b/src/lib.rs index f501856bee..cb2d55bf1c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -82,6 +82,15 @@ clippy::cast_precision_loss, clippy::cast_sign_loss )] +#![cfg_attr( + not(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "x86", + target_arch = "x86_64" + )), + allow(dead_code, unused_imports, unused_macros) +)] #![no_std] #[cfg(feature = "alloc")] diff --git a/src/polyfill.rs b/src/polyfill.rs index 4d5a0ec1f0..96702a7a80 100644 --- a/src/polyfill.rs +++ b/src/polyfill.rs @@ -64,6 +64,15 @@ mod test; mod unwrap_const; +#[cfg_attr( + not(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "x86", + target_arch = "x86_64" + )), + allow(unused_imports) +)] pub use self::{ array_flat_map::ArrayFlatMap, array_split_map::ArraySplitMap, notsend::NotSend, unwrap_const::unwrap_const,