diff --git a/src/aead/gcm.rs b/src/aead/gcm.rs index f1a2ef296a..27716e4ba8 100644 --- a/src/aead/gcm.rs +++ b/src/aead/gcm.rs @@ -12,17 +12,25 @@ // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +use self::ffi::{Block, BLOCK_LEN, ZERO_BLOCK}; use super::{aes_gcm, Aad}; use crate::{ bits::{BitLength, FromByteLen as _}, - constant_time, cpu, error, + cpu, error, polyfill::{sliceutil::overwrite_at_start, ArraySplitMap as _}, }; -use core::ops::BitXorAssign; +use cfg_if::cfg_if; -// GCM uses the same block type as AES. -use super::aes::{Block, BLOCK_LEN, ZERO_BLOCK}; +cfg_if! { + if #[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] { + pub(super) use self::ffi::{HTable, Xi}; + } else { + use self::ffi::{HTable, Xi}; + } +} +#[macro_use] +mod ffi; mod gcm_nohw; #[derive(Clone)] @@ -33,118 +41,21 @@ pub struct Key { impl Key { pub(super) fn new(h_be: Block, cpu_features: cpu::Features) -> Self { let h: [u64; 2] = h_be.array_split_map(u64::from_be_bytes); - - let mut key = Self { - h_table: HTable { - Htable: [U128 { hi: 0, lo: 0 }; HTABLE_LEN], - }, - }; - let h_table = &mut key.h_table; - - match detect_implementation(cpu_features) { + let h_table = match detect_implementation(cpu_features) { #[cfg(target_arch = "x86_64")] - Implementation::CLMUL if has_avx_movbe(cpu_features) => { - prefixed_extern! { - fn gcm_init_avx(HTable: &mut HTable, h: &[u64; 2]); - } - unsafe { - gcm_init_avx(h_table, &h); - } - } + Implementation::CLMUL if has_avx_movbe(cpu_features) => unsafe { + htable_new!(gcm_init_avx, &h, cou_features) + }, #[cfg(any(target_arch = "aarch64", target_arch = "x86_64", target_arch = "x86"))] - Implementation::CLMUL => { - prefixed_extern! { - fn gcm_init_clmul(Htable: &mut HTable, h: &[u64; 2]); - } - unsafe { - gcm_init_clmul(h_table, &h); - } - } + Implementation::CLMUL => unsafe { htable_new!(gcm_init_clmul, &h, cpu_features) }, #[cfg(any(target_arch = "aarch64", target_arch = "arm"))] - Implementation::NEON => { - prefixed_extern! { - fn gcm_init_neon(Htable: &mut HTable, h: &[u64; 2]); - } - unsafe { - gcm_init_neon(h_table, &h); - } - } - - Implementation::Fallback => { - h_table.Htable[0] = gcm_nohw::init(h); - } - } - - key - } -} - -/// SAFETY: -/// * The function `$name` must meet the contract of the `f` paramweter of -/// `ghash()`. -#[cfg(any( - target_arch = "aarch64", - target_arch = "arm", - target_arch = "x86", - target_arch = "x86_64" -))] -macro_rules! ghash { - ( $name:ident, $xi:expr, $h_table:expr, $input:expr, $cpu_features:expr ) => {{ - prefixed_extern! { - fn $name( - xi: &mut Xi, - Htable: &HTable, - inp: *const u8, - len: crate::c::NonZero_size_t, - ); - } - ghash($name, $xi, $h_table, $input, $cpu_features); - }}; -} - -/// SAFETY: -/// * `f` must read `len` bytes from `inp`; it may assume -/// that `len` is a (non-zero) multiple of `BLOCK_LEN`. -/// * `f` may inspect CPU features. -#[cfg(any( - target_arch = "aarch64", - target_arch = "arm", - target_arch = "x86", - target_arch = "x86_64" -))] -unsafe fn ghash( - f: unsafe extern "C" fn( - xi: &mut Xi, - Htable: &HTable, - inp: *const u8, - len: crate::c::NonZero_size_t, - ), - xi: &mut Xi, - h_table: &HTable, - input: &[[u8; BLOCK_LEN]], - cpu_features: cpu::Features, -) { - use crate::polyfill::slice; - use core::num::NonZeroUsize; - - let input = slice::flatten(input); + Implementation::NEON => unsafe { htable_new!(gcm_init_neon, &h, cpu_features) }, - let input_len = match NonZeroUsize::new(input.len()) { - Some(len) => len, - None => { - return; - } - }; - - let _: cpu::Features = cpu_features; - // SAFETY: - // * There are `input_len: NonZeroUsize` bytes available at `input` for - // `f` to read. - // * CPU feature detection has been done. - unsafe { - f(xi, h_table, input.as_ptr(), input_len); + Implementation::Fallback => HTable::new_single_entry(gcm_nohw::init(h)), + }; + Self { h_table } } } @@ -209,7 +120,7 @@ impl<'key> Context<'key> { pub fn update_blocks(&mut self, input: &[[u8; BLOCK_LEN]]) { let xi = &mut self.Xi; - let h_table = &self.h_table; + let h_table = self.h_table; match detect_implementation(self.cpu_features) { #[cfg(target_arch = "x86_64")] @@ -240,7 +151,7 @@ impl<'key> Context<'key> { }, Implementation::Fallback => { - gcm_nohw::ghash(xi, h_table.Htable[0], input); + gcm_nohw::ghash(xi, h_table.first_entry(), input); } } } @@ -257,31 +168,21 @@ impl<'key> Context<'key> { self.Xi.bitxor_assign(a); let xi = &mut self.Xi; - let h_table = &self.h_table; + let h_table = self.h_table; match detect_implementation(self.cpu_features) { #[cfg(any(target_arch = "aarch64", target_arch = "x86_64", target_arch = "x86"))] - Implementation::CLMUL => { - prefixed_extern! { - fn gcm_gmult_clmul(xi: &mut Xi, Htable: &HTable); - } - unsafe { - gcm_gmult_clmul(xi, h_table); - } - } + Implementation::CLMUL => unsafe { + gmult!(gcm_gmult_clmul, xi, h_table, self.cpu_features) + }, #[cfg(any(target_arch = "aarch64", target_arch = "arm"))] - Implementation::NEON => { - prefixed_extern! { - fn gcm_gmult_neon(xi: &mut Xi, Htable: &HTable); - } - unsafe { - gcm_gmult_neon(xi, h_table); - } - } + Implementation::NEON => unsafe { + gmult!(gcm_gmult_neon, xi, h_table, self.cpu_features) + }, Implementation::Fallback => { - gcm_nohw::gmult(xi, h_table.Htable[0]); + gcm_nohw::gmult(xi, h_table.first_entry()); } } } @@ -295,7 +196,7 @@ impl<'key> Context<'key> { alen.copy_from_slice(&BitLength::::to_be_bytes(self.aad_len)); clen.copy_from_slice(&BitLength::::to_be_bytes(self.in_out_len)); self.update_block(block); - f(self.Xi.0, self.cpu_features) + f(self.Xi.into_block(), self.cpu_features) } #[cfg(target_arch = "x86_64")] @@ -315,32 +216,6 @@ impl<'key> Context<'key> { } } -// The alignment is required by some assembly code. -#[derive(Clone)] -#[repr(C, align(16))] -pub(super) struct HTable { - Htable: [U128; HTABLE_LEN], -} - -#[derive(Clone, Copy)] -#[repr(C)] -struct U128 { - hi: u64, - lo: u64, -} - -const HTABLE_LEN: usize = 16; - -#[repr(transparent)] -pub struct Xi(Block); - -impl BitXorAssign for Xi { - #[inline] - fn bitxor_assign(&mut self, a: Block) { - self.0 = constant_time::xor_16(self.0, a) - } -} - #[allow(clippy::upper_case_acronyms)] enum Implementation { #[cfg(any(target_arch = "aarch64", target_arch = "x86_64", target_arch = "x86"))] diff --git a/src/aead/gcm/ffi.rs b/src/aead/gcm/ffi.rs new file mode 100644 index 0000000000..41f679f068 --- /dev/null +++ b/src/aead/gcm/ffi.rs @@ -0,0 +1,186 @@ +// Copyright 2018 Brian Smith. +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +use crate::constant_time; + +pub(in super::super) const BLOCK_LEN: usize = 16; +pub(in super::super) type Block = [u8; BLOCK_LEN]; +pub(super) const ZERO_BLOCK: Block = [0u8; BLOCK_LEN]; + +#[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "x86", + target_arch = "x86_64" +))] +macro_rules! htable_new { + ( $name:ident, $input:expr, $cpu_features:expr ) => {{ + use crate::aead::gcm::ffi::HTable; + prefixed_extern! { + fn $name(HTable: &mut HTable, h: &[u64; 2]); + } + HTable::new($name, $input) + }}; +} + +#[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "x86", + target_arch = "x86_64" +))] +macro_rules! gmult { + ( $name:ident, $xi:expr, $h_table:expr, $cpu_features:expr ) => {{ + use crate::aead::gcm::ffi::{HTable, Xi}; + prefixed_extern! { + fn $name(xi: &mut Xi, Htable: &HTable); + } + $h_table.gmult($name, $xi, $cpu_features) + }}; +} + +/// SAFETY: +/// * The function `$name` must meet the contract of the `f` paramweter of +/// `ghash()`. +#[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "x86", + target_arch = "x86_64" +))] +macro_rules! ghash { + ( $name:ident, $xi:expr, $h_table:expr, $input:expr, $cpu_features:expr ) => {{ + use crate::aead::gcm::ffi::{HTable, Xi}; + prefixed_extern! { + fn $name( + xi: &mut Xi, + Htable: &HTable, + inp: *const u8, + len: crate::c::NonZero_size_t, + ); + } + $h_table.ghash($name, $xi, $input, $cpu_features) + }}; +} + +/// SAFETY: +/// * `f` must read `len` bytes from `inp`; it may assume +/// that `len` is a (non-zero) multiple of `BLOCK_LEN`. +/// * `f` may inspect CPU features. +#[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "x86", + target_arch = "x86_64" +))] +impl HTable { + pub(super) unsafe fn new( + init: unsafe extern "C" fn(HTable: &mut HTable, h: &[u64; 2]), + value: &[u64; 2], + ) -> Self { + let mut r = Self { + Htable: [U128 { hi: 0, lo: 0 }; HTABLE_LEN], + }; + unsafe { init(&mut r, value) }; + r + } + + pub(super) unsafe fn gmult( + &self, + f: unsafe extern "C" fn(xi: &mut Xi, h_table: &HTable), + xi: &mut Xi, + _cpu_features: crate::cpu::Features, + ) { + unsafe { f(xi, self) } + } + + pub(super) unsafe fn ghash( + &self, + f: unsafe extern "C" fn( + xi: &mut Xi, + Htable: &HTable, + inp: *const u8, + len: crate::c::NonZero_size_t, + ), + xi: &mut Xi, + input: &[[u8; BLOCK_LEN]], + cpu_features: crate::cpu::Features, + ) { + use crate::polyfill::slice; + use core::num::NonZeroUsize; + + let input = slice::flatten(input); + + let input_len = match NonZeroUsize::new(input.len()) { + Some(len) => len, + None => { + return; + } + }; + + let _: crate::cpu::Features = cpu_features; + // SAFETY: + // * There are `input_len: NonZeroUsize` bytes available at `input` for + // `f` to read. + // * CPU feature detection has been done. + unsafe { + f(xi, &self, input.as_ptr(), input_len); + } + } +} + +impl HTable { + pub(super) fn new_single_entry(first_entry: U128) -> Self { + let mut r = Self { + Htable: [U128 { hi: 0, lo: 0 }; HTABLE_LEN], + }; + r.Htable[0] = first_entry; + r + } + + pub(super) fn first_entry(&self) -> U128 { + self.Htable[9] + } +} + +// The alignment is required by some assembly code. +#[derive(Clone)] +#[repr(C, align(16))] +pub(in super::super) struct HTable { + Htable: [U128; HTABLE_LEN], +} + +#[derive(Clone, Copy)] +#[repr(C)] +pub(super) struct U128 { + pub(super) hi: u64, + pub(super) lo: u64, +} + +const HTABLE_LEN: usize = 16; + +#[repr(transparent)] +pub(in super::super) struct Xi(pub(super) Block); + +impl Xi { + #[inline] + pub(super) fn bitxor_assign(&mut self, a: Block) { + self.0 = constant_time::xor_16(self.0, a) + } + + #[inline] + pub(super) fn into_block(self) -> Block { + self.0 + } +} diff --git a/src/aead/gcm/gcm_nohw.rs b/src/aead/gcm/gcm_nohw.rs index edc8a04769..77ca08e056 100644 --- a/src/aead/gcm/gcm_nohw.rs +++ b/src/aead/gcm/gcm_nohw.rs @@ -22,7 +22,7 @@ // // Unlike the BearSSL notes, we use u128 in the 64-bit implementation. -use super::{Xi, BLOCK_LEN}; +use super::{ffi::U128, Xi, BLOCK_LEN}; use crate::polyfill::ArraySplitMap as _; #[cfg(target_pointer_width = "64")] @@ -138,7 +138,7 @@ fn gcm_mul64_nohw(a: u64, b: u64) -> (u64, u64) { (lo ^ (mid << 32), hi ^ (mid >> 32)) } -pub(super) fn init(xi: [u64; 2]) -> super::U128 { +pub(super) fn init(xi: [u64; 2]) -> U128 { // We implement GHASH in terms of POLYVAL, as described in RFC 8452. This // avoids a shift by 1 in the multiplication, needed to account for bit // reversal losing a bit after multiplication, that is, @@ -165,10 +165,10 @@ pub(super) fn init(xi: [u64; 2]) -> super::U128 { hi ^= carry & 0xc200000000000000; // This implementation does not use the rest of |Htable|. - super::U128 { hi, lo } + U128 { hi, lo } } -fn gcm_polyval_nohw(xi: &mut [u64; 2], h: super::U128) { +fn gcm_polyval_nohw(xi: &mut [u64; 2], h: U128) { // Karatsuba multiplication. The product of |Xi| and |H| is stored in |r0| // through |r3|. Note there is no byte or bit reversal because we are // evaluating POLYVAL. @@ -217,13 +217,13 @@ fn gcm_polyval_nohw(xi: &mut [u64; 2], h: super::U128) { *xi = [r2, r3]; } -pub(super) fn gmult(xi: &mut Xi, h: super::U128) { +pub(super) fn gmult(xi: &mut Xi, h: U128) { with_swapped_xi(xi, |swapped| { gcm_polyval_nohw(swapped, h); }) } -pub(super) fn ghash(xi: &mut Xi, h: super::U128, input: &[[u8; BLOCK_LEN]]) { +pub(super) fn ghash(xi: &mut Xi, h: U128, input: &[[u8; BLOCK_LEN]]) { with_swapped_xi(xi, |swapped| { input.iter().for_each(|&input| { let input = input.array_split_map(u64::from_be_bytes);