From 19e0cdf833f108f75fdb92a15e68898ae1b270e8 Mon Sep 17 00:00:00 2001 From: Carson McManus Date: Wed, 12 Jul 2023 09:29:44 -0400 Subject: [PATCH] argon2: optimize with AVX2 SIMD (#440) --- Cargo.lock | 5 +++-- argon2/Cargo.toml | 3 +++ argon2/src/block.rs | 29 +++++++++++++++++++++++++++-- argon2/src/lib.rs | 33 ++++++++++++++++++++++++++++----- benches/Cargo.toml | 1 + benches/src/argon2.rs | 5 ++++- 6 files changed, 66 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 45fa5e88..8397fc26 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,7 @@ version = "0.5.0" dependencies = [ "base64ct", "blake2", + "cpufeatures", "hex-literal", "password-hash", "zeroize", @@ -107,9 +108,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03e69e28e9f7f77debdedbaafa2866e1de9ba56df55a8bd7cfc724c25a09987c" +checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" dependencies = [ "libc", ] diff --git a/argon2/Cargo.toml b/argon2/Cargo.toml index ed8c1c9f..ba406625 100644 --- a/argon2/Cargo.toml +++ b/argon2/Cargo.toml @@ -23,6 +23,9 @@ blake2 = { version = "0.10.6", default-features = false } password-hash = { version = "0.5", optional = true } zeroize = { version = "1", default-features = false, optional = true } +[target.'cfg(any(target_arch = "x86", target_arch = "x86_64"))'.dependencies] +cpufeatures = "0.2.9" + [dev-dependencies] hex-literal = "0.4" password-hash = { version = "0.5", features = ["rand_core"] } diff --git a/argon2/src/block.rs b/argon2/src/block.rs index 6586f25b..7b471c9d 100644 --- a/argon2/src/block.rs +++ b/argon2/src/block.rs @@ -66,7 +66,8 @@ impl Block { unsafe { &mut *(self.0.as_mut_ptr() as *mut [u8; Self::SIZE]) } } - pub(crate) fn compress(rhs: &Self, lhs: &Self) -> Self { + #[inline(always)] + pub(crate) fn compress_soft(rhs: &Self, lhs: &Self) -> Self { let r = *rhs ^ lhs; // Apply permutations rowwise @@ -101,6 +102,12 @@ impl Block { q ^= &r; q } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[target_feature(enable = "avx2")] + pub(crate) unsafe fn compress_avx2(rhs: &Self, lhs: &Self) -> Self { + Self::compress_soft(rhs, lhs) + } } impl Default for Block { @@ -132,7 +139,7 @@ impl BitXor<&Block> for Block { impl BitXorAssign<&Block> for Block { fn bitxor_assign(&mut self, rhs: &Block) { - for (dst, src) in self.0.iter_mut().zip(rhs.0.iter().copied()) { + for (dst, src) in self.0.iter_mut().zip(rhs.0.iter()) { *dst ^= src; } } @@ -144,3 +151,21 @@ impl Zeroize for Block { self.0.zeroize(); } } + +#[cfg(test)] +mod test { + use super::*; + + #[cfg(target_arch = "x86_64")] + #[test] + fn compress_avx2() { + let mut lhs = Block([0; 128]); + lhs.0[0..7].copy_from_slice(&[0, 0, 0, 2048, 4, 2, 1]); + let rhs = Block([0; 128]); + + let result = Block::compress_soft(&rhs, &lhs); + let result_avx2 = unsafe { Block::compress_avx2(&rhs, &lhs) }; + + assert_eq!(result.0, result_avx2.0); + } +} diff --git a/argon2/src/lib.rs b/argon2/src/lib.rs index 41b9f8c7..eead9b0d 100644 --- a/argon2/src/lib.rs +++ b/argon2/src/lib.rs @@ -144,6 +144,9 @@ pub(crate) const SYNC_POINTS: usize = 4; /// To generate reference block positions const ADDRESSES_IN_BLOCK: usize = 128; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +cpufeatures::new!(avx2_cpuid, "avx2"); + /// Argon2 context. /// /// This is the primary type of this crate's API, and contains the following: @@ -165,6 +168,9 @@ pub struct Argon2<'key> { /// Key array secret: Option<&'key [u8]>, + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + cpu_feat_avx2: avx2_cpuid::InitToken, } impl Default for Argon2<'_> { @@ -191,6 +197,8 @@ impl<'key> Argon2<'key> { version, params, secret: None, + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + cpu_feat_avx2: avx2_cpuid::init(), } } @@ -210,6 +218,8 @@ impl<'key> Argon2<'key> { version, params, secret: Some(secret), + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + cpu_feat_avx2: avx2_cpuid::init(), }) } @@ -335,7 +345,7 @@ impl<'key> Argon2<'key> { let first_block = if pass == 0 && slice == 0 { if data_independent_addressing { // Generate first set of addresses - Self::update_address_block( + self.update_address_block( &mut address_block, &mut input_block, &zero_block, @@ -364,7 +374,7 @@ impl<'key> Argon2<'key> { let addres_index = block % ADDRESSES_IN_BLOCK; if addres_index == 0 { - Self::update_address_block( + self.update_address_block( &mut address_block, &mut input_block, &zero_block, @@ -424,7 +434,7 @@ impl<'key> Argon2<'key> { // Calculate new block let result = - Block::compress(&memory_blocks[prev_index], &memory_blocks[ref_index]); + self.compress(&memory_blocks[prev_index], &memory_blocks[ref_index]); if self.version == Version::V0x10 || pass == 0 { memory_blocks[cur_index] = result; @@ -442,6 +452,16 @@ impl<'key> Argon2<'key> { Ok(()) } + fn compress(&self, rhs: &Block, lhs: &Block) -> Block { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if self.cpu_feat_avx2.get() { + return unsafe { Block::compress_avx2(rhs, lhs) }; + } + } + Block::compress_soft(rhs, lhs) + } + /// Get default configured [`Params`]. pub fn params(&self) -> &Params { &self.params @@ -467,13 +487,14 @@ impl<'key> Argon2<'key> { } fn update_address_block( + &self, address_block: &mut Block, input_block: &mut Block, zero_block: &Block, ) { input_block.as_mut()[6] += 1; - *address_block = Block::compress(zero_block, input_block); - *address_block = Block::compress(zero_block, address_block); + *address_block = self.compress(zero_block, input_block); + *address_block = self.compress(zero_block, address_block); } /// Hashes all the inputs into `blockhash[PREHASH_DIGEST_LEN]`. @@ -579,6 +600,8 @@ impl PasswordHasher for Argon2<'_> { algorithm, version, params, + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + cpu_feat_avx2: self.cpu_feat_avx2, } .hash_password(password, salt) } diff --git a/benches/Cargo.toml b/benches/Cargo.toml index 0a53e3ff..e36cbdba 100644 --- a/benches/Cargo.toml +++ b/benches/Cargo.toml @@ -9,6 +9,7 @@ publish = false [dev-dependencies] argon2 = { path = "../argon2" } criterion = { version = "0.4", features = ["html_reports"] } +pprof = { version = "0.11", features = ["flamegraph", "criterion"] } [[bench]] name = "argon2" diff --git a/benches/src/argon2.rs b/benches/src/argon2.rs index afa3849b..b26b6371 100644 --- a/benches/src/argon2.rs +++ b/benches/src/argon2.rs @@ -1,5 +1,6 @@ use argon2::*; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use pprof::criterion::{Output, PProfProfiler}; const BENCH_PASSWORD: &[u8] = b"hunter2"; const BENCH_SALT: &[u8] = b"pepper42"; @@ -80,7 +81,9 @@ fn bench_vary_p(c: &mut Criterion) { } criterion_group!( - benches, + name = benches; + config = Criterion::default().with_profiler(PProfProfiler::new(300, Output::Flamegraph(None))); + targets = bench_default_params, bench_vary_m, bench_vary_t,