From 110e9b4029dddcc09ea9b2691129d0c7d12f1e8d Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Mon, 27 Nov 2023 15:29:56 -0800 Subject: [PATCH] NFC ChaCha20: Move CPU feature dispatching to Rust. --- build.rs | 2 + crypto/chacha/asm/chacha-armv8.pl | 33 ++++------------- src/aead/chacha.rs | 61 ++++++++++++++++++++++++++++--- 3 files changed, 65 insertions(+), 31 deletions(-) diff --git a/build.rs b/build.rs index f7b94108b7..5cdabf927d 100644 --- a/build.rs +++ b/build.rs @@ -893,6 +893,8 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String { "CRYPTO_poly1305_update", "CRYPTO_poly1305_update_neon", "ChaCha20_ctr32", + "ChaCha20_ctr32_fallback", + "ChaCha20_ctr32_neon", "LIMBS_add_mod", "LIMBS_are_even", "LIMBS_are_zero", diff --git a/crypto/chacha/asm/chacha-armv8.pl b/crypto/chacha/asm/chacha-armv8.pl index fa6a801445..ed6fc65d93 100755 --- a/crypto/chacha/asm/chacha-armv8.pl +++ b/crypto/chacha/asm/chacha-armv8.pl @@ -122,9 +122,6 @@ sub ROUND { $code.=<<___; #include -.extern OPENSSL_armcap_P -.hidden OPENSSL_armcap_P - .section .rodata .align 5 @@ -136,24 +133,10 @@ sub ROUND { .text -.globl ChaCha20_ctr32 -.type ChaCha20_ctr32,%function +.globl ChaCha20_ctr32_fallback +.type ChaCha20_ctr32_fallback,%function .align 5 -ChaCha20_ctr32: - AARCH64_VALID_CALL_TARGET - cbz $len,.Labort -#if defined(OPENSSL_HWASAN) && __clang_major__ >= 10 - adrp @x[0],:pg_hi21_nc:OPENSSL_armcap_P -#else - adrp @x[0],:pg_hi21:OPENSSL_armcap_P -#endif - cmp $len,#192 - b.lo .Lshort - ldr w17,[@x[0],:lo12:OPENSSL_armcap_P] - tst w17,#ARMV7_NEON - b.ne ChaCha20_neon - -.Lshort: +ChaCha20_ctr32_fallback: AARCH64_SIGN_LINK_REGISTER stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -276,7 +259,6 @@ sub ROUND { ldp x27,x28,[x29,#80] ldp x29,x30,[sp],#96 AARCH64_VALIDATE_LINK_REGISTER -.Labort: ret .align 4 @@ -334,7 +316,7 @@ sub ROUND { ldp x29,x30,[sp],#96 AARCH64_VALIDATE_LINK_REGISTER ret -.size ChaCha20_ctr32,.-ChaCha20_ctr32 +.size ChaCha20_ctr32_fallback,.-ChaCha20_ctr32_fallback ___ {{{ @@ -375,9 +357,10 @@ sub NEONROUND { $code.=<<___; -.type ChaCha20_neon,%function +.globl ChaCha20_ctr32_neon +.type ChaCha20_ctr32_neon,%function .align 5 -ChaCha20_neon: +ChaCha20_ctr32_neon: AARCH64_SIGN_LINK_REGISTER stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -690,7 +673,7 @@ sub NEONROUND { ldp x29,x30,[sp],#96 AARCH64_VALIDATE_LINK_REGISTER ret -.size ChaCha20_neon,.-ChaCha20_neon +.size ChaCha20_ctr32_neon,.-ChaCha20_ctr32_neon ___ { my ($T0,$T1,$T2,$T3,$T4,$T5)=@K; diff --git a/src/aead/chacha.rs b/src/aead/chacha.rs index 660cf34c2d..871c34d210 100644 --- a/src/aead/chacha.rs +++ b/src/aead/chacha.rs @@ -95,12 +95,61 @@ impl Key { /// Only call this with `src` equal to `0..` or from `encrypt_within`. #[inline] fn encrypt_less_safe(&self, counter: Counter, in_out: &mut [u8], src: RangeFrom) { - #[cfg(any( - target_arch = "aarch64", - target_arch = "arm", - target_arch = "x86", - target_arch = "x86_64" - ))] + #[cfg(target_arch = "aarch64")] + #[inline(always)] + pub(super) fn ChaCha20_ctr32( + key: &Key, + counter: Counter, + in_out: &mut [u8], + src: RangeFrom, + ) { + let in_out_len = in_out.len().checked_sub(src.start).unwrap(); + + // There's no need to worry if `counter` is incremented because it is + // owned here and we drop immediately after the call. + + if in_out_len >= 192 && cpu::arm::NEON.available(key.cpu_features) { + prefixed_extern! { + fn ChaCha20_ctr32_neon( + out: *mut u8, + in_: *const u8, + in_len: crate::c::size_t, + key: &[u32; KEY_LEN / 4], + counter: &Counter, + ); + } + unsafe { + ChaCha20_ctr32_neon( + in_out.as_mut_ptr(), + in_out[src].as_ptr(), + in_out_len, + key.words_less_safe(), + &counter, + ) + } + } else if in_out_len > 0 { + prefixed_extern! { + fn ChaCha20_ctr32_fallback( + out: *mut u8, + in_: *const u8, + in_len: crate::c::size_t, + key: &[u32; KEY_LEN / 4], + counter: &Counter, + ); + } + unsafe { + ChaCha20_ctr32_fallback( + in_out.as_mut_ptr(), + in_out[src].as_ptr(), + in_out_len, + key.words_less_safe(), + &counter, + ) + } + } + } + + #[cfg(any(target_arch = "arm", target_arch = "x86", target_arch = "x86_64"))] #[inline(always)] pub(super) fn ChaCha20_ctr32( key: &Key,