From 2e4f0f9514c4f8a472ad56bb61201cc419c65983 Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Tue, 26 Sep 2023 15:08:29 -0700 Subject: [PATCH] NFC: Move low-level Montgomery arithmetic out of `bigint`. When the `alloc` feature is disabled, on lesser-used targets we don't build `bigint` but we still need some of the Montgomery arithmetic. ``` git diff \ HEAD^1:src/arithmetic/bigint/bn_mul_mont_fallback.rs \ src/arithmetic/montgomery.rs ``` ``` git diff \ HEAD^1:src/arithmetic/bigint.rs \ src/arithmetic/montgomery.rs ``` --- src/arithmetic.rs | 4 + src/arithmetic/bigint.rs | 110 +----------- src/arithmetic/bigint/bn_mul_mont_fallback.rs | 51 ------ src/arithmetic/bigint/modulus.rs | 18 +- src/arithmetic/montgomery.rs | 156 +++++++++++++++++- src/arithmetic/{bigint => }/n0.rs | 9 +- 6 files changed, 181 insertions(+), 167 deletions(-) delete mode 100644 src/arithmetic/bigint/bn_mul_mont_fallback.rs rename src/arithmetic/{bigint => }/n0.rs (86%) diff --git a/src/arithmetic.rs b/src/arithmetic.rs index 40c8ba2026..0728b414c1 100644 --- a/src/arithmetic.rs +++ b/src/arithmetic.rs @@ -19,6 +19,10 @@ pub mod constant; pub mod bigint; pub mod montgomery; +mod n0; #[cfg(feature = "alloc")] mod nonnegative; + +#[allow(dead_code)] +const BIGINT_MODULUS_MAX_LIMBS: usize = 8192 / crate::limb::LIMB_BITS; diff --git a/src/arithmetic/bigint.rs b/src/arithmetic/bigint.rs index 15e20d507c..2150006807 100644 --- a/src/arithmetic/bigint.rs +++ b/src/arithmetic/bigint.rs @@ -36,25 +36,24 @@ //! [Static checking of units in Servo]: //! https://blog.mozilla.org/research/2014/06/23/static-checking-of-units-in-servo/ -use self::{boxed_limbs::BoxedLimbs, n0::N0}; +use self::boxed_limbs::BoxedLimbs; pub(crate) use self::{ modulus::{Modulus, PartialModulus, MODULUS_MAX_LIMBS}, private_exponent::PrivateExponent, }; +use super::n0::N0; pub(crate) use super::nonnegative::Nonnegative; use crate::{ arithmetic::montgomery::*, - bits, bssl, c, cpu, error, + bits, c, cpu, error, limb::{self, Limb, LimbMask, LIMB_BITS}, polyfill::u64_from_usize, }; use alloc::vec; use core::{marker::PhantomData, num::NonZeroU64}; -mod bn_mul_mont_fallback; mod boxed_limbs; mod modulus; -mod n0; mod private_exponent; /// A prime modulus. @@ -321,9 +320,9 @@ impl One { // 2**LIMB_BITS such that R > m. // // Even though the assembly on some 32-bit platforms works with 64-bit - // values, using `LIMB_BITS` here, rather than `N0_LIMBS_USED * LIMB_BITS`, + // values, using `LIMB_BITS` here, rather than `N0::LIMBS_USED * LIMB_BITS`, // is correct because R**2 will still be a multiple of the latter as - // `N0_LIMBS_USED` is either one or two. + // `N0::LIMBS_USED` is either one or two. fn newRR(m: &PartialModulus, m_bits: bits::BitLength) -> Self { let m_bits = m_bits.as_usize_bits(); let r = (m_bits + (LIMB_BITS - 1)) / LIMB_BITS * LIMB_BITS; @@ -445,7 +444,7 @@ pub fn elem_exp_consttime( exponent: &PrivateExponent, m: &Modulus, ) -> Result, error::Unspecified> { - use crate::limb::Window; + use crate::{bssl, limb::Window}; const WINDOW_BITS: usize = 5; const TABLE_ENTRIES: usize = 1 << WINDOW_BITS; @@ -779,56 +778,6 @@ fn limbs_mont_mul(r: &mut [Limb], a: &[Limb], m: &[Limb], n0: &N0, _cpu_features } } -fn limbs_from_mont_in_place(r: &mut [Limb], tmp: &mut [Limb], m: &[Limb], n0: &N0) { - prefixed_extern! { - fn bn_from_montgomery_in_place( - r: *mut Limb, - num_r: c::size_t, - a: *mut Limb, - num_a: c::size_t, - n: *const Limb, - num_n: c::size_t, - n0: &N0, - ) -> bssl::Result; - } - Result::from(unsafe { - bn_from_montgomery_in_place( - r.as_mut_ptr(), - r.len(), - tmp.as_mut_ptr(), - tmp.len(), - m.as_ptr(), - m.len(), - n0, - ) - }) - .unwrap() -} - -#[cfg(not(any( - target_arch = "aarch64", - target_arch = "arm", - target_arch = "x86", - target_arch = "x86_64" -)))] -fn limbs_mul(r: &mut [Limb], a: &[Limb], b: &[Limb]) { - debug_assert_eq!(r.len(), 2 * a.len()); - debug_assert_eq!(a.len(), b.len()); - let ab_len = a.len(); - - r[..ab_len].fill(0); - for (i, &b_limb) in b.iter().enumerate() { - r[ab_len + i] = unsafe { - limbs_mul_add_limb( - (&mut r[i..][..ab_len]).as_mut_ptr(), - a.as_ptr(), - b_limb, - ab_len, - ) - }; - } -} - /// r = a * b #[cfg(not(target_arch = "x86_64"))] fn limbs_mont_product( @@ -882,21 +831,6 @@ prefixed_extern! { ); } -#[cfg(any( - test, - not(any( - target_arch = "aarch64", - target_arch = "arm", - target_arch = "x86_64", - target_arch = "x86" - )) -))] -prefixed_extern! { - // `r` must not alias `a` - #[must_use] - fn limbs_mul_add_limb(r: *mut Limb, a: *const Limb, b: Limb, num_limbs: c::size_t) -> Limb; -} - #[cfg(test)] mod tests { use super::{modulus::MODULUS_MIN_LIMBS, *}; @@ -1100,36 +1034,4 @@ mod tests { fn into_encoded(a: Elem, m: &Modulus) -> Elem { elem_mul(m.oneRR().as_ref(), a, m) } - - #[test] - // TODO: wasm - fn test_mul_add_words() { - const ZERO: Limb = 0; - const MAX: Limb = ZERO.wrapping_sub(1); - static TEST_CASES: &[(&[Limb], &[Limb], Limb, Limb, &[Limb])] = &[ - (&[0], &[0], 0, 0, &[0]), - (&[MAX], &[0], MAX, 0, &[MAX]), - (&[0], &[MAX], MAX, MAX - 1, &[1]), - (&[MAX], &[MAX], MAX, MAX, &[0]), - (&[0, 0], &[MAX, MAX], MAX, MAX - 1, &[1, MAX]), - (&[1, 0], &[MAX, MAX], MAX, MAX - 1, &[2, MAX]), - (&[MAX, 0], &[MAX, MAX], MAX, MAX, &[0, 0]), - (&[0, 1], &[MAX, MAX], MAX, MAX, &[1, 0]), - (&[MAX, MAX], &[MAX, MAX], MAX, MAX, &[0, MAX]), - ]; - - for (i, (r_input, a, w, expected_retval, expected_r)) in TEST_CASES.iter().enumerate() { - extern crate std; - let mut r = std::vec::Vec::from(*r_input); - assert_eq!(r.len(), a.len()); // Sanity check - let actual_retval = - unsafe { limbs_mul_add_limb(r.as_mut_ptr(), a.as_ptr(), *w, a.len()) }; - assert_eq!(&r, expected_r, "{}: {:x?} != {:x?}", i, &r[..], expected_r); - assert_eq!( - actual_retval, *expected_retval, - "{}: {:x?} != {:x?}", - i, actual_retval, *expected_retval - ); - } - } } diff --git a/src/arithmetic/bigint/bn_mul_mont_fallback.rs b/src/arithmetic/bigint/bn_mul_mont_fallback.rs deleted file mode 100644 index 1357858d07..0000000000 --- a/src/arithmetic/bigint/bn_mul_mont_fallback.rs +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2015-2022 Brian Smith. -// -// Permission to use, copy, modify, and/or distribute this software for any -// purpose with or without fee is hereby granted, provided that the above -// copyright notice and this permission notice appear in all copies. -// -// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES -// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY -// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION -// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -#![cfg(not(any( - target_arch = "aarch64", - target_arch = "arm", - target_arch = "x86", - target_arch = "x86_64" -)))] - -use super::{limbs_from_mont_in_place, limbs_mul, Limb, MODULUS_MAX_LIMBS, N0}; -use crate::c; - -prefixed_export! { - unsafe fn bn_mul_mont( - r: *mut Limb, - a: *const Limb, - b: *const Limb, - n: *const Limb, - n0: &N0, - num_limbs: c::size_t, - ) { - // The mutable pointer `r` may alias `a` and/or `b`, so the lifetimes of - // any slices for `a` or `b` must not overlap with the lifetime of any - // mutable for `r`. - - // Nothing aliases `n` - let n = unsafe { core::slice::from_raw_parts(n, num_limbs) }; - - let mut tmp = [0; 2 * MODULUS_MAX_LIMBS]; - let tmp = &mut tmp[..(2 * num_limbs)]; - { - let a: &[Limb] = unsafe { core::slice::from_raw_parts(a, num_limbs) }; - let b: &[Limb] = unsafe { core::slice::from_raw_parts(b, num_limbs) }; - limbs_mul(tmp, a, b); - } - let r: &mut [Limb] = unsafe { core::slice::from_raw_parts_mut(r, num_limbs) }; - limbs_from_mont_in_place(r, tmp, n, n0); - } -} diff --git a/src/arithmetic/bigint/modulus.rs b/src/arithmetic/bigint/modulus.rs index ca0724cc2c..ba6acfc958 100644 --- a/src/arithmetic/bigint/modulus.rs +++ b/src/arithmetic/bigint/modulus.rs @@ -13,8 +13,10 @@ // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. use super::{ - super::montgomery::{Unencoded, R, RR}, - n0::{N0, N0_LIMBS_USED}, + super::{ + montgomery::{Unencoded, R, RR}, + n0::N0, + }, BoxedLimbs, Elem, Nonnegative, One, PublicModulus, SlightlySmallerModulus, SmallerModulus, Width, }; @@ -32,7 +34,7 @@ use core::marker::PhantomData; /// same. pub const MODULUS_MIN_LIMBS: usize = 4; -pub const MODULUS_MAX_LIMBS: usize = 8192 / LIMB_BITS; +pub const MODULUS_MAX_LIMBS: usize = super::super::BIGINT_MODULUS_MAX_LIMBS; /// The modulus *m* for a ring ℤ/mℤ, along with the precomputed values needed /// for efficient Montgomery multiplication modulo *m*. The value must be odd @@ -43,10 +45,10 @@ pub struct Modulus { // n0 * N == -1 (mod r). // - // r == 2**(N0_LIMBS_USED * LIMB_BITS) and LG_LITTLE_R == lg(r). This + // r == 2**(N0::LIMBS_USED * LIMB_BITS) and LG_LITTLE_R == lg(r). This // ensures that we can do integer division by |r| by simply ignoring - // `N0_LIMBS_USED` limbs. Similarly, we can calculate values modulo `r` by - // just looking at the lowest `N0_LIMBS_USED` limbs. This is what makes + // `N0::LIMBS_USED` limbs. Similarly, we can calculate values modulo `r` by + // just looking at the lowest `N0::LIMBS_USED` limbs. This is what makes // Montgomery multiplication efficient. // // As shown in Algorithm 1 of "Fast Prime Field Elliptic Curve Cryptography @@ -151,7 +153,7 @@ impl Modulus { } // n_mod_r = n % r. As explained in the documentation for `n0`, this is - // done by taking the lowest `N0_LIMBS_USED` limbs of `n`. + // done by taking the lowest `N0::LIMBS_USED` limbs of `n`. #[allow(clippy::useless_conversion)] let n0 = { prefixed_extern! { @@ -161,7 +163,7 @@ impl Modulus { // XXX: u64::from isn't guaranteed to be constant time. let mut n_mod_r: u64 = u64::from(n[0]); - if N0_LIMBS_USED == 2 { + if N0::LIMBS_USED == 2 { // XXX: If we use `<< LIMB_BITS` here then 64-bit builds // fail to compile because of `deny(exceeding_bitshifts)`. debug_assert_eq!(LIMB_BITS, 32); diff --git a/src/arithmetic/montgomery.rs b/src/arithmetic/montgomery.rs index 6f95b2e254..0bc8a0e202 100644 --- a/src/arithmetic/montgomery.rs +++ b/src/arithmetic/montgomery.rs @@ -1,4 +1,4 @@ -// Copyright 2017 Brian Smith. +// Copyright 2017-2023 Brian Smith. // // Permission to use, copy, modify, and/or distribute this software for any // purpose with or without fee is hereby granted, provided that the above @@ -86,3 +86,157 @@ impl ProductEncoding for (RR, Unencoded) { impl ProductEncoding for (RR, RInverse) { type Output = <(RInverse, RR) as ProductEncoding>::Output; } + +#[allow(unused_imports)] +use {super::n0::N0, crate::{bssl, c, limb::Limb}}; + + +#[cfg(not(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "x86", + target_arch = "x86_64" +)))] +prefixed_export! { + unsafe fn bn_mul_mont( + r: *mut Limb, + a: *const Limb, + b: *const Limb, + n: *const Limb, + n0: &N0, + num_limbs: c::size_t, + ) { + use super::BIGINT_MODULUS_MAX_LIMBS; + + // The mutable pointer `r` may alias `a` and/or `b`, so the lifetimes of + // any slices for `a` or `b` must not overlap with the lifetime of any + // mutable for `r`. + + // Nothing aliases `n` + let n = unsafe { core::slice::from_raw_parts(n, num_limbs) }; + + let mut tmp = [0; 2 * MODULUS_MAX_LIMBS]; + let tmp = &mut tmp[..(2 * num_limbs)]; + { + let a: &[Limb] = unsafe { core::slice::from_raw_parts(a, num_limbs) }; + let b: &[Limb] = unsafe { core::slice::from_raw_parts(b, num_limbs) }; + limbs_mul(tmp, a, b); + } + let r: &mut [Limb] = unsafe { core::slice::from_raw_parts_mut(r, num_limbs) }; + limbs_from_mont_in_place(r, tmp, n, n0); + } +} + +// `bigint` needs then when the `alloc` feature is enabled. `bn_mul_mont` above needs this when +// we are using the platforms for which we don't have `bn_mul_mont` in assembly. +#[cfg(any( + feature = "alloc", + not(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "x86", + target_arch = "x86_64" + )) +))] +pub(super) fn limbs_from_mont_in_place(r: &mut [Limb], tmp: &mut [Limb], m: &[Limb], n0: &N0) { + prefixed_extern! { + fn bn_from_montgomery_in_place( + r: *mut Limb, + num_r: c::size_t, + a: *mut Limb, + num_a: c::size_t, + n: *const Limb, + num_n: c::size_t, + n0: &N0, + ) -> bssl::Result; + } + Result::from(unsafe { + bn_from_montgomery_in_place( + r.as_mut_ptr(), + r.len(), + tmp.as_mut_ptr(), + tmp.len(), + m.as_ptr(), + m.len(), + n0, + ) + }) + .unwrap() +} + +#[cfg(not(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "x86", + target_arch = "x86_64" +)))] +fn limbs_mul(r: &mut [Limb], a: &[Limb], b: &[Limb]) { + debug_assert_eq!(r.len(), 2 * a.len()); + debug_assert_eq!(a.len(), b.len()); + let ab_len = a.len(); + + r[..ab_len].fill(0); + for (i, &b_limb) in b.iter().enumerate() { + r[ab_len + i] = unsafe { + limbs_mul_add_limb( + (&mut r[i..][..ab_len]).as_mut_ptr(), + a.as_ptr(), + b_limb, + ab_len, + ) + }; + } +} + +#[cfg(any( + test, + not(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "x86_64", + target_arch = "x86" + )) +))] +prefixed_extern! { + // `r` must not alias `a` + #[must_use] + fn limbs_mul_add_limb(r: *mut Limb, a: *const Limb, b: Limb, num_limbs: c::size_t) -> Limb; +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::limb::Limb; + + #[test] + // TODO: wasm + fn test_mul_add_words() { + const ZERO: Limb = 0; + const MAX: Limb = ZERO.wrapping_sub(1); + static TEST_CASES: &[(&[Limb], &[Limb], Limb, Limb, &[Limb])] = &[ + (&[0], &[0], 0, 0, &[0]), + (&[MAX], &[0], MAX, 0, &[MAX]), + (&[0], &[MAX], MAX, MAX - 1, &[1]), + (&[MAX], &[MAX], MAX, MAX, &[0]), + (&[0, 0], &[MAX, MAX], MAX, MAX - 1, &[1, MAX]), + (&[1, 0], &[MAX, MAX], MAX, MAX - 1, &[2, MAX]), + (&[MAX, 0], &[MAX, MAX], MAX, MAX, &[0, 0]), + (&[0, 1], &[MAX, MAX], MAX, MAX, &[1, 0]), + (&[MAX, MAX], &[MAX, MAX], MAX, MAX, &[0, MAX]), + ]; + + for (i, (r_input, a, w, expected_retval, expected_r)) in TEST_CASES.iter().enumerate() { + extern crate std; + let mut r = std::vec::Vec::from(*r_input); + assert_eq!(r.len(), a.len()); // Sanity check + let actual_retval = + unsafe { limbs_mul_add_limb(r.as_mut_ptr(), a.as_ptr(), *w, a.len()) }; + assert_eq!(&r, expected_r, "{}: {:x?} != {:x?}", i, &r[..], expected_r); + assert_eq!( + actual_retval, *expected_retval, + "{}: {:x?} != {:x?}", + i, actual_retval, *expected_retval + ); + } + } +} diff --git a/src/arithmetic/bigint/n0.rs b/src/arithmetic/n0.rs similarity index 86% rename from src/arithmetic/bigint/n0.rs rename to src/arithmetic/n0.rs index 6a185d4062..609ee9bdec 100644 --- a/src/arithmetic/bigint/n0.rs +++ b/src/arithmetic/n0.rs @@ -12,13 +12,16 @@ // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -use super::{Limb, LIMB_BITS}; +use crate::limb::Limb; #[derive(Clone)] #[repr(transparent)] -pub(super) struct N0([Limb; 2]); +pub(in super::super) struct N0([Limb; 2]); -pub(super) const N0_LIMBS_USED: usize = 64 / LIMB_BITS; +impl N0 { + #[cfg(feature = "alloc")] + pub(super) const LIMBS_USED: usize = 64 / crate::limb::LIMB_BITS; +} impl From for N0 { #[inline]