From 40018e35cf58dbfac773d3332a35116a87063c67 Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Tue, 17 Oct 2023 09:34:43 -0700 Subject: [PATCH 1/7] NFC P-256: Add `twin_mul` method to `PublicScalarOps`. Allow each curve to provide its own `twin_mul` implementation. For now, use the same implementation we've been using. --- src/ec/suite_b/ecdsa/verification.rs | 16 ++-------------- src/ec/suite_b/ops.rs | 18 ++++++++++++++---- src/ec/suite_b/ops/p256.rs | 5 ++++- src/ec/suite_b/ops/p384.rs | 4 +++- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/src/ec/suite_b/ecdsa/verification.rs b/src/ec/suite_b/ecdsa/verification.rs index 2e9f50127d..157753e5cb 100644 --- a/src/ec/suite_b/ecdsa/verification.rs +++ b/src/ec/suite_b/ecdsa/verification.rs @@ -123,7 +123,7 @@ impl EcdsaVerificationAlgorithm { // NSA Guide Step 6: "Compute the elliptic curve point // R = (xR, yR) = u1*G + u2*Q, using EC scalar multiplication and EC // addition. If R is equal to the point at infinity, output INVALID." - let product = twin_mul(self.ops.private_key_ops, &u1, &u2, &peer_pub_key); + let product = (self.ops.twin_mul)(&u1, &u2, &peer_pub_key); // Verify that the point we computed is on the curve; see // `verify_affine_point_is_on_the_curve_scaled` for details on why. It @@ -158,7 +158,7 @@ impl EcdsaVerificationAlgorithm { } if self.ops.elem_less_than(&r, &self.ops.q_minus_n) { self.ops - .private_key_ops + .scalar_ops .common .elem_add(&mut r, &public_key_ops.common.n); if sig_r_equals_x(self.ops, &r, &x, &z2) { @@ -193,18 +193,6 @@ fn split_rs_asn1<'a>( }) } -fn twin_mul( - ops: &PrivateKeyOps, - g_scalar: &Scalar, - p_scalar: &Scalar, - p_xy: &(Elem, Elem), -) -> Point { - // XXX: Inefficient. TODO: implement interleaved wNAF multiplication. - let scaled_g = ops.point_mul_base(g_scalar); - let scaled_p = ops.point_mul(p_scalar, p_xy); - ops.common.point_sum(&scaled_g, &scaled_p) -} - /// Verification of fixed-length (PKCS#11 style) ECDSA signatures using the /// P-256 curve and SHA-256. /// diff --git a/src/ec/suite_b/ops.rs b/src/ec/suite_b/ops.rs index b42b958efa..83683a0f73 100644 --- a/src/ec/suite_b/ops.rs +++ b/src/ec/suite_b/ops.rs @@ -270,10 +270,7 @@ pub struct PublicScalarOps { pub scalar_ops: &'static ScalarOps, pub public_key_ops: &'static PublicKeyOps, - // XXX: `PublicScalarOps` shouldn't depend on `PrivateKeyOps`, but it does - // temporarily until `twin_mul` is rewritten. - pub private_key_ops: &'static PrivateKeyOps, - + pub twin_mul: fn(g_scalar: &Scalar, p_scalar: &Scalar, p_xy: &(Elem, Elem)) -> Point, pub q_minus_n: Elem, } @@ -305,6 +302,19 @@ pub struct PrivateScalarOps { pub oneRR_mod_n: Scalar, // 1 * R**2 (mod n). TOOD: Use One. } +// XXX: Inefficient and unnecessarily depends on `PrivateKeyOps`. TODO: implement interleaved wNAF +// multiplication. +fn twin_mul_inefficient( + ops: &PrivateKeyOps, + g_scalar: &Scalar, + p_scalar: &Scalar, + p_xy: &(Elem, Elem), +) -> Point { + let scaled_g = ops.point_mul_base(g_scalar); + let scaled_p = ops.point_mul(p_scalar, p_xy); + ops.common.point_sum(&scaled_g, &scaled_p) +} + // This assumes n < q < 2*n. pub fn elem_reduced_to_scalar(ops: &CommonOps, elem: &Elem) -> Scalar { let num_limbs = ops.num_limbs; diff --git a/src/ec/suite_b/ops/p256.rs b/src/ec/suite_b/ops/p256.rs index b7ea524a1d..c0ccbcdc1f 100644 --- a/src/ec/suite_b/ops/p256.rs +++ b/src/ec/suite_b/ops/p256.rs @@ -114,7 +114,10 @@ pub static SCALAR_OPS: ScalarOps = ScalarOps { pub static PUBLIC_SCALAR_OPS: PublicScalarOps = PublicScalarOps { scalar_ops: &SCALAR_OPS, public_key_ops: &PUBLIC_KEY_OPS, - private_key_ops: &PRIVATE_KEY_OPS, + twin_mul: |g_scalar, p_scalar, p_xy| { + twin_mul_inefficient(&PRIVATE_KEY_OPS, g_scalar, p_scalar, p_xy) + }, + q_minus_n: Elem::from_hex("4319055358e8617b0c46353d039cdaae"), }; diff --git a/src/ec/suite_b/ops/p384.rs b/src/ec/suite_b/ops/p384.rs index 6ef4bc3f9e..f424c520d0 100644 --- a/src/ec/suite_b/ops/p384.rs +++ b/src/ec/suite_b/ops/p384.rs @@ -122,7 +122,9 @@ pub static SCALAR_OPS: ScalarOps = ScalarOps { pub static PUBLIC_SCALAR_OPS: PublicScalarOps = PublicScalarOps { scalar_ops: &SCALAR_OPS, public_key_ops: &PUBLIC_KEY_OPS, - private_key_ops: &PRIVATE_KEY_OPS, + twin_mul: |g_scalar, p_scalar, p_xy| { + twin_mul_inefficient(&PRIVATE_KEY_OPS, g_scalar, p_scalar, p_xy) + }, q_minus_n: Elem::from_hex("389cb27e0bc8d21fa7e5f24cb74f58851313e696333ad68c"), }; From 83ceb38075b5b0864ac33e5e05e12cd0d84d167a Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Tue, 17 Oct 2023 10:44:17 -0700 Subject: [PATCH 2/7] Import ecp_nistz256_points_mul_public from BoringSSL. From BoringSSL commit 8d71d244c0debac4079beeb02b5802fde59b94bd. Comment it out until it is modified to work. --- crypto/fipsmodule/ec/p256-nistz.c | 68 +++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/crypto/fipsmodule/ec/p256-nistz.c b/crypto/fipsmodule/ec/p256-nistz.c index 33add75fcb..abd7739e9e 100644 --- a/crypto/fipsmodule/ec/p256-nistz.c +++ b/crypto/fipsmodule/ec/p256-nistz.c @@ -284,4 +284,72 @@ void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) { limbs_copy(r->Z, p.Z, P256_LIMBS); } +#if 0 + +static void ecp_nistz256_points_mul_public(const EC_GROUP *group, + EC_JACOBIAN *r, + const EC_SCALAR *g_scalar, + const EC_JACOBIAN *p_, + const EC_SCALAR *p_scalar) { + assert(p_ != NULL && p_scalar != NULL && g_scalar != NULL); + + alignas(32) P256_POINT p; + uint8_t p_str[33]; + OPENSSL_memcpy(p_str, g_scalar->words, 32); + p_str[32] = 0; + + // First window + size_t index = 0; + size_t wvalue = calc_first_wvalue(&index, p_str); + + // Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p| + // is infinity and |ONE| otherwise. |p| was computed from the table, so it + // is infinity iff |wvalue >> 1| is zero. + if ((wvalue >> 1) != 0) { + OPENSSL_memcpy(p.X, &ecp_nistz256_precomputed[0][(wvalue >> 1) - 1].X, + sizeof(p.X)); + OPENSSL_memcpy(p.Y, &ecp_nistz256_precomputed[0][(wvalue >> 1) - 1].Y, + sizeof(p.Y)); + OPENSSL_memcpy(p.Z, ONE, sizeof(p.Z)); + } else { + OPENSSL_memset(p.X, 0, sizeof(p.X)); + OPENSSL_memset(p.Y, 0, sizeof(p.Y)); + OPENSSL_memset(p.Z, 0, sizeof(p.Z)); + } + + if ((wvalue & 1) == 1) { + ecp_nistz256_neg(p.Y, p.Y); + } + + for (int i = 1; i < 37; i++) { + wvalue = calc_wvalue(&index, p_str); + if ((wvalue >> 1) == 0) { + continue; + } + + alignas(32) P256_POINT_AFFINE t; + OPENSSL_memcpy(&t, &ecp_nistz256_precomputed[i][(wvalue >> 1) - 1], + sizeof(t)); + if ((wvalue & 1) == 1) { + ecp_nistz256_neg(t.Y, t.Y); + } + + // Note |ecp_nistz256_point_add_affine| does not work if |p| and |t| are + // the same non-infinity point, so it is important that we compute the + // |g_scalar| term before the |p_scalar| term. + ecp_nistz256_point_add_affine(&p, &p, &t); + } + + alignas(32) P256_POINT tmp; + ecp_nistz256_windowed_mul(group, &tmp, p_, p_scalar); + ecp_nistz256_point_add(&p, &p, &tmp); + + assert(group->field.N.width == P256_LIMBS); + OPENSSL_memcpy(r->X.words, p.X, P256_LIMBS * sizeof(BN_ULONG)); + OPENSSL_memcpy(r->Y.words, p.Y, P256_LIMBS * sizeof(BN_ULONG)); + OPENSSL_memcpy(r->Z.words, p.Z, P256_LIMBS * sizeof(BN_ULONG)); +} + +#endif + #endif /* defined(OPENSSL_USE_NISTZ256) */ From 4fa99059cc9720e929d5a5b4a1a3bb2410678f1d Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Tue, 17 Oct 2023 10:30:49 -0700 Subject: [PATCH 3/7] P-256 ECDSA verification: Use optimized nistz256 verification. Import the optimized nistz256 verification from BoringSSL. --- build.rs | 1 + crypto/fipsmodule/ec/p256-nistz.c | 27 ++++++++++--------------- src/ec/suite_b/ops/p256.rs | 33 ++++++++++++++++++++++++++++++- 3 files changed, 43 insertions(+), 18 deletions(-) diff --git a/build.rs b/build.rs index 5c328aa35b..a1e1f41d81 100644 --- a/build.rs +++ b/build.rs @@ -958,6 +958,7 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String { "p256_point_double", "p256_point_mul", "p256_point_mul_base", + "p256_points_mul_public", "p256_scalar_mul_mont", "p256_scalar_sqr_rep_mont", "p256_sqr_mont", diff --git a/crypto/fipsmodule/ec/p256-nistz.c b/crypto/fipsmodule/ec/p256-nistz.c index abd7739e9e..f0fc61424d 100644 --- a/crypto/fipsmodule/ec/p256-nistz.c +++ b/crypto/fipsmodule/ec/p256-nistz.c @@ -284,18 +284,14 @@ void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) { limbs_copy(r->Z, p.Z, P256_LIMBS); } -#if 0 - -static void ecp_nistz256_points_mul_public(const EC_GROUP *group, - EC_JACOBIAN *r, - const EC_SCALAR *g_scalar, - const EC_JACOBIAN *p_, - const EC_SCALAR *p_scalar) { - assert(p_ != NULL && p_scalar != NULL && g_scalar != NULL); - +void p256_points_mul_public(P256_POINT *r, + const Limb g_scalar[P256_LIMBS], + const Limb p_scalar[P256_LIMBS], + const Limb p_x[P256_LIMBS], + const Limb p_y[P256_LIMBS]) { alignas(32) P256_POINT p; uint8_t p_str[33]; - OPENSSL_memcpy(p_str, g_scalar->words, 32); + OPENSSL_memcpy(p_str, g_scalar, 32); p_str[32] = 0; // First window @@ -341,15 +337,12 @@ static void ecp_nistz256_points_mul_public(const EC_GROUP *group, } alignas(32) P256_POINT tmp; - ecp_nistz256_windowed_mul(group, &tmp, p_, p_scalar); + ecp_nistz256_windowed_mul(&tmp, p_scalar, p_x, p_y); ecp_nistz256_point_add(&p, &p, &tmp); - assert(group->field.N.width == P256_LIMBS); - OPENSSL_memcpy(r->X.words, p.X, P256_LIMBS * sizeof(BN_ULONG)); - OPENSSL_memcpy(r->Y.words, p.Y, P256_LIMBS * sizeof(BN_ULONG)); - OPENSSL_memcpy(r->Z.words, p.Z, P256_LIMBS * sizeof(BN_ULONG)); + OPENSSL_memcpy(r->X, p.X, P256_LIMBS * sizeof(BN_ULONG)); + OPENSSL_memcpy(r->Y, p.Y, P256_LIMBS * sizeof(BN_ULONG)); + OPENSSL_memcpy(r->Z, p.Z, P256_LIMBS * sizeof(BN_ULONG)); } -#endif - #endif /* defined(OPENSSL_USE_NISTZ256) */ diff --git a/src/ec/suite_b/ops/p256.rs b/src/ec/suite_b/ops/p256.rs index c0ccbcdc1f..566dbfe2be 100644 --- a/src/ec/suite_b/ops/p256.rs +++ b/src/ec/suite_b/ops/p256.rs @@ -114,6 +114,11 @@ pub static SCALAR_OPS: ScalarOps = ScalarOps { pub static PUBLIC_SCALAR_OPS: PublicScalarOps = PublicScalarOps { scalar_ops: &SCALAR_OPS, public_key_ops: &PUBLIC_KEY_OPS, + + #[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] + twin_mul: twin_mul_nistz256, + + #[cfg(not(any(target_arch = "aarch64", target_arch = "x86_64")))] twin_mul: |g_scalar, p_scalar, p_xy| { twin_mul_inefficient(&PRIVATE_KEY_OPS, g_scalar, p_scalar, p_xy) }, @@ -121,6 +126,33 @@ pub static PUBLIC_SCALAR_OPS: PublicScalarOps = PublicScalarOps { q_minus_n: Elem::from_hex("4319055358e8617b0c46353d039cdaae"), }; +#[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] +fn twin_mul_nistz256( + g_scalar: &Scalar, + p_scalar: &Scalar, + (p_x, p_y): &(Elem, Elem), +) -> Point { + prefixed_extern! { + fn p256_points_mul_public(r: *mut Limb, // [3][COMMON_OPS.num_limbs] + g_scalar: *const Limb, // [COMMON_OPS.num_limbs] + p_scalar: *const Limb, // [COMMON_OPS.num_limbs] + p_x: *const Limb, // [COMMON_OPS.num_limbs] + p_y: *const Limb, // [COMMON_OPS.num_limbs] + ); + } + let mut r = Point::new_at_infinity(); + unsafe { + p256_points_mul_public( + r.xyz.as_mut_ptr(), + g_scalar.limbs.as_ptr(), + p_scalar.limbs.as_ptr(), + p_x.limbs.as_ptr(), + p_y.limbs.as_ptr(), + ); + } + r +} + pub static PRIVATE_SCALAR_OPS: PrivateScalarOps = PrivateScalarOps { scalar_ops: &SCALAR_OPS, @@ -273,7 +305,6 @@ prefixed_extern! { p_x: *const Limb, // [COMMON_OPS.num_limbs] p_y: *const Limb, // [COMMON_OPS.num_limbs] ); - fn p256_scalar_mul_mont( r: *mut Limb, // [COMMON_OPS.num_limbs] a: *const Limb, // [COMMON_OPS.num_limbs] From 86f49768e8f01551f326d57db8fd76d404945894 Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Tue, 17 Oct 2023 11:12:20 -0700 Subject: [PATCH 4/7] P-256 ECDSA verification: Clarify multiplication. Move more of the logic for the nistz256 multiplication into Rust. --- build.rs | 2 +- crypto/fipsmodule/ec/p256-nistz.c | 11 ++------ src/ec/suite_b/ops.rs | 10 +++++-- src/ec/suite_b/ops/p256.rs | 46 ++++++++++++++++++------------- 4 files changed, 38 insertions(+), 31 deletions(-) diff --git a/build.rs b/build.rs index a1e1f41d81..3cbae9d926 100644 --- a/build.rs +++ b/build.rs @@ -958,7 +958,7 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String { "p256_point_double", "p256_point_mul", "p256_point_mul_base", - "p256_points_mul_public", + "p256_point_mul_base_vartime", "p256_scalar_mul_mont", "p256_scalar_sqr_rep_mont", "p256_sqr_mont", diff --git a/crypto/fipsmodule/ec/p256-nistz.c b/crypto/fipsmodule/ec/p256-nistz.c index f0fc61424d..c40b1085db 100644 --- a/crypto/fipsmodule/ec/p256-nistz.c +++ b/crypto/fipsmodule/ec/p256-nistz.c @@ -284,11 +284,8 @@ void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) { limbs_copy(r->Z, p.Z, P256_LIMBS); } -void p256_points_mul_public(P256_POINT *r, - const Limb g_scalar[P256_LIMBS], - const Limb p_scalar[P256_LIMBS], - const Limb p_x[P256_LIMBS], - const Limb p_y[P256_LIMBS]) { +void p256_point_mul_base_vartime(P256_POINT *r, + const Limb g_scalar[P256_LIMBS]) { alignas(32) P256_POINT p; uint8_t p_str[33]; OPENSSL_memcpy(p_str, g_scalar, 32); @@ -336,10 +333,6 @@ void p256_points_mul_public(P256_POINT *r, ecp_nistz256_point_add_affine(&p, &p, &t); } - alignas(32) P256_POINT tmp; - ecp_nistz256_windowed_mul(&tmp, p_scalar, p_x, p_y); - ecp_nistz256_point_add(&p, &p, &tmp); - OPENSSL_memcpy(r->X, p.X, P256_LIMBS * sizeof(BN_ULONG)); OPENSSL_memcpy(r->Y, p.Y, P256_LIMBS * sizeof(BN_ULONG)); OPENSSL_memcpy(r->Z, p.Z, P256_LIMBS * sizeof(BN_ULONG)); diff --git a/src/ec/suite_b/ops.rs b/src/ec/suite_b/ops.rs index 83683a0f73..5aa241390e 100644 --- a/src/ec/suite_b/ops.rs +++ b/src/ec/suite_b/ops.rs @@ -979,6 +979,7 @@ mod tests { fn p256_point_mul_base_test() { point_mul_base_tests( &p256::PRIVATE_KEY_OPS, + |s| p256::PRIVATE_KEY_OPS.point_mul_base(s), test_file!("ops/p256_point_mul_base_tests.txt"), ); } @@ -987,16 +988,21 @@ mod tests { fn p384_point_mul_base_test() { point_mul_base_tests( &p384::PRIVATE_KEY_OPS, + |s| p384::PRIVATE_KEY_OPS.point_mul_base(s), test_file!("ops/p384_point_mul_base_tests.txt"), ); } - fn point_mul_base_tests(ops: &PrivateKeyOps, test_file: test::File) { + pub(super) fn point_mul_base_tests( + ops: &PrivateKeyOps, + f: impl Fn(&Scalar) -> Point, + test_file: test::File, + ) { test::run(test_file, |section, test_case| { assert_eq!(section, ""); let g_scalar = consume_scalar(ops.common, test_case, "g_scalar"); let expected_result = consume_point(ops, test_case, "r"); - let actual_result = ops.point_mul_base(&g_scalar); + let actual_result = f(&g_scalar); assert_point_actual_equals_expected(ops, &actual_result, &expected_result); Ok(()) }) diff --git a/src/ec/suite_b/ops/p256.rs b/src/ec/suite_b/ops/p256.rs index 566dbfe2be..adbed60936 100644 --- a/src/ec/suite_b/ops/p256.rs +++ b/src/ec/suite_b/ops/p256.rs @@ -127,30 +127,24 @@ pub static PUBLIC_SCALAR_OPS: PublicScalarOps = PublicScalarOps { }; #[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] -fn twin_mul_nistz256( - g_scalar: &Scalar, - p_scalar: &Scalar, - (p_x, p_y): &(Elem, Elem), -) -> Point { +fn twin_mul_nistz256(g_scalar: &Scalar, p_scalar: &Scalar, p_xy: &(Elem, Elem)) -> Point { + let scaled_g = point_mul_base_vartime(g_scalar); + let scaled_p = PRIVATE_KEY_OPS.point_mul(p_scalar, p_xy); + PRIVATE_KEY_OPS.common.point_sum(&scaled_g, &scaled_p) +} + +#[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] +fn point_mul_base_vartime(g_scalar: &Scalar) -> Point { prefixed_extern! { - fn p256_points_mul_public(r: *mut Limb, // [3][COMMON_OPS.num_limbs] - g_scalar: *const Limb, // [COMMON_OPS.num_limbs] - p_scalar: *const Limb, // [COMMON_OPS.num_limbs] - p_x: *const Limb, // [COMMON_OPS.num_limbs] - p_y: *const Limb, // [COMMON_OPS.num_limbs] + fn p256_point_mul_base_vartime(r: *mut Limb, // [3][COMMON_OPS.num_limbs] + g_scalar: *const Limb, // [COMMON_OPS.num_limbs] ); } - let mut r = Point::new_at_infinity(); + let mut scaled_g = Point::new_at_infinity(); unsafe { - p256_points_mul_public( - r.xyz.as_mut_ptr(), - g_scalar.limbs.as_ptr(), - p_scalar.limbs.as_ptr(), - p_x.limbs.as_ptr(), - p_y.limbs.as_ptr(), - ); + p256_point_mul_base_vartime(scaled_g.xyz.as_mut_ptr(), g_scalar.limbs.as_ptr()); } - r + scaled_g } pub static PRIVATE_SCALAR_OPS: PrivateScalarOps = PrivateScalarOps { @@ -316,3 +310,17 @@ prefixed_extern! { rep: Limb, ); } + +#[cfg(test)] +mod tests { + #[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] + #[test] + fn p256_point_mul_base_vartime_test() { + use super::{super::tests::point_mul_base_tests, *}; + point_mul_base_tests( + &PRIVATE_KEY_OPS, + point_mul_base_vartime, + test_file!("p256_point_mul_base_tests.txt"), + ); + } +} From e6728bfb71fcf88df207e5e0c31411abba95c22c Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Tue, 17 Oct 2023 14:25:41 -0700 Subject: [PATCH 5/7] P-256 nistz: Use arrays instead of P256_POINT in boundary functions. Better match the Rust declarations of these functions. Prepare to support more target platforms and more weird things (like P-521) that by avoiding any kind of alignment assumptions at the language boundary (or elsewhere). --- crypto/fipsmodule/ec/p256-nistz.c | 16 ++--- crypto/fipsmodule/ec/p256.c | 112 ++++++++++++++++++++---------- 2 files changed, 85 insertions(+), 43 deletions(-) diff --git a/crypto/fipsmodule/ec/p256-nistz.c b/crypto/fipsmodule/ec/p256-nistz.c index c40b1085db..aa6344c68b 100644 --- a/crypto/fipsmodule/ec/p256-nistz.c +++ b/crypto/fipsmodule/ec/p256-nistz.c @@ -232,18 +232,18 @@ static crypto_word_t calc_wvalue(size_t *index, const uint8_t p_str[33]) { return booth_recode_w7(wvalue); } -void p256_point_mul(P256_POINT *r, const Limb p_scalar[P256_LIMBS], +void p256_point_mul(Limb r[3][P256_LIMBS], const Limb p_scalar[P256_LIMBS], const Limb p_x[P256_LIMBS], const Limb p_y[P256_LIMBS]) { alignas(32) P256_POINT out; ecp_nistz256_windowed_mul(&out, p_scalar, p_x, p_y); - limbs_copy(r->X, out.X, P256_LIMBS); - limbs_copy(r->Y, out.Y, P256_LIMBS); - limbs_copy(r->Z, out.Z, P256_LIMBS); + limbs_copy(r[0], out.X, P256_LIMBS); + limbs_copy(r[1], out.Y, P256_LIMBS); + limbs_copy(r[2], out.Z, P256_LIMBS); } -void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) { +void p256_point_mul_base(Limb r[3][P256_LIMBS], const Limb scalar[P256_LIMBS]) { P256_SCALAR_BYTES p_str; p256_scalar_bytes_from_limbs(p_str, scalar); @@ -279,9 +279,9 @@ void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) { ecp_nistz256_point_add_affine(&p, &p, &t); } - limbs_copy(r->X, p.X, P256_LIMBS); - limbs_copy(r->Y, p.Y, P256_LIMBS); - limbs_copy(r->Z, p.Z, P256_LIMBS); + limbs_copy(r[0], p.X, P256_LIMBS); + limbs_copy(r[1], p.Y, P256_LIMBS); + limbs_copy(r[2], p.Z, P256_LIMBS); } void p256_point_mul_base_vartime(P256_POINT *r, diff --git a/crypto/fipsmodule/ec/p256.c b/crypto/fipsmodule/ec/p256.c index 8d6152486e..7b3bcfb612 100644 --- a/crypto/fipsmodule/ec/p256.c +++ b/crypto/fipsmodule/ec/p256.c @@ -96,6 +96,21 @@ static void fiat_p256_cmovznz(fiat_p256_limb_t out[FIAT_P256_NLIMBS], fiat_p256_selectznz(out, !!t, z, nz); } +static void fiat_p256_from_words(fiat_p256_felem out, + const Limb in[32 / sizeof(BN_ULONG)]) { + // Typically, |BN_ULONG| and |fiat_p256_limb_t| will be the same type, but on + // 64-bit platforms without |uint128_t|, they are different. However, on + // little-endian systems, |uint64_t[4]| and |uint32_t[8]| have the same + // layout. + OPENSSL_memcpy(out, in, 32); +} + +static void fiat_p256_to_words(Limb out[32 / sizeof(BN_ULONG)], const fiat_p256_felem in) { + // See |fiat_p256_from_words|. + OPENSSL_memcpy(out, in, 32); +} + + // Group operations // ---------------- // @@ -339,8 +354,8 @@ static crypto_word_t fiat_p256_get_bit(const Limb in[P256_LIMBS], int i) { #endif } -void p256_point_mul(P256_POINT *r, const Limb scalar[P256_LIMBS], - const Limb p_x[P256_LIMBS], const Limb p_y[P256_LIMBS]) { +void p256_point_mul(Limb r[3][P256_LIMBS], const Limb scalar[P256_LIMBS], + const Limb p_x[P256_LIMBS], const Limb p_y[P256_LIMBS]) { debug_assert_nonsecret(r != NULL); debug_assert_nonsecret(scalar != NULL); debug_assert_nonsecret(p_x != NULL); @@ -349,9 +364,9 @@ void p256_point_mul(P256_POINT *r, const Limb scalar[P256_LIMBS], fiat_p256_felem p_pre_comp[17][3]; OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp)); // Precompute multiples. - limbs_copy(&p_pre_comp[1][0][0], p_x, P256_LIMBS); - limbs_copy(&p_pre_comp[1][1][0], p_y, P256_LIMBS); - limbs_copy(&p_pre_comp[1][2][0], fiat_p256_one, P256_LIMBS); + fiat_p256_from_words(p_pre_comp[1][0], p_x); + fiat_p256_from_words(p_pre_comp[1][1], p_y); + fiat_p256_copy(p_pre_comp[1][2], fiat_p256_one); for (size_t j = 2; j <= 16; ++j) { if (j & 1) { @@ -407,12 +422,12 @@ void p256_point_mul(P256_POINT *r, const Limb scalar[P256_LIMBS], } } - limbs_copy(r->X, nq[0], P256_LIMBS); - limbs_copy(r->Y, nq[1], P256_LIMBS); - limbs_copy(r->Z, nq[2], P256_LIMBS); + fiat_p256_to_words(r[0], nq[0]); + fiat_p256_to_words(r[1], nq[1]); + fiat_p256_to_words(r[2], nq[2]); } -void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) { +void p256_point_mul_base(Limb r[3][P256_LIMBS], const Limb scalar[P256_LIMBS]) { // Set nq to the point at infinity. fiat_p256_felem nq[3] = {{0}, {0}, {0}}, tmp[3]; @@ -453,45 +468,72 @@ void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) { tmp[0], tmp[1], tmp[2]); } - limbs_copy(r->X, nq[0], P256_LIMBS); - limbs_copy(r->Y, nq[1], P256_LIMBS); - limbs_copy(r->Z, nq[2], P256_LIMBS); + fiat_p256_to_words(r[0], nq[0]); + fiat_p256_to_words(r[1], nq[1]); + fiat_p256_to_words(r[2], nq[2]); } void p256_mul_mont(Limb r[P256_LIMBS], const Limb a[P256_LIMBS], - const Limb b[P256_LIMBS]) { - fiat_p256_mul(r, a, b); + const Limb b[P256_LIMBS]) { + fiat_p256_felem a_, b_; + fiat_p256_from_words(a_, a); + fiat_p256_from_words(b_, b); + fiat_p256_mul(a_, a_, b_); + fiat_p256_to_words(r, a_); } void p256_sqr_mont(Limb r[P256_LIMBS], const Limb a[P256_LIMBS]) { - fiat_p256_square(r, a); + fiat_p256_felem x; + fiat_p256_from_words(x, a); + fiat_p256_square(x, x); + fiat_p256_to_words(r, x); } -void p256_point_add(P256_POINT *r, const P256_POINT *a, const P256_POINT *b) { - fiat_p256_point_add(r->X, r->Y, r->Z, - a->X, a->Y, a->Z, - 0, - b->X, b->Y, b->Z); +void p256_point_add(Limb r[3][P256_LIMBS], const Limb a[3][P256_LIMBS], + const Limb b[3][P256_LIMBS]) { + fiat_p256_felem x1, y1, z1, x2, y2, z2; + fiat_p256_from_words(x1, a[0]); + fiat_p256_from_words(y1, a[1]); + fiat_p256_from_words(z1, a[2]); + fiat_p256_from_words(x2, b[0]); + fiat_p256_from_words(y2, b[1]); + fiat_p256_from_words(z2, b[2]); + fiat_p256_point_add(x1, y1, z1, x1, y1, z1, 0 /* both Jacobian */, x2, y2, + z2); + fiat_p256_to_words(r[0], x1); + fiat_p256_to_words(r[1], y1); + fiat_p256_to_words(r[2], z1); } -void p256_point_double(P256_POINT *r, const P256_POINT *a) { - fiat_p256_point_double(r->X, r->Y, r->Z, - a->X, a->Y, a->Z); +void p256_point_double(Limb r[3][P256_LIMBS], const Limb a[3][P256_LIMBS]) { + fiat_p256_felem x, y, z; + fiat_p256_from_words(x, a[0]); + fiat_p256_from_words(y, a[1]); + fiat_p256_from_words(z, a[2]); + fiat_p256_point_double(x, y, z, x, y, z); + fiat_p256_to_words(r[0], x); + fiat_p256_to_words(r[1], y); + fiat_p256_to_words(r[2], z); } // For testing only. -void p256_point_add_affine(P256_POINT *r, const P256_POINT *a, - const BN_ULONG b[P256_LIMBS * 2]) { - const Limb *b_x = &b[0]; - const Limb *b_y = &b[P256_LIMBS]; - fiat_p256_felem b_z = {0}; - crypto_word_t b_is_inf = constant_time_select_w( - LIMBS_are_zero(b_x, P256_LIMBS), LIMBS_are_zero(b_y, P256_LIMBS), 0); - fiat_p256_cmovznz(b_z, constant_time_is_zero_w(b_is_inf), b_z, fiat_p256_one); - fiat_p256_point_add(r->X, r->Y, r->Z, - a->X, a->Y, a->Z, - 1, - b_x, b_y, b_z); +void p256_point_add_affine(Limb r[3][P256_LIMBS], const Limb a[3][P256_LIMBS], + const Limb b[2][P256_LIMBS]) { + fiat_p256_felem x1, y1, z1, x2, y2; + fiat_p256_from_words(x1, a[0]); + fiat_p256_from_words(y1, a[1]); + fiat_p256_from_words(z1, a[2]); + fiat_p256_from_words(x2, b[0]); + fiat_p256_from_words(y2, b[1]); + + fiat_p256_felem z2 = {0}; + fiat_p256_cmovznz(z2, fiat_p256_nz(x2) & fiat_p256_nz(y2), z2, fiat_p256_one); + + fiat_p256_point_add(x1, y1, z1, x1, y1, z1, 1 /* mixed */, x2, y2, z2); + + fiat_p256_to_words(r[0], x1); + fiat_p256_to_words(r[1], y1); + fiat_p256_to_words(r[2], z1); } #endif From 63aacbe4b8da081812296cee49add70b3ab7491f Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Tue, 17 Oct 2023 13:40:54 -0700 Subject: [PATCH 6/7] Import BoringSSL's ec_compute_wNAF. --- crypto/fipsmodule/bn/internal.h | 3 + crypto/fipsmodule/bn/shift.c | 67 +++++++++++++++ crypto/fipsmodule/ec/internal.h | 84 ++++++++++++++++++ crypto/fipsmodule/ec/p256.c | 96 +++++++++++++++++++++ crypto/fipsmodule/ec/wnaf.c | 148 ++++++++++++++++++++++++++++++++ 5 files changed, 398 insertions(+) create mode 100644 crypto/fipsmodule/bn/shift.c create mode 100644 crypto/fipsmodule/ec/internal.h create mode 100644 crypto/fipsmodule/ec/wnaf.c diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index 3fbb7d7521..20173f080a 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -165,6 +165,9 @@ typedef crypto_word_t BN_ULONG; #error "Must define either OPENSSL_32_BIT or OPENSSL_64_BIT" #endif +// bn_is_bit_set_words returns one if bit |bit| is set in |a| and zero +// otherwise. +int bn_is_bit_set_words(const BN_ULONG *a, size_t num, size_t bit); // |num| must be at least 4, at least on x86. // diff --git a/crypto/fipsmodule/bn/shift.c b/crypto/fipsmodule/bn/shift.c new file mode 100644 index 0000000000..76bf6219a2 --- /dev/null +++ b/crypto/fipsmodule/bn/shift.c @@ -0,0 +1,67 @@ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] */ + +#include "internal.h" + + +int bn_is_bit_set_words(const BN_ULONG *a, size_t num, size_t bit) { + size_t i = bit / BN_BITS2; + size_t j = bit % BN_BITS2; + if (i >= num) { + return 0; + } + return (a[i] >> j) & 1; +} diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h new file mode 100644 index 0000000000..cf7c807256 --- /dev/null +++ b/crypto/fipsmodule/ec/internal.h @@ -0,0 +1,84 @@ +/* Originally written by Bodo Moeller for the OpenSSL project. + * ==================================================================== + * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ +/* ==================================================================== + * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. + * + * Portions of the attached software ("Contribution") are developed by + * SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project. + * + * The Contribution is licensed pursuant to the OpenSSL open source + * license provided above. + * + * The elliptic curve binary polynomial software is originally written by + * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems + * Laboratories. */ + +#ifndef OPENSSL_HEADER_EC_INTERNAL_H +#define OPENSSL_HEADER_EC_INTERNAL_H + +#include + +// ec_compute_wNAF writes the modified width-(w+1) Non-Adjacent Form (wNAF) of +// |scalar| to |out|. |out| must have room for |bits| + 1 elements, each of +// which will be either zero or odd with an absolute value less than 2^w +// satisfying +// scalar = \sum_j out[j]*2^j +// where at most one of any w+1 consecutive digits is non-zero +// with the exception that the most significant digit may be only +// w-1 zeros away from that next non-zero digit. +void ec_compute_wNAF(const EC_GROUP *group, int8_t *out, + const EC_SCALAR *scalar, size_t bits, int w); + +#endif // OPENSSL_HEADER_EC_INTERNAL_H diff --git a/crypto/fipsmodule/ec/p256.c b/crypto/fipsmodule/ec/p256.c index 7b3bcfb612..dc67a71f07 100644 --- a/crypto/fipsmodule/ec/p256.c +++ b/crypto/fipsmodule/ec/p256.c @@ -473,6 +473,102 @@ void p256_point_mul_base(Limb r[3][P256_LIMBS], const Limb scalar[P256_LIMBS]) { fiat_p256_to_words(r[2], nq[2]); } +#if 0 + +static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group, + EC_JACOBIAN *r, + const EC_SCALAR *g_scalar, + const EC_JACOBIAN *p, + const EC_SCALAR *p_scalar) { +#define P256_WSIZE_PUBLIC 4 + // Precompute multiples of |p|. p_pre_comp[i] is (2*i+1) * |p|. + fiat_p256_felem p_pre_comp[1 << (P256_WSIZE_PUBLIC - 1)][3]; + fiat_p256_from_generic(p_pre_comp[0][0], &p->X); + fiat_p256_from_generic(p_pre_comp[0][1], &p->Y); + fiat_p256_from_generic(p_pre_comp[0][2], &p->Z); + fiat_p256_felem p2[3]; + fiat_p256_point_double(p2[0], p2[1], p2[2], p_pre_comp[0][0], + p_pre_comp[0][1], p_pre_comp[0][2]); + for (size_t i = 1; i < OPENSSL_ARRAY_SIZE(p_pre_comp); i++) { + fiat_p256_point_add(p_pre_comp[i][0], p_pre_comp[i][1], p_pre_comp[i][2], + p_pre_comp[i - 1][0], p_pre_comp[i - 1][1], + p_pre_comp[i - 1][2], 0 /* not mixed */, p2[0], p2[1], + p2[2]); + } + + // Set up the coefficients for |p_scalar|. + int8_t p_wNAF[257]; + ec_compute_wNAF(group, p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC); + + // Set |ret| to the point at infinity. + int skip = 1; // Save some point operations. + fiat_p256_felem ret[3] = {{0}, {0}, {0}}; + for (int i = 256; i >= 0; i--) { + if (!skip) { + fiat_p256_point_double(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2]); + } + + // For the |g_scalar|, we use the precomputed table without the + // constant-time lookup. + if (i <= 31) { + // First, look 32 bits upwards. + crypto_word_t bits = fiat_p256_get_bit(g_scalar, i + 224) << 3; + bits |= fiat_p256_get_bit(g_scalar, i + 160) << 2; + bits |= fiat_p256_get_bit(g_scalar, i + 96) << 1; + bits |= fiat_p256_get_bit(g_scalar, i + 32); + if (bits != 0) { + size_t index = (size_t)(bits - 1); + fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], + 1 /* mixed */, fiat_p256_g_pre_comp[1][index][0], + fiat_p256_g_pre_comp[1][index][1], + fiat_p256_one); + skip = 0; + } + + // Second, look at the current position. + bits = fiat_p256_get_bit(g_scalar, i + 192) << 3; + bits |= fiat_p256_get_bit(g_scalar, i + 128) << 2; + bits |= fiat_p256_get_bit(g_scalar, i + 64) << 1; + bits |= fiat_p256_get_bit(g_scalar, i); + if (bits != 0) { + size_t index = (size_t)(bits - 1); + fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], + 1 /* mixed */, fiat_p256_g_pre_comp[0][index][0], + fiat_p256_g_pre_comp[0][index][1], + fiat_p256_one); + skip = 0; + } + } + + int digit = p_wNAF[i]; + if (digit != 0) { + assert(digit & 1); + size_t idx = (size_t)(digit < 0 ? (-digit) >> 1 : digit >> 1); + fiat_p256_felem *y = &p_pre_comp[idx][1], tmp; + if (digit < 0) { + fiat_p256_opp(tmp, p_pre_comp[idx][1]); + y = &tmp; + } + if (!skip) { + fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], + 0 /* not mixed */, p_pre_comp[idx][0], *y, + p_pre_comp[idx][2]); + } else { + fiat_p256_copy(ret[0], p_pre_comp[idx][0]); + fiat_p256_copy(ret[1], *y); + fiat_p256_copy(ret[2], p_pre_comp[idx][2]); + skip = 0; + } + } + } + + fiat_p256_to_generic(&r->X, ret[0]); + fiat_p256_to_generic(&r->Y, ret[1]); + fiat_p256_to_generic(&r->Z, ret[2]); +} + +#endif + void p256_mul_mont(Limb r[P256_LIMBS], const Limb a[P256_LIMBS], const Limb b[P256_LIMBS]) { fiat_p256_felem a_, b_; diff --git a/crypto/fipsmodule/ec/wnaf.c b/crypto/fipsmodule/ec/wnaf.c new file mode 100644 index 0000000000..56de6cfec5 --- /dev/null +++ b/crypto/fipsmodule/ec/wnaf.c @@ -0,0 +1,148 @@ +/* Originally written by Bodo Moeller for the OpenSSL project. + * ==================================================================== + * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ +/* ==================================================================== + * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. + * + * Portions of the attached software ("Contribution") are developed by + * SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project. + * + * The Contribution is licensed pursuant to the OpenSSL open source + * license provided above. + * + * The elliptic curve binary polynomial software is originally written by + * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems + * Laboratories. */ + +#include + +#include +#include + +#include +#include +#include +#include + +#include "internal.h" +#include "../bn/internal.h" +#include "../../internal.h" + + +// This file implements the wNAF-based interleaving multi-exponentiation method +// at: +// http://link.springer.com/chapter/10.1007%2F3-540-45537-X_13 +// http://www.bmoeller.de/pdf/TI-01-08.multiexp.pdf + +void ec_compute_wNAF(const EC_GROUP *group, int8_t *out, + const EC_SCALAR *scalar, size_t bits, int w) { + // 'int8_t' can represent integers with absolute values less than 2^7. + assert(0 < w && w <= 7); + assert(bits != 0); + int bit = 1 << w; // 2^w, at most 128 + int next_bit = bit << 1; // 2^(w+1), at most 256 + int mask = next_bit - 1; // at most 255 + + int window_val = scalar->words[0] & mask; + for (size_t j = 0; j < bits + 1; j++) { + assert(0 <= window_val && window_val <= next_bit); + int digit = 0; + if (window_val & 1) { + assert(0 < window_val && window_val < next_bit); + if (window_val & bit) { + digit = window_val - next_bit; + // We know -next_bit < digit < 0 and window_val - digit = next_bit. + + // modified wNAF + if (j + w + 1 >= bits) { + // special case for generating modified wNAFs: + // no new bits will be added into window_val, + // so using a positive digit here will decrease + // the total length of the representation + + digit = window_val & (mask >> 1); + // We know 0 < digit < bit and window_val - digit = bit. + } + } else { + digit = window_val; + // We know 0 < digit < bit and window_val - digit = 0. + } + + window_val -= digit; + + // Now window_val is 0 or 2^(w+1) in standard wNAF generation. + // For modified window NAFs, it may also be 2^w. + // + // See the comments above for the derivation of each of these bounds. + assert(window_val == 0 || window_val == next_bit || window_val == bit); + assert(-bit < digit && digit < bit); + + // window_val was odd, so digit is also odd. + assert(digit & 1); + } + + out[j] = digit; + + // Incorporate the next bit. Previously, |window_val| <= |next_bit|, so if + // we shift and add at most one copy of |bit|, this will continue to hold + // afterwards. + window_val >>= 1; + window_val += bit * bn_is_bit_set_words(scalar->words, group->order.N.width, + j + w + 1); + assert(window_val <= next_bit); + } + + // bits + 1 entries should be sufficient to consume all bits. + assert(window_val == 0); +} From 2de84993cbffd9d4eb4e281566fcfcca76c2bc80 Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Tue, 17 Oct 2023 16:03:36 -0700 Subject: [PATCH 7/7] P-256 ECDSA verification: Use BoringSSL's W-NAF-based implementation. On targets where we don't use nistz256, use the Fiat W-NAF-based implementation instead. --- Cargo.toml | 3 +++ build.rs | 5 ++++ crypto/fipsmodule/ec/internal.h | 5 ++-- crypto/fipsmodule/ec/p256.c | 31 +++++++++++-------------- crypto/fipsmodule/ec/wnaf.c | 41 +++++++++++---------------------- crypto/internal.h | 2 ++ src/ec/suite_b/ops/p256.rs | 26 ++++++++++++++++++--- 7 files changed, 63 insertions(+), 50 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d2dd40127b..670b59c426 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -65,12 +65,14 @@ include = [ "crypto/fipsmodule/bn/internal.h", "crypto/fipsmodule/bn/montgomery.c", "crypto/fipsmodule/bn/montgomery_inv.c", + "crypto/fipsmodule/bn/shift.c", "crypto/fipsmodule/ec/asm/p256-armv8-asm.pl", "crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl", "crypto/fipsmodule/ec/ecp_nistz.c", "crypto/fipsmodule/ec/ecp_nistz.h", "crypto/fipsmodule/ec/ecp_nistz384.h", "crypto/fipsmodule/ec/ecp_nistz384.inl", + "crypto/fipsmodule/ec/internal.h", "crypto/fipsmodule/ec/gfp_p256.c", "crypto/fipsmodule/ec/gfp_p384.c", "crypto/fipsmodule/ec/p256.c", @@ -80,6 +82,7 @@ include = [ "crypto/fipsmodule/ec/p256_shared.h", "crypto/fipsmodule/ec/p256_table.h", "crypto/fipsmodule/ec/util.h", + "crypto/fipsmodule/ec/wnaf.c", "crypto/fipsmodule/ecdsa/ecdsa_verify_tests.txt", "crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl", "crypto/fipsmodule/modes/asm/ghash-armv4.pl", diff --git a/build.rs b/build.rs index 3cbae9d926..6add88b4f7 100644 --- a/build.rs +++ b/build.rs @@ -38,10 +38,12 @@ const RING_SRCS: &[(&[&str], &str)] = &[ (&[], "crypto/fipsmodule/aes/aes_nohw.c"), (&[], "crypto/fipsmodule/bn/montgomery.c"), (&[], "crypto/fipsmodule/bn/montgomery_inv.c"), + (&[], "crypto/fipsmodule/bn/shift.c"), (&[], "crypto/fipsmodule/ec/ecp_nistz.c"), (&[], "crypto/fipsmodule/ec/gfp_p256.c"), (&[], "crypto/fipsmodule/ec/gfp_p384.c"), (&[], "crypto/fipsmodule/ec/p256.c"), + (&[], "crypto/fipsmodule/ec/wnaf.c"), (&[], "crypto/limbs/limbs.c"), (&[], "crypto/mem.c"), (&[], "crypto/poly1305/poly1305.c"), @@ -919,6 +921,7 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String { "aesni_gcm_decrypt", "aesni_gcm_encrypt", "bn_from_montgomery_in_place", + "bn_is_bit_set_words", "bn_gather5", "bn_mul_mont", "bn_mul_mont_gather5", @@ -933,6 +936,7 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String { "bssl_constant_time_test_main", "chacha20_poly1305_open", "chacha20_poly1305_seal", + "ec_compute_wNAF", "fiat_curve25519_adx_mul", "fiat_curve25519_adx_square", "gcm_ghash_avx", @@ -959,6 +963,7 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String { "p256_point_mul", "p256_point_mul_base", "p256_point_mul_base_vartime", + "p256_point_mul_public", "p256_scalar_mul_mont", "p256_scalar_sqr_rep_mont", "p256_sqr_mont", diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h index cf7c807256..99c47bae39 100644 --- a/crypto/fipsmodule/ec/internal.h +++ b/crypto/fipsmodule/ec/internal.h @@ -68,7 +68,7 @@ #ifndef OPENSSL_HEADER_EC_INTERNAL_H #define OPENSSL_HEADER_EC_INTERNAL_H -#include +#include // ec_compute_wNAF writes the modified width-(w+1) Non-Adjacent Form (wNAF) of // |scalar| to |out|. |out| must have room for |bits| + 1 elements, each of @@ -78,7 +78,6 @@ // where at most one of any w+1 consecutive digits is non-zero // with the exception that the most significant digit may be only // w-1 zeros away from that next non-zero digit. -void ec_compute_wNAF(const EC_GROUP *group, int8_t *out, - const EC_SCALAR *scalar, size_t bits, int w); +void ec_compute_wNAF(int8_t *out, const BN_ULONG *scalar, size_t scalar_limbs, size_t bits, int w); #endif // OPENSSL_HEADER_EC_INTERNAL_H diff --git a/crypto/fipsmodule/ec/p256.c b/crypto/fipsmodule/ec/p256.c index dc67a71f07..8461a42067 100644 --- a/crypto/fipsmodule/ec/p256.c +++ b/crypto/fipsmodule/ec/p256.c @@ -23,6 +23,7 @@ #include "p256_shared.h" +#include "internal.h" #include "../../internal.h" #include "./util.h" @@ -473,19 +474,17 @@ void p256_point_mul_base(Limb r[3][P256_LIMBS], const Limb scalar[P256_LIMBS]) { fiat_p256_to_words(r[2], nq[2]); } -#if 0 - -static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group, - EC_JACOBIAN *r, - const EC_SCALAR *g_scalar, - const EC_JACOBIAN *p, - const EC_SCALAR *p_scalar) { +void p256_point_mul_public(Limb r[3][P256_LIMBS], + const Limb g_scalar[P256_LIMBS], + const Limb p_scalar[P256_LIMBS], + const Limb p_x[P256_LIMBS], + const Limb p_y[P256_LIMBS]) { #define P256_WSIZE_PUBLIC 4 // Precompute multiples of |p|. p_pre_comp[i] is (2*i+1) * |p|. fiat_p256_felem p_pre_comp[1 << (P256_WSIZE_PUBLIC - 1)][3]; - fiat_p256_from_generic(p_pre_comp[0][0], &p->X); - fiat_p256_from_generic(p_pre_comp[0][1], &p->Y); - fiat_p256_from_generic(p_pre_comp[0][2], &p->Z); + fiat_p256_from_words(p_pre_comp[0][0], p_x); + fiat_p256_from_words(p_pre_comp[0][1], p_y); + fiat_p256_copy(p_pre_comp[0][2], fiat_p256_one); fiat_p256_felem p2[3]; fiat_p256_point_double(p2[0], p2[1], p2[2], p_pre_comp[0][0], p_pre_comp[0][1], p_pre_comp[0][2]); @@ -498,7 +497,7 @@ static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group, // Set up the coefficients for |p_scalar|. int8_t p_wNAF[257]; - ec_compute_wNAF(group, p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC); + ec_compute_wNAF(p_wNAF, p_scalar, P256_LIMBS, 256, P256_WSIZE_PUBLIC); // Set |ret| to the point at infinity. int skip = 1; // Save some point operations. @@ -542,7 +541,7 @@ static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group, int digit = p_wNAF[i]; if (digit != 0) { - assert(digit & 1); + debug_assert_nonsecret(digit & 1); size_t idx = (size_t)(digit < 0 ? (-digit) >> 1 : digit >> 1); fiat_p256_felem *y = &p_pre_comp[idx][1], tmp; if (digit < 0) { @@ -562,13 +561,11 @@ static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group, } } - fiat_p256_to_generic(&r->X, ret[0]); - fiat_p256_to_generic(&r->Y, ret[1]); - fiat_p256_to_generic(&r->Z, ret[2]); + fiat_p256_to_words(r[0], ret[0]); + fiat_p256_to_words(r[1], ret[1]); + fiat_p256_to_words(r[2], ret[2]); } -#endif - void p256_mul_mont(Limb r[P256_LIMBS], const Limb a[P256_LIMBS], const Limb b[P256_LIMBS]) { fiat_p256_felem a_, b_; diff --git a/crypto/fipsmodule/ec/wnaf.c b/crypto/fipsmodule/ec/wnaf.c index 56de6cfec5..cd8c1161f1 100644 --- a/crypto/fipsmodule/ec/wnaf.c +++ b/crypto/fipsmodule/ec/wnaf.c @@ -65,17 +65,6 @@ * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems * Laboratories. */ -#include - -#include -#include - -#include -#include -#include -#include - -#include "internal.h" #include "../bn/internal.h" #include "../../internal.h" @@ -85,27 +74,26 @@ // http://link.springer.com/chapter/10.1007%2F3-540-45537-X_13 // http://www.bmoeller.de/pdf/TI-01-08.multiexp.pdf -void ec_compute_wNAF(const EC_GROUP *group, int8_t *out, - const EC_SCALAR *scalar, size_t bits, int w) { +void ec_compute_wNAF(int8_t *out, const BN_ULONG scalar[], size_t scalar_limbs, size_t bits, int w) { // 'int8_t' can represent integers with absolute values less than 2^7. - assert(0 < w && w <= 7); - assert(bits != 0); + debug_assert_nonsecret(0 < w && w <= 7); + debug_assert_nonsecret(bits != 0); int bit = 1 << w; // 2^w, at most 128 int next_bit = bit << 1; // 2^(w+1), at most 256 int mask = next_bit - 1; // at most 255 - int window_val = scalar->words[0] & mask; + int window_val = ((int)scalar[0]) & mask; for (size_t j = 0; j < bits + 1; j++) { - assert(0 <= window_val && window_val <= next_bit); + debug_assert_nonsecret(0 <= window_val && window_val <= next_bit); int digit = 0; if (window_val & 1) { - assert(0 < window_val && window_val < next_bit); + debug_assert_nonsecret(0 < window_val && window_val < next_bit); if (window_val & bit) { digit = window_val - next_bit; // We know -next_bit < digit < 0 and window_val - digit = next_bit. // modified wNAF - if (j + w + 1 >= bits) { + if (j + ((size_t)w) + 1 >= bits) { // special case for generating modified wNAFs: // no new bits will be added into window_val, // so using a positive digit here will decrease @@ -125,24 +113,23 @@ void ec_compute_wNAF(const EC_GROUP *group, int8_t *out, // For modified window NAFs, it may also be 2^w. // // See the comments above for the derivation of each of these bounds. - assert(window_val == 0 || window_val == next_bit || window_val == bit); - assert(-bit < digit && digit < bit); + debug_assert_nonsecret(window_val == 0 || window_val == next_bit || window_val == bit); + debug_assert_nonsecret(-bit < digit && digit < bit); // window_val was odd, so digit is also odd. - assert(digit & 1); + debug_assert_nonsecret(digit & 1); } - out[j] = digit; + out[j] = (int8_t)digit; // Incorporate the next bit. Previously, |window_val| <= |next_bit|, so if // we shift and add at most one copy of |bit|, this will continue to hold // afterwards. window_val >>= 1; - window_val += bit * bn_is_bit_set_words(scalar->words, group->order.N.width, - j + w + 1); - assert(window_val <= next_bit); + window_val += bit * bn_is_bit_set_words(scalar, scalar_limbs, j + (size_t)w + 1); + debug_assert_nonsecret(window_val <= next_bit); } // bits + 1 entries should be sufficient to consume all bits. - assert(window_val == 0); + debug_assert_nonsecret(window_val == 0); } diff --git a/crypto/internal.h b/crypto/internal.h index 7beb1d44fd..062ca564c6 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -150,6 +150,8 @@ typedef __int128_t int128_t; typedef __uint128_t uint128_t; #endif +#define OPENSSL_ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) + // Pointer utility functions. // buffers_alias returns one if |a| and |b| alias and zero otherwise. diff --git a/src/ec/suite_b/ops/p256.rs b/src/ec/suite_b/ops/p256.rs index adbed60936..104c2e10fc 100644 --- a/src/ec/suite_b/ops/p256.rs +++ b/src/ec/suite_b/ops/p256.rs @@ -119,9 +119,7 @@ pub static PUBLIC_SCALAR_OPS: PublicScalarOps = PublicScalarOps { twin_mul: twin_mul_nistz256, #[cfg(not(any(target_arch = "aarch64", target_arch = "x86_64")))] - twin_mul: |g_scalar, p_scalar, p_xy| { - twin_mul_inefficient(&PRIVATE_KEY_OPS, g_scalar, p_scalar, p_xy) - }, + twin_mul: twin_mul_fiat, q_minus_n: Elem::from_hex("4319055358e8617b0c46353d039cdaae"), }; @@ -147,6 +145,28 @@ fn point_mul_base_vartime(g_scalar: &Scalar) -> Point { scaled_g } +#[cfg(not(any(target_arch = "aarch64", target_arch = "x86_64")))] +fn twin_mul_fiat(g_scalar: &Scalar, p_scalar: &Scalar, &(p_x, p_y): &(Elem, Elem)) -> Point { + prefixed_extern! { + fn p256_point_mul_public(r: *mut Limb, + g_scalar: *const Limb, + p_scalar: *const Limb, + p_x: *const Limb, + p_y: *const Limb); + } + let mut r = Point::new_at_infinity(); + unsafe { + p256_point_mul_public( + r.xyz.as_mut_ptr(), + g_scalar.limbs.as_ptr(), + p_scalar.limbs.as_ptr(), + p_x.limbs.as_ptr(), + p_y.limbs.as_ptr(), + ); + } + r +} + pub static PRIVATE_SCALAR_OPS: PrivateScalarOps = PrivateScalarOps { scalar_ops: &SCALAR_OPS,