diff --git a/Cargo.toml b/Cargo.toml index d2dd40127b..670b59c426 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -65,12 +65,14 @@ include = [ "crypto/fipsmodule/bn/internal.h", "crypto/fipsmodule/bn/montgomery.c", "crypto/fipsmodule/bn/montgomery_inv.c", + "crypto/fipsmodule/bn/shift.c", "crypto/fipsmodule/ec/asm/p256-armv8-asm.pl", "crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl", "crypto/fipsmodule/ec/ecp_nistz.c", "crypto/fipsmodule/ec/ecp_nistz.h", "crypto/fipsmodule/ec/ecp_nistz384.h", "crypto/fipsmodule/ec/ecp_nistz384.inl", + "crypto/fipsmodule/ec/internal.h", "crypto/fipsmodule/ec/gfp_p256.c", "crypto/fipsmodule/ec/gfp_p384.c", "crypto/fipsmodule/ec/p256.c", @@ -80,6 +82,7 @@ include = [ "crypto/fipsmodule/ec/p256_shared.h", "crypto/fipsmodule/ec/p256_table.h", "crypto/fipsmodule/ec/util.h", + "crypto/fipsmodule/ec/wnaf.c", "crypto/fipsmodule/ecdsa/ecdsa_verify_tests.txt", "crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl", "crypto/fipsmodule/modes/asm/ghash-armv4.pl", diff --git a/build.rs b/build.rs index 5c328aa35b..6add88b4f7 100644 --- a/build.rs +++ b/build.rs @@ -38,10 +38,12 @@ const RING_SRCS: &[(&[&str], &str)] = &[ (&[], "crypto/fipsmodule/aes/aes_nohw.c"), (&[], "crypto/fipsmodule/bn/montgomery.c"), (&[], "crypto/fipsmodule/bn/montgomery_inv.c"), + (&[], "crypto/fipsmodule/bn/shift.c"), (&[], "crypto/fipsmodule/ec/ecp_nistz.c"), (&[], "crypto/fipsmodule/ec/gfp_p256.c"), (&[], "crypto/fipsmodule/ec/gfp_p384.c"), (&[], "crypto/fipsmodule/ec/p256.c"), + (&[], "crypto/fipsmodule/ec/wnaf.c"), (&[], "crypto/limbs/limbs.c"), (&[], "crypto/mem.c"), (&[], "crypto/poly1305/poly1305.c"), @@ -919,6 +921,7 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String { "aesni_gcm_decrypt", "aesni_gcm_encrypt", "bn_from_montgomery_in_place", + "bn_is_bit_set_words", "bn_gather5", "bn_mul_mont", "bn_mul_mont_gather5", @@ -933,6 +936,7 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String { "bssl_constant_time_test_main", "chacha20_poly1305_open", "chacha20_poly1305_seal", + "ec_compute_wNAF", "fiat_curve25519_adx_mul", "fiat_curve25519_adx_square", "gcm_ghash_avx", @@ -958,6 +962,8 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String { "p256_point_double", "p256_point_mul", "p256_point_mul_base", + "p256_point_mul_base_vartime", + "p256_point_mul_public", "p256_scalar_mul_mont", "p256_scalar_sqr_rep_mont", "p256_sqr_mont", diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index 3fbb7d7521..20173f080a 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -165,6 +165,9 @@ typedef crypto_word_t BN_ULONG; #error "Must define either OPENSSL_32_BIT or OPENSSL_64_BIT" #endif +// bn_is_bit_set_words returns one if bit |bit| is set in |a| and zero +// otherwise. +int bn_is_bit_set_words(const BN_ULONG *a, size_t num, size_t bit); // |num| must be at least 4, at least on x86. // diff --git a/crypto/fipsmodule/bn/shift.c b/crypto/fipsmodule/bn/shift.c new file mode 100644 index 0000000000..76bf6219a2 --- /dev/null +++ b/crypto/fipsmodule/bn/shift.c @@ -0,0 +1,67 @@ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] */ + +#include "internal.h" + + +int bn_is_bit_set_words(const BN_ULONG *a, size_t num, size_t bit) { + size_t i = bit / BN_BITS2; + size_t j = bit % BN_BITS2; + if (i >= num) { + return 0; + } + return (a[i] >> j) & 1; +} diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h new file mode 100644 index 0000000000..99c47bae39 --- /dev/null +++ b/crypto/fipsmodule/ec/internal.h @@ -0,0 +1,83 @@ +/* Originally written by Bodo Moeller for the OpenSSL project. + * ==================================================================== + * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ +/* ==================================================================== + * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. + * + * Portions of the attached software ("Contribution") are developed by + * SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project. + * + * The Contribution is licensed pursuant to the OpenSSL open source + * license provided above. + * + * The elliptic curve binary polynomial software is originally written by + * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems + * Laboratories. */ + +#ifndef OPENSSL_HEADER_EC_INTERNAL_H +#define OPENSSL_HEADER_EC_INTERNAL_H + +#include + +// ec_compute_wNAF writes the modified width-(w+1) Non-Adjacent Form (wNAF) of +// |scalar| to |out|. |out| must have room for |bits| + 1 elements, each of +// which will be either zero or odd with an absolute value less than 2^w +// satisfying +// scalar = \sum_j out[j]*2^j +// where at most one of any w+1 consecutive digits is non-zero +// with the exception that the most significant digit may be only +// w-1 zeros away from that next non-zero digit. +void ec_compute_wNAF(int8_t *out, const BN_ULONG *scalar, size_t scalar_limbs, size_t bits, int w); + +#endif // OPENSSL_HEADER_EC_INTERNAL_H diff --git a/crypto/fipsmodule/ec/p256-nistz.c b/crypto/fipsmodule/ec/p256-nistz.c index 33add75fcb..aa6344c68b 100644 --- a/crypto/fipsmodule/ec/p256-nistz.c +++ b/crypto/fipsmodule/ec/p256-nistz.c @@ -232,18 +232,18 @@ static crypto_word_t calc_wvalue(size_t *index, const uint8_t p_str[33]) { return booth_recode_w7(wvalue); } -void p256_point_mul(P256_POINT *r, const Limb p_scalar[P256_LIMBS], +void p256_point_mul(Limb r[3][P256_LIMBS], const Limb p_scalar[P256_LIMBS], const Limb p_x[P256_LIMBS], const Limb p_y[P256_LIMBS]) { alignas(32) P256_POINT out; ecp_nistz256_windowed_mul(&out, p_scalar, p_x, p_y); - limbs_copy(r->X, out.X, P256_LIMBS); - limbs_copy(r->Y, out.Y, P256_LIMBS); - limbs_copy(r->Z, out.Z, P256_LIMBS); + limbs_copy(r[0], out.X, P256_LIMBS); + limbs_copy(r[1], out.Y, P256_LIMBS); + limbs_copy(r[2], out.Z, P256_LIMBS); } -void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) { +void p256_point_mul_base(Limb r[3][P256_LIMBS], const Limb scalar[P256_LIMBS]) { P256_SCALAR_BYTES p_str; p256_scalar_bytes_from_limbs(p_str, scalar); @@ -279,9 +279,63 @@ void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) { ecp_nistz256_point_add_affine(&p, &p, &t); } - limbs_copy(r->X, p.X, P256_LIMBS); - limbs_copy(r->Y, p.Y, P256_LIMBS); - limbs_copy(r->Z, p.Z, P256_LIMBS); + limbs_copy(r[0], p.X, P256_LIMBS); + limbs_copy(r[1], p.Y, P256_LIMBS); + limbs_copy(r[2], p.Z, P256_LIMBS); +} + +void p256_point_mul_base_vartime(P256_POINT *r, + const Limb g_scalar[P256_LIMBS]) { + alignas(32) P256_POINT p; + uint8_t p_str[33]; + OPENSSL_memcpy(p_str, g_scalar, 32); + p_str[32] = 0; + + // First window + size_t index = 0; + size_t wvalue = calc_first_wvalue(&index, p_str); + + // Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p| + // is infinity and |ONE| otherwise. |p| was computed from the table, so it + // is infinity iff |wvalue >> 1| is zero. + if ((wvalue >> 1) != 0) { + OPENSSL_memcpy(p.X, &ecp_nistz256_precomputed[0][(wvalue >> 1) - 1].X, + sizeof(p.X)); + OPENSSL_memcpy(p.Y, &ecp_nistz256_precomputed[0][(wvalue >> 1) - 1].Y, + sizeof(p.Y)); + OPENSSL_memcpy(p.Z, ONE, sizeof(p.Z)); + } else { + OPENSSL_memset(p.X, 0, sizeof(p.X)); + OPENSSL_memset(p.Y, 0, sizeof(p.Y)); + OPENSSL_memset(p.Z, 0, sizeof(p.Z)); + } + + if ((wvalue & 1) == 1) { + ecp_nistz256_neg(p.Y, p.Y); + } + + for (int i = 1; i < 37; i++) { + wvalue = calc_wvalue(&index, p_str); + if ((wvalue >> 1) == 0) { + continue; + } + + alignas(32) P256_POINT_AFFINE t; + OPENSSL_memcpy(&t, &ecp_nistz256_precomputed[i][(wvalue >> 1) - 1], + sizeof(t)); + if ((wvalue & 1) == 1) { + ecp_nistz256_neg(t.Y, t.Y); + } + + // Note |ecp_nistz256_point_add_affine| does not work if |p| and |t| are + // the same non-infinity point, so it is important that we compute the + // |g_scalar| term before the |p_scalar| term. + ecp_nistz256_point_add_affine(&p, &p, &t); + } + + OPENSSL_memcpy(r->X, p.X, P256_LIMBS * sizeof(BN_ULONG)); + OPENSSL_memcpy(r->Y, p.Y, P256_LIMBS * sizeof(BN_ULONG)); + OPENSSL_memcpy(r->Z, p.Z, P256_LIMBS * sizeof(BN_ULONG)); } #endif /* defined(OPENSSL_USE_NISTZ256) */ diff --git a/crypto/fipsmodule/ec/p256.c b/crypto/fipsmodule/ec/p256.c index 8d6152486e..8461a42067 100644 --- a/crypto/fipsmodule/ec/p256.c +++ b/crypto/fipsmodule/ec/p256.c @@ -23,6 +23,7 @@ #include "p256_shared.h" +#include "internal.h" #include "../../internal.h" #include "./util.h" @@ -96,6 +97,21 @@ static void fiat_p256_cmovznz(fiat_p256_limb_t out[FIAT_P256_NLIMBS], fiat_p256_selectznz(out, !!t, z, nz); } +static void fiat_p256_from_words(fiat_p256_felem out, + const Limb in[32 / sizeof(BN_ULONG)]) { + // Typically, |BN_ULONG| and |fiat_p256_limb_t| will be the same type, but on + // 64-bit platforms without |uint128_t|, they are different. However, on + // little-endian systems, |uint64_t[4]| and |uint32_t[8]| have the same + // layout. + OPENSSL_memcpy(out, in, 32); +} + +static void fiat_p256_to_words(Limb out[32 / sizeof(BN_ULONG)], const fiat_p256_felem in) { + // See |fiat_p256_from_words|. + OPENSSL_memcpy(out, in, 32); +} + + // Group operations // ---------------- // @@ -339,8 +355,8 @@ static crypto_word_t fiat_p256_get_bit(const Limb in[P256_LIMBS], int i) { #endif } -void p256_point_mul(P256_POINT *r, const Limb scalar[P256_LIMBS], - const Limb p_x[P256_LIMBS], const Limb p_y[P256_LIMBS]) { +void p256_point_mul(Limb r[3][P256_LIMBS], const Limb scalar[P256_LIMBS], + const Limb p_x[P256_LIMBS], const Limb p_y[P256_LIMBS]) { debug_assert_nonsecret(r != NULL); debug_assert_nonsecret(scalar != NULL); debug_assert_nonsecret(p_x != NULL); @@ -349,9 +365,9 @@ void p256_point_mul(P256_POINT *r, const Limb scalar[P256_LIMBS], fiat_p256_felem p_pre_comp[17][3]; OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp)); // Precompute multiples. - limbs_copy(&p_pre_comp[1][0][0], p_x, P256_LIMBS); - limbs_copy(&p_pre_comp[1][1][0], p_y, P256_LIMBS); - limbs_copy(&p_pre_comp[1][2][0], fiat_p256_one, P256_LIMBS); + fiat_p256_from_words(p_pre_comp[1][0], p_x); + fiat_p256_from_words(p_pre_comp[1][1], p_y); + fiat_p256_copy(p_pre_comp[1][2], fiat_p256_one); for (size_t j = 2; j <= 16; ++j) { if (j & 1) { @@ -407,12 +423,12 @@ void p256_point_mul(P256_POINT *r, const Limb scalar[P256_LIMBS], } } - limbs_copy(r->X, nq[0], P256_LIMBS); - limbs_copy(r->Y, nq[1], P256_LIMBS); - limbs_copy(r->Z, nq[2], P256_LIMBS); + fiat_p256_to_words(r[0], nq[0]); + fiat_p256_to_words(r[1], nq[1]); + fiat_p256_to_words(r[2], nq[2]); } -void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) { +void p256_point_mul_base(Limb r[3][P256_LIMBS], const Limb scalar[P256_LIMBS]) { // Set nq to the point at infinity. fiat_p256_felem nq[3] = {{0}, {0}, {0}}, tmp[3]; @@ -453,45 +469,164 @@ void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) { tmp[0], tmp[1], tmp[2]); } - limbs_copy(r->X, nq[0], P256_LIMBS); - limbs_copy(r->Y, nq[1], P256_LIMBS); - limbs_copy(r->Z, nq[2], P256_LIMBS); + fiat_p256_to_words(r[0], nq[0]); + fiat_p256_to_words(r[1], nq[1]); + fiat_p256_to_words(r[2], nq[2]); +} + +void p256_point_mul_public(Limb r[3][P256_LIMBS], + const Limb g_scalar[P256_LIMBS], + const Limb p_scalar[P256_LIMBS], + const Limb p_x[P256_LIMBS], + const Limb p_y[P256_LIMBS]) { +#define P256_WSIZE_PUBLIC 4 + // Precompute multiples of |p|. p_pre_comp[i] is (2*i+1) * |p|. + fiat_p256_felem p_pre_comp[1 << (P256_WSIZE_PUBLIC - 1)][3]; + fiat_p256_from_words(p_pre_comp[0][0], p_x); + fiat_p256_from_words(p_pre_comp[0][1], p_y); + fiat_p256_copy(p_pre_comp[0][2], fiat_p256_one); + fiat_p256_felem p2[3]; + fiat_p256_point_double(p2[0], p2[1], p2[2], p_pre_comp[0][0], + p_pre_comp[0][1], p_pre_comp[0][2]); + for (size_t i = 1; i < OPENSSL_ARRAY_SIZE(p_pre_comp); i++) { + fiat_p256_point_add(p_pre_comp[i][0], p_pre_comp[i][1], p_pre_comp[i][2], + p_pre_comp[i - 1][0], p_pre_comp[i - 1][1], + p_pre_comp[i - 1][2], 0 /* not mixed */, p2[0], p2[1], + p2[2]); + } + + // Set up the coefficients for |p_scalar|. + int8_t p_wNAF[257]; + ec_compute_wNAF(p_wNAF, p_scalar, P256_LIMBS, 256, P256_WSIZE_PUBLIC); + + // Set |ret| to the point at infinity. + int skip = 1; // Save some point operations. + fiat_p256_felem ret[3] = {{0}, {0}, {0}}; + for (int i = 256; i >= 0; i--) { + if (!skip) { + fiat_p256_point_double(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2]); + } + + // For the |g_scalar|, we use the precomputed table without the + // constant-time lookup. + if (i <= 31) { + // First, look 32 bits upwards. + crypto_word_t bits = fiat_p256_get_bit(g_scalar, i + 224) << 3; + bits |= fiat_p256_get_bit(g_scalar, i + 160) << 2; + bits |= fiat_p256_get_bit(g_scalar, i + 96) << 1; + bits |= fiat_p256_get_bit(g_scalar, i + 32); + if (bits != 0) { + size_t index = (size_t)(bits - 1); + fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], + 1 /* mixed */, fiat_p256_g_pre_comp[1][index][0], + fiat_p256_g_pre_comp[1][index][1], + fiat_p256_one); + skip = 0; + } + + // Second, look at the current position. + bits = fiat_p256_get_bit(g_scalar, i + 192) << 3; + bits |= fiat_p256_get_bit(g_scalar, i + 128) << 2; + bits |= fiat_p256_get_bit(g_scalar, i + 64) << 1; + bits |= fiat_p256_get_bit(g_scalar, i); + if (bits != 0) { + size_t index = (size_t)(bits - 1); + fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], + 1 /* mixed */, fiat_p256_g_pre_comp[0][index][0], + fiat_p256_g_pre_comp[0][index][1], + fiat_p256_one); + skip = 0; + } + } + + int digit = p_wNAF[i]; + if (digit != 0) { + debug_assert_nonsecret(digit & 1); + size_t idx = (size_t)(digit < 0 ? (-digit) >> 1 : digit >> 1); + fiat_p256_felem *y = &p_pre_comp[idx][1], tmp; + if (digit < 0) { + fiat_p256_opp(tmp, p_pre_comp[idx][1]); + y = &tmp; + } + if (!skip) { + fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], + 0 /* not mixed */, p_pre_comp[idx][0], *y, + p_pre_comp[idx][2]); + } else { + fiat_p256_copy(ret[0], p_pre_comp[idx][0]); + fiat_p256_copy(ret[1], *y); + fiat_p256_copy(ret[2], p_pre_comp[idx][2]); + skip = 0; + } + } + } + + fiat_p256_to_words(r[0], ret[0]); + fiat_p256_to_words(r[1], ret[1]); + fiat_p256_to_words(r[2], ret[2]); } void p256_mul_mont(Limb r[P256_LIMBS], const Limb a[P256_LIMBS], - const Limb b[P256_LIMBS]) { - fiat_p256_mul(r, a, b); + const Limb b[P256_LIMBS]) { + fiat_p256_felem a_, b_; + fiat_p256_from_words(a_, a); + fiat_p256_from_words(b_, b); + fiat_p256_mul(a_, a_, b_); + fiat_p256_to_words(r, a_); } void p256_sqr_mont(Limb r[P256_LIMBS], const Limb a[P256_LIMBS]) { - fiat_p256_square(r, a); + fiat_p256_felem x; + fiat_p256_from_words(x, a); + fiat_p256_square(x, x); + fiat_p256_to_words(r, x); } -void p256_point_add(P256_POINT *r, const P256_POINT *a, const P256_POINT *b) { - fiat_p256_point_add(r->X, r->Y, r->Z, - a->X, a->Y, a->Z, - 0, - b->X, b->Y, b->Z); +void p256_point_add(Limb r[3][P256_LIMBS], const Limb a[3][P256_LIMBS], + const Limb b[3][P256_LIMBS]) { + fiat_p256_felem x1, y1, z1, x2, y2, z2; + fiat_p256_from_words(x1, a[0]); + fiat_p256_from_words(y1, a[1]); + fiat_p256_from_words(z1, a[2]); + fiat_p256_from_words(x2, b[0]); + fiat_p256_from_words(y2, b[1]); + fiat_p256_from_words(z2, b[2]); + fiat_p256_point_add(x1, y1, z1, x1, y1, z1, 0 /* both Jacobian */, x2, y2, + z2); + fiat_p256_to_words(r[0], x1); + fiat_p256_to_words(r[1], y1); + fiat_p256_to_words(r[2], z1); } -void p256_point_double(P256_POINT *r, const P256_POINT *a) { - fiat_p256_point_double(r->X, r->Y, r->Z, - a->X, a->Y, a->Z); +void p256_point_double(Limb r[3][P256_LIMBS], const Limb a[3][P256_LIMBS]) { + fiat_p256_felem x, y, z; + fiat_p256_from_words(x, a[0]); + fiat_p256_from_words(y, a[1]); + fiat_p256_from_words(z, a[2]); + fiat_p256_point_double(x, y, z, x, y, z); + fiat_p256_to_words(r[0], x); + fiat_p256_to_words(r[1], y); + fiat_p256_to_words(r[2], z); } // For testing only. -void p256_point_add_affine(P256_POINT *r, const P256_POINT *a, - const BN_ULONG b[P256_LIMBS * 2]) { - const Limb *b_x = &b[0]; - const Limb *b_y = &b[P256_LIMBS]; - fiat_p256_felem b_z = {0}; - crypto_word_t b_is_inf = constant_time_select_w( - LIMBS_are_zero(b_x, P256_LIMBS), LIMBS_are_zero(b_y, P256_LIMBS), 0); - fiat_p256_cmovznz(b_z, constant_time_is_zero_w(b_is_inf), b_z, fiat_p256_one); - fiat_p256_point_add(r->X, r->Y, r->Z, - a->X, a->Y, a->Z, - 1, - b_x, b_y, b_z); +void p256_point_add_affine(Limb r[3][P256_LIMBS], const Limb a[3][P256_LIMBS], + const Limb b[2][P256_LIMBS]) { + fiat_p256_felem x1, y1, z1, x2, y2; + fiat_p256_from_words(x1, a[0]); + fiat_p256_from_words(y1, a[1]); + fiat_p256_from_words(z1, a[2]); + fiat_p256_from_words(x2, b[0]); + fiat_p256_from_words(y2, b[1]); + + fiat_p256_felem z2 = {0}; + fiat_p256_cmovznz(z2, fiat_p256_nz(x2) & fiat_p256_nz(y2), z2, fiat_p256_one); + + fiat_p256_point_add(x1, y1, z1, x1, y1, z1, 1 /* mixed */, x2, y2, z2); + + fiat_p256_to_words(r[0], x1); + fiat_p256_to_words(r[1], y1); + fiat_p256_to_words(r[2], z1); } #endif diff --git a/crypto/fipsmodule/ec/wnaf.c b/crypto/fipsmodule/ec/wnaf.c new file mode 100644 index 0000000000..cd8c1161f1 --- /dev/null +++ b/crypto/fipsmodule/ec/wnaf.c @@ -0,0 +1,135 @@ +/* Originally written by Bodo Moeller for the OpenSSL project. + * ==================================================================== + * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ +/* ==================================================================== + * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. + * + * Portions of the attached software ("Contribution") are developed by + * SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project. + * + * The Contribution is licensed pursuant to the OpenSSL open source + * license provided above. + * + * The elliptic curve binary polynomial software is originally written by + * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems + * Laboratories. */ + +#include "../bn/internal.h" +#include "../../internal.h" + + +// This file implements the wNAF-based interleaving multi-exponentiation method +// at: +// http://link.springer.com/chapter/10.1007%2F3-540-45537-X_13 +// http://www.bmoeller.de/pdf/TI-01-08.multiexp.pdf + +void ec_compute_wNAF(int8_t *out, const BN_ULONG scalar[], size_t scalar_limbs, size_t bits, int w) { + // 'int8_t' can represent integers with absolute values less than 2^7. + debug_assert_nonsecret(0 < w && w <= 7); + debug_assert_nonsecret(bits != 0); + int bit = 1 << w; // 2^w, at most 128 + int next_bit = bit << 1; // 2^(w+1), at most 256 + int mask = next_bit - 1; // at most 255 + + int window_val = ((int)scalar[0]) & mask; + for (size_t j = 0; j < bits + 1; j++) { + debug_assert_nonsecret(0 <= window_val && window_val <= next_bit); + int digit = 0; + if (window_val & 1) { + debug_assert_nonsecret(0 < window_val && window_val < next_bit); + if (window_val & bit) { + digit = window_val - next_bit; + // We know -next_bit < digit < 0 and window_val - digit = next_bit. + + // modified wNAF + if (j + ((size_t)w) + 1 >= bits) { + // special case for generating modified wNAFs: + // no new bits will be added into window_val, + // so using a positive digit here will decrease + // the total length of the representation + + digit = window_val & (mask >> 1); + // We know 0 < digit < bit and window_val - digit = bit. + } + } else { + digit = window_val; + // We know 0 < digit < bit and window_val - digit = 0. + } + + window_val -= digit; + + // Now window_val is 0 or 2^(w+1) in standard wNAF generation. + // For modified window NAFs, it may also be 2^w. + // + // See the comments above for the derivation of each of these bounds. + debug_assert_nonsecret(window_val == 0 || window_val == next_bit || window_val == bit); + debug_assert_nonsecret(-bit < digit && digit < bit); + + // window_val was odd, so digit is also odd. + debug_assert_nonsecret(digit & 1); + } + + out[j] = (int8_t)digit; + + // Incorporate the next bit. Previously, |window_val| <= |next_bit|, so if + // we shift and add at most one copy of |bit|, this will continue to hold + // afterwards. + window_val >>= 1; + window_val += bit * bn_is_bit_set_words(scalar, scalar_limbs, j + (size_t)w + 1); + debug_assert_nonsecret(window_val <= next_bit); + } + + // bits + 1 entries should be sufficient to consume all bits. + debug_assert_nonsecret(window_val == 0); +} diff --git a/crypto/internal.h b/crypto/internal.h index 7beb1d44fd..062ca564c6 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -150,6 +150,8 @@ typedef __int128_t int128_t; typedef __uint128_t uint128_t; #endif +#define OPENSSL_ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) + // Pointer utility functions. // buffers_alias returns one if |a| and |b| alias and zero otherwise. diff --git a/src/ec/suite_b/ecdsa/verification.rs b/src/ec/suite_b/ecdsa/verification.rs index 2e9f50127d..157753e5cb 100644 --- a/src/ec/suite_b/ecdsa/verification.rs +++ b/src/ec/suite_b/ecdsa/verification.rs @@ -123,7 +123,7 @@ impl EcdsaVerificationAlgorithm { // NSA Guide Step 6: "Compute the elliptic curve point // R = (xR, yR) = u1*G + u2*Q, using EC scalar multiplication and EC // addition. If R is equal to the point at infinity, output INVALID." - let product = twin_mul(self.ops.private_key_ops, &u1, &u2, &peer_pub_key); + let product = (self.ops.twin_mul)(&u1, &u2, &peer_pub_key); // Verify that the point we computed is on the curve; see // `verify_affine_point_is_on_the_curve_scaled` for details on why. It @@ -158,7 +158,7 @@ impl EcdsaVerificationAlgorithm { } if self.ops.elem_less_than(&r, &self.ops.q_minus_n) { self.ops - .private_key_ops + .scalar_ops .common .elem_add(&mut r, &public_key_ops.common.n); if sig_r_equals_x(self.ops, &r, &x, &z2) { @@ -193,18 +193,6 @@ fn split_rs_asn1<'a>( }) } -fn twin_mul( - ops: &PrivateKeyOps, - g_scalar: &Scalar, - p_scalar: &Scalar, - p_xy: &(Elem, Elem), -) -> Point { - // XXX: Inefficient. TODO: implement interleaved wNAF multiplication. - let scaled_g = ops.point_mul_base(g_scalar); - let scaled_p = ops.point_mul(p_scalar, p_xy); - ops.common.point_sum(&scaled_g, &scaled_p) -} - /// Verification of fixed-length (PKCS#11 style) ECDSA signatures using the /// P-256 curve and SHA-256. /// diff --git a/src/ec/suite_b/ops.rs b/src/ec/suite_b/ops.rs index b42b958efa..5aa241390e 100644 --- a/src/ec/suite_b/ops.rs +++ b/src/ec/suite_b/ops.rs @@ -270,10 +270,7 @@ pub struct PublicScalarOps { pub scalar_ops: &'static ScalarOps, pub public_key_ops: &'static PublicKeyOps, - // XXX: `PublicScalarOps` shouldn't depend on `PrivateKeyOps`, but it does - // temporarily until `twin_mul` is rewritten. - pub private_key_ops: &'static PrivateKeyOps, - + pub twin_mul: fn(g_scalar: &Scalar, p_scalar: &Scalar, p_xy: &(Elem, Elem)) -> Point, pub q_minus_n: Elem, } @@ -305,6 +302,19 @@ pub struct PrivateScalarOps { pub oneRR_mod_n: Scalar, // 1 * R**2 (mod n). TOOD: Use One. } +// XXX: Inefficient and unnecessarily depends on `PrivateKeyOps`. TODO: implement interleaved wNAF +// multiplication. +fn twin_mul_inefficient( + ops: &PrivateKeyOps, + g_scalar: &Scalar, + p_scalar: &Scalar, + p_xy: &(Elem, Elem), +) -> Point { + let scaled_g = ops.point_mul_base(g_scalar); + let scaled_p = ops.point_mul(p_scalar, p_xy); + ops.common.point_sum(&scaled_g, &scaled_p) +} + // This assumes n < q < 2*n. pub fn elem_reduced_to_scalar(ops: &CommonOps, elem: &Elem) -> Scalar { let num_limbs = ops.num_limbs; @@ -969,6 +979,7 @@ mod tests { fn p256_point_mul_base_test() { point_mul_base_tests( &p256::PRIVATE_KEY_OPS, + |s| p256::PRIVATE_KEY_OPS.point_mul_base(s), test_file!("ops/p256_point_mul_base_tests.txt"), ); } @@ -977,16 +988,21 @@ mod tests { fn p384_point_mul_base_test() { point_mul_base_tests( &p384::PRIVATE_KEY_OPS, + |s| p384::PRIVATE_KEY_OPS.point_mul_base(s), test_file!("ops/p384_point_mul_base_tests.txt"), ); } - fn point_mul_base_tests(ops: &PrivateKeyOps, test_file: test::File) { + pub(super) fn point_mul_base_tests( + ops: &PrivateKeyOps, + f: impl Fn(&Scalar) -> Point, + test_file: test::File, + ) { test::run(test_file, |section, test_case| { assert_eq!(section, ""); let g_scalar = consume_scalar(ops.common, test_case, "g_scalar"); let expected_result = consume_point(ops, test_case, "r"); - let actual_result = ops.point_mul_base(&g_scalar); + let actual_result = f(&g_scalar); assert_point_actual_equals_expected(ops, &actual_result, &expected_result); Ok(()) }) diff --git a/src/ec/suite_b/ops/p256.rs b/src/ec/suite_b/ops/p256.rs index b7ea524a1d..104c2e10fc 100644 --- a/src/ec/suite_b/ops/p256.rs +++ b/src/ec/suite_b/ops/p256.rs @@ -114,10 +114,59 @@ pub static SCALAR_OPS: ScalarOps = ScalarOps { pub static PUBLIC_SCALAR_OPS: PublicScalarOps = PublicScalarOps { scalar_ops: &SCALAR_OPS, public_key_ops: &PUBLIC_KEY_OPS, - private_key_ops: &PRIVATE_KEY_OPS, + + #[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] + twin_mul: twin_mul_nistz256, + + #[cfg(not(any(target_arch = "aarch64", target_arch = "x86_64")))] + twin_mul: twin_mul_fiat, + q_minus_n: Elem::from_hex("4319055358e8617b0c46353d039cdaae"), }; +#[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] +fn twin_mul_nistz256(g_scalar: &Scalar, p_scalar: &Scalar, p_xy: &(Elem, Elem)) -> Point { + let scaled_g = point_mul_base_vartime(g_scalar); + let scaled_p = PRIVATE_KEY_OPS.point_mul(p_scalar, p_xy); + PRIVATE_KEY_OPS.common.point_sum(&scaled_g, &scaled_p) +} + +#[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] +fn point_mul_base_vartime(g_scalar: &Scalar) -> Point { + prefixed_extern! { + fn p256_point_mul_base_vartime(r: *mut Limb, // [3][COMMON_OPS.num_limbs] + g_scalar: *const Limb, // [COMMON_OPS.num_limbs] + ); + } + let mut scaled_g = Point::new_at_infinity(); + unsafe { + p256_point_mul_base_vartime(scaled_g.xyz.as_mut_ptr(), g_scalar.limbs.as_ptr()); + } + scaled_g +} + +#[cfg(not(any(target_arch = "aarch64", target_arch = "x86_64")))] +fn twin_mul_fiat(g_scalar: &Scalar, p_scalar: &Scalar, &(p_x, p_y): &(Elem, Elem)) -> Point { + prefixed_extern! { + fn p256_point_mul_public(r: *mut Limb, + g_scalar: *const Limb, + p_scalar: *const Limb, + p_x: *const Limb, + p_y: *const Limb); + } + let mut r = Point::new_at_infinity(); + unsafe { + p256_point_mul_public( + r.xyz.as_mut_ptr(), + g_scalar.limbs.as_ptr(), + p_scalar.limbs.as_ptr(), + p_x.limbs.as_ptr(), + p_y.limbs.as_ptr(), + ); + } + r +} + pub static PRIVATE_SCALAR_OPS: PrivateScalarOps = PrivateScalarOps { scalar_ops: &SCALAR_OPS, @@ -270,7 +319,6 @@ prefixed_extern! { p_x: *const Limb, // [COMMON_OPS.num_limbs] p_y: *const Limb, // [COMMON_OPS.num_limbs] ); - fn p256_scalar_mul_mont( r: *mut Limb, // [COMMON_OPS.num_limbs] a: *const Limb, // [COMMON_OPS.num_limbs] @@ -282,3 +330,17 @@ prefixed_extern! { rep: Limb, ); } + +#[cfg(test)] +mod tests { + #[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] + #[test] + fn p256_point_mul_base_vartime_test() { + use super::{super::tests::point_mul_base_tests, *}; + point_mul_base_tests( + &PRIVATE_KEY_OPS, + point_mul_base_vartime, + test_file!("p256_point_mul_base_tests.txt"), + ); + } +} diff --git a/src/ec/suite_b/ops/p384.rs b/src/ec/suite_b/ops/p384.rs index 6ef4bc3f9e..f424c520d0 100644 --- a/src/ec/suite_b/ops/p384.rs +++ b/src/ec/suite_b/ops/p384.rs @@ -122,7 +122,9 @@ pub static SCALAR_OPS: ScalarOps = ScalarOps { pub static PUBLIC_SCALAR_OPS: PublicScalarOps = PublicScalarOps { scalar_ops: &SCALAR_OPS, public_key_ops: &PUBLIC_KEY_OPS, - private_key_ops: &PRIVATE_KEY_OPS, + twin_mul: |g_scalar, p_scalar, p_xy| { + twin_mul_inefficient(&PRIVATE_KEY_OPS, g_scalar, p_scalar, p_xy) + }, q_minus_n: Elem::from_hex("389cb27e0bc8d21fa7e5f24cb74f58851313e696333ad68c"), };