Skip to content

Commit

Permalink
ec: Use 3 fewer squarings for P-256 scalar inversion.
Browse files Browse the repository at this point in the history
Back in 2021 a GitHub user "Nik-U" showed a better addition chain for
P-256 scalar inversion. This addition chain is slightly better than
that one.
  • Loading branch information
briansmith committed Oct 14, 2024
1 parent f291606 commit 33fd099
Showing 1 changed file with 43 additions and 59 deletions.
102 changes: 43 additions & 59 deletions src/ec/suite_b/ops/p256.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ pub static PRIVATE_SCALAR_OPS: PrivateScalarOps = PrivateScalarOps {
scalar_inv_to_mont: p256_scalar_inv_to_mont,
};

#[allow(clippy::just_underscores_and_digits)]
fn p256_scalar_inv_to_mont(a: Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
// Calculate the modular inverse of scalar |a| using Fermat's Little
// Theorem:
Expand Down Expand Up @@ -207,32 +208,29 @@ fn p256_scalar_inv_to_mont(a: Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
binary_op_assign(p256_scalar_mul_mont, acc, b);
}

// Indexes into `d`.
const B_1: usize = 0;
const B_10: usize = 1;
const B_11: usize = 2;
const B_101: usize = 3;
const B_111: usize = 4;
const B_1111: usize = 5;
const B_10101: usize = 6;
const B_101111: usize = 7;
const DIGIT_COUNT: usize = 8;

let mut d = [Scalar::zero(); DIGIT_COUNT];

d[B_1] = a;
d[B_10] = sqr(&d[B_1]);
d[B_11] = mul(&d[B_10], &d[B_1]);
d[B_101] = mul(&d[B_10], &d[B_11]);
d[B_111] = mul(&d[B_101], &d[B_10]);
let b_1010 = sqr(&d[B_101]);
d[B_1111] = mul(&b_1010, &d[B_101]);
d[B_10101] = sqr_mul(&b_1010, 0 + 1, &d[B_1]);
let b_101010 = sqr(&d[B_10101]);
d[B_101111] = mul(&b_101010, &d[B_101]);
let b_111111 = mul(&b_101010, &d[B_10101]);

let ff = sqr_mul(&b_111111, 0 + 2, &d[B_11]);
let _1 = &a;

let _10 = sqr(_1); // 2
let _100 = sqr(&_10); // 4
let _101 = mul(&_100, _1); // 5
let _111 = mul(&_101, &_10); // 7

let _1000 = sqr(&_100); // 8
let _10000 = sqr(&_1000); // 16
let _100000 = sqr(&_10000); // 32

let _100111 = mul(&_111, &_100000); // 39 = 7 + 32
let _101011 = mul(&_100, &_100111); // 43 = 4 + 39
let _101111 = mul(&_100, &_101011); // 47 = 4 + 39
let _1001111 = mul(&_100000, &_101111); // 79 = 32 + 47
let _86 = sqr(&_101011); // 86 = 43 * 2
let _1011011 = mul(&_101, &_86); // 91 = 5 + 86
let _92 = mul(_1, &_1011011); // 92 = 1 + 91
let _1100011 = mul(&_111, &_92); // 99 = 7 + 92
let _10111111 = mul(&_92, &_1100011); // 191 = 92 + 99
let _11011111 = mul(&_100000, &_10111111); // 223 = 32 + 191

let ff = mul(&_100000, &_11011111); // 255 = 32 + 223
let ffff = sqr_mul(&ff, 0 + 8, &ff);
let ffffffff = sqr_mul(&ffff, 0 + 16, &ffff);

Expand All @@ -247,39 +245,25 @@ fn p256_scalar_inv_to_mont(a: Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
// 1011110011100110111110101010110110100111000101111001111010000100
// 1111001110111001110010101100001011111100011000110010010101001111

#[allow(clippy::cast_possible_truncation)]
static REMAINING_WINDOWS: [(u8, u8); 26] = [
(6, B_101111 as u8),
(2 + 3, B_111 as u8),
(2 + 2, B_11 as u8),
(1 + 4, B_1111 as u8),
(5, B_10101 as u8),
(1 + 3, B_101 as u8),
(3, B_101 as u8),
(3, B_101 as u8),
(2 + 3, B_111 as u8),
(3 + 6, B_101111 as u8),
(2 + 4, B_1111 as u8),
(1 + 1, B_1 as u8),
(4 + 1, B_1 as u8),
(2 + 4, B_1111 as u8),
(2 + 3, B_111 as u8),
(1 + 3, B_111 as u8),
(2 + 3, B_111 as u8),
(2 + 3, B_101 as u8),
(1 + 2, B_11 as u8),
(4 + 6, B_101111 as u8),
(2, B_11 as u8),
(3 + 2, B_11 as u8),
(3 + 2, B_11 as u8),
(2 + 1, B_1 as u8),
(2 + 5, B_10101 as u8),
(2 + 4, B_1111 as u8),
];

for &(squarings, digit) in &REMAINING_WINDOWS {
sqr_mul_acc(&mut acc, Limb::from(squarings), &d[usize::from(digit)]);
}
sqr_mul_acc(&mut acc, 6, &_101111);
sqr_mul_acc(&mut acc, 2 + 3, &_111);
sqr_mul_acc(&mut acc, 2 + 8, &_11011111);
sqr_mul_acc(&mut acc, 1 + 3, &_101);
sqr_mul_acc(&mut acc, 1 + 7, &_1011011);
sqr_mul_acc(&mut acc, 1 + 6, &_100111);
sqr_mul_acc(&mut acc, 3 + 6, &_101111);
sqr_mul_acc(&mut acc, 2 + 3, &_111);
sqr_mul_acc(&mut acc, 3, &_101);
sqr_mul_acc(&mut acc, 4 + 7, &_1001111);
sqr_mul_acc(&mut acc, 2 + 3, &_111);
sqr_mul_acc(&mut acc, 1 + 3, &_111);
sqr_mul_acc(&mut acc, 2 + 3, &_111);
sqr_mul_acc(&mut acc, 2 + 6, &_101011);
sqr_mul_acc(&mut acc, 4 + 8, &_10111111);
sqr_mul_acc(&mut acc, 3 + 7, &_1100011);
sqr_mul_acc(&mut acc, 2 + 1, _1);
sqr_mul_acc(&mut acc, 2 + 3, &_101);
sqr_mul_acc(&mut acc, 1 + 7, &_1001111);

acc
}
Expand Down

0 comments on commit 33fd099

Please sign in to comment.