Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ec: Use 3 fewer squarings for P-256 scalar inversion. #2154

Merged
merged 1 commit into from
Oct 14, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 43 additions & 59 deletions src/ec/suite_b/ops/p256.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ pub static PRIVATE_SCALAR_OPS: PrivateScalarOps = PrivateScalarOps {
scalar_inv_to_mont: p256_scalar_inv_to_mont,
};

#[allow(clippy::just_underscores_and_digits)]
fn p256_scalar_inv_to_mont(a: Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
// Calculate the modular inverse of scalar |a| using Fermat's Little
// Theorem:
Expand Down Expand Up @@ -207,32 +208,29 @@ fn p256_scalar_inv_to_mont(a: Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
binary_op_assign(p256_scalar_mul_mont, acc, b);
}

// Indexes into `d`.
const B_1: usize = 0;
const B_10: usize = 1;
const B_11: usize = 2;
const B_101: usize = 3;
const B_111: usize = 4;
const B_1111: usize = 5;
const B_10101: usize = 6;
const B_101111: usize = 7;
const DIGIT_COUNT: usize = 8;

let mut d = [Scalar::zero(); DIGIT_COUNT];

d[B_1] = a;
d[B_10] = sqr(&d[B_1]);
d[B_11] = mul(&d[B_10], &d[B_1]);
d[B_101] = mul(&d[B_10], &d[B_11]);
d[B_111] = mul(&d[B_101], &d[B_10]);
let b_1010 = sqr(&d[B_101]);
d[B_1111] = mul(&b_1010, &d[B_101]);
d[B_10101] = sqr_mul(&b_1010, 0 + 1, &d[B_1]);
let b_101010 = sqr(&d[B_10101]);
d[B_101111] = mul(&b_101010, &d[B_101]);
let b_111111 = mul(&b_101010, &d[B_10101]);

let ff = sqr_mul(&b_111111, 0 + 2, &d[B_11]);
let _1 = &a;

let _10 = sqr(_1); // 2
let _100 = sqr(&_10); // 4
let _101 = mul(&_100, _1); // 5
let _111 = mul(&_101, &_10); // 7

let _1000 = sqr(&_100); // 8
let _10000 = sqr(&_1000); // 16
let _100000 = sqr(&_10000); // 32

let _100111 = mul(&_111, &_100000); // 39 = 7 + 32
let _101011 = mul(&_100, &_100111); // 43 = 4 + 39
let _101111 = mul(&_100, &_101011); // 47 = 4 + 39
let _1001111 = mul(&_100000, &_101111); // 79 = 32 + 47
let _86 = sqr(&_101011); // 86 = 43 * 2
let _1011011 = mul(&_101, &_86); // 91 = 5 + 86
let _92 = mul(_1, &_1011011); // 92 = 1 + 91
let _1100011 = mul(&_111, &_92); // 99 = 7 + 92
let _10111111 = mul(&_92, &_1100011); // 191 = 92 + 99
let _11011111 = mul(&_100000, &_10111111); // 223 = 32 + 191

let ff = mul(&_100000, &_11011111); // 255 = 32 + 223
let ffff = sqr_mul(&ff, 0 + 8, &ff);
let ffffffff = sqr_mul(&ffff, 0 + 16, &ffff);

Expand All @@ -247,39 +245,25 @@ fn p256_scalar_inv_to_mont(a: Scalar<R>, _cpu: cpu::Features) -> Scalar<R> {
// 1011110011100110111110101010110110100111000101111001111010000100
// 1111001110111001110010101100001011111100011000110010010101001111

#[allow(clippy::cast_possible_truncation)]
static REMAINING_WINDOWS: [(u8, u8); 26] = [
(6, B_101111 as u8),
(2 + 3, B_111 as u8),
(2 + 2, B_11 as u8),
(1 + 4, B_1111 as u8),
(5, B_10101 as u8),
(1 + 3, B_101 as u8),
(3, B_101 as u8),
(3, B_101 as u8),
(2 + 3, B_111 as u8),
(3 + 6, B_101111 as u8),
(2 + 4, B_1111 as u8),
(1 + 1, B_1 as u8),
(4 + 1, B_1 as u8),
(2 + 4, B_1111 as u8),
(2 + 3, B_111 as u8),
(1 + 3, B_111 as u8),
(2 + 3, B_111 as u8),
(2 + 3, B_101 as u8),
(1 + 2, B_11 as u8),
(4 + 6, B_101111 as u8),
(2, B_11 as u8),
(3 + 2, B_11 as u8),
(3 + 2, B_11 as u8),
(2 + 1, B_1 as u8),
(2 + 5, B_10101 as u8),
(2 + 4, B_1111 as u8),
];

for &(squarings, digit) in &REMAINING_WINDOWS {
sqr_mul_acc(&mut acc, Limb::from(squarings), &d[usize::from(digit)]);
}
sqr_mul_acc(&mut acc, 6, &_101111);
sqr_mul_acc(&mut acc, 2 + 3, &_111);
sqr_mul_acc(&mut acc, 2 + 8, &_11011111);
sqr_mul_acc(&mut acc, 1 + 3, &_101);
sqr_mul_acc(&mut acc, 1 + 7, &_1011011);
sqr_mul_acc(&mut acc, 1 + 6, &_100111);
sqr_mul_acc(&mut acc, 3 + 6, &_101111);
sqr_mul_acc(&mut acc, 2 + 3, &_111);
sqr_mul_acc(&mut acc, 3, &_101);
sqr_mul_acc(&mut acc, 4 + 7, &_1001111);
sqr_mul_acc(&mut acc, 2 + 3, &_111);
sqr_mul_acc(&mut acc, 1 + 3, &_111);
sqr_mul_acc(&mut acc, 2 + 3, &_111);
sqr_mul_acc(&mut acc, 2 + 6, &_101011);
sqr_mul_acc(&mut acc, 4 + 8, &_10111111);
sqr_mul_acc(&mut acc, 3 + 7, &_1100011);
sqr_mul_acc(&mut acc, 2 + 1, _1);
sqr_mul_acc(&mut acc, 2 + 3, &_101);
sqr_mul_acc(&mut acc, 1 + 7, &_1001111);

acc
}
Expand Down
Loading