|
| 1 | +package field_p256r1 |
| 2 | + |
| 3 | +import "core:encoding/endian" |
| 4 | +import "core:math/bits" |
| 5 | +import "core:mem" |
| 6 | + |
| 7 | +fe_clear :: proc "contextless" (arg1: ^Montgomery_Domain_Field_Element) { |
| 8 | + mem.zero_explicit(arg1, size_of(Montgomery_Domain_Field_Element)) |
| 9 | +} |
| 10 | + |
| 11 | +fe_clear_vec :: proc "contextless" ( |
| 12 | + arg1: []^Montgomery_Domain_Field_Element, |
| 13 | +) { |
| 14 | + for fe in arg1 { |
| 15 | + fe_clear(fe) |
| 16 | + } |
| 17 | +} |
| 18 | + |
| 19 | +fe_from_bytes :: proc "contextless" ( |
| 20 | + out1: ^Montgomery_Domain_Field_Element, |
| 21 | + arg1: []byte, |
| 22 | + unsafe_assume_canonical := false, |
| 23 | +) -> bool { |
| 24 | + ensure_contextless(len(arg1) == 32, "p256r1: invalid fe input buffer") |
| 25 | + |
| 26 | + // Note: We assume the input is in big-endian. |
| 27 | + tmp := Non_Montgomery_Domain_Field_Element { |
| 28 | + endian.unchecked_get_u64le(arg1[24:]), |
| 29 | + endian.unchecked_get_u64le(arg1[16:]), |
| 30 | + endian.unchecked_get_u64le(arg1[8:]), |
| 31 | + endian.unchecked_get_u64le(arg1[0:]), |
| 32 | + } |
| 33 | + defer mem.zero_explicit(&tmp, size_of(tmp)) |
| 34 | + |
| 35 | + // Check that tmp is in the the range [0, ELL). |
| 36 | + if !unsafe_assume_canonical { |
| 37 | + _, borrow := bits.sub_u64(ELL[0] - 1, tmp[0], 0) |
| 38 | + _, borrow = bits.sub_u64(ELL[1], tmp[1], borrow) |
| 39 | + _, borrow = bits.sub_u64(ELL[2], tmp[2], borrow) |
| 40 | + _, borrow = bits.sub_u64(ELL[3], tmp[3], borrow) |
| 41 | + if borrow != 0 { |
| 42 | + return false |
| 43 | + } |
| 44 | + } |
| 45 | + |
| 46 | + fe_to_montgomery(out1, &tmp) |
| 47 | + |
| 48 | + return true |
| 49 | +} |
| 50 | + |
| 51 | +fe_to_bytes :: proc "contextless" (out1: []byte, arg1: ^Montgomery_Domain_Field_Element) { |
| 52 | + ensure_contextless(len(out1) == 32, "p256r1: invalid fe output buffer") |
| 53 | + |
| 54 | + tmp: Non_Montgomery_Domain_Field_Element |
| 55 | + fe_from_montgomery(&tmp, arg1) |
| 56 | + |
| 57 | + // Note: Likewise, output in big-endian. |
| 58 | + endian.unchecked_put_u64le(out1[24:], tmp[0]) |
| 59 | + endian.unchecked_put_u64le(out1[16:], tmp[1]) |
| 60 | + endian.unchecked_put_u64le(out1[8:], tmp[2]) |
| 61 | + endian.unchecked_put_u64le(out1[0:], tmp[3]) |
| 62 | + |
| 63 | + mem.zero_explicit(&tmp, size_of(tmp)) |
| 64 | +} |
| 65 | + |
| 66 | +@(require_results) |
| 67 | +fe_equal :: proc "contextless" (arg1, arg2: ^Montgomery_Domain_Field_Element) -> int { |
| 68 | + tmp: Montgomery_Domain_Field_Element |
| 69 | + fe_sub(&tmp, arg1, arg2) |
| 70 | + |
| 71 | + // This will only underflow iff arg1 == arg2, and we return the borrow, |
| 72 | + // which will be 1. |
| 73 | + _, borrow := bits.sub_u64(fe_non_zero(&tmp), 1, 0) |
| 74 | + |
| 75 | + fe_clear(&tmp) |
| 76 | + |
| 77 | + return int(borrow) |
| 78 | +} |
| 79 | + |
| 80 | +@(require_results) |
| 81 | +fe_is_odd :: proc "contextless" (arg1: ^Montgomery_Domain_Field_Element) -> int { |
| 82 | + tmp: Non_Montgomery_Domain_Field_Element |
| 83 | + defer mem.zero_explicit(&tmp, size_of(tmp)) |
| 84 | + |
| 85 | + fe_from_montgomery(&tmp, arg1) |
| 86 | + return int(tmp[0] & 1) |
| 87 | +} |
| 88 | + |
| 89 | +fe_pow2k :: proc "contextless" ( |
| 90 | + out1: ^Montgomery_Domain_Field_Element, |
| 91 | + arg1: ^Montgomery_Domain_Field_Element, |
| 92 | + arg2: uint, |
| 93 | +) { |
| 94 | + // Special case: `arg1^(2 * 0) = 1`, though this should never happen. |
| 95 | + if arg2 == 0 { |
| 96 | + fe_one(out1) |
| 97 | + return |
| 98 | + } |
| 99 | + |
| 100 | + fe_square(out1, arg1) |
| 101 | + for _ in 1 ..< arg2 { |
| 102 | + fe_square(out1, out1) |
| 103 | + } |
| 104 | +} |
| 105 | + |
| 106 | +fe_inv :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) { |
| 107 | + // Inversion computation is derived from the addition chain: |
| 108 | + // |
| 109 | + // _10 = 2*1 |
| 110 | + // _11 = 1 + _10 |
| 111 | + // _1100 = _11 << 2 |
| 112 | + // _1111 = _11 + _1100 |
| 113 | + // _11110 = 2*_1111 |
| 114 | + // _11111 = 1 + _11110 |
| 115 | + // _1111100 = _11111 << 2 |
| 116 | + // _1111111 = _11 + _1111100 |
| 117 | + // x11 = _1111111 << 4 + _1111 |
| 118 | + // x22 = x11 << 11 + x11 |
| 119 | + // x27 = x22 << 5 + _11111 |
| 120 | + // x54 = x27 << 27 + x27 |
| 121 | + // x108 = x54 << 54 + x54 |
| 122 | + // x216 = x108 << 108 + x108 |
| 123 | + // x223 = x216 << 7 + _1111111 |
| 124 | + // i266 = ((x223 << 23 + x22) << 5 + 1) << 3 |
| 125 | + // return (_11 + i266) << 2 |
| 126 | + // |
| 127 | + // Operations: 255 squares 14 multiplies |
| 128 | + // |
| 129 | + // Generated by github.com/mmcloughlin/addchain v0.4.0. |
| 130 | + |
| 131 | + // Note: Need to stash `arg1` (`xx`) in the case that `out1`/`arg1` alias, |
| 132 | + // due to step 263, which uses `arg1` after `out1` has been altered. |
| 133 | + t0, t1, t2, t3, xx: Montgomery_Domain_Field_Element = ---, ---, ---, ---, arg1^ |
| 134 | + |
| 135 | + // Step 1: z = x^0x2 |
| 136 | + fe_square(out1, &xx) |
| 137 | + |
| 138 | + // Step 2: z = x^0x3 |
| 139 | + fe_mul(out1, &xx, out1) |
| 140 | + |
| 141 | + // Step 4: t0 = x^0xc |
| 142 | + fe_pow2k(&t0, out1, 2) |
| 143 | + |
| 144 | + // Step 5: t0 = x^0xf |
| 145 | + fe_mul(&t0, out1, &t0) |
| 146 | + |
| 147 | + // Step 6: t1 = x^0x1e |
| 148 | + fe_square(&t1, &t0) |
| 149 | + |
| 150 | + // Step 7: t2 = x^0x1f |
| 151 | + fe_mul(&t2, &xx, &t1) |
| 152 | + |
| 153 | + // Step 9: t1 = x^0x7c |
| 154 | + fe_pow2k(&t1, &t2, 2) |
| 155 | + |
| 156 | + // Step 10: t1 = x^0x7f |
| 157 | + fe_mul(&t1, out1, &t1) |
| 158 | + |
| 159 | + // Step 14: t3 = x^0x7f0 |
| 160 | + fe_pow2k(&t3, &t1, 4) |
| 161 | + |
| 162 | + // Step 15: t0 = x^0x7ff |
| 163 | + fe_mul(&t0, &t0, &t3) |
| 164 | + |
| 165 | + // Step 26: t3 = x^0x3ff800 |
| 166 | + fe_pow2k(&t3, &t0, 11) |
| 167 | + |
| 168 | + // Step 27: t0 = x^0x3fffff |
| 169 | + fe_mul(&t0, &t0, &t3) |
| 170 | + |
| 171 | + // Step 32: t3 = x^0x7ffffe0 |
| 172 | + fe_pow2k(&t3, &t0, 5) |
| 173 | + |
| 174 | + // Step 33: t2 = x^0x7ffffff |
| 175 | + fe_mul(&t2, &t2, &t3) |
| 176 | + |
| 177 | + // Step 60: t3 = x^0x3ffffff8000000 |
| 178 | + fe_pow2k(&t3, &t2, 27) |
| 179 | + |
| 180 | + // Step 61: t2 = x^0x3fffffffffffff |
| 181 | + fe_mul(&t2, &t2, &t3) |
| 182 | + |
| 183 | + // Step 115: t3 = x^0xfffffffffffffc0000000000000 |
| 184 | + fe_pow2k(&t3, &t2, 54) |
| 185 | + |
| 186 | + // Step 116: t2 = x^0xfffffffffffffffffffffffffff |
| 187 | + fe_mul(&t2, &t2, &t3) |
| 188 | + |
| 189 | + // Step 224: t3 = x^0xfffffffffffffffffffffffffff000000000000000000000000000 |
| 190 | + fe_pow2k(&t3, &t2, 108) |
| 191 | + |
| 192 | + // Step 225: t2 = x^0xffffffffffffffffffffffffffffffffffffffffffffffffffffff |
| 193 | + fe_mul(&t2, &t2, &t3) |
| 194 | + |
| 195 | + // Step 232: t2 = x^0x7fffffffffffffffffffffffffffffffffffffffffffffffffffff80 |
| 196 | + fe_pow2k(&t2, &t2, 7) |
| 197 | + |
| 198 | + // Step 233: t1 = x^0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffff |
| 199 | + fe_mul(&t1, &t1, &t2) |
| 200 | + |
| 201 | + // Step 256: t1 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffff800000 |
| 202 | + fe_pow2k(&t1, &t1, 23) |
| 203 | + |
| 204 | + // Step 257: t0 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff |
| 205 | + fe_mul(&t0, &t0, &t1) |
| 206 | + |
| 207 | + // Step 262: t0 = x^0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffff7ffffe0 |
| 208 | + fe_pow2k(&t0, &t0, 5) |
| 209 | + |
| 210 | + // Step 263: t0 = x^0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffff7ffffe1 |
| 211 | + fe_mul(&t0, &xx, &t0) |
| 212 | + |
| 213 | + // Step 266: t0 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff08 |
| 214 | + fe_pow2k(&t0, &t0, 3) |
| 215 | + |
| 216 | + // Step 267: z = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0b |
| 217 | + fe_mul(out1, out1, &t0) |
| 218 | + |
| 219 | + // Step 269: z = x^0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2c |
| 220 | + fe_pow2k(out1, out1, 2) |
| 221 | + |
| 222 | + fe_clear_vec([]^Montgomery_Domain_Field_Element{&t0, &t1, &t2, &t3, &xx}) |
| 223 | +} |
| 224 | + |
| 225 | +@(require_results) |
| 226 | +fe_sqrt :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) -> int { |
| 227 | + // Square root candidate can be derived via exponentiation by `(p + 1) / 4` |
| 228 | + // From sage: 28948022302589062190674361737351893382521535853822578548883407827216774463488 |
| 229 | + // |
| 230 | + // // Inversion computation is derived from the addition chain: |
| 231 | + // |
| 232 | + // _10 = 2*1 |
| 233 | + // _11 = 1 + _10 |
| 234 | + // _1100 = _11 << 2 |
| 235 | + // _1111 = _11 + _1100 |
| 236 | + // _11110000 = _1111 << 4 |
| 237 | + // _11111111 = _1111 + _11110000 |
| 238 | + // x16 = _11111111 << 8 + _11111111 |
| 239 | + // x32 = x16 << 16 + x16 |
| 240 | + // return ((x32 << 32 + 1) << 96 + 1) << 94 |
| 241 | + // |
| 242 | + // Operations: 253 squares 7 multiplies |
| 243 | + // |
| 244 | + // Generated by github.com/mmcloughlin/addchain v0.4.0. |
| 245 | + |
| 246 | + // Likewise this tramples over arg1, so stash another copy. |
| 247 | + t0, xx: Montgomery_Domain_Field_Element = ---, arg1^ |
| 248 | + |
| 249 | + // Step 1: z = x^0x2 |
| 250 | + fe_square(out1, arg1) |
| 251 | + |
| 252 | + // Step 2: z = x^0x3 |
| 253 | + fe_mul(out1, &xx, out1) |
| 254 | + |
| 255 | + // Step 4: t0 = x^0xc |
| 256 | + fe_pow2k(&t0, &xx, 2) |
| 257 | + |
| 258 | + // Step 5: z = x^0xf |
| 259 | + fe_mul(out1, out1, &t0) |
| 260 | + |
| 261 | + // Step 9: t0 = x^0xf0 |
| 262 | + fe_pow2k(&t0, out1, 4) |
| 263 | + |
| 264 | + // Step 10: z = x^0xff |
| 265 | + fe_mul(out1, out1, &t0) |
| 266 | + |
| 267 | + // Step 18: t0 = x^0xff00 |
| 268 | + fe_pow2k(&t0, out1, 8) |
| 269 | + |
| 270 | + // Step 19: z = x^0xffff |
| 271 | + fe_mul(out1, out1, &t0) |
| 272 | + |
| 273 | + // Step 35: t0 = x^0xffff0000 |
| 274 | + fe_pow2k(&t0, out1, 16) |
| 275 | + |
| 276 | + // Step 36: z = x^0xffffffff |
| 277 | + fe_mul(out1, out1, &t0) |
| 278 | + |
| 279 | + // Step 68: z = x^0xffffffff00000000 |
| 280 | + fe_pow2k(out1, out1, 32) |
| 281 | + |
| 282 | + // Step 69: z = x^0xffffffff00000001 |
| 283 | + fe_mul(out1, &xx, out1) |
| 284 | + |
| 285 | + // Step 165: z = x^0xffffffff00000001000000000000000000000000 |
| 286 | + fe_pow2k(out1, out1, 96) |
| 287 | + |
| 288 | + // Step 166: z = x^0xffffffff00000001000000000000000000000001 |
| 289 | + fe_mul(out1, &xx, out1) |
| 290 | + |
| 291 | + // Step 260: z = x^0x3fffffffc0000000400000000000000000000000400000000000000000000000 |
| 292 | + fe_pow2k(out1, out1, 94) |
| 293 | + |
| 294 | + // Ensure that our candidate is actually the square root. |
| 295 | + check, zero: Montgomery_Domain_Field_Element |
| 296 | + fe_square(&check, out1) |
| 297 | + |
| 298 | + is_valid := fe_equal(&check, &xx) |
| 299 | + fe_cond_select(out1, &zero, out1, is_valid) |
| 300 | + |
| 301 | + fe_clear_vec([]^Montgomery_Domain_Field_Element{&t0, &xx, &check}) |
| 302 | + |
| 303 | + return is_valid |
| 304 | + |
| 305 | +} |
| 306 | + |
| 307 | +fe_zero :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element) { |
| 308 | + out1[0] = 0 |
| 309 | + out1[1] = 0 |
| 310 | + out1[2] = 0 |
| 311 | + out1[3] = 0 |
| 312 | +} |
| 313 | + |
| 314 | +fe_set :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) { |
| 315 | + x1 := arg1[0] |
| 316 | + x2 := arg1[1] |
| 317 | + x3 := arg1[2] |
| 318 | + x4 := arg1[3] |
| 319 | + out1[0] = x1 |
| 320 | + out1[1] = x2 |
| 321 | + out1[2] = x3 |
| 322 | + out1[3] = x4 |
| 323 | +} |
| 324 | + |
| 325 | +@(optimization_mode = "none") |
| 326 | +fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Montgomery_Domain_Field_Element, arg1: int) { |
| 327 | + mask := (u64(arg1) * 0xffffffffffffffff) |
| 328 | + x := (out1[0] ~ out2[0]) & mask |
| 329 | + x1, y1 := out1[0] ~ x, out2[0] ~ x |
| 330 | + x = (out1[1] ~ out2[1]) & mask |
| 331 | + x2, y2 := out1[1] ~ x, out2[1] ~ x |
| 332 | + x = (out1[2] ~ out2[2]) & mask |
| 333 | + x3, y3 := out1[2] ~ x, out2[2] ~ x |
| 334 | + x = (out1[3] ~ out2[3]) & mask |
| 335 | + x4, y4 := out1[3] ~ x, out2[3] ~ x |
| 336 | + out1[0], out2[0] = x1, y1 |
| 337 | + out1[1], out2[1] = x2, y2 |
| 338 | + out1[2], out2[2] = x3, y3 |
| 339 | + out1[3], out2[3] = x4, y4 |
| 340 | +} |
| 341 | + |
| 342 | +@(optimization_mode = "none") |
| 343 | +fe_cond_select :: #force_no_inline proc "contextless" ( |
| 344 | + out1, arg1, arg2: ^Montgomery_Domain_Field_Element, |
| 345 | + arg3: int, |
| 346 | +) { |
| 347 | + mask := (u64(arg3) * 0xffffffffffffffff) |
| 348 | + x1 := ((mask & arg2[0]) | ((~mask) & arg1[0])) |
| 349 | + x2 := ((mask & arg2[1]) | ((~mask) & arg1[1])) |
| 350 | + x3 := ((mask & arg2[2]) | ((~mask) & arg1[2])) |
| 351 | + x4 := ((mask & arg2[3]) | ((~mask) & arg1[3])) |
| 352 | + out1[0] = x1 |
| 353 | + out1[1] = x2 |
| 354 | + out1[2] = x3 |
| 355 | + out1[3] = x4 |
| 356 | +} |
| 357 | + |
| 358 | +fe_cond_negate :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element, ctrl: int) { |
| 359 | + tmp1: Montgomery_Domain_Field_Element = --- |
| 360 | + fe_opp(&tmp1, arg1) |
| 361 | + fe_cond_select(out1, arg1, &tmp1, ctrl) |
| 362 | + |
| 363 | + fe_clear(&tmp1) |
| 364 | +} |
0 commit comments