odin-lang
diff --git a/‎core/crypto/_fiat/field_p256r1/field.odin‎
Lines changed: 364 additions & 0 deletions b/‎core/crypto/_fiat/field_p256r1/field.odin‎
Lines changed: 364 additions & 0 deletions
@@ -0,0 +1,364 @@
+package field_p256r1
+
+import "core:encoding/endian"
+import "core:math/bits"
+import "core:mem"
+
+fe_clear :: proc "contextless" (arg1: ^Montgomery_Domain_Field_Element) {
+	mem.zero_explicit(arg1, size_of(Montgomery_Domain_Field_Element))
+}
+
+fe_clear_vec :: proc "contextless" (
+	arg1: []^Montgomery_Domain_Field_Element,
+) {
+	for fe in arg1 {
+		fe_clear(fe)
+	}
+}
+
+fe_from_bytes :: proc "contextless" (
+	out1: ^Montgomery_Domain_Field_Element,
+	arg1: []byte,
+	unsafe_assume_canonical := false,
+) -> bool {
+	ensure_contextless(len(arg1) == 32, "p256r1: invalid fe input buffer")
+
+	// Note: We assume the input is in big-endian.
+	tmp := Non_Montgomery_Domain_Field_Element {
+		endian.unchecked_get_u64le(arg1[24:]),
+		endian.unchecked_get_u64le(arg1[16:]),
+		endian.unchecked_get_u64le(arg1[8:]),
+		endian.unchecked_get_u64le(arg1[0:]),
+	}
+	defer mem.zero_explicit(&tmp, size_of(tmp))
+
+	// Check that tmp is in the the range [0, ELL).
+	if !unsafe_assume_canonical {
+		_, borrow := bits.sub_u64(ELL[0] - 1, tmp[0], 0)
+		_, borrow = bits.sub_u64(ELL[1], tmp[1], borrow)
+		_, borrow = bits.sub_u64(ELL[2], tmp[2], borrow)
+		_, borrow = bits.sub_u64(ELL[3], tmp[3], borrow)
+		if borrow != 0 {
+			return false
+		}
+	}
+
+	fe_to_montgomery(out1, &tmp)
+
+	return true
+}
+
+fe_to_bytes :: proc "contextless" (out1: []byte, arg1: ^Montgomery_Domain_Field_Element) {
+	ensure_contextless(len(out1) == 32, "p256r1: invalid fe output buffer")
+
+	tmp: Non_Montgomery_Domain_Field_Element
+	fe_from_montgomery(&tmp, arg1)
+
+	// Note: Likewise, output in big-endian.
+	endian.unchecked_put_u64le(out1[24:], tmp[0])
+	endian.unchecked_put_u64le(out1[16:], tmp[1])
+	endian.unchecked_put_u64le(out1[8:], tmp[2])
+	endian.unchecked_put_u64le(out1[0:], tmp[3])
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+@(require_results)
+fe_equal :: proc "contextless" (arg1, arg2: ^Montgomery_Domain_Field_Element) -> int {
+	tmp: Montgomery_Domain_Field_Element
+	fe_sub(&tmp, arg1, arg2)
+
+	// This will only underflow iff arg1 == arg2, and we return the borrow,
+	// which will be 1.
+	_, borrow := bits.sub_u64(fe_non_zero(&tmp), 1, 0)
+
+	fe_clear(&tmp)
+
+	return int(borrow)
+}
+
+@(require_results)
+fe_is_odd :: proc "contextless" (arg1: ^Montgomery_Domain_Field_Element) -> int {
+	tmp: Non_Montgomery_Domain_Field_Element
+	defer mem.zero_explicit(&tmp, size_of(tmp))
+
+	fe_from_montgomery(&tmp, arg1)
+	return int(tmp[0] & 1)
+}
+
+fe_pow2k :: proc "contextless" (
+	out1: ^Montgomery_Domain_Field_Element,
+	arg1: ^Montgomery_Domain_Field_Element,
+	arg2: uint,
+) {
+	// Special case: `arg1^(2 * 0) = 1`, though this should never happen.
+	if arg2 == 0 {
+		fe_one(out1)
+		return
+	}
+
+	fe_square(out1, arg1)
+	for _ in 1 ..< arg2 {
+		fe_square(out1, out1)
+	}
+}
+
+fe_inv :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) {
+	// Inversion computation is derived from the addition chain:
+	//
+	//	_10      = 2*1
+	//	_11      = 1 + _10
+	//	_1100    = _11 << 2
+	//	_1111    = _11 + _1100
+	//	_11110   = 2*_1111
+	//	_11111   = 1 + _11110
+	//	_1111100 = _11111 << 2
+	//	_1111111 = _11 + _1111100
+	//	x11      = _1111111 << 4 + _1111
+	//	x22      = x11 << 11 + x11
+	//	x27      = x22 << 5 + _11111
+	//	x54      = x27 << 27 + x27
+	//	x108     = x54 << 54 + x54
+	//	x216     = x108 << 108 + x108
+	//	x223     = x216 << 7 + _1111111
+	//	i266     = ((x223 << 23 + x22) << 5 + 1) << 3
+	//	return     (_11 + i266) << 2
+	//
+	// Operations: 255 squares 14 multiplies
+	//
+	// Generated by github.com/mmcloughlin/addchain v0.4.0.
+
+	// Note: Need to stash `arg1` (`xx`) in the case that `out1`/`arg1` alias,
+	// due to step 263, which uses `arg1` after `out1` has been altered.
+	t0, t1, t2, t3, xx: Montgomery_Domain_Field_Element = ---, ---, ---, ---, arg1^
+
+	// Step 1: z = x^0x2
+	fe_square(out1, &xx)
+
+	// Step 2: z = x^0x3
+	fe_mul(out1, &xx, out1)
+
+	// Step 4: t0 = x^0xc
+	fe_pow2k(&t0, out1, 2)
+
+	// Step 5: t0 = x^0xf
+	fe_mul(&t0, out1, &t0)
+
+	// Step 6: t1 = x^0x1e
+	fe_square(&t1, &t0)
+
+	// Step 7: t2 = x^0x1f
+	fe_mul(&t2, &xx, &t1)
+
+	// Step 9: t1 = x^0x7c
+	fe_pow2k(&t1, &t2, 2)
+
+	// Step 10: t1 = x^0x7f
+	fe_mul(&t1, out1, &t1)
+
+	// Step 14: t3 = x^0x7f0
+	fe_pow2k(&t3, &t1, 4)
+
+	// Step 15: t0 = x^0x7ff
+	fe_mul(&t0, &t0, &t3)
+
+	// Step 26: t3 = x^0x3ff800
+	fe_pow2k(&t3, &t0, 11)
+
+	// Step 27: t0 = x^0x3fffff
+	fe_mul(&t0, &t0, &t3)
+
+	// Step 32: t3 = x^0x7ffffe0
+	fe_pow2k(&t3, &t0, 5)
+
+	// Step 33: t2 = x^0x7ffffff
+	fe_mul(&t2, &t2, &t3)
+
+	// Step 60: t3 = x^0x3ffffff8000000
+	fe_pow2k(&t3, &t2, 27)
+
+	// Step 61: t2 = x^0x3fffffffffffff
+	fe_mul(&t2, &t2, &t3)
+
+	// Step 115: t3 = x^0xfffffffffffffc0000000000000
+	fe_pow2k(&t3, &t2, 54)
+
+	// Step 116: t2 = x^0xfffffffffffffffffffffffffff
+	fe_mul(&t2, &t2, &t3)
+
+	// Step 224: t3 = x^0xfffffffffffffffffffffffffff000000000000000000000000000
+	fe_pow2k(&t3, &t2, 108)
+
+	// Step 225: t2 = x^0xffffffffffffffffffffffffffffffffffffffffffffffffffffff
+	fe_mul(&t2, &t2, &t3)
+
+	// Step 232: t2 = x^0x7fffffffffffffffffffffffffffffffffffffffffffffffffffff80
+	fe_pow2k(&t2, &t2, 7)
+
+	// Step 233: t1 = x^0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffff
+	fe_mul(&t1, &t1, &t2)
+
+	// Step 256: t1 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffff800000
+	fe_pow2k(&t1, &t1, 23)
+
+	// Step 257: t0 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff
+	fe_mul(&t0, &t0, &t1)
+
+	// Step 262: t0 = x^0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffff7ffffe0
+	fe_pow2k(&t0, &t0, 5)
+
+	// Step 263: t0 = x^0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffff7ffffe1
+	fe_mul(&t0, &xx, &t0)
+
+	// Step 266: t0 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff08
+	fe_pow2k(&t0, &t0, 3)
+
+	// Step 267: z = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0b
+	fe_mul(out1, out1, &t0)
+
+	// Step 269: z = x^0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2c
+	fe_pow2k(out1, out1, 2)
+
+	fe_clear_vec([]^Montgomery_Domain_Field_Element{&t0, &t1, &t2, &t3, &xx})
+}
+
+@(require_results)
+fe_sqrt :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) -> int {
+	// Square root candidate can be derived via exponentiation by `(p + 1) / 4`
+	// From sage: 28948022302589062190674361737351893382521535853822578548883407827216774463488
+	//
+	// 	// Inversion computation is derived from the addition chain:
+	//
+	//	_10       = 2*1
+	//	_11       = 1 + _10
+	//	_1100     = _11 << 2
+	//	_1111     = _11 + _1100
+	//	_11110000 = _1111 << 4
+	//	_11111111 = _1111 + _11110000
+	//	x16       = _11111111 << 8 + _11111111
+	//	x32       = x16 << 16 + x16
+	//	return      ((x32 << 32 + 1) << 96 + 1) << 94
+	//
+	// Operations: 253 squares 7 multiplies
+	//
+	// Generated by github.com/mmcloughlin/addchain v0.4.0.
+
+	// Likewise this tramples over arg1, so stash another copy.
+	t0, xx: Montgomery_Domain_Field_Element =  ---, arg1^
+
+	// Step 1: z = x^0x2
+	fe_square(out1, arg1)
+
+	// Step 2: z = x^0x3
+	fe_mul(out1, &xx, out1)
+
+	// Step 4: t0 = x^0xc
+	fe_pow2k(&t0, &xx, 2)
+
+	// Step 5: z = x^0xf
+	fe_mul(out1, out1, &t0)
+
+	// Step 9: t0 = x^0xf0
+	fe_pow2k(&t0, out1, 4)
+
+	// Step 10: z = x^0xff
+	fe_mul(out1, out1, &t0)
+
+	// Step 18: t0 = x^0xff00
+	fe_pow2k(&t0, out1, 8)
+
+	// Step 19: z = x^0xffff
+	fe_mul(out1, out1, &t0)
+
+	// Step 35: t0 = x^0xffff0000
+	fe_pow2k(&t0, out1, 16)
+
+	// Step 36: z = x^0xffffffff
+	fe_mul(out1, out1, &t0)
+
+	// Step 68: z = x^0xffffffff00000000
+	fe_pow2k(out1, out1, 32)
+
+	// Step 69: z = x^0xffffffff00000001
+	fe_mul(out1, &xx, out1)
+
+	// Step 165: z = x^0xffffffff00000001000000000000000000000000
+	fe_pow2k(out1, out1, 96)
+
+	// Step 166: z = x^0xffffffff00000001000000000000000000000001
+	fe_mul(out1, &xx, out1)
+
+	// Step 260: z = x^0x3fffffffc0000000400000000000000000000000400000000000000000000000
+	fe_pow2k(out1, out1, 94)
+
+	// Ensure that our candidate is actually the square root.
+	check, zero: Montgomery_Domain_Field_Element
+	fe_square(&check, out1)
+
+	is_valid := fe_equal(&check, &xx)
+	fe_cond_select(out1, &zero, out1, is_valid)
+
+	fe_clear_vec([]^Montgomery_Domain_Field_Element{&t0, &xx, &check})
+
+	return is_valid
+
+}
+
+fe_zero :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element) {
+	out1[0] = 0
+	out1[1] = 0
+	out1[2] = 0
+	out1[3] = 0
+}
+
+fe_set :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) {
+	x1 := arg1[0]
+	x2 := arg1[1]
+	x3 := arg1[2]
+	x4 := arg1[3]
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+}
+
+@(optimization_mode = "none")
+fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Montgomery_Domain_Field_Element, arg1: int) {
+	mask := (u64(arg1) * 0xffffffffffffffff)
+	x := (out1[0] ~ out2[0]) & mask
+	x1, y1 := out1[0] ~ x, out2[0] ~ x
+	x = (out1[1] ~ out2[1]) & mask
+	x2, y2 := out1[1] ~ x, out2[1] ~ x
+	x = (out1[2] ~ out2[2]) & mask
+	x3, y3 := out1[2] ~ x, out2[2] ~ x
+	x = (out1[3] ~ out2[3]) & mask
+	x4, y4 := out1[3] ~ x, out2[3] ~ x
+	out1[0], out2[0] = x1, y1
+	out1[1], out2[1] = x2, y2
+	out1[2], out2[2] = x3, y3
+	out1[3], out2[3] = x4, y4
+}
+
+@(optimization_mode = "none")
+fe_cond_select :: #force_no_inline proc "contextless" (
+	out1, arg1, arg2: ^Montgomery_Domain_Field_Element,
+	arg3: int,
+) {
+	mask := (u64(arg3) * 0xffffffffffffffff)
+	x1 := ((mask & arg2[0]) | ((~mask) & arg1[0]))
+	x2 := ((mask & arg2[1]) | ((~mask) & arg1[1]))
+	x3 := ((mask & arg2[2]) | ((~mask) & arg1[2]))
+	x4 := ((mask & arg2[3]) | ((~mask) & arg1[3]))
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+}
+
+fe_cond_negate :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element, ctrl: int) {
+	tmp1: Montgomery_Domain_Field_Element = ---
+	fe_opp(&tmp1, arg1)
+	fe_cond_select(out1, arg1, &tmp1, ctrl)
+
+	fe_clear(&tmp1)
+}