From e13a1fa23734654d8fee9ad0ca369346a10ffc6f Mon Sep 17 00:00:00 2001 From: Dumi Loghin Date: Wed, 4 Sep 2024 13:51:56 +0800 Subject: [PATCH] update cryptography_cuda ref, fix avx2 issues, update toolchain --- Cargo.toml | 2 +- .../arch/x86_64/poseidon2_goldilocks_avx2.rs | 1 + .../hash/arch/x86_64/poseidon_bn128_avx2.rs | 34 ++++++++----------- plonky2/src/hash/merkle_tree.rs | 2 +- plonky2/src/hash/poseidon2.rs | 17 +--------- plonky2/src/hash/poseidon_bn128_ops.rs | 4 +++ plonky2/src/lib.rs | 2 +- rust-toolchain.toml | 2 +- 8 files changed, 25 insertions(+), 39 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6b603ed48b..6d9c89ac0e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ members = ["field", "maybe_rayon", "plonky2", "starky", "util", "gen", "u32", "e resolver = "2" [workspace.dependencies] -cryptography_cuda = { git = "ssh://git@github.com/okx/cryptography_cuda.git", rev = "173510160183f3299f4765b30bd4f2c1685353f9" } +cryptography_cuda = { git = "ssh://git@github.com/okx/cryptography_cuda.git", rev = "f2ed17c3086b9ca538272974e42b47e4bf7970e2" } ahash = { version = "0.8.7", default-features = false, features = [ "compile-time-rng", ] } # NOTE: Be sure to keep this version the same as the dependency in `hashbrown`. diff --git a/plonky2/src/hash/arch/x86_64/poseidon2_goldilocks_avx2.rs b/plonky2/src/hash/arch/x86_64/poseidon2_goldilocks_avx2.rs index fde9bd5863..c7c30dd7a8 100644 --- a/plonky2/src/hash/arch/x86_64/poseidon2_goldilocks_avx2.rs +++ b/plonky2/src/hash/arch/x86_64/poseidon2_goldilocks_avx2.rs @@ -72,6 +72,7 @@ where } } +#[allow(dead_code)] #[inline(always)] pub fn matmul_internal_avx( state: &mut [F; SPONGE_WIDTH], diff --git a/plonky2/src/hash/arch/x86_64/poseidon_bn128_avx2.rs b/plonky2/src/hash/arch/x86_64/poseidon_bn128_avx2.rs index 453d41d894..04ce1262bf 100644 --- a/plonky2/src/hash/arch/x86_64/poseidon_bn128_avx2.rs +++ b/plonky2/src/hash/arch/x86_64/poseidon_bn128_avx2.rs @@ -1095,13 +1095,18 @@ mod tests { 13281191951274694749u64 as i64, 13281191951274694749u64 as i64, ); + let exp: [u64; 4] = [ + 0xE0842DFEFB3AC8EEu64, + 0xE0842DFEFB3AC8EEu64, + 0xE0842DFEFB3AC8EEu64, + 0xE0842DFEFB3AC8EEu64, + ]; let r = _mm256_add_epi64(ct1, ct2); - let mut a: [u64; 4] = [0; 4]; - _mm256_store_si256(a.as_mut_ptr().cast::<__m256i>(), r); - println!("{:?}", a); - let x = 2896914383306846353u64 + 13281191951274694749u64; - println!("{:?}", x); + let mut vr: [u64; 4] = [0; 4]; + _mm256_storeu_si256(vr.as_mut_ptr().cast::<__m256i>(), r); + println!("{:X?}", vr); + assert_eq!(vr, exp); } Ok(()) } @@ -1147,28 +1152,19 @@ mod tests { #[test] fn test_bn128_sub64() -> Result<()> { unsafe { - let a = _mm256_set_epi64x( - 4i64, - 7i64, - 0xFFFFFFFFFFFFFFFFu64 as i64, - 4291643747455737684u64 as i64, - ); - let b = _mm256_set_epi64x(7i64, 4i64, 0x0i64, 3486998266802970665u64 as i64); + let a = _mm256_set_epi64x(4i64, 7i64, 0xFFFFFFFFFFFFFFFFu64 as i64, 0x0u64 as i64); + let b = _mm256_set_epi64x(7i64, 4i64, 0x0i64, 0xFFFFFFFFFFFFFFFFu64 as i64); let bin = _mm256_set_epi64x(0, 0, 0, 0); - let res = [ - 0xFFFFFFFFFFFFFFFFu64, - 0xFFFFFFFFFFFFFFFFu64, - 3u64, - 0xFFFFFFFFFFFFFFFDu64, - ]; + let res = [0x1u64, 0xFFFFFFFFFFFFFFFFu64, 3u64, 0xFFFFFFFFFFFFFFFDu64]; let bout = [1u64, 0u64, 0u64, 1u64]; let mut v: [u64; 4] = [0; 4]; let (c1, c2) = sub64(&a, &b, &bin); _mm256_storeu_si256(v.as_mut_ptr().cast::<__m256i>(), c1); - println!(" Res: {:?}", v); + println!("Res: {:X?}", v); + println!("Exp: {:X?}", res); assert_eq!(v, res); _mm256_storeu_si256(v.as_mut_ptr().cast::<__m256i>(), c2); println!("Cout: {:X?}", v); diff --git a/plonky2/src/hash/merkle_tree.rs b/plonky2/src/hash/merkle_tree.rs index ac909604ce..f3b4f50488 100644 --- a/plonky2/src/hash/merkle_tree.rs +++ b/plonky2/src/hash/merkle_tree.rs @@ -14,7 +14,7 @@ use cryptography_cuda::device::memory::HostOrDeviceSlice; #[cfg(feature = "cuda")] use cryptography_cuda::device::stream::CudaStream; #[cfg(feature = "cuda")] -use cryptography_cuda::merkle::bindings::{ +use cryptography_cuda::{ fill_digests_buf_linear_gpu_with_gpu_ptr, fill_digests_buf_linear_multigpu_with_gpu_ptr, }; use num::range; diff --git a/plonky2/src/hash/poseidon2.rs b/plonky2/src/hash/poseidon2.rs index 15a854899e..e71e366fa6 100644 --- a/plonky2/src/hash/poseidon2.rs +++ b/plonky2/src/hash/poseidon2.rs @@ -9,7 +9,7 @@ use plonky2_field::goldilocks_field::GoldilocksField; use super::arch::x86_64::goldilocks_avx2::sbox_avx; #[cfg(target_feature = "avx2")] use super::arch::x86_64::poseidon2_goldilocks_avx2::{ - add_rc_avx, internal_layer_avx, matmul_internal_avx, permute_mut_avx, + add_rc_avx, internal_layer_avx, permute_mut_avx, }; use super::hash_types::{HashOutTarget, NUM_HASH_OUT_ELTS}; use crate::field::extension::Extendable; @@ -389,9 +389,6 @@ where } } -#[derive(Debug, Clone, Default)] -struct DiffusionMatrixGoldilocks; - pub fn matmul_internal( state: &mut [F; SPONGE_WIDTH], mat_internal_diag_m_1: [u64; SPONGE_WIDTH], @@ -403,18 +400,6 @@ pub fn matmul_internal( } } -impl P2Permutation<[F; 12]> for DiffusionMatrixGoldilocks { - #[cfg(not(target_feature = "avx2"))] - fn permute_mut(&self, state: &mut [F; 12]) { - matmul_internal::(state, MATRIX_DIAG_12_GOLDILOCKS); - } - - #[cfg(target_feature = "avx2")] - fn permute_mut(&self, state: &mut [F; 12]) { - matmul_internal_avx::(state, MATRIX_DIAG_12_GOLDILOCKS); - } -} - pub trait Poseidon2: RichField { // const WIDTH: usize = 12; // const D: u64 = 7; diff --git a/plonky2/src/hash/poseidon_bn128_ops.rs b/plonky2/src/hash/poseidon_bn128_ops.rs index 38b573fe98..97b8d7d2b8 100644 --- a/plonky2/src/hash/poseidon_bn128_ops.rs +++ b/plonky2/src/hash/poseidon_bn128_ops.rs @@ -4550,6 +4550,7 @@ pub struct PoseidonBN128NativePermutation { } impl PoseidonBN128NativePermutation { + #[allow(dead_code)] #[inline] fn exp5state(self, state: &mut [ElementBN128; 5]) { state[0].exp5(); @@ -4559,6 +4560,7 @@ impl PoseidonBN128NativePermutation { state[4].exp5(); } + #[allow(dead_code)] #[inline] fn ark(self, state: &mut [ElementBN128; 5], c: [[u64; 4]; 100], it: usize) { for i in 0..5 { @@ -4567,6 +4569,7 @@ impl PoseidonBN128NativePermutation { } } + #[allow(dead_code)] #[inline] fn mix(self, state: &mut [ElementBN128; 5], m: [[[u64; 4]; 5]; 5]) { let mut new_state: [ElementBN128; 5] = [ElementBN128::zero(); 5]; @@ -4584,6 +4587,7 @@ impl PoseidonBN128NativePermutation { } } + #[allow(dead_code)] pub fn permute_fn(&self, input: [u64; 12]) -> [u64; 12] { #[cfg(feature = "papi")] let mut event_set = init_papi(); diff --git a/plonky2/src/lib.rs b/plonky2/src/lib.rs index 1325f51555..4fc1e03841 100644 --- a/plonky2/src/lib.rs +++ b/plonky2/src/lib.rs @@ -3,7 +3,7 @@ #![deny(rustdoc::broken_intra_doc_links)] #![deny(missing_debug_implementations)] #![cfg_attr(not(feature = "std"), no_std)] -// #![feature(stdarch_x86_avx512)] +#![feature(stdarch_x86_avx512)] // #[cfg(not(feature = "std"))] pub extern crate alloc; diff --git a/rust-toolchain.toml b/rust-toolchain.toml index e0a216c974..639d6a3056 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,5 +1,5 @@ [toolchain] -channel = "nightly-2024-01-16" +channel = "nightly" components = [] targets = [] profile = "default"