-
Notifications
You must be signed in to change notification settings - Fork 49
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement clmul for crc8, 16, 32, test for all cases in the correctne…
…ss tests (as the all test doesn't work for simd because of the amount of data it needs)
- Loading branch information
Showing
12 changed files
with
671 additions
and
80 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
#[cfg(all( | ||
target_feature = "sse2", | ||
target_feature = "sse4.1", | ||
target_feature = "pclmulqdq", | ||
))] | ||
mod x86; | ||
|
||
#[cfg(all( | ||
target_feature = "sse2", | ||
target_feature = "sse4.1", | ||
target_feature = "pclmulqdq", | ||
))] | ||
pub(crate) use x86::Value; | ||
|
||
trait ValueOps { | ||
fn xor(self, value: u64) -> Self; | ||
|
||
fn fold_16(self, x_mod_p: Self, value: Self) -> Self; | ||
|
||
fn fold_8(self, x_mod_p: Self) -> Self; | ||
|
||
fn fold_4(self, x_mod_p: Self) -> Self; | ||
|
||
fn barret_reduction_32(self, px_u: Self) -> u32; | ||
} | ||
|
||
pub(crate) const fn crc32_clmul_coeff(width: u8, poly: u32) -> [Value; 4] { | ||
const fn xt_mod_px(mut t: u32, px: u64) -> u64 { | ||
if t < 32 { | ||
return 0; | ||
} | ||
t -= 31; | ||
|
||
let mut n = 0x80000000; | ||
let mut i = 0; | ||
while i < t { | ||
n <<= 1; | ||
if n & 0x100000000 != 0 { | ||
n ^= px; | ||
} | ||
i += 1; | ||
} | ||
n << 32 | ||
} | ||
|
||
const fn u(px: u64) -> u64 { | ||
let mut q = 0; | ||
let mut n = 0x100000000; | ||
let mut i = 0; | ||
while i < 33 { | ||
q <<= 1; | ||
if n & 0x100000000 != 0 { | ||
q |= 1; | ||
n ^= px; | ||
} | ||
n <<= 1; | ||
i += 1; | ||
} | ||
q | ||
} | ||
|
||
let px = (poly as u64) << (u32::BITS as u8 - width); | ||
unsafe { | ||
// SAFETY: This will be evaluated during compile-time and therefore the alignment | ||
// doesn't matter, the type is transmuted from 2*u64 to u64x2 simd type. | ||
core::mem::transmute([ | ||
xt_mod_px(4 * 128 + 32, px).reverse_bits() << 1, | ||
xt_mod_px(4 * 128 - 32, px).reverse_bits() << 1, | ||
xt_mod_px(128 + 32, px).reverse_bits() << 1, | ||
xt_mod_px(128 - 32, px).reverse_bits() << 1, | ||
xt_mod_px(64, px).reverse_bits() << 1, | ||
xt_mod_px(32, px).reverse_bits() << 1, | ||
px.reverse_bits() >> 31, | ||
u(px).reverse_bits() >> 31, | ||
]) | ||
} | ||
} | ||
|
||
pub(crate) fn crc32_update_refin( | ||
crc: u32, | ||
coefficients: &[Value; 4], | ||
first_chunk: &[Value; 4], | ||
chunks: &[[Value; 4]], | ||
) -> u32 { | ||
let mut x4 = *first_chunk; | ||
|
||
// Apply initial crc value | ||
x4[0] = x4[0].xor(crc as u64); | ||
|
||
// Iteratively Fold by 4: | ||
let k1_k2 = coefficients[0]; | ||
for chunk in chunks { | ||
for (x, value) in x4.iter_mut().zip(chunk.iter()) { | ||
*x = x.fold_16(k1_k2, *value) | ||
} | ||
} | ||
|
||
// Iteratively Fold by 1: | ||
let k3_k4 = coefficients[1]; | ||
let mut x = x4[0].fold_16(k3_k4, x4[1]); | ||
x = x.fold_16(k3_k4, x4[2]); | ||
x = x.fold_16(k3_k4, x4[3]); | ||
|
||
// Final Reduction of 128-bits | ||
let k5_k6 = coefficients[2]; | ||
x = x.fold_8(k3_k4); | ||
x = x.fold_4(k5_k6); | ||
|
||
// Barret Reduction | ||
let px_u = coefficients[3]; | ||
x.barret_reduction_32(px_u) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
use crate::clmul::ValueOps; | ||
|
||
#[cfg(target_arch = "x86")] | ||
use core::arch::x86 as arch; | ||
#[cfg(target_arch = "x86_64")] | ||
use core::arch::x86_64 as arch; | ||
use core::mem; | ||
|
||
#[derive(Copy, Clone)] | ||
pub struct Value(arch::__m128i); | ||
|
||
impl ValueOps for Value { | ||
#[inline] | ||
fn xor(self, value: u64) -> Self { | ||
// SAFETY: This is only implemented if the target supports sse2, sse4.1, and pclmulqdq | ||
unsafe { | ||
Self(arch::_mm_xor_si128( | ||
self.0, | ||
arch::_mm_set_epi64x(0, value as i64), | ||
)) | ||
} | ||
} | ||
|
||
#[inline] | ||
fn fold_16(self, x_mod_p: Self, value: Self) -> Self { | ||
// SAFETY: This is only implemented if the target supports sse2, sse4.1, and pclmulqdq | ||
unsafe { | ||
Self(arch::_mm_xor_si128( | ||
arch::_mm_clmulepi64_si128(self.0, x_mod_p.0, 0x00), | ||
arch::_mm_xor_si128(arch::_mm_clmulepi64_si128(self.0, x_mod_p.0, 0x11), value.0), | ||
)) | ||
} | ||
} | ||
|
||
#[inline] | ||
fn fold_8(self, x_mod_p: Self) -> Self { | ||
// SAFETY: This is only implemented if the target supports sse2, sse4.1, and pclmulqdq | ||
unsafe { | ||
Self(arch::_mm_xor_si128( | ||
arch::_mm_clmulepi64_si128(self.0, x_mod_p.0, 0x10), | ||
arch::_mm_srli_si128(self.0, 8), | ||
)) | ||
} | ||
} | ||
|
||
#[inline] | ||
fn fold_4(self, x_mod_p: Self) -> Self { | ||
// SAFETY: This is only implemented if the target supports sse2, sse4.1, and pclmulqdq | ||
unsafe { | ||
Self(arch::_mm_xor_si128( | ||
arch::_mm_clmulepi64_si128( | ||
arch::_mm_and_si128(self.0, mem::transmute((1u128 << 32) - 1)), | ||
x_mod_p.0, | ||
0x00, | ||
), | ||
arch::_mm_srli_si128(self.0, 4), | ||
)) | ||
} | ||
} | ||
|
||
#[inline] | ||
fn barret_reduction_32(self, px_u: Self) -> u32 { | ||
// SAFETY: This is only implemented if the target supports sse2, sse4.1, and pclmulqdq | ||
unsafe { | ||
let t1 = arch::_mm_clmulepi64_si128( | ||
arch::_mm_and_si128(self.0, mem::transmute((1u128 << 32) - 1)), | ||
px_u.0, | ||
0x10, | ||
); | ||
let t2 = arch::_mm_clmulepi64_si128( | ||
arch::_mm_and_si128(t1, mem::transmute((1u128 << 32) - 1)), | ||
px_u.0, | ||
0x00, | ||
); | ||
arch::_mm_extract_epi32(arch::_mm_xor_si128(self.0, t2), 1) as u32 | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.