Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SIMD-based implementations #109

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,17 @@ pub const BLUETOOTH: Crc<u8> = Crc::<u8>::new(&CRC_8_BLUETOOTH);
pub const BLUETOOTH_SLICE16: Crc<u8, Table<16>> = Crc::<u8, Table<16>>::new(&CRC_8_BLUETOOTH);
pub const BLUETOOTH_BYTEWISE: Crc<u8, Table<1>> = Crc::<u8, Table<1>>::new(&CRC_8_BLUETOOTH);
pub const BLUETOOTH_NOLOOKUP: Crc<u8, NoTable> = Crc::<u8, NoTable>::new(&CRC_8_BLUETOOTH);
pub const BLUETOOTH_SIMD: Crc<u8, Simd> = Crc::<u8, Simd>::new(&CRC_8_BLUETOOTH);
pub const X25: Crc<u16> = Crc::<u16>::new(&CRC_16_IBM_SDLC);
pub const X25_SLICE16: Crc<u16, Table<16>> = Crc::<u16, Table<16>>::new(&CRC_16_IBM_SDLC);
pub const X25_BYTEWISE: Crc<u16, Table<1>> = Crc::<u16, Table<1>>::new(&CRC_16_IBM_SDLC);
pub const X25_NOLOOKUP: Crc<u16, NoTable> = Crc::<u16, NoTable>::new(&CRC_16_IBM_SDLC);
pub const X25_SIMD: Crc<u16, Simd> = Crc::<u16, Simd>::new(&CRC_16_IBM_SDLC);
pub const ISCSI: Crc<u32> = Crc::<u32>::new(&CRC_32_ISCSI);
pub const ISCSI_SLICE16: Crc<u32, Table<16>> = Crc::<u32, Table<16>>::new(&CRC_32_ISCSI);
pub const ISCSI_BYTEWISE: Crc<u32, Table<1>> = Crc::<u32, Table<1>>::new(&CRC_32_ISCSI);
pub const ISCSI_NOLOOKUP: Crc<u32, NoTable> = Crc::<u32, NoTable>::new(&CRC_32_ISCSI);
pub const ISCSI_SIMD: Crc<u32, Simd> = Crc::<u32, Simd>::new(&CRC_32_ISCSI);
pub const GSM_40: Crc<u64> = Crc::<u64>::new(&CRC_40_GSM);
pub const ECMA: Crc<u64> = Crc::<u64>::new(&CRC_64_ECMA_182);
pub const ECMA_SLICE16: Crc<u64, Table<16>> = Crc::<u64, Table<16>>::new(&CRC_64_ECMA_182);
Expand Down Expand Up @@ -51,6 +54,9 @@ fn checksum(c: &mut Criterion) {
})
.bench_function("slice16", |b| {
b.iter(|| BLUETOOTH_SLICE16.checksum(black_box(&bytes)))
})
.bench_function("simd", |b| {
b.iter(|| BLUETOOTH_SIMD.checksum(black_box(&bytes)))
});

c.benchmark_group("crc16")
Expand All @@ -64,7 +70,8 @@ fn checksum(c: &mut Criterion) {
})
.bench_function("slice16", |b| {
b.iter(|| X25_SLICE16.checksum(black_box(&bytes)))
});
})
.bench_function("simd", |b| b.iter(|| X25_SIMD.checksum(black_box(&bytes))));

c.benchmark_group("crc32")
.throughput(Throughput::Bytes(size as u64))
Expand All @@ -77,6 +84,9 @@ fn checksum(c: &mut Criterion) {
})
.bench_function("slice16", |b| {
b.iter(|| ISCSI_SLICE16.checksum(black_box(&bytes)))
})
.bench_function("simd", |b| {
b.iter(|| ISCSI_SIMD.checksum(black_box(&bytes)))
});

c.benchmark_group("crc64")
Expand Down
27 changes: 17 additions & 10 deletions src/crc128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,20 +169,19 @@ const fn update_slice16(
#[cfg(test)]
mod test {
use crate::*;
use crc_catalog::{Algorithm, CRC_82_DARC};

/// Test this optimized version against the well known implementation to ensure correctness
#[test]
fn correctness() {
let data: &[&str] = &[
"",
"1",
"1234",
"123456789",
"0123456789ABCDE",
"01234567890ABCDEFGHIJK",
"01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK",
];
"",
"1",
"1234",
"123456789",
"0123456789ABCDE",
"01234567890ABCDEFGHIJK",
"01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK",
];

pub const CRC_82_DARC_NONREFLEX: Algorithm<u128> = Algorithm {
width: 82,
Expand All @@ -191,12 +190,20 @@ mod test {
refin: false,
refout: true,
xorout: 0x000000000000000000000,
check: 0x09ea83f625023801fd612,
check: 0x12e0b19fa447c0bf627ac,
residue: 0x000000000000000000000,
};

let algs_to_test = [&CRC_82_DARC, &CRC_82_DARC_NONREFLEX];

// Check if the baseline is as expected.
for alg in algs_to_test {
assert_eq!(
Crc::<u128, Table<1>>::new(alg).checksum("123456789".as_bytes()),
alg.check
);
}

for alg in algs_to_test {
for data in data {
let crc_slice16 = Crc::<u128, Table<16>>::new(alg);
Expand Down
78 changes: 66 additions & 12 deletions src/crc16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ mod bytewise;
mod nolookup;
mod slice16;

#[cfg(all(
target_feature = "sse2",
target_feature = "sse4.1",
target_feature = "pclmulqdq",
))]
mod simd;

const fn init(algorithm: &Algorithm<u16>, initial: u16) -> u16 {
if algorithm.refin {
initial.reverse_bits() >> (16u8 - algorithm.width)
Expand Down Expand Up @@ -141,7 +148,6 @@ const fn update_slice16(
#[cfg(test)]
mod test {
use crate::*;
use crc_catalog::{Algorithm, CRC_16_IBM_SDLC};

/// Test this optimized version against the well known implementation to ensure correctness
#[test]
Expand All @@ -156,28 +162,72 @@ mod test {
"01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK01234567890ABCDEFGHIJK",
];

pub const CRC_16_IBM_SDLC_NONREFLEX: Algorithm<u16> = Algorithm {
width: 16,
poly: 0x1021,
init: 0xffff,
refin: false,
refout: true,
xorout: 0xffff,
check: 0x906e,
residue: 0xf0b8,
};
let algs_to_test = &[
CRC_10_ATM,
CRC_10_CDMA2000,
CRC_10_GSM,
CRC_11_FLEXRAY,
CRC_11_UMTS,
CRC_12_CDMA2000,
CRC_12_DECT,
CRC_12_GSM,
CRC_12_UMTS,
CRC_13_BBC,
CRC_14_DARC,
CRC_14_GSM,
CRC_15_CAN,
CRC_15_MPT1327,
CRC_16_ARC,
CRC_16_CDMA2000,
CRC_16_CMS,
CRC_16_DDS_110,
CRC_16_DECT_R,
CRC_16_DECT_X,
CRC_16_DNP,
CRC_16_EN_13757,
CRC_16_GENIBUS,
CRC_16_GSM,
CRC_16_IBM_3740,
CRC_16_IBM_SDLC,
CRC_16_ISO_IEC_14443_3_A,
CRC_16_KERMIT,
CRC_16_LJ1200,
CRC_16_MAXIM_DOW,
CRC_16_MCRF4XX,
CRC_16_MODBUS,
CRC_16_NRSC_5,
CRC_16_OPENSAFETY_A,
CRC_16_OPENSAFETY_B,
CRC_16_PROFIBUS,
CRC_16_RIELLO,
CRC_16_SPI_FUJITSU,
CRC_16_T10_DIF,
CRC_16_TELEDISK,
CRC_16_TMS37157,
CRC_16_UMTS,
CRC_16_USB,
CRC_16_XMODEM,
];

let algs_to_test = [&CRC_16_IBM_SDLC, &CRC_16_IBM_SDLC_NONREFLEX];
// Check if the baseline is as expected.
for alg in algs_to_test {
assert_eq!(
Crc::<u16, Table<1>>::new(alg).checksum("123456789".as_bytes()),
alg.check
);
}

for alg in algs_to_test {
for data in data {
let crc_slice16 = Crc::<u16, Table<16>>::new(alg);
let crc_nolookup = Crc::<u16, NoTable>::new(alg);
let crc_clmul = Crc::<u16, Simd>::new(alg);
let expected = Crc::<u16, Table<1>>::new(alg).checksum(data.as_bytes());

// Check that doing all at once works as expected
assert_eq!(crc_slice16.checksum(data.as_bytes()), expected);
assert_eq!(crc_nolookup.checksum(data.as_bytes()), expected);
assert_eq!(crc_clmul.checksum(data.as_bytes()), expected);

let mut digest = crc_slice16.digest();
digest.update(data.as_bytes());
Expand All @@ -187,6 +237,10 @@ mod test {
digest.update(data.as_bytes());
assert_eq!(digest.finalize(), expected);

let mut digest = crc_clmul.digest();
digest.update(data.as_bytes());
assert_eq!(digest.finalize(), expected);

// Check that we didn't break updating from multiple sources
if data.len() > 2 {
let data = data.as_bytes();
Expand Down
66 changes: 66 additions & 0 deletions src/crc16/simd.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
use crate::crc16::{finalize, init, update_bytewise};
use crate::*;
use crate::{simd::crc32_coeff, table::crc16_table};

use self::simd::{crc32_update_refin, Value};

impl Crc<u16, Simd> {
pub const fn new(algorithm: &'static Algorithm<u16>) -> Self {
let table = crc16_table(algorithm.width, algorithm.poly, algorithm.refin);
let coeff = crc32_coeff(algorithm.width, algorithm.poly as u32);
Self {
algorithm,
data: (table, coeff),
}
}

pub fn checksum(&self, bytes: &[u8]) -> u16 {
let mut crc = init(self.algorithm, self.algorithm.init);
crc = self.update(crc, bytes);
finalize(self.algorithm, crc)
}

fn update(&self, mut crc: u16, bytes: &[u8]) -> u16 {
if !self.algorithm.refin {
return update_bytewise(crc, self.algorithm.refin, &self.data.0, bytes);
}

// SAFETY: The returned value for chunks will always be aligned,
// considering the platform requirement and 64*8-bit chunks are transmuted
// to 4*128-bit chunks and the lifetime and mutability does not change.
let (bytes_before, chunks, bytes_after) = unsafe { bytes.align_to::<[Value; 4]>() };
crc = update_bytewise(crc, self.algorithm.refin, &self.data.0, bytes_before);
if let Some(first_chunk) = chunks.first() {
crc = crc32_update_refin(crc as u32, &self.data.1, first_chunk, &chunks[1..]) as u16;
}
update_bytewise(crc, self.algorithm.refin, &self.data.0, bytes_after)
}

pub const fn digest(&self) -> Digest<u16, Simd> {
self.digest_with_initial(self.algorithm.init)
}

/// Construct a `Digest` with a given initial value.
///
/// This overrides the initial value specified by the algorithm.
/// The effects of the algorithm's properties `refin` and `width`
/// are applied to the custom initial value.
pub const fn digest_with_initial(&self, initial: u16) -> Digest<u16, Simd> {
let value = init(self.algorithm, initial);
Digest::new(self, value)
}
}

impl<'a> Digest<'a, u16, Simd> {
const fn new(crc: &'a Crc<u16, Simd>, value: u16) -> Self {
Digest { crc, value }
}

pub fn update(&mut self, bytes: &[u8]) {
self.value = self.crc.update(self.value, bytes);
}

pub const fn finalize(self) -> u16 {
finalize(self.crc.algorithm, self.value)
}
}
Loading
Loading