From ed7127effa413a5811f860c2eb657bb16a8e68b8 Mon Sep 17 00:00:00 2001 From: Lam Pham-Sy Date: Tue, 30 Oct 2018 11:16:37 +0100 Subject: [PATCH] RS-FNT: simd indices as member variables --- src/fec_rs_fnt.h | 11 +++++++++++ src/fec_vectorisation.cpp | 26 ++++++-------------------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/src/fec_rs_fnt.h b/src/fec_rs_fnt.h index 4bdd5475..55ffb487 100644 --- a/src/fec_rs_fnt.h +++ b/src/fec_rs_fnt.h @@ -60,6 +60,11 @@ class RsFnt : public FecCode { // decoding context used in encoding of systematic FNT std::unique_ptr> enc_context; + // Indices used for accelerated functions + size_t simd_vec_len; + size_t simd_trailing_len; + size_t simd_offset; + public: RsFnt( FecType type, @@ -70,6 +75,12 @@ class RsFnt : public FecCode { : FecCode(type, word_size, n_data, n_parities, pkt_size) { this->fec_init(); + + // Indices used for accelerated functions + const unsigned ratio = simd::countof(); + simd_vec_len = this->pkt_size / ratio; + simd_trailing_len = this->pkt_size - simd_vec_len * ratio; + simd_offset = simd_vec_len * ratio; } inline void check_params() override diff --git a/src/fec_vectorisation.cpp b/src/fec_vectorisation.cpp index 8684e1ab..ed82fab8 100644 --- a/src/fec_vectorisation.cpp +++ b/src/fec_vectorisation.cpp @@ -53,20 +53,13 @@ void RsFnt::encode_post_process( uint16_t threshold = this->gf->card_minus_one(); unsigned code_len = this->n_outputs; - // number of elements per vector register - unsigned vec_size = simd::countof(); - // number of vector registers per fragment packet - size_t vecs_nb = size / vec_size; - // odd number of elements not vectorized - size_t last_len = size - vecs_nb * vec_size; - simd::encode_post_process( - output, props, offset, code_len, threshold, vecs_nb); + output, props, offset, code_len, threshold, simd_vec_len); - if (last_len > 0) { + if (simd_trailing_len > 0) { for (unsigned i = 0; i < code_len; ++i) { uint16_t* chunk = output.get(i); - for (size_t j = vecs_nb * vec_size; j < size; ++j) { + for (size_t j = simd_offset; j < size; ++j) { if (chunk[j] == threshold) { props[i].add(offset + j, OOR_MARK); } @@ -85,20 +78,13 @@ void RsFnt::encode_post_process( const uint32_t threshold = this->gf->card_minus_one(); const unsigned code_len = this->n_outputs; - // number of elements per vector register - const unsigned vec_size = simd::countof(); - // number of vector registers per fragment packet - const size_t vecs_nb = size / vec_size; - // odd number of elements not vectorized - const size_t last_len = size - vecs_nb * vec_size; - simd::encode_post_process( - output, props, offset, code_len, threshold, vecs_nb); + output, props, offset, code_len, threshold, simd_vec_len); - if (last_len > 0) { + if (simd_trailing_len > 0) { for (unsigned i = 0; i < code_len; ++i) { uint32_t* chunk = output.get(i); - for (size_t j = vecs_nb * vec_size; j < size; ++j) { + for (size_t j = simd_offset; j < size; ++j) { if (chunk[j] == threshold) { props[i].add(offset + j, OOR_MARK); }