Skip to content

Commit

Permalink
RS-FNT: simd indices as member variables
Browse files Browse the repository at this point in the history
  • Loading branch information
lamphamsy committed Dec 20, 2018
1 parent 968bc75 commit ed7127e
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 20 deletions.
11 changes: 11 additions & 0 deletions src/fec_rs_fnt.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ class RsFnt : public FecCode<T> {
// decoding context used in encoding of systematic FNT
std::unique_ptr<DecodeContext<T>> enc_context;

// Indices used for accelerated functions
size_t simd_vec_len;
size_t simd_trailing_len;
size_t simd_offset;

public:
RsFnt(
FecType type,
Expand All @@ -70,6 +75,12 @@ class RsFnt : public FecCode<T> {
: FecCode<T>(type, word_size, n_data, n_parities, pkt_size)
{
this->fec_init();

// Indices used for accelerated functions
const unsigned ratio = simd::countof<T>();
simd_vec_len = this->pkt_size / ratio;
simd_trailing_len = this->pkt_size - simd_vec_len * ratio;
simd_offset = simd_vec_len * ratio;
}

inline void check_params() override
Expand Down
26 changes: 6 additions & 20 deletions src/fec_vectorisation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,13 @@ void RsFnt<uint16_t>::encode_post_process(
uint16_t threshold = this->gf->card_minus_one();
unsigned code_len = this->n_outputs;

// number of elements per vector register
unsigned vec_size = simd::countof<uint16_t>();
// number of vector registers per fragment packet
size_t vecs_nb = size / vec_size;
// odd number of elements not vectorized
size_t last_len = size - vecs_nb * vec_size;

simd::encode_post_process(
output, props, offset, code_len, threshold, vecs_nb);
output, props, offset, code_len, threshold, simd_vec_len);

if (last_len > 0) {
if (simd_trailing_len > 0) {
for (unsigned i = 0; i < code_len; ++i) {
uint16_t* chunk = output.get(i);
for (size_t j = vecs_nb * vec_size; j < size; ++j) {
for (size_t j = simd_offset; j < size; ++j) {
if (chunk[j] == threshold) {
props[i].add(offset + j, OOR_MARK);
}
Expand All @@ -85,20 +78,13 @@ void RsFnt<uint32_t>::encode_post_process(
const uint32_t threshold = this->gf->card_minus_one();
const unsigned code_len = this->n_outputs;

// number of elements per vector register
const unsigned vec_size = simd::countof<uint32_t>();
// number of vector registers per fragment packet
const size_t vecs_nb = size / vec_size;
// odd number of elements not vectorized
const size_t last_len = size - vecs_nb * vec_size;

simd::encode_post_process(
output, props, offset, code_len, threshold, vecs_nb);
output, props, offset, code_len, threshold, simd_vec_len);

if (last_len > 0) {
if (simd_trailing_len > 0) {
for (unsigned i = 0; i < code_len; ++i) {
uint32_t* chunk = output.get(i);
for (size_t j = vecs_nb * vec_size; j < size; ++j) {
for (size_t j = simd_offset; j < size; ++j) {
if (chunk[j] == threshold) {
props[i].add(offset + j, OOR_MARK);
}
Expand Down

0 comments on commit ed7127e

Please sign in to comment.