Skip to content

Commit 06a4848

Browse files
committed
variadic map reducer
1 parent ef6b5b2 commit 06a4848

File tree

2 files changed

+119
-0
lines changed

2 files changed

+119
-0
lines changed
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Perform a variadic map-reduce -- used to compute the
2+
// dot product in a nice way.
3+
4+
// [[Rcpp::depends(RcppNT2)]]
5+
#include <RcppNT2.h>
6+
using namespace RcppNT2;
7+
8+
#include <Rcpp.h>
9+
using namespace Rcpp;
10+
11+
struct DotMapReducer
12+
{
13+
double init() { return 0.0; }
14+
15+
template <typename T>
16+
T map(const T& a, const T& b, const T& c)
17+
{
18+
return a * b * c;
19+
}
20+
21+
template <typename T>
22+
T combine(const T& lhs, const T& rhs)
23+
{
24+
return lhs + rhs;
25+
}
26+
27+
template <typename T>
28+
double reduce(const T& t)
29+
{
30+
return nt2::sum(t);
31+
}
32+
};
33+
34+
// [[Rcpp::export]]
35+
double simdDotVariadic(NumericVector a, NumericVector b, NumericVector c)
36+
{
37+
return variadic::simdMapReduce(DotMapReducer(), a, b, c);
38+
}
39+
40+
// [[Rcpp::export]]
41+
double naiveDot(NumericVector a, NumericVector b, NumericVector c)
42+
{
43+
double* pA = REAL(a);
44+
double* pB = REAL(b);
45+
double* pC = REAL(c);
46+
47+
double* end = REAL(a) + a.size();
48+
49+
double result = 0.0;
50+
for (; pA != end; ++pA, ++pB, ++pC)
51+
result += (*pA) * (*pB) * (*pC);
52+
53+
return result;
54+
}
55+
56+
/*** R
57+
set.seed(123)
58+
n <- 1000 * 1024
59+
x <- rnorm(n); y <- rnorm(n); z <- rnorm(n)
60+
stopifnot(all.equal(
61+
sum(x * y * z),
62+
simdDotVariadic(x, y, z)
63+
))
64+
65+
library(microbenchmark)
66+
microbenchmark(
67+
R = sum(x * y * z),
68+
simd = simdDotVariadic(x, y, z),
69+
naive = naiveDot(x, y, z)
70+
)
71+
*/

inst/include/RcppNT2/variadic/variadic.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,54 @@ F simdFor(F&& f, const T& t, const Ts&... ts)
3434
return simdFor(std::forward<F>(f), t.begin(), t.end(), begin(ts)...);
3535
}
3636

37+
template <typename MapReducer, typename U, typename T, typename... Ts>
38+
U simdMapReduce(MapReducer&& reducer, U init, const T* it, const T* end, const Ts*... ts)
39+
{
40+
auto&& f = std::forward<MapReducer>(reducer);
41+
42+
typedef boost::simd::pack<T> vT; // SIMD vector of T
43+
typedef boost::simd::pack<U> vU; // SIMD vector of U
44+
45+
static const std::size_t N = vT::static_size;
46+
const T* aligned_begin = std::min(boost::simd::align_on(it, N * sizeof(T)), end);
47+
const T* aligned_end = aligned_begin + (end - aligned_begin) / N * N;
48+
49+
// Buffer for the SIMD mapping operations
50+
vU buffer = boost::simd::splat<vU>(init);
51+
52+
// Scalar operations for the initial unaligned region
53+
for (; it != aligned_begin; increment<1>(it, ts...))
54+
init = f.combine(f.map(*it, *ts...), init);
55+
56+
// Aligned, SIMD operations
57+
for (; it != aligned_end; increment<N>(it, ts...))
58+
buffer = f.combine(
59+
f.map(boost::simd::aligned_load<vT>(it), boost::simd::load<vT>(ts)...),
60+
buffer
61+
);
62+
63+
// Reduce the buffer, joining it into the scalar vale
64+
init = f.combine(f.reduce(buffer), init);
65+
66+
// Leftover unaligned region
67+
for (; it != end; increment<1>(it, ts...))
68+
init = f.combine(f.map(*it, *ts...), init);
69+
70+
return init;
71+
}
72+
73+
template <typename MapReducer, typename T, typename... Ts>
74+
auto simdMapReduce(MapReducer&& reducer, const T& t, const Ts&... ts)
75+
-> decltype(std::forward<MapReducer>(reducer).init())
76+
{
77+
return simdMapReduce(std::forward<MapReducer>(reducer),
78+
std::forward<MapReducer>(reducer).init(),
79+
t.begin(),
80+
t.end(),
81+
begin(ts)...);
82+
}
83+
84+
3785
} // namespace variadic
3886
} // namespace RcppNT2
3987

0 commit comments

Comments
 (0)