-
Notifications
You must be signed in to change notification settings - Fork 2
/
matmul-bench-simple-c.c
106 lines (85 loc) · 2.57 KB
/
matmul-bench-simple-c.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#include "matmul-bench-common.h"
static void
simple_run(struct MatmulBenchParam *p)
{
float * __restrict out = p->out;
const float * __restrict inL = p->inL;
const float * __restrict inR = p->inR;
unsigned int n = p->n;
for (int i=0; i<n; i++) {
for (int j=0; j<n; j++) {
float v = 0;
for (int k=0; k<n; k++) {
v += inL[i*n+k] * inR[k*n+j];
}
out[i*n+j] = v;
}
}
}
static void
thread_func(struct MatmulBenchParam *p,
unsigned long i_start,
unsigned long i_end,
unsigned int thread_id)
{
float * __restrict out = p->out;
const float * __restrict inL = p->inL;
const float * __restrict inR = p->inR;
unsigned long n = p->n;
for (unsigned long i=i_start; i<i_end; i++) {
for (unsigned long j=0; j<n; j++) {
float v = 0;
for (unsigned long k=0; k<n; k++) {
v += inL[i*n+k] * inR[k*n+j];
}
out[i*n+j] = v;
}
}
}
static void
simple_thread_run(struct MatmulBenchParam *p)
{
matmul_bench_thread_call(p, p->i_block_size, p->n, thread_func);
}
static void
outer_func(struct MatmulBenchParam *p,
unsigned long i_start,
unsigned long i_end,
unsigned int thread_id)
{
const float * __restrict inL = p->inL;
const float * __restrict inR = p->inR;
float * __restrict out = p->out;
unsigned long n = p->n;
for (unsigned long i=i_start; i<i_end; i++) {
for (int k=0; k<n; k++) {
float lik = inL[i*n+k];
for (int j=0; j<n; j++) {
out[i*n+j] += lik * inR[k*n + j];
}
}
}
}
static void
outer_run(struct MatmulBenchParam *p)
{
float * __restrict out = p->out;
unsigned int n = p->n;
int i;
for (i=0; i<n; i++) {
for (int j=0; j<n; j++) {
out[i*n+j] = 0;
}
}
matmul_bench_thread_call(p, p->i_block_size, p->n, outer_func);
}
static const struct MatmulBenchTest simple = MATMULBENCH_TEST_INITIALIZER("simple", simple_run, 1);
static const struct MatmulBenchTest simple_thread = MATMULBENCH_TEST_INITIALIZER("simple_thread", simple_thread_run, 1);
static const struct MatmulBenchTest outer = MATMULBENCH_TEST_INITIALIZER("outer_thread", outer_run, 1);
void
matmulbench_init_simple_c(struct MatmulBench *b, struct npr_varray *test_set)
{
VA_PUSH(struct MatmulBenchTest, test_set, simple);
VA_PUSH(struct MatmulBenchTest, test_set, simple_thread);
VA_PUSH(struct MatmulBenchTest, test_set, outer);
}