forked from PeterDinda/nautilus
-
Notifications
You must be signed in to change notification settings - Fork 0
/
omptest-ori.c
231 lines (189 loc) · 5.38 KB
/
omptest-ori.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
#include <nautilus/nautilus.h>
#include <nautilus/shell.h>
#include <nautilus/libccompat.h>
#include <nautilus/random.h>
//#include <nautilus/scheduler.h>
#ifndef NAUT_CONFIG_DEBUG_GPUDEV
#undef DEBUG_PRINT
#define DEBUG_PRINT(fmt, args...)
#endif
#define ERROR(fmt, args...) ERROR_PRINT("omptest: " fmt, ##args)
#define DEBUG(fmt, args...) DEBUG_PRINT("omptest: " fmt, ##args)
#define INFO(fmt, args...) INFO_PRINT("omptest: " fmt, ##args)
static inline uint16_t random()
{
uint16_t t;
nk_get_rand_bytes((uint8_t *)&t,sizeof(t));
return t;
}
#define MAXN 5100 /* Max value of N */
int N; /* Matrix size */
int procs; /* Number of processors to use */
/* Matrices and vectors */
volatile float A[MAXN][MAXN], B[MAXN], X[MAXN];
volatile float ORA[MAXN][MAXN], ORB[MAXN], ORX[MAXN];
/* A * X = B, solve for X */
int seed;
/* Prototype */
void gauss(); /* The function you will provide.
* It is this routine that is timed.
* It is called only on the parent.
*/
/* Initialize A and B (and X to 0.0s) */
void initialize_inputs() {
int row, col;
printf("\nInitializing...\n");
// #pragma omp parallel num_threads(8)
{
// #pragma omp for private(row,col) schedule(static,1) nowait
for (col = 0; col < N; col++) {
for (row = 0; row < N; row++) {
ORA[col][row] = (float) random()/32768.0;
}
ORB[col] = (float)random()/32768.0;
}
}
}
void reset_inputs(){
int row, col;
printf("\n reseting...\n");
for (col = 0; col < N; col++) {
for (row = 0; row < N; row++) {
A[row][col] = ORA[row][col];
}
B[col] = ORB[col];
X[col] = 0.0;
}
}
/* Print input matrices */
void print_inputs() {
int row, col;
if (N < 1000) {
printf("\nA =\n\t");
for (row = 0; row < N; row++) {
for (col = 0; col < N; col++) {
printf("%5.2f%s", A[row][col], (col < N-1) ? ", " : ";\n\t");
}
}
printf("\nB = [");
for (col = 0; col < N; col++) {
printf("%5.2f%s", B[col], (col < N-1) ? "; " : "]\n");
}
}
}
void serialgauss(){
int norm, row, col; /* Normalization row, and zeroing
* element row and col */
float multiplier;
printf("Computing serially.\n");
/* Gaussian elimination */
for (norm = 0; norm < N - 1; norm++) {
// int num = N - norm;
{
//printf("%f ", A[norm][norm]);
for (row = norm + 1; row < N; row++) {
multiplier = A[row][norm] / A[norm][norm];
for (col = norm; col < N; col++) {
A[row][col] -= A[norm][col] * multiplier;
}
B[row] -= B[norm] * multiplier;
}
}
}
/* (Diagonal elements are not normalized to 1. This is treated in back
* substitution.)
*/
/* Back substitution */
for (row = N - 1; row >= 0; row--) {
X[row] = B[row];
for (col = N-1; col > row; col--) {
X[row] -= A[row][col] * X[col];
}
X[row] /= A[row][row];
//printf("%5.2f ", X[row]);
}
}
void ompgauss() {
int norm, row, col; /* Normalization row, and zeroing
* element row and col */
float multiplier;
//doneflag[0] = 1;
printf("Computing using omp.\n");
/* Gaussian elimination */
#pragma omp parallel private(row, col, multiplier, norm) num_threads(procs)
{
for (norm = 0; norm < N - 1; norm++) {
#pragma omp for schedule(static,1)
for (row = norm + 1; row < N; row++) {
multiplier = A[row][norm]/A[norm][norm];
for (col = norm; col < N; col++) {
A[row][col] -= A[norm][col] * multiplier;
}
B[row] -= B[norm] * multiplier;
}
}
}
nk_vc_printf("I am done\n");
/* (Diagonal elements are not normalized to 1. This is treated in back
* substitution.)
*/
/* Back substitution */
for (row = N - 1; row >= 0; row--) {
X[row] = B[row];
for (col = N-1; col > row; col--) {
X[row] -= A[row][col] * X[col];
}
X[row] /= A[row][row];
}
}
#define TIME() (double)nk_sched_get_realtime();
static int handle_omptest (char * buf, void * priv)
{
int seed, size, np;
if ((sscanf(buf,"omptest %d %d %d",&seed,&size,&np)!=3)) {
nk_vc_printf("Don't understand %s please input seed, matrix size and nprocs\n",buf);
return -1;
}
nk_rand_seed(seed);
N = size;
procs = np;
nk_vc_printf("seed %d, size, %d, nprocs: %d\n", seed, N, procs);
initialize_inputs();
reset_inputs();
// print_inputs();
unsigned mxcsr;
__asm__ volatile("ldmxcsr %0"::"m"(*&mxcsr):"memory");
printf("ld %04x \n", mxcsr);
mxcsr = mxcsr ^ 0x0200;
printf("st %08x \n", mxcsr);
__asm__ volatile("stmxcsr %0"::"m"(*&mxcsr):"memory");
__asm__ volatile("ldmxcsr %0"::"m"(*&mxcsr):"memory");
printf("ld %08x \n", mxcsr);
double start = TIME();
ompgauss();
double end = TIME();
double omp = end-start;
nk_vc_printf("openmp done %lf\n", omp);
float OMP[N];
for(int row =0; row<N; row++){
OMP[row] = X[row];
}
reset_inputs();
start = TIME();
serialgauss();
end = TIME();
double serial = end-start;
nk_vc_printf("serial done %lf\n", serial);
float difference = 0.0;
for(int row =0; row<N; row++){
difference += (OMP[row]- X[row]);
}
nk_vc_printf("OMP difference %f speed up %f !\n", difference, serial/omp);
return 0;
}
static struct shell_cmd_impl omptest_impl = {
.cmd = "omptest",
.help_str = "openmp test",
.handler = handle_omptest,
};
nk_register_shell_cmd(omptest_impl);