-
Notifications
You must be signed in to change notification settings - Fork 2
/
gemmXtOutOfCore.cu
53 lines (45 loc) · 1.19 KB
/
gemmXtOutOfCore.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#include <stdlib.h>
#include <stdio.h>
#include <cuda_runtime.h>
#include <cublasXt.h>
#include "common.hh"
int
main(int argc, char *argv[])
{
size_t N = 16000;
clock_t start, end;
cublasXtHandle_t handle;
int devices[1] = {0};
float *a, *b, *c;
const float alpha = 1;
const float beta = 0;
size_t count, nn;
if (argc == 2) {
N = checked_strtosize(argv[1]);
}
nn = checked_mul(N, N);
count = checked_mul(nn, sizeof(float));
check(cublasXtCreate(&handle));
check(cublasXtDeviceSelect(handle, 1, devices));
start = clock();
check(cudaMallocHost(&a, count));
check(cudaMallocHost(&b, count));
check(cudaMallocHost(&c, count));
for (size_t i = 0; i < N*N; i++) {
a[i] = i / 37.0;
b[i] = i / 101.0;
}
end = clock();
log("host: MallocHost+init", start, end);
start = clock();
check(cublasXtSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N,
N, N, N,
&alpha,
a, N,
b, N,
&beta,
c, N));
end = clock();
log("cublasXtSgemm", start, end);
return 0;
}