Skip to content

Commit f5a8dd3

Browse files
committed
Use librpimemmgr for memory management
1 parent eaf319e commit f5a8dd3

File tree

10 files changed

+84
-436
lines changed

10 files changed

+84
-436
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ test/memory_bench
77
test/scopy
88
test/sgemm
99
test/sgemm_spec
10-
test/vcsm
1110
test/vsAbs
1211

1312
qmkl.pc

CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,19 @@ endif ()
4646

4747
pkg_check_modules(MAILBOX REQUIRED libmailbox>=2.0.0)
4848

49+
# librpimemmgr needs bcm_host and vcsm, which may be in /opt/vc...
50+
pkg_check_modules(RPIMEMMGR librpimemmgr>=1.0.0)
51+
if (NOT RPIMEMMGR_FOUND)
52+
message(STATUS "Adding /opt/vc/lib/pkgconfig to PKG_CONFIG_PATH")
53+
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:/opt/vc/lib/pkgconfig")
54+
pkg_check_modules(RPIMEMMGR librpimemmgr>=1.0.0)
55+
if (NOT RPIMEMMGR_FOUND)
56+
message (FATAL_ERROR "librpimemmgr not found even in /opt/vc/lib. "
57+
"Building on non-RPi host? "
58+
"Please specify PKG_CONFIG_PATH.")
59+
endif ()
60+
endif ()
61+
4962
if (DEFINED ENV{RPIVER})
5063
if ("$ENV{RPIVER}" STREQUAL "1")
5164
set (RPIVER 1)

README.md

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,12 @@ optimized for neural networks. There are movies of that:
1313

1414
## Requirements
1515

16-
You need to install [qasm2](https://github.com/Terminus-IMRC/qpu-assembler2)
17-
and [qbin2hex](https://github.com/Terminus-IMRC/qpu-bin-to-hex) to compile
18-
this library. Just clone them and do `make && sudo make install`.
16+
You need to install:
17+
18+
- [qasm2](https://github.com/Terminus-IMRC/qpu-assembler2)
19+
- [qbin2hex](https://github.com/Terminus-IMRC/qpu-bin-to-hex)
20+
- [mailbox](https://github.com/Terminus-IMRC/mailbox)
21+
- [librpimemmgr](https://github.com/Idein/librpimemmgr)
1922

2023
In addition, make sure Linux kernel 4.9.79 or above is running on your Pi. e.g.:
2124

@@ -35,7 +38,7 @@ $ make
3538
$ sudo make install
3639
```
3740

38-
You can also create Debian package and install it:
41+
Or you can create Debian package and install it:
3942

4043
```
4144
$ make package
@@ -46,7 +49,8 @@ $ sudo dpkg -i qmkl-x.y.x-system.deb
4649
## Running tests
4750

4851
```
49-
$ sudo test/sgemm
50-
$ sudo test/scopy
51-
$ sudo test/vsAbs
52+
$ test/sgemm
53+
$ test/scopy
54+
$ test/vsAbs
55+
$ test/sgemm_spec
5256
```

src/blas/copy.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "local/common.h"
1212
#include "local/called.h"
1313
#include "local/error.h"
14+
#include <rpimemmgr.h>
1415
#include <stdio.h>
1516
#include <stdlib.h>
1617
#include <string.h>
@@ -60,8 +61,8 @@ void cblas_scopy(
6061

6162
memcpy(code_common_cpu, code_scopy, sizeof(code_scopy));
6263

63-
qmkl_cache_op_multiple(2, QMKL_CACHE_OP_CLEAN, x, n * sizeof(*x),
64-
QMKL_CACHE_OP_CLEAN, y, n * sizeof(*y));
64+
rpimemmgr_cache_op_multiple(2, QMKL_CACHE_OP_CLEAN, x, n * sizeof(*x),
65+
QMKL_CACHE_OP_CLEAN, y, n * sizeof(*y));
6566
launch_qpu_code_mailbox(1, 0, 5e3, unif_common_gpu, code_common_gpu);
66-
qmkl_cache_op(QMKL_CACHE_OP_INVALIDATE, y, n * sizeof(*y));
67+
rpimemmgr_cache_op(QMKL_CACHE_OP_INVALIDATE, y, n * sizeof(*y));
6768
}

src/blas/gemm.c

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "local/common.h"
1212
#include "local/called.h"
1313
#include "local/error.h"
14+
#include <rpimemmgr.h>
1415
#include <stdio.h>
1516
#include <stdlib.h>
1617
#include <string.h>
@@ -134,9 +135,9 @@ static void cblas_sgemm_RNN(
134135
h_acc += hi;
135136
}
136137
}
137-
qmkl_cache_op_2_multiple(3, QMKL_CACHE_OP_CLEAN, a, P, Q * 4, lda * 4,
138-
QMKL_CACHE_OP_CLEAN, b, Q, R * 4, ldb * 4,
139-
QMKL_CACHE_OP_CLEAN, c, P, R * 4, ldc * 4);
138+
rpimemmgr_cache_op_2_multiple(3, QMKL_CACHE_OP_CLEAN, a, P, Q * 4, lda * 4,
139+
QMKL_CACHE_OP_CLEAN, b, Q, R * 4, ldb * 4,
140+
QMKL_CACHE_OP_CLEAN, c, P, R * 4, ldc * 4);
140141
launch_qpu_code_mailbox(n_threads, 0, 5e3,
141142
(unsigned*) unif_common_gpu + 0 * unif_len_1th, code_common_gpu,
142143
(unsigned*) unif_common_gpu + 1 * unif_len_1th, code_common_gpu,
@@ -151,7 +152,7 @@ static void cblas_sgemm_RNN(
151152
(unsigned*) unif_common_gpu + 10 * unif_len_1th, code_common_gpu,
152153
(unsigned*) unif_common_gpu + 11 * unif_len_1th, code_common_gpu
153154
);
154-
qmkl_cache_op_2(QMKL_CACHE_OP_INVALIDATE, c, P, R * 4, ldc * 4);
155+
rpimemmgr_cache_op_2(QMKL_CACHE_OP_INVALIDATE, c, P, R * 4, ldc * 4);
155156
}
156157

157158
static void cblas_sgemm_RNT(
@@ -240,9 +241,9 @@ static void cblas_sgemm_RNT(
240241
h_acc += hi;
241242
}
242243
}
243-
qmkl_cache_op_2_multiple(3, QMKL_CACHE_OP_CLEAN, a, P, Q * 4, lda * 4,
244-
QMKL_CACHE_OP_CLEAN, b, R, Q * 4, ldb * 4,
245-
QMKL_CACHE_OP_CLEAN, c, P, R * 4, ldc * 4);
244+
rpimemmgr_cache_op_2_multiple(3, QMKL_CACHE_OP_CLEAN, a, P, Q * 4, lda * 4,
245+
QMKL_CACHE_OP_CLEAN, b, R, Q * 4, ldb * 4,
246+
QMKL_CACHE_OP_CLEAN, c, P, R * 4, ldc * 4);
246247
launch_qpu_code_mailbox(n_threads, 0, 5e3,
247248
(unsigned*) unif_common_gpu + 0 * unif_len_1th, code_common_gpu,
248249
(unsigned*) unif_common_gpu + 1 * unif_len_1th, code_common_gpu,
@@ -257,7 +258,7 @@ static void cblas_sgemm_RNT(
257258
(unsigned*) unif_common_gpu + 10 * unif_len_1th, code_common_gpu,
258259
(unsigned*) unif_common_gpu + 11 * unif_len_1th, code_common_gpu
259260
);
260-
qmkl_cache_op_2(QMKL_CACHE_OP_INVALIDATE, c, P, R * 4, ldc * 4);
261+
rpimemmgr_cache_op_2(QMKL_CACHE_OP_INVALIDATE, c, P, R * 4, ldc * 4);
261262
}
262263

263264
static void cblas_sgemm_RTN(
@@ -346,9 +347,9 @@ static void cblas_sgemm_RTN(
346347
h_acc += hi;
347348
}
348349
}
349-
qmkl_cache_op_2_multiple(3, QMKL_CACHE_OP_CLEAN, a, Q, P * 4, lda * 4,
350-
QMKL_CACHE_OP_CLEAN, b, Q, R * 4, ldb * 4,
351-
QMKL_CACHE_OP_CLEAN, c, P, R * 4, ldc * 4);
350+
rpimemmgr_cache_op_2_multiple(3, QMKL_CACHE_OP_CLEAN, a, Q, P * 4, lda * 4,
351+
QMKL_CACHE_OP_CLEAN, b, Q, R * 4, ldb * 4,
352+
QMKL_CACHE_OP_CLEAN, c, P, R * 4, ldc * 4);
352353
launch_qpu_code_mailbox(n_threads, 0, 5e3,
353354
(unsigned*) unif_common_gpu + 0 * unif_len_1th, code_common_gpu,
354355
(unsigned*) unif_common_gpu + 1 * unif_len_1th, code_common_gpu,
@@ -363,7 +364,7 @@ static void cblas_sgemm_RTN(
363364
(unsigned*) unif_common_gpu + 10 * unif_len_1th, code_common_gpu,
364365
(unsigned*) unif_common_gpu + 11 * unif_len_1th, code_common_gpu
365366
);
366-
qmkl_cache_op_2(QMKL_CACHE_OP_INVALIDATE, c, P, R * 4, ldc * 4);
367+
rpimemmgr_cache_op_2(QMKL_CACHE_OP_INVALIDATE, c, P, R * 4, ldc * 4);
367368
}
368369

369370
static void cblas_sgemm_RTT(
@@ -452,9 +453,9 @@ static void cblas_sgemm_RTT(
452453
h_acc += hi;
453454
}
454455
}
455-
qmkl_cache_op_2_multiple(3, QMKL_CACHE_OP_CLEAN, a, Q, P * 4, lda * 4,
456-
QMKL_CACHE_OP_CLEAN, b, R, Q * 4, ldb * 4,
457-
QMKL_CACHE_OP_CLEAN, c, P, R * 4, ldc * 4);
456+
rpimemmgr_cache_op_2_multiple(3, QMKL_CACHE_OP_CLEAN, a, Q, P * 4, lda * 4,
457+
QMKL_CACHE_OP_CLEAN, b, R, Q * 4, ldb * 4,
458+
QMKL_CACHE_OP_CLEAN, c, P, R * 4, ldc * 4);
458459
launch_qpu_code_mailbox(n_threads, 0, 5e3,
459460
(unsigned*) unif_common_gpu + 0 * unif_len_1th, code_common_gpu,
460461
(unsigned*) unif_common_gpu + 1 * unif_len_1th, code_common_gpu,
@@ -469,7 +470,7 @@ static void cblas_sgemm_RTT(
469470
(unsigned*) unif_common_gpu + 10 * unif_len_1th, code_common_gpu,
470471
(unsigned*) unif_common_gpu + 11 * unif_len_1th, code_common_gpu
471472
);
472-
qmkl_cache_op_2(QMKL_CACHE_OP_INVALIDATE, c, P, R * 4, ldc * 4);
473+
rpimemmgr_cache_op_2(QMKL_CACHE_OP_INVALIDATE, c, P, R * 4, ldc * 4);
473474
}
474475

475476
static void cblas_sgemm_R(

src/include/qmkl/memory.h

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#ifndef _QMKL_MEMORY_H_
1111
#define _QMKL_MEMORY_H_
1212

13+
#include <interface/vcsm/user-vcsm.h>
1314
#include <sys/types.h>
1415
#include "qmkl/types.h"
1516

@@ -20,28 +21,10 @@
2021

2122
void memory_init();
2223
void memory_finalize();
23-
void* map_on_cpu(MKL_UINT ptr_gpu, size_t alloc_size);
24-
void unmap_on_cpu(void *ptr_cpu, size_t alloc_size);
2524
void* mkl_malloc_cache(size_t alloc_size, int alignment,
26-
const _Bool use_cpu_cache);
25+
const VCSM_CACHE_TYPE_T cache_type);
2726
void* mkl_malloc(size_t alloc_size, int alignment);
2827
void mkl_free(void *a_ptr);
29-
MKL_UINT get_ptr_gpu_from_ptr_cpu(const void *ptr_cpu);
30-
void unif_set_uint(MKL_UINT *p, const MKL_UINT u);
31-
void unif_set_float(MKL_UINT *p, const float f);
32-
void unif_add_uint(const MKL_UINT u, MKL_UINT **p);
33-
void unif_add_float(const float f, MKL_UINT **p);
34-
35-
/* op0, user0, size0, ... */
36-
int qmkl_cache_op_multiple(unsigned op_count, ...);
37-
int qmkl_cache_op(const enum qmkl_cache_op op, void * const p,
38-
const size_t size);
39-
/* op0, user0, block_count0, block_size0, stride0, ... */
40-
int qmkl_cache_op_2_multiple(unsigned op_count, ...);
41-
int qmkl_cache_op_2(const enum qmkl_cache_op op, void * const p,
42-
const size_t block_count, const size_t block_size,
43-
const size_t stride);
44-
45-
#define BUS_TO_PHYS(addr) ((addr) & ~0xc0000000)
28+
uint32_t get_ptr_gpu_from_ptr_cpu(const void * const ptr_cpu);
4629

4730
#endif /* _QMKL_MEMORY_H_ */

0 commit comments

Comments
 (0)