Idein
diff --git a/‎.gitignore
Lines changed: 0 additions & 1 deletion b/‎.gitignore
Lines changed: 0 additions & 1 deletion
diff --git a/‎CMakeLists.txt
Lines changed: 13 additions & 0 deletions b/‎CMakeLists.txt
Lines changed: 13 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 11 additions & 7 deletions b/‎README.md
Lines changed: 11 additions & 7 deletions
diff --git a/‎src/blas/copy.c
Lines changed: 4 additions & 3 deletions b/‎src/blas/copy.c
Lines changed: 4 additions & 3 deletions
diff --git a/‎src/blas/gemm.c
Lines changed: 17 additions & 16 deletions b/‎src/blas/gemm.c
Lines changed: 17 additions & 16 deletions
diff --git a/‎src/include/qmkl/memory.h
Lines changed: 3 additions & 20 deletions b/‎src/include/qmkl/memory.h
Lines changed: 3 additions & 20 deletions
@@ -7,7 +7,6 @@ test/memory_bench
 test/scopy
 test/sgemm
 test/sgemm_spec
-test/vcsm
 test/vsAbs
 
 qmkl.pc
 
@@ -46,6 +46,19 @@ endif ()
 
 pkg_check_modules(MAILBOX REQUIRED libmailbox>=2.0.0)
 
+# librpimemmgr needs bcm_host and vcsm, which may be in /opt/vc...
+pkg_check_modules(RPIMEMMGR librpimemmgr>=1.0.0)
+if (NOT RPIMEMMGR_FOUND)
+    message(STATUS "Adding /opt/vc/lib/pkgconfig to PKG_CONFIG_PATH")
+    set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:/opt/vc/lib/pkgconfig")
+    pkg_check_modules(RPIMEMMGR librpimemmgr>=1.0.0)
+    if (NOT RPIMEMMGR_FOUND)
+        message (FATAL_ERROR "librpimemmgr not found even in /opt/vc/lib. "
+                             "Building on non-RPi host? "
+                             "Please specify PKG_CONFIG_PATH.")
+    endif ()
+endif ()
+
 if (DEFINED ENV{RPIVER})
     if     ("$ENV{RPIVER}" STREQUAL "1")
         set (RPIVER 1)
 
@@ -13,9 +13,12 @@ optimized for neural networks. There are movies of that:
 
 ## Requirements
 
-You need to install [qasm2](https://github.com/Terminus-IMRC/qpu-assembler2)
-and [qbin2hex](https://github.com/Terminus-IMRC/qpu-bin-to-hex) to compile
-this library. Just clone them and do `make && sudo make install`.
+You need to install:
+
+- [qasm2](https://github.com/Terminus-IMRC/qpu-assembler2)
+- [qbin2hex](https://github.com/Terminus-IMRC/qpu-bin-to-hex)
+- [mailbox](https://github.com/Terminus-IMRC/mailbox)
+- [librpimemmgr](https://github.com/Idein/librpimemmgr)
 
 In addition, make sure Linux kernel 4.9.79 or above is running on your Pi. e.g.:
 
@@ -35,7 +38,7 @@ $ make
 $ sudo make install
 ```
 
-You can also create Debian package and install it:
+Or you can create Debian package and install it:
 
 ```
 $ make package
@@ -46,7 +49,8 @@ $ sudo dpkg -i qmkl-x.y.x-system.deb
 ## Running tests
 
 ```
-$ sudo test/sgemm
-$ sudo test/scopy
-$ sudo test/vsAbs
+$ test/sgemm
+$ test/scopy
+$ test/vsAbs
+$ test/sgemm_spec
 ```
@@ -11,6 +11,7 @@
 #include "local/common.h"
 #include "local/called.h"
 #include "local/error.h"
+#include <rpimemmgr.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -60,8 +61,8 @@ void cblas_scopy(
 
     memcpy(code_common_cpu, code_scopy, sizeof(code_scopy));
 
-    qmkl_cache_op_multiple(2, QMKL_CACHE_OP_CLEAN, x, n * sizeof(*x),
-                              QMKL_CACHE_OP_CLEAN, y, n * sizeof(*y));
+    rpimemmgr_cache_op_multiple(2, QMKL_CACHE_OP_CLEAN, x, n * sizeof(*x),
+                                   QMKL_CACHE_OP_CLEAN, y, n * sizeof(*y));
     launch_qpu_code_mailbox(1, 0, 5e3, unif_common_gpu, code_common_gpu);
-    qmkl_cache_op(QMKL_CACHE_OP_INVALIDATE, y, n * sizeof(*y));
+    rpimemmgr_cache_op(QMKL_CACHE_OP_INVALIDATE, y, n * sizeof(*y));
 }
@@ -11,6 +11,7 @@
 #include "local/common.h"
 #include "local/called.h"
 #include "local/error.h"
+#include <rpimemmgr.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -134,9 +135,9 @@ static void cblas_sgemm_RNN(
             h_acc += hi;
         }
     }
-    qmkl_cache_op_2_multiple(3, QMKL_CACHE_OP_CLEAN, a, P, Q * 4, lda * 4,
-                                QMKL_CACHE_OP_CLEAN, b, Q, R * 4, ldb * 4,
-                                QMKL_CACHE_OP_CLEAN, c, P, R * 4, ldc * 4);
+    rpimemmgr_cache_op_2_multiple(3, QMKL_CACHE_OP_CLEAN, a, P, Q * 4, lda * 4,
+                                     QMKL_CACHE_OP_CLEAN, b, Q, R * 4, ldb * 4,
+                                     QMKL_CACHE_OP_CLEAN, c, P, R * 4, ldc * 4);
     launch_qpu_code_mailbox(n_threads, 0, 5e3,
                             (unsigned*) unif_common_gpu +  0 * unif_len_1th, code_common_gpu,
                             (unsigned*) unif_common_gpu +  1 * unif_len_1th, code_common_gpu,
@@ -151,7 +152,7 @@ static void cblas_sgemm_RNN(
                             (unsigned*) unif_common_gpu + 10 * unif_len_1th, code_common_gpu,
                             (unsigned*) unif_common_gpu + 11 * unif_len_1th, code_common_gpu
     );
-    qmkl_cache_op_2(QMKL_CACHE_OP_INVALIDATE, c, P, R * 4, ldc * 4);
+    rpimemmgr_cache_op_2(QMKL_CACHE_OP_INVALIDATE, c, P, R * 4, ldc * 4);
 }
 
 static void cblas_sgemm_RNT(
@@ -240,9 +241,9 @@ static void cblas_sgemm_RNT(
             h_acc += hi;
         }
     }
-    qmkl_cache_op_2_multiple(3, QMKL_CACHE_OP_CLEAN, a, P, Q * 4, lda * 4,
-                                QMKL_CACHE_OP_CLEAN, b, R, Q * 4, ldb * 4,
-                                QMKL_CACHE_OP_CLEAN, c, P, R * 4, ldc * 4);
+    rpimemmgr_cache_op_2_multiple(3, QMKL_CACHE_OP_CLEAN, a, P, Q * 4, lda * 4,
+                                     QMKL_CACHE_OP_CLEAN, b, R, Q * 4, ldb * 4,
+                                     QMKL_CACHE_OP_CLEAN, c, P, R * 4, ldc * 4);
     launch_qpu_code_mailbox(n_threads, 0, 5e3,
                             (unsigned*) unif_common_gpu +  0 * unif_len_1th, code_common_gpu,
                             (unsigned*) unif_common_gpu +  1 * unif_len_1th, code_common_gpu,
@@ -257,7 +258,7 @@ static void cblas_sgemm_RNT(
                             (unsigned*) unif_common_gpu + 10 * unif_len_1th, code_common_gpu,
                             (unsigned*) unif_common_gpu + 11 * unif_len_1th, code_common_gpu
     );
-    qmkl_cache_op_2(QMKL_CACHE_OP_INVALIDATE, c, P, R * 4, ldc * 4);
+    rpimemmgr_cache_op_2(QMKL_CACHE_OP_INVALIDATE, c, P, R * 4, ldc * 4);
 }
 
 static void cblas_sgemm_RTN(
@@ -346,9 +347,9 @@ static void cblas_sgemm_RTN(
             h_acc += hi;
         }
     }
-    qmkl_cache_op_2_multiple(3, QMKL_CACHE_OP_CLEAN, a, Q, P * 4, lda * 4,
-                                QMKL_CACHE_OP_CLEAN, b, Q, R * 4, ldb * 4,
-                                QMKL_CACHE_OP_CLEAN, c, P, R * 4, ldc * 4);
+    rpimemmgr_cache_op_2_multiple(3, QMKL_CACHE_OP_CLEAN, a, Q, P * 4, lda * 4,
+                                     QMKL_CACHE_OP_CLEAN, b, Q, R * 4, ldb * 4,
+                                     QMKL_CACHE_OP_CLEAN, c, P, R * 4, ldc * 4);
     launch_qpu_code_mailbox(n_threads, 0, 5e3,
                             (unsigned*) unif_common_gpu +  0 * unif_len_1th, code_common_gpu,
                             (unsigned*) unif_common_gpu +  1 * unif_len_1th, code_common_gpu,
@@ -363,7 +364,7 @@ static void cblas_sgemm_RTN(
                             (unsigned*) unif_common_gpu + 10 * unif_len_1th, code_common_gpu,
                             (unsigned*) unif_common_gpu + 11 * unif_len_1th, code_common_gpu
     );
-    qmkl_cache_op_2(QMKL_CACHE_OP_INVALIDATE, c, P, R * 4, ldc * 4);
+    rpimemmgr_cache_op_2(QMKL_CACHE_OP_INVALIDATE, c, P, R * 4, ldc * 4);
 }
 
 static void cblas_sgemm_RTT(
@@ -452,9 +453,9 @@ static void cblas_sgemm_RTT(
             h_acc += hi;
         }
     }
-    qmkl_cache_op_2_multiple(3, QMKL_CACHE_OP_CLEAN, a, Q, P * 4, lda * 4,
-                                QMKL_CACHE_OP_CLEAN, b, R, Q * 4, ldb * 4,
-                                QMKL_CACHE_OP_CLEAN, c, P, R * 4, ldc * 4);
+    rpimemmgr_cache_op_2_multiple(3, QMKL_CACHE_OP_CLEAN, a, Q, P * 4, lda * 4,
+                                     QMKL_CACHE_OP_CLEAN, b, R, Q * 4, ldb * 4,
+                                     QMKL_CACHE_OP_CLEAN, c, P, R * 4, ldc * 4);
     launch_qpu_code_mailbox(n_threads, 0, 5e3,
                             (unsigned*) unif_common_gpu +  0 * unif_len_1th, code_common_gpu,
                             (unsigned*) unif_common_gpu +  1 * unif_len_1th, code_common_gpu,
@@ -469,7 +470,7 @@ static void cblas_sgemm_RTT(
                             (unsigned*) unif_common_gpu + 10 * unif_len_1th, code_common_gpu,
                             (unsigned*) unif_common_gpu + 11 * unif_len_1th, code_common_gpu
     );
-    qmkl_cache_op_2(QMKL_CACHE_OP_INVALIDATE, c, P, R * 4, ldc * 4);
+    rpimemmgr_cache_op_2(QMKL_CACHE_OP_INVALIDATE, c, P, R * 4, ldc * 4);
 }
 
 static void cblas_sgemm_R(
 
@@ -10,6 +10,7 @@
 #ifndef _QMKL_MEMORY_H_
 #define _QMKL_MEMORY_H_
 
+#include <interface/vcsm/user-vcsm.h>
 #include <sys/types.h>
 #include "qmkl/types.h"
 
@@ -20,28 +21,10 @@
 
     void memory_init();
     void memory_finalize();
-    void* map_on_cpu(MKL_UINT ptr_gpu, size_t alloc_size);
-    void unmap_on_cpu(void *ptr_cpu, size_t alloc_size);
     void* mkl_malloc_cache(size_t alloc_size, int alignment,
-            const _Bool use_cpu_cache);
+            const VCSM_CACHE_TYPE_T cache_type);
     void* mkl_malloc(size_t alloc_size, int alignment);
     void mkl_free(void *a_ptr);
-    MKL_UINT get_ptr_gpu_from_ptr_cpu(const void *ptr_cpu);
-    void unif_set_uint(MKL_UINT *p, const MKL_UINT u);
-    void unif_set_float(MKL_UINT *p, const float f);
-    void unif_add_uint(const MKL_UINT u, MKL_UINT **p);
-    void unif_add_float(const float f, MKL_UINT **p);
-
-    /* op0, user0, size0, ... */
-    int qmkl_cache_op_multiple(unsigned op_count, ...);
-    int qmkl_cache_op(const enum qmkl_cache_op op, void * const p,
-            const size_t size);
-    /* op0, user0, block_count0, block_size0, stride0, ... */
-    int qmkl_cache_op_2_multiple(unsigned op_count, ...);
-    int qmkl_cache_op_2(const enum qmkl_cache_op op, void * const p,
-            const size_t block_count, const size_t block_size,
-            const size_t stride);
-
-#define BUS_TO_PHYS(addr) ((addr) & ~0xc0000000)
+    uint32_t get_ptr_gpu_from_ptr_cpu(const void * const ptr_cpu);
 
 #endif /* _QMKL_MEMORY_H_ */