11
11
#include "local/common.h"
12
12
#include "local/called.h"
13
13
#include "local/error.h"
14
+ #include <rpimemmgr.h>
14
15
#include <stdio.h>
15
16
#include <stdlib.h>
16
17
#include <string.h>
@@ -134,9 +135,9 @@ static void cblas_sgemm_RNN(
134
135
h_acc += hi ;
135
136
}
136
137
}
137
- qmkl_cache_op_2_multiple (3 , QMKL_CACHE_OP_CLEAN , a , P , Q * 4 , lda * 4 ,
138
- QMKL_CACHE_OP_CLEAN , b , Q , R * 4 , ldb * 4 ,
139
- QMKL_CACHE_OP_CLEAN , c , P , R * 4 , ldc * 4 );
138
+ rpimemmgr_cache_op_2_multiple (3 , QMKL_CACHE_OP_CLEAN , a , P , Q * 4 , lda * 4 ,
139
+ QMKL_CACHE_OP_CLEAN , b , Q , R * 4 , ldb * 4 ,
140
+ QMKL_CACHE_OP_CLEAN , c , P , R * 4 , ldc * 4 );
140
141
launch_qpu_code_mailbox (n_threads , 0 , 5e3 ,
141
142
(unsigned * ) unif_common_gpu + 0 * unif_len_1th , code_common_gpu ,
142
143
(unsigned * ) unif_common_gpu + 1 * unif_len_1th , code_common_gpu ,
@@ -151,7 +152,7 @@ static void cblas_sgemm_RNN(
151
152
(unsigned * ) unif_common_gpu + 10 * unif_len_1th , code_common_gpu ,
152
153
(unsigned * ) unif_common_gpu + 11 * unif_len_1th , code_common_gpu
153
154
);
154
- qmkl_cache_op_2 (QMKL_CACHE_OP_INVALIDATE , c , P , R * 4 , ldc * 4 );
155
+ rpimemmgr_cache_op_2 (QMKL_CACHE_OP_INVALIDATE , c , P , R * 4 , ldc * 4 );
155
156
}
156
157
157
158
static void cblas_sgemm_RNT (
@@ -240,9 +241,9 @@ static void cblas_sgemm_RNT(
240
241
h_acc += hi ;
241
242
}
242
243
}
243
- qmkl_cache_op_2_multiple (3 , QMKL_CACHE_OP_CLEAN , a , P , Q * 4 , lda * 4 ,
244
- QMKL_CACHE_OP_CLEAN , b , R , Q * 4 , ldb * 4 ,
245
- QMKL_CACHE_OP_CLEAN , c , P , R * 4 , ldc * 4 );
244
+ rpimemmgr_cache_op_2_multiple (3 , QMKL_CACHE_OP_CLEAN , a , P , Q * 4 , lda * 4 ,
245
+ QMKL_CACHE_OP_CLEAN , b , R , Q * 4 , ldb * 4 ,
246
+ QMKL_CACHE_OP_CLEAN , c , P , R * 4 , ldc * 4 );
246
247
launch_qpu_code_mailbox (n_threads , 0 , 5e3 ,
247
248
(unsigned * ) unif_common_gpu + 0 * unif_len_1th , code_common_gpu ,
248
249
(unsigned * ) unif_common_gpu + 1 * unif_len_1th , code_common_gpu ,
@@ -257,7 +258,7 @@ static void cblas_sgemm_RNT(
257
258
(unsigned * ) unif_common_gpu + 10 * unif_len_1th , code_common_gpu ,
258
259
(unsigned * ) unif_common_gpu + 11 * unif_len_1th , code_common_gpu
259
260
);
260
- qmkl_cache_op_2 (QMKL_CACHE_OP_INVALIDATE , c , P , R * 4 , ldc * 4 );
261
+ rpimemmgr_cache_op_2 (QMKL_CACHE_OP_INVALIDATE , c , P , R * 4 , ldc * 4 );
261
262
}
262
263
263
264
static void cblas_sgemm_RTN (
@@ -346,9 +347,9 @@ static void cblas_sgemm_RTN(
346
347
h_acc += hi ;
347
348
}
348
349
}
349
- qmkl_cache_op_2_multiple (3 , QMKL_CACHE_OP_CLEAN , a , Q , P * 4 , lda * 4 ,
350
- QMKL_CACHE_OP_CLEAN , b , Q , R * 4 , ldb * 4 ,
351
- QMKL_CACHE_OP_CLEAN , c , P , R * 4 , ldc * 4 );
350
+ rpimemmgr_cache_op_2_multiple (3 , QMKL_CACHE_OP_CLEAN , a , Q , P * 4 , lda * 4 ,
351
+ QMKL_CACHE_OP_CLEAN , b , Q , R * 4 , ldb * 4 ,
352
+ QMKL_CACHE_OP_CLEAN , c , P , R * 4 , ldc * 4 );
352
353
launch_qpu_code_mailbox (n_threads , 0 , 5e3 ,
353
354
(unsigned * ) unif_common_gpu + 0 * unif_len_1th , code_common_gpu ,
354
355
(unsigned * ) unif_common_gpu + 1 * unif_len_1th , code_common_gpu ,
@@ -363,7 +364,7 @@ static void cblas_sgemm_RTN(
363
364
(unsigned * ) unif_common_gpu + 10 * unif_len_1th , code_common_gpu ,
364
365
(unsigned * ) unif_common_gpu + 11 * unif_len_1th , code_common_gpu
365
366
);
366
- qmkl_cache_op_2 (QMKL_CACHE_OP_INVALIDATE , c , P , R * 4 , ldc * 4 );
367
+ rpimemmgr_cache_op_2 (QMKL_CACHE_OP_INVALIDATE , c , P , R * 4 , ldc * 4 );
367
368
}
368
369
369
370
static void cblas_sgemm_RTT (
@@ -452,9 +453,9 @@ static void cblas_sgemm_RTT(
452
453
h_acc += hi ;
453
454
}
454
455
}
455
- qmkl_cache_op_2_multiple (3 , QMKL_CACHE_OP_CLEAN , a , Q , P * 4 , lda * 4 ,
456
- QMKL_CACHE_OP_CLEAN , b , R , Q * 4 , ldb * 4 ,
457
- QMKL_CACHE_OP_CLEAN , c , P , R * 4 , ldc * 4 );
456
+ rpimemmgr_cache_op_2_multiple (3 , QMKL_CACHE_OP_CLEAN , a , Q , P * 4 , lda * 4 ,
457
+ QMKL_CACHE_OP_CLEAN , b , R , Q * 4 , ldb * 4 ,
458
+ QMKL_CACHE_OP_CLEAN , c , P , R * 4 , ldc * 4 );
458
459
launch_qpu_code_mailbox (n_threads , 0 , 5e3 ,
459
460
(unsigned * ) unif_common_gpu + 0 * unif_len_1th , code_common_gpu ,
460
461
(unsigned * ) unif_common_gpu + 1 * unif_len_1th , code_common_gpu ,
@@ -469,7 +470,7 @@ static void cblas_sgemm_RTT(
469
470
(unsigned * ) unif_common_gpu + 10 * unif_len_1th , code_common_gpu ,
470
471
(unsigned * ) unif_common_gpu + 11 * unif_len_1th , code_common_gpu
471
472
);
472
- qmkl_cache_op_2 (QMKL_CACHE_OP_INVALIDATE , c , P , R * 4 , ldc * 4 );
473
+ rpimemmgr_cache_op_2 (QMKL_CACHE_OP_INVALIDATE , c , P , R * 4 , ldc * 4 );
473
474
}
474
475
475
476
static void cblas_sgemm_R (
0 commit comments