Skip to content

Commit ecfea95

Browse files
committed
Merge branch 'transformers' of github.com:0cc4m/exllama into transformers
2 parents ac19f97 + a09cf75 commit ecfea95

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

exllama_ext/cuda_func/column_remap.cu

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,14 @@
11
#include "column_remap.cuh"
22
#include "../util.cuh"
33

4+
// Using 1024 make me crash with "Memory access fault by GPU node-1 (Agent
5+
// handle: 0x012345678912) on address 0x012345678912. Reason: Page not present
6+
// or supervisor privilege."
7+
#if defined(USE_ROCM)
48
const int SHUF_BLOCKSIZE_X = 256;
9+
#else
10+
const int SHUF_BLOCKSIZE_X = 1024;
11+
#endif
512
const int SHUF_BLOCKSIZE_Y = 16;
613

714
__global__ void column_remap_kernel

0 commit comments

Comments
 (0)