We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents ac19f97 + a09cf75 commit ecfea95Copy full SHA for ecfea95
exllama_ext/cuda_func/column_remap.cu
@@ -1,7 +1,14 @@
1
#include "column_remap.cuh"
2
#include "../util.cuh"
3
4
+// Using 1024 make me crash with "Memory access fault by GPU node-1 (Agent
5
+// handle: 0x012345678912) on address 0x012345678912. Reason: Page not present
6
+// or supervisor privilege."
7
+#if defined(USE_ROCM)
8
const int SHUF_BLOCKSIZE_X = 256;
9
+#else
10
+const int SHUF_BLOCKSIZE_X = 1024;
11
+#endif
12
const int SHUF_BLOCKSIZE_Y = 16;
13
14
__global__ void column_remap_kernel
0 commit comments