@@ -129,10 +129,10 @@ using namespace cute;
129129// ///////////////////////////////////////////////////////////////////////////////////////////////
130130
131131// TP size (= number of processors/GPUs)
132- using TP = _8 ;
132+ using TP = _4 ;
133133static constexpr int TP_ = TP{};
134134
135- #if defined(CUTLASS_ARCH_MMA_SM100A_ENABLED ) && \
135+ #if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED ) && \
136136 (__CUDACC_VER_MAJOR__ > 12 || (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 8 ))
137137
138138// Distributed GEMM tiling/sharding schedule
@@ -254,7 +254,7 @@ HostTensorB tensor_B_arr[TP_];
254254HostTensorD tensor_C_arr[TP_];
255255HostTensorD tensor_D_arr[TP_];
256256
257- #endif // (defined(CUTLASS_ARCH_MMA_SM100A_ENABLED ) &&
257+ #endif // (defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED ) &&
258258 // (__CUDACC_VER_MAJOR__ > 12 || (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 8))
259259
260260// ///////////////////////////////////////////////////////////////////////////////////////////////
@@ -347,8 +347,7 @@ struct Result {
347347
348348};
349349
350- #if defined(CUTLASS_ARCH_MMA_SM100A_ENABLED) && \
351- (__CUDACC_VER_MAJOR__ > 12 || (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 8 ))
350+ #if ((__CUDACC_VER_MAJOR__ > 12 || (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 8)))
352351
353352// ///////////////////////////////////////////////////////////////////////////////////////////////
354353// / GEMM setup and evaluation
@@ -812,8 +811,7 @@ int run(Options &options) {
812811 return 0 ;
813812}
814813
815- #endif // (defined(CUTLASS_ARCH_MMA_SM100A_ENABLED) &&
816- // (__CUDACC_VER_MAJOR__ > 12 || (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 8))
814+ #endif // (__CUDACC_VER_MAJOR__ > 12 || (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 8))
817815
818816// /////////////////////////////////////////////////////////////////////////////////////////////////
819817
@@ -867,7 +865,7 @@ int main(int argc, char const **args) {
867865 // Evaluate CUTLASS kernels
868866 //
869867
870- #if (defined(CUTLASS_ARCH_MMA_SM100A_ENABLED) && (__CUDACC_VER_MAJOR__ > 12 || (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 8)))
868+ #if ((__CUDACC_VER_MAJOR__ > 12 || (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 8)))
871869 run (options);
872870#else
873871 std::cerr
0 commit comments