diff --git a/config/zen/bli_cntx_init_zen.c b/config/zen/bli_cntx_init_zen.c index be0eae8943..84f2eb279a 100644 --- a/config/zen/bli_cntx_init_zen.c +++ b/config/zen/bli_cntx_init_zen.c @@ -117,56 +117,34 @@ void bli_cntx_init_zen( cntx_t* cntx ) Multi Instance performance improvement of DGEMM when binded to a CCX In Multi instance each thread runs a sequential DGEMM. - a) CPU freq 2.6 Ghz + a) If BLIS is run in a multi instance mode with + CPU freq 2.6/2.2 Ghz DDR4 clock frequency 2400Mhz - Multi instance mode mc = 240, kc = 512, and nc = 2040 - - b) CPU freq 2.2Ghz - DDR4 clock frequency 2400Mhz - Multi Instance mode - either - mc = 240, kc = 512 and nc = 2040 - (or) - mc = 390, kc = 512 and nc = 4080 + has better performance on EPYC server, over the default block sizes. - c) If BLIS is run in Single Instance mode choose + b) If BLIS is run in Single Instance mode mc = 510, kc = 1024 and nc = 4080 */ #ifdef BLIS_ENABLE_ZEN_BLOCK_SIZES - // Zen optmized level 3 cache block sizes - /************************************************************************ - Below block sizes of DGEMM, works better in a multi instance mode, - for clock frequency of 2.2GHz and DDR4 clock frequency of 2400MHz - ************************************************************************/ - bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 240, 144, 72 ); - bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 ); - bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 ); - - /*********************************************************************************** - Below block sizes of DGEMM, gives better performance in a multi instance mode, - for clock frequency of 2.6GHz and DDR4 clock frequency of 2400MHz - **************************************************************************************/ - //bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 390, 144, 72 ); - //bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 ); - //bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 ); - - /****************************************************************************** - BLIS on single instance mode, gives better perfomance with - below mentioned default block size values - ********************************************************************************/ - //bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 510, 144, 72 ); - //bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 1024, 256, 256 ); - //bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 ); + #if BLIS_ENABLE_SINGLE_INSTANCE_BLOCK_SIZES + bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 510, 144, 72 ); + bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 1024, 256, 256 ); + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 ); + #else + bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 240, 144, 72 ); + bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 ); + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 ); + #endif #else - - bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 ); - bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); - bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 ); + bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 ); + bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 ); #endif + //bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 ); bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); diff --git a/config/zen/bli_family_zen.h b/config/zen/bli_family_zen.h index 743c38f32a..281bebf507 100644 --- a/config/zen/bli_family_zen.h +++ b/config/zen/bli_family_zen.h @@ -46,6 +46,7 @@ #define BLIS_ENABLE_SMALL_MATRIX #define BLIS_ENABLE_SMALL_MATRIX_TRSM + // This will select the threshold below which small matrix code will be called. #define BLIS_SMALL_MATRIX_THRES 700 #define BLIS_SMALL_M_RECT_MATRIX_THRES 160 @@ -56,5 +57,7 @@ #define BLIS_SMALL_MATRIX_A_THRES_M_SYRK 96 #define BLIS_SMALL_MATRIX_A_THRES_N_SYRK 128 +//This macro will enable BLIS DGEMM to choose block sizes for a single instance mode +#define BLIS_ENABLE_SINGLE_INSTANCE_BLOCK_SIZES 0 //#endif