Skip to content

Commit

Permalink
This commit adds a macro, which is to be enabled when BLIS is workin…
Browse files Browse the repository at this point in the history
…g on single instance mode

Change-Id: I7f3fd654b78e64c4e6e24e9f0e245b1a30c492b0
  • Loading branch information
kiran-amd committed Jan 4, 2019
1 parent 1f4eeee commit cf9c115
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 39 deletions.
56 changes: 17 additions & 39 deletions config/zen/bli_cntx_init_zen.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,56 +117,34 @@ void bli_cntx_init_zen( cntx_t* cntx )
Multi Instance performance improvement of DGEMM when binded to a CCX
In Multi instance each thread runs a sequential DGEMM.
a) CPU freq 2.6 Ghz
a) If BLIS is run in a multi instance mode with
CPU freq 2.6/2.2 Ghz
DDR4 clock frequency 2400Mhz
Multi instance mode
mc = 240, kc = 512, and nc = 2040
b) CPU freq 2.2Ghz
DDR4 clock frequency 2400Mhz
Multi Instance mode
either
mc = 240, kc = 512 and nc = 2040
(or)
mc = 390, kc = 512 and nc = 4080
has better performance on EPYC server, over the default block sizes.
c) If BLIS is run in Single Instance mode choose
b) If BLIS is run in Single Instance mode
mc = 510, kc = 1024 and nc = 4080
*/

#ifdef BLIS_ENABLE_ZEN_BLOCK_SIZES
// Zen optmized level 3 cache block sizes
/************************************************************************
Below block sizes of DGEMM, works better in a multi instance mode,
for clock frequency of 2.2GHz and DDR4 clock frequency of 2400MHz
************************************************************************/
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 240, 144, 72 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 );

/***********************************************************************************
Below block sizes of DGEMM, gives better performance in a multi instance mode,
for clock frequency of 2.6GHz and DDR4 clock frequency of 2400MHz
**************************************************************************************/
//bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 390, 144, 72 );
//bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 );
//bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 );

/******************************************************************************
BLIS on single instance mode, gives better perfomance with
below mentioned default block size values
********************************************************************************/
//bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 510, 144, 72 );
//bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 1024, 256, 256 );
//bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 );

#if BLIS_ENABLE_SINGLE_INSTANCE_BLOCK_SIZES
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 510, 144, 72 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 1024, 256, 256 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 );
#else
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 240, 144, 72 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 );
#endif
#else

bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 );
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 );
#endif

//bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 );
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 );
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );
Expand Down
3 changes: 3 additions & 0 deletions config/zen/bli_family_zen.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#define BLIS_ENABLE_SMALL_MATRIX
#define BLIS_ENABLE_SMALL_MATRIX_TRSM


// This will select the threshold below which small matrix code will be called.
#define BLIS_SMALL_MATRIX_THRES 700
#define BLIS_SMALL_M_RECT_MATRIX_THRES 160
Expand All @@ -56,5 +57,7 @@
#define BLIS_SMALL_MATRIX_A_THRES_M_SYRK 96
#define BLIS_SMALL_MATRIX_A_THRES_N_SYRK 128

//This macro will enable BLIS DGEMM to choose block sizes for a single instance mode
#define BLIS_ENABLE_SINGLE_INSTANCE_BLOCK_SIZES 0
//#endif

0 comments on commit cf9c115

Please sign in to comment.