Skip to content

Commit

Permalink
This commit improves the performance of multi-instance DGEMM when the…
Browse files Browse the repository at this point in the history
…se multiple threads are binded to a CCX.

Multi-Instance: Each thread runs a sequential DGEMM.
Change-Id: I306920c8061b6dad61efac1dae68727f4ac27df6
  • Loading branch information
BiplabRaut committed Dec 19, 2018
1 parent 7bf901e commit 6d26737
Showing 1 changed file with 33 additions and 7 deletions.
40 changes: 33 additions & 7 deletions config/zen/bli_cntx_init_zen.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,34 +113,60 @@ void bli_cntx_init_zen( cntx_t* cntx )
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 );

/*
Multi Instance performance degradation on different cores
Multi Instance performance improvement of DGEMM when binded to a CCX
In Multi instance each thread runs a sequential DGEMM.
a) CPU freq 2.6 Ghz
DDR4 2400
DDR4 clock frequency 2400Mhz
Multi instance mode
mc = 240, kc = 512, and nc = 2040
b) CPU freq 2.4Ghz
DDR4 2400
b) CPU freq 2.2Ghz
DDR4 clock frequency 2400Mhz
Multi Instance mode
either
mc = 240, kc = 512 and nc = 2040
(or)
mc = 390, kc = 512 and nc = 4080
c) Higher frequency(3.1Ghz), single instance mode choose default value
c) If BLIS is run in Single Instance mode choose
mc = 510, kc = 1024 and nc = 4080
*/

#ifdef BLIS_ENABLE_ZEN_BLOCK_SIZES
// Zen optmized level 3 cache block sizes
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 240, 144, 72 );
/************************************************************************
Below block sizes of DGEMM, works better in a multi instance mode,
for clock frequency of 2.6Ghz and DDR4 clock frequency of 2400Mhz
************************************************************************/
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 240, 144, 72 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 );

/***********************************************************************************
Below block sizes of DGEMM, gives better performance in a multi instance mode,
for clock frequency of 2.2Ghz and DDR4 clock frequency of 2400Mhz
**************************************************************************************/
//bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 390, 144, 72 );
//bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 );
//bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 );

/******************************************************************************
BLIS on single instance mode, gives better perfomance with
below mentioned default block size values
********************************************************************************/
// bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 510, 144, 72 );
//bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 1024, 256, 256 );
//bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 );

#else

bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 );
#endif
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 );
//bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 );
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 );
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );

Expand Down

0 comments on commit 6d26737

Please sign in to comment.