Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scale P and Q with L2 cache size for SVE #4397

Draft
wants to merge 1 commit into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Makefile.system
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,6 @@ DYNAMIC_CORE += CORTEXA53
DYNAMIC_CORE += CORTEXA57
DYNAMIC_CORE += NEOVERSEN1
ifneq ($(NO_SVE), 1)
DYNAMIC_CORE += NEOVERSEV1
DYNAMIC_CORE += NEOVERSEN2
DYNAMIC_CORE += ARMV8SVE
endif
Expand Down
34 changes: 17 additions & 17 deletions driver/others/dynamic_arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,9 @@ extern gotoblas_t gotoblas_TSV110;
extern gotoblas_t gotoblas_EMAG8180;
extern gotoblas_t gotoblas_NEOVERSEN1;
#ifndef NO_SVE
extern gotoblas_t gotoblas_NEOVERSEV1;
extern gotoblas_t gotoblas_NEOVERSEN2;
extern gotoblas_t gotoblas_ARMV8SVE;
#define gotoblas_NEOVERSEV1 gotoblas_ARMV8SVE
#else
#define gotoblas_NEOVERSEV1 gotoblas_ARMV8
#define gotoblas_NEOVERSEN2 gotoblas_ARMV8
Expand Down Expand Up @@ -167,6 +167,7 @@ extern void openblas_warning(int verbose, const char * msg);

static char *corename[] = {
"armv8",
"armv8sve",
"cortexa53",
"cortexa57",
"cortexa72",
Expand All @@ -181,27 +182,26 @@ static char *corename[] = {
"neoversen2",
"thunderx3t110",
"cortexa55",
"armv8sve",
"unknown"
};

char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_ARMV8) return corename[ 0];
if (gotoblas == &gotoblas_CORTEXA53) return corename[ 1];
if (gotoblas == &gotoblas_CORTEXA57) return corename[ 2];
if (gotoblas == &gotoblas_CORTEXA72) return corename[ 3];
if (gotoblas == &gotoblas_CORTEXA73) return corename[ 4];
if (gotoblas == &gotoblas_FALKOR) return corename[ 5];
if (gotoblas == &gotoblas_THUNDERX) return corename[ 6];
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7];
if (gotoblas == &gotoblas_TSV110) return corename[ 8];
if (gotoblas == &gotoblas_EMAG8180) return corename[ 9];
if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10];
if (gotoblas == &gotoblas_NEOVERSEV1) return corename[11];
if (gotoblas == &gotoblas_NEOVERSEN2) return corename[12];
if (gotoblas == &gotoblas_THUNDERX3T110) return corename[13];
if (gotoblas == &gotoblas_CORTEXA55) return corename[14];
if (gotoblas == &gotoblas_ARMV8SVE) return corename[15];
if (gotoblas == &gotoblas_ARMV8SVE) return corename[ 1];
if (gotoblas == &gotoblas_CORTEXA53) return corename[ 2];
if (gotoblas == &gotoblas_CORTEXA57) return corename[ 3];
if (gotoblas == &gotoblas_CORTEXA72) return corename[ 4];
if (gotoblas == &gotoblas_CORTEXA73) return corename[ 5];
if (gotoblas == &gotoblas_FALKOR) return corename[ 6];
if (gotoblas == &gotoblas_THUNDERX) return corename[ 7];
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 8];
if (gotoblas == &gotoblas_TSV110) return corename[ 9];
if (gotoblas == &gotoblas_EMAG8180) return corename[10];
if (gotoblas == &gotoblas_NEOVERSEN1) return corename[11];
if (gotoblas == &gotoblas_NEOVERSEV1) return corename[12];
if (gotoblas == &gotoblas_NEOVERSEN2) return corename[13];
if (gotoblas == &gotoblas_THUNDERX3T110) return corename[14];
if (gotoblas == &gotoblas_CORTEXA55) return corename[15];
return corename[NUM_CORETYPES];
}

Expand Down
2 changes: 1 addition & 1 deletion getarch.c
Original file line number Diff line number Diff line change
Expand Up @@ -1245,7 +1245,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DARMV8SVE " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 -DSCALE_L2 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8"
#define LIBNAME "armv8sve"
Expand Down
34 changes: 30 additions & 4 deletions kernel/setparam-ref.c
Original file line number Diff line number Diff line change
Expand Up @@ -890,15 +890,41 @@ gotoblas_t TABLE_NAME = {
};

#if (ARCH_ARM64)

#define L2_CACHE_FILE "/sys/devices/system/cpu/cpu0/cache/index2/size"
static inline uint64_t get_l2_multiplier() {
#if defined(__linux) && defined(SCALE_L2)
char buffer[100];
FILE* sysfs_file = fopen(L2_CACHE_FILE, "r");
uint64_t cache_size = 0;
char cache_unit = '\n';
if (sysfs_file && fgets(buffer, sizeof(buffer), sysfs_file) != NULL) {
if (sscanf(buffer, "%ld%c", &cache_size, &cache_unit) > 1) {
switch (cache_unit) {
case 'K':
return MAX(cache_size >> 7, 1);
case '\n':
return MAX(cache_size >> 17, 1);
default: // unknown
return 1;
}
}
}
#endif
return 1;
}

static void init_parameter(void) {
const uint64_t l2_multiplier = get_l2_multiplier();

#if (BUILD_BFLOAT16)
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
#endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P * l2_multiplier;
#endif
#if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P * l2_multiplier;
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
Expand All @@ -911,10 +937,10 @@ static void init_parameter(void) {
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
#endif
#if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q * l2_multiplier;
#endif
#if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q * l2_multiplier;
#endif
#if BUILD_COMPLEX== 1
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
Expand Down
8 changes: 4 additions & 4 deletions param.h
Original file line number Diff line number Diff line change
Expand Up @@ -3517,13 +3517,13 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
#define ZGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_MN 16

#define SGEMM_DEFAULT_P 128
#define DGEMM_DEFAULT_P 160
#define SGEMM_DEFAULT_P 30

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How were the default P and Q chosen for 128KB cache?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#4381 demonstrated values that worked well for a 1MB L2 cache, so I divided that by 8.

If you have a more scientific approach, I'd be happy to hear it 😸

#define DGEMM_DEFAULT_P 30
#define CGEMM_DEFAULT_P 128
#define ZGEMM_DEFAULT_P 128

#define SGEMM_DEFAULT_Q 352
#define DGEMM_DEFAULT_Q 128
#define SGEMM_DEFAULT_Q 80
#define DGEMM_DEFAULT_Q 40
#define CGEMM_DEFAULT_Q 224
#define ZGEMM_DEFAULT_Q 112

Expand Down
Loading