diff --git a/aocl_dtl/aocldtl.c b/aocl_dtl/aocldtl.c index e678ef1569..ccc0c361c3 100644 --- a/aocl_dtl/aocldtl.c +++ b/aocl_dtl/aocldtl.c @@ -229,7 +229,6 @@ void DTL_Trace( if ((ui8LogLevel >= AOCL_DTL_LEVEL_TRACE_1) && (ui8LogLevel <= AOCL_DTL_LEVEL_TRACE_8)) { - fprintf(pOutFile, "%d ", (ui8LogLevel - AOCL_DTL_LEVEL_TRACE_1)+1); /* this loop is for formating the output log file */ for (i = 0; i < (ui8LogLevel - AOCL_DTL_LEVEL_TRACE_1); i++) { @@ -237,11 +236,6 @@ void DTL_Trace( fprintf(pOutFile, "\t"); } } - else - { - /* For non call traces we will just start the line with astrix */ - fprintf(pOutFile, "* \t"); - } switch (ui8LogType) { diff --git a/aocl_dtl/aocldtl_blis.c b/aocl_dtl/aocldtl_blis.c new file mode 100644 index 0000000000..1ca3f22ca2 --- /dev/null +++ b/aocl_dtl/aocldtl_blis.c @@ -0,0 +1,46 @@ +/*=================================================================== + * File Name : aocldtl_blis.c + * + * Description : BLIS library specific debug helpes. + * + * Copyright (C) 2020, Advanced Micro Devices, Inc + * + *==================================================================*/ + + +#include "blis.h" + +#if AOCL_DTL_LOG_ENABLE +void AOCL_DTL_log_gemm_sizes(int8 loglevel, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c, + const char* filename, + const char* function_name, + int line) +{ + char buffer[256]; + gint_t m = bli_obj_length( c ); + gint_t n = bli_obj_width( c ); + gint_t k = bli_obj_length( b ); + guint_t csa = bli_obj_col_stride( a ); + guint_t csb = bli_obj_col_stride( b ); + guint_t csc = bli_obj_col_stride( c ); + guint_t rsa = bli_obj_row_stride( a ); + guint_t rsb = bli_obj_row_stride( b ); + guint_t rsc = bli_obj_row_stride( c ); + const num_t dt_exec = bli_obj_dt( c ); + float* alpha_cast = bli_obj_buffer_for_1x1( dt_exec, alpha ); + float* beta_cast = bli_obj_buffer_for_1x1( dt_exec, beta ); + + sprintf(buffer, "%ld %ld %ld %lu %lu %lu %lu %lu %lu %f %f", + m, k, n, + csa, csb, csc, + rsa, rsb, rsc, + *alpha_cast, *beta_cast); + + DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer); +} +#endif diff --git a/aocl_dtl/aocldtl_blis.h b/aocl_dtl/aocldtl_blis.h new file mode 100644 index 0000000000..ba560b5025 --- /dev/null +++ b/aocl_dtl/aocldtl_blis.h @@ -0,0 +1,34 @@ +/*=================================================================== + * File Name : aocldtl_blis.h + * + * Description : BLIS library specific debug helpes. + * + * Copyright (C) 2020, Advanced Micro Devices, Inc + * + *==================================================================*/ + + +#ifndef __AOCLDTL_BLIS_H +#define __AOCLDTL_BLIS_H + +#include "blis.h" + +#if AOCL_DTL_LOG_ENABLE +void AOCL_DTL_log_gemm_sizes(int8 loglevel, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c, + const char* filename, + const char* functionn_name, + int line); + +#define AOCL_DTL_LOG_GEMM_INPUTS(loglevel, alpha, a, b, beta, c) \ + AOCL_DTL_log_gemm_sizes(loglevel, alpha, a, b, beta, c, __FILE__, __FUNCTION__, __LINE__); +#else +#define AOCL_DTL_LOG_GEMM_INPUTS(loglevel, alpha, a, b, beta, c) +#endif + +#endif + diff --git a/frame/3/bli_l3_oapi.c b/frame/3/bli_l3_oapi.c index df37395f7b..aec1e43d4a 100644 --- a/frame/3/bli_l3_oapi.c +++ b/frame/3/bli_l3_oapi.c @@ -58,6 +58,7 @@ void PASTEMAC(opname,EX_SUF) \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ + AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_2, alpha, a, b, beta, c); \ \ /* If C has a zero dimension, return early. */ \ if ( bli_obj_has_zero_dim( c ) ) {\ diff --git a/frame/3/bli_l3_sup.c b/frame/3/bli_l3_sup.c index d7ce216719..a9f5395e4d 100644 --- a/frame/3/bli_l3_sup.c +++ b/frame/3/bli_l3_sup.c @@ -45,22 +45,31 @@ err_t bli_gemmsup rntm_t* rntm ) { + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2); + AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_2, alpha, a, b, beta, c); + // Return early if small matrix handling is disabled at configure-time. #ifdef BLIS_DISABLE_SUP_HANDLING + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_2, "SUP is Disabled."); return BLIS_FAILURE; #endif // Return early if this is a mixed-datatype computation. if ( bli_obj_dt( c ) != bli_obj_dt( a ) || bli_obj_dt( c ) != bli_obj_dt( b ) || - bli_obj_comp_prec( c ) != bli_obj_prec( c ) ) return BLIS_FAILURE; + bli_obj_comp_prec( c ) != bli_obj_prec( c ) ) { + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_2, "SUP doesn't support Mixed datatypes."); + return BLIS_FAILURE; + } const stor3_t stor_id = bli_obj_stor3_from_strides( c, a, b ); /*General stride is not yet supported in sup*/ - if(BLIS_XXX==stor_id) + if(BLIS_XXX==stor_id) { + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_2, "SUP doesn't support general stride."); return BLIS_FAILURE; + } const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); @@ -74,6 +83,7 @@ err_t bli_gemmsup || ((transb == BLIS_CONJ_NO_TRANSPOSE) || (transb == BLIS_CONJ_TRANSPOSE)) )){ //printf(" gemmsup: Returning with for un-supported storage types and conjugate property in cgemmsup \n"); + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_2, "SUP - Unsuppported storage type for cgemm"); return BLIS_FAILURE; } @@ -84,6 +94,7 @@ err_t bli_gemmsup || ((transb == BLIS_CONJ_NO_TRANSPOSE) || (transb == BLIS_CONJ_TRANSPOSE)) )){ //printf(" gemmsup: Returning with for un-supported storage types and conjugate property in zgemmsup \n"); + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_2, "SUP - Unsuppported storage type for zgemm."); return BLIS_FAILURE; } @@ -103,16 +114,20 @@ err_t bli_gemmsup // Pass in m and n reversed, which simulates a transposition of the // entire operation pursuant to the microkernel storage preference. - if ( !bli_cntx_l3_sup_thresh_is_met( dt, n, m, k, cntx ) ) + if ( !bli_cntx_l3_sup_thresh_is_met( dt, n, m, k, cntx ) ) { + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_2, "SUP - Traspostion results in unsupported storage for matrix C."); return BLIS_FAILURE; + } } else // ukr_prefers_storage_of( c, ... ) { const num_t dt = bli_obj_dt( c ); const dim_t k = bli_obj_width_after_trans( a ); - if ( !bli_cntx_l3_sup_thresh_is_met( dt, m, n, k, cntx ) ) + if ( !bli_cntx_l3_sup_thresh_is_met( dt, m, n, k, cntx ) ) { + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_2, "SUP - Unsupported storage for matrix C."); return BLIS_FAILURE; + } } // Initialize a local runtime with global settings if necessary. Note @@ -158,6 +173,8 @@ printf( "dims: %d %d %d (threshs: %d %d %d)\n", cntx, rntm ); + + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2); } diff --git a/frame/3/bli_l3_sup_int.c b/frame/3/bli_l3_sup_int.c index b35d03f329..0778b0cd8b 100644 --- a/frame/3/bli_l3_sup_int.c +++ b/frame/3/bli_l3_sup_int.c @@ -46,6 +46,9 @@ err_t bli_gemmsup_int thrinfo_t* thread ) { + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_4); + AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_4, alpha, a, b, beta, c); + #if 0 //bli_gemmsup_ref_var2 //bli_gemmsup_ref_var1 @@ -80,7 +83,10 @@ err_t bli_gemmsup_int // Don't use the small/unpacked implementation if one of the matrices // uses general stride. - if ( stor_id == BLIS_XXX ) return BLIS_FAILURE; + if ( stor_id == BLIS_XXX ) { + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_4, "SUP doesn't support general stide."); + return BLIS_FAILURE; + } const bool_t is_rrr_rrc_rcr_crr = ( stor_id == BLIS_RRR || stor_id == BLIS_RRC || @@ -237,6 +243,7 @@ err_t bli_gemmsup_int } // Return success so that the caller knows that we computed the solution. + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4) return BLIS_SUCCESS; } diff --git a/frame/3/bli_l3_sup_packm_a.c b/frame/3/bli_l3_sup_packm_a.c index 7b12132995..0601b3dfb2 100644 --- a/frame/3/bli_l3_sup_packm_a.c +++ b/frame/3/bli_l3_sup_packm_a.c @@ -51,7 +51,8 @@ void PASTEMAC(ch,opname) \ thrinfo_t* restrict thread \ ) \ { \ - /* Inspect whether we are going to be packing matrix A. */ \ + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_5); \ + /* Inspect whether we are going to be packing matrix A. */ \ if ( will_pack == FALSE ) \ { \ } \ @@ -164,6 +165,7 @@ void PASTEMAC(ch,opname) \ } \ } \ } \ + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_5);\ } INSERT_GENTFUNC_BASIC0( packm_sup_init_mem_a ) @@ -180,6 +182,7 @@ void PASTEMAC(ch,opname) \ thrinfo_t* restrict thread \ ) \ { \ + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_5);\ /* Inspect whether we previously packed matrix A. */ \ if ( did_pack == FALSE ) \ { \ @@ -202,6 +205,7 @@ void PASTEMAC(ch,opname) \ } \ } \ } \ + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_5);\ } INSERT_GENTFUNC_BASIC0( packm_sup_finalize_mem_a ) @@ -228,6 +232,7 @@ void PASTEMAC(ch,opname) \ thrinfo_t* restrict thread \ ) \ { \ + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_5);\ /* Inspect whether we are going to be packing matrix A. */ \ if ( will_pack == FALSE ) \ { \ @@ -297,6 +302,7 @@ void PASTEMAC(ch,opname) \ broker. */ \ *p = bli_mem_buffer( mem ); \ } \ + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_5);\ } INSERT_GENTFUNC_BASIC0( packm_sup_init_a ) @@ -335,6 +341,7 @@ void PASTEMAC(ch,opname) \ dim_t k_max; \ dim_t pd_p; \ \ + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_5);\ /* Prepare the packing destination buffer. If packing is not requested, this function will reduce to a no-op. */ \ PASTEMAC(ch,packm_sup_init_mem_a) \ @@ -424,6 +431,7 @@ void PASTEMAC(ch,opname) \ /* Barrier so that packing is done before computation. */ \ bli_thread_barrier( thread ); \ } \ + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_5);\ } INSERT_GENTFUNC_BASIC0( packm_sup_a ) diff --git a/frame/3/bli_l3_sup_packm_b.c b/frame/3/bli_l3_sup_packm_b.c index 37fde14660..db38e55e10 100644 --- a/frame/3/bli_l3_sup_packm_b.c +++ b/frame/3/bli_l3_sup_packm_b.c @@ -51,6 +51,7 @@ void PASTEMAC(ch,opname) \ thrinfo_t* restrict thread \ ) \ { \ + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_5);\ /* Inspect whether we are going to be packing matrix B. */ \ if ( will_pack == FALSE ) \ { \ @@ -164,6 +165,7 @@ void PASTEMAC(ch,opname) \ } \ } \ } \ + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_5);\ } INSERT_GENTFUNC_BASIC0( packm_sup_init_mem_b ) @@ -180,6 +182,7 @@ void PASTEMAC(ch,opname) \ thrinfo_t* restrict thread \ ) \ { \ + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_5); \ /* Inspect whether we previously packed matrix A. */ \ if ( did_pack == FALSE ) \ { \ @@ -202,6 +205,7 @@ void PASTEMAC(ch,opname) \ } \ } \ } \ + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_5); \ } INSERT_GENTFUNC_BASIC0( packm_sup_finalize_mem_b ) @@ -228,6 +232,7 @@ void PASTEMAC(ch,opname) \ thrinfo_t* restrict thread \ ) \ { \ + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_5);\ /* Inspect whether we are going to be packing matrix B. */ \ if ( will_pack == FALSE ) \ { \ @@ -297,6 +302,7 @@ void PASTEMAC(ch,opname) \ broker. */ \ *p = bli_mem_buffer( mem ); \ } \ + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_5);\ } INSERT_GENTFUNC_BASIC0( packm_sup_init_b ) @@ -330,6 +336,7 @@ void PASTEMAC(ch,opname) \ thrinfo_t* restrict thread \ ) \ { \ + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_5); \ pack_t schema; \ dim_t k_max; \ dim_t n_max; \ @@ -424,6 +431,7 @@ void PASTEMAC(ch,opname) \ /* Barrier so that packing is done before computation. */ \ bli_thread_barrier( thread ); \ } \ + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_5); \ } INSERT_GENTFUNC_BASIC0( packm_sup_b ) diff --git a/frame/3/bli_l3_sup_var1n2m.c b/frame/3/bli_l3_sup_var1n2m.c index 5dfe4fb927..8e79a8d4a8 100644 --- a/frame/3/bli_l3_sup_var1n2m.c +++ b/frame/3/bli_l3_sup_var1n2m.c @@ -76,6 +76,7 @@ void bli_gemmsup_ref_var1n thrinfo_t* thread ) { + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_5); #if 0 obj_t at, bt; @@ -227,6 +228,7 @@ void bli_gemmsup_ref_var1n thread ); } + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_5); } @@ -724,6 +726,7 @@ void bli_gemmsup_ref_var2m thrinfo_t* thread ) { + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_5); #if 0 obj_t at, bt; @@ -875,6 +878,7 @@ void bli_gemmsup_ref_var2m thread ); } + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_5); } diff --git a/frame/3/gemm/bli_gemm_front.c b/frame/3/gemm/bli_gemm_front.c index e413b407c9..ec1b878fc5 100644 --- a/frame/3/gemm/bli_gemm_front.c +++ b/frame/3/gemm/bli_gemm_front.c @@ -48,6 +48,7 @@ void bli_gemm_front ) { AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_3); + AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_3, alpha, a, b, beta, c); bli_init_once(); obj_t a_local; diff --git a/frame/3/gemm/bli_gemm_int.c b/frame/3/gemm/bli_gemm_int.c index 9286665e0b..69eac09762 100644 --- a/frame/3/gemm/bli_gemm_int.c +++ b/frame/3/gemm/bli_gemm_int.c @@ -52,8 +52,9 @@ void bli_gemm_int obj_t b_local; obj_t c_local; gemm_var_oft f; - AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_4) - + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_4); + AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_4, alpha, a, b, beta, c); + // Check parameters. if ( bli_error_checking_is_enabled() ) bli_gemm_basic_check( alpha, a, b, beta, c, cntx ); diff --git a/frame/include/blis.h b/frame/include/blis.h index 3b4d85f9e9..783b5de0eb 100644 --- a/frame/include/blis.h +++ b/frame/include/blis.h @@ -205,7 +205,8 @@ extern "C" { #include "bli_winsys.h" #include "aocldtl.h" - +#include "aocldtl_blis.h" + // End extern "C" construct block. #ifdef __cplusplus }