From efedcc3bafd48f0da76777caa0f249182534a266 Mon Sep 17 00:00:00 2001 From: Nuno Miguel Nobre Date: Fri, 10 Jan 2020 18:17:27 +0000 Subject: [PATCH] Stream_df_cholesky_peek: Guarantee data is copied back to shared memory Previously, the control program could proceed to halt the timer and enter the verification stage w/o guaranteeing the matrix was fully unloaded from the streams to shared memory. This caused verification to fail on single-threaded runs. This was not a problem in multi-threaded runs, presumably because the other threads would be free to finish these copies before the copy of the last block (which the control programs always waited for) happened. --- examples/cholesky/stream_df_cholesky_peek.c | 25 ++++++++------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/examples/cholesky/stream_df_cholesky_peek.c b/examples/cholesky/stream_df_cholesky_peek.c index 9c23b8823..f01f5d0cc 100644 --- a/examples/cholesky/stream_df_cholesky_peek.c +++ b/examples/cholesky/stream_df_cholesky_peek.c @@ -328,22 +328,15 @@ void create_terminal_task(int id_x, int id_y) int token; if(id_x == id_y) { - if(id_x == ntiles-1) { - #pragma omp task peek(sdpotrf_ref[IDX_DPOTRF(id_x)] >> in[T*T]) \ - output(sfinal_ref[0] << token) - { - copy_block_to_global(in, id_x, id_y); - debug_printf("Terminal Task %d, %d\n", id_x, id_y); - } - } else { - #pragma omp task peek(sdpotrf_ref[IDX_DPOTRF(id_x)] >> in[T*T]) - { - copy_block_to_global(in, id_x, id_y); - debug_printf("Terminal Task %d, %d\n", id_x, id_y); - } + #pragma omp task peek(sdpotrf_ref[IDX_DPOTRF(id_x)] >> in[T*T]) \ + output(sfinal_ref[0] << token) + { + copy_block_to_global(in, id_x, id_y); + debug_printf("Terminal Task %d, %d\n", id_x, id_y); } } else { - #pragma omp task peek(sdtrsm_ref[IDX_DTRSM(id_x, id_y)] >> in[T*T]) + #pragma omp task peek(sdtrsm_ref[IDX_DTRSM(id_x, id_y)] >> in[T*T]) \ + output(sfinal_ref[0] << token) { copy_block_to_global(in, id_x, id_y); debug_printf("Terminal Task %d, %d\n", id_x, id_y); @@ -521,7 +514,7 @@ int main(int argc, char** argv) int px = 4; int py = px; - int token; + int token[ntiles*(ntiles+1)/2]; int ntasks = 0; int nntasks = 0; @@ -600,7 +593,7 @@ int main(int argc, char** argv) } } - #pragma omp task input(sfinal_ref[0] >> token) + #pragma omp task input(sfinal_ref[0] >> token[ntiles*(ntiles+1)/2]) { printf("All tasks finished\n"); }