@@ -689,7 +689,7 @@ __global__ void Round2(const int round, const int part, const siphash_keys &sipk
689689 cudaMemset (indexesE[1 ], 0 , indexesSize);
690690 cudaMemcpy (dipkeys, &sipkeys, sizeof (sipkeys), cudaMemcpyHostToDevice);
691691
692- checkCudaErrors (cudaDeviceSynchronize ());
692+ // checkCudaErrors (cudaDeviceSynchronize ());
693693
694694#ifdef TIMER
695695 float durationA, durationB;
@@ -706,7 +706,7 @@ __global__ void Round2(const int round, const int part, const siphash_keys &sipk
706706 else
707707 Cuckaroo_SeedA < EDGES_A ><<< tp.genA.blocks, tp.genA.tpb >>> (*dipkeys, (ulonglong4 *) bufferAB, (int *) indexesE[1 ]);
708708
709- checkCudaErrors (cudaDeviceSynchronize ());
709+ // checkCudaErrors (cudaDeviceSynchronize ());
710710
711711#ifdef TIMER
712712 cudaEventRecord (stop, NULL );
@@ -780,7 +780,7 @@ __global__ void Round2(const int round, const int part, const siphash_keys &sipk
780780 Round<EDGES_A/4 , EDGES_B/4 ><<<tp.trim.blocks, tp.trim.tpb, BITMAPBYTES>>> (3 , part, *dipkeys, (uint2 *)bufferB, (uint2 *)bufferA, indexesE[1 ], indexesE[0 ]); // to .117
781781 }
782782
783- cudaDeviceSynchronize ();
783+ // cudaDeviceSynchronize();
784784
785785 for (int round = 4 ; round < tp.ntrims ; round += 2 ) {
786786 cudaMemset (indexesE[1 ], 0 , indexesSize);
@@ -797,9 +797,18 @@ __global__ void Round2(const int round, const int part, const siphash_keys &sipk
797797 cudaDeviceSynchronize ();
798798
799799 Tail<EDGES_B/4 ><<<tp.tail.blocks, tp.tail.tpb>>> ((const uint2 *)bufferA, (uint2 *)bufferB, (const u32 *)indexesE[0 ], (u32 *)indexesE[1 ]);
800- cudaMemcpy (&nedges, indexesE[1 ], sizeof (u32 ), cudaMemcpyDeviceToHost);
801- cudaDeviceSynchronize ();
802- return nedges;
800+ // cudaMemcpy(&nedges, indexesE[1], sizeof(u32), cudaMemcpyDeviceToHost);
801+ // cudaDeviceSynchronize();
802+ bool ready = false ;
803+ while (1 ){
804+ usleep (1000 );
805+ ready = cudaSuccess == cudaStreamQuery (0 );
806+ if (ready){
807+ cudaMemcpy (&nedges, indexesE[1 ], sizeof (u32 ), cudaMemcpyDeviceToHost);
808+ break ;
809+ }
810+ }
811+ return nedges;
803812 }
804813
805814};
0 commit comments