Skip to content

Commit 42e9d05

Browse files
committed
Merge branch 'dev' of github.com:CortexFoundation/CortexTheseus into dev
2 parents 7635bfc + e8cc122 commit 42e9d05

File tree

1 file changed

+15
-6
lines changed

1 file changed

+15
-6
lines changed

solution/miner/libcuckoo/src/cuda/trimmer.cu

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -689,7 +689,7 @@ __global__ void Round2(const int round, const int part, const siphash_keys &sipk
689689
cudaMemset (indexesE[1], 0, indexesSize);
690690
cudaMemcpy (dipkeys, &sipkeys, sizeof (sipkeys), cudaMemcpyHostToDevice);
691691

692-
checkCudaErrors (cudaDeviceSynchronize ());
692+
// checkCudaErrors (cudaDeviceSynchronize ());
693693

694694
#ifdef TIMER
695695
float durationA, durationB;
@@ -706,7 +706,7 @@ __global__ void Round2(const int round, const int part, const siphash_keys &sipk
706706
else
707707
Cuckaroo_SeedA < EDGES_A ><<< tp.genA.blocks, tp.genA.tpb >>> (*dipkeys, (ulonglong4 *) bufferAB, (int *) indexesE[1]);
708708

709-
checkCudaErrors (cudaDeviceSynchronize ());
709+
// checkCudaErrors (cudaDeviceSynchronize ());
710710

711711
#ifdef TIMER
712712
cudaEventRecord (stop, NULL);
@@ -780,7 +780,7 @@ __global__ void Round2(const int round, const int part, const siphash_keys &sipk
780780
Round<EDGES_A/4, EDGES_B/4><<<tp.trim.blocks, tp.trim.tpb, BITMAPBYTES>>>(3, part, *dipkeys, (uint2 *)bufferB, (uint2 *)bufferA, indexesE[1], indexesE[0]); // to .117
781781
}
782782

783-
cudaDeviceSynchronize();
783+
// cudaDeviceSynchronize();
784784

785785
for (int round = 4; round < tp.ntrims; round += 2) {
786786
cudaMemset(indexesE[1], 0, indexesSize);
@@ -797,9 +797,18 @@ __global__ void Round2(const int round, const int part, const siphash_keys &sipk
797797
cudaDeviceSynchronize();
798798

799799
Tail<EDGES_B/4><<<tp.tail.blocks, tp.tail.tpb>>>((const uint2 *)bufferA, (uint2 *)bufferB, (const u32 *)indexesE[0], (u32 *)indexesE[1]);
800-
cudaMemcpy(&nedges, indexesE[1], sizeof(u32), cudaMemcpyDeviceToHost);
801-
cudaDeviceSynchronize();
802-
return nedges;
800+
// cudaMemcpy(&nedges, indexesE[1], sizeof(u32), cudaMemcpyDeviceToHost);
801+
// cudaDeviceSynchronize();
802+
bool ready = false;
803+
while(1){
804+
usleep(1000);
805+
ready = cudaSuccess == cudaStreamQuery(0);
806+
if(ready){
807+
cudaMemcpy(&nedges, indexesE[1], sizeof(u32), cudaMemcpyDeviceToHost);
808+
break;
809+
}
810+
}
811+
return nedges;
803812
}
804813

805814
};

0 commit comments

Comments
 (0)