From 2f3a7d3b75b92bbd47ba4cbee9f741e602b9f954 Mon Sep 17 00:00:00 2001
From: Douceur <contact@ether-source.fr>
Date: Mon, 30 Oct 2023 22:14:24 +0100
Subject: [PATCH] remove comment of CudaErrCheck

---
 cuda/CudaMatch.cu         | 57 ---------------------------------------
 cuda/CudaPlotPhase2.cu    | 30 +--------------------
 cuda/GpuDownloadStream.cu |  5 +---
 cuda/GpuQueue.cu          |  1 -
 cuda/GpuStreams.cu        |  1 -
 cuda/chacha8.cu           |  1 -
 6 files changed, 2 insertions(+), 93 deletions(-)

diff --git a/cuda/CudaMatch.cu b/cuda/CudaMatch.cu
index e827547f..359df68d 100644
--- a/cuda/CudaMatch.cu
+++ b/cuda/CudaMatch.cu
@@ -564,17 +564,6 @@ cudaError CudaHarvestMatchK32(
     HarvestMatchK32Kernel<<<kblocks, kthreads, 0, stream>>>(
         devOutPairs, devMatchCount, devYEntries, entryCount, matchOffset );
 
-// #if _DEBUG
-//     uint32 matchCount = 0;
-//     CudaErrCheck( cudaMemcpyAsync( &matchCount, devMatchCount, sizeof( uint32 ) , cudaMemcpyDeviceToHost, stream ) );
-//     CudaErrCheck( cudaStreamSynchronize( stream ) );
-//     CudaErrCheck( cudaStreamSynchronize( stream ) );
-
-//     Pair* matches = new Pair[matchCount];
-//     CudaErrCheck( cudaMemcpyAsync( matches, devOutPairs, sizeof( Pair ) * matchCount , cudaMemcpyDeviceToHost, stream ) );
-//     CudaErrCheck( cudaStreamSynchronize( stream ) );
-//     CudaErrCheck( cudaStreamSynchronize( stream ) );
-// #endif
 
     return cudaSuccess;
 }
@@ -621,49 +610,3 @@ void CudaMatchBucketizedK32(
     MatchCudaK32Bucket<<<BBCU_MAX_GROUP_COUNT, BBCU_THREADS_PER_MATCH_GROUP, 0, stream>>>( bucketMask, entryCount, cx.devGroupCount, devY, cx.devGroupBoundaries, cx.devMatchCount, cx.devMatches );
 }
 
-//-----------------------------------------------------------
-// cudaError CudaHarvestMatchK32WithGroupScan(
-//     Pair*         devOutPairs,
-//     uint32*       devMatchCount,
-//     const uint32  maxMatches,
-//     uint32*       devGroupIndices,
-//     uint32*       devGroupIndicesTemp,
-//     const uint32  maxGroups,
-//     void*         sortBuffer,
-//     const size_t  sortBufferSize,
-//     const uint64* devYEntries,
-//     const uint32  entryCount,
-//     const uint32  matchOffset,
-//     cudaStream_t  stream )
-// {
-//     // Scan for BC groups
-//     {
-//         const uint32 kblocks  = 0;
-//         const uint32 kthreads = 0;
-
-
-//     // constexpr uint32 kscanblocks = CuCDiv( BBCU_BUCKET_ALLOC_ENTRY_COUNT, BBCU_SCAN_GROUP_THREADS );
-//         // Initialize the entries to the max value so that they are not included in the sort
-//         CudaInitGroups<<<kblocks, kthreads, 0, stream>>>( devGroupIndicesTemp, entryCount );
-//         // CudaInitGroupsBucket<<<kscanblocks, BBCU_SCAN_GROUP_THREADS, 0, stream>>>( tmpGroupCounts );
-
-//         // Add first group and last ghost group
-//         CudaSetFirstAndLastGroup<<<1,2,0,stream>>>( tmpGroupCounts, entryCount );
-//     }
-
-//     CudaErrCheck( cudaMemsetAsync( cx.devGroupCount, 0, sizeof( uint32 ), stream ) );
-//     CudaErrCheck( cudaMemsetAsync( cx.devMatchCount, 0, sizeof( uint32 ), stream ) );
-//     ScanGroupsCudaK32Bucket<<<kscanblocks, BBCU_SCAN_GROUP_THREADS, 0, stream>>>( devY, tmpGroupCounts+2, cx.devGroupCount, entryCount, bucketMask );
-
-//     byte*  sortTmpAlloc = (byte*)( tmpGroupCounts + BBCU_MAX_GROUP_COUNT );
-//     size_t sortTmpSize  = ( BBCU_BUCKET_ALLOC_ENTRY_COUNT - BBCU_MAX_GROUP_COUNT ) * sizeof( uint32 );
-
-// #if _DEBUG
-//     size_t sortSize = 0;
-//     cub::DeviceRadixSort::SortKeys<uint32, uint32>( nullptr, sortSize, nullptr, nullptr, BBCU_MAX_GROUP_COUNT, 0, 32 );
-//     ASSERT( sortSize <= sortTmpSize );
-// #endif
-
-//     cub::DeviceRadixSort::SortKeys<uint32, uint32>( sortTmpAlloc, sortTmpSize, tmpGroupCounts, cx.devGroupBoundaries, BBCU_MAX_GROUP_COUNT, 0, 32, stream );
-
-// }
diff --git a/cuda/CudaPlotPhase2.cu b/cuda/CudaPlotPhase2.cu
index 8d2d5094..87c1b16c 100644
--- a/cuda/CudaPlotPhase2.cu
+++ b/cuda/CudaPlotPhase2.cu
@@ -245,22 +245,7 @@ void MarkTable( CudaK32PlotContext& cx, CudaK32Phase2& p2 )
         #if _DEBUG
         p2.outMarks.WaitForCompletion();
 
-    //     CudaErrCheck( cudaStreamSynchronize( cx.computeStream ) );
-    //     CudaErrCheck( cudaStreamSynchronize( cx.gpuDownloadStream[0]->GetStream() ) );
-    //     CudaErrCheck( cudaStreamSynchronize( cx.gpuDownloadStream[1]->GetStream() ) );
-    //     CudaErrCheck( cudaStreamSynchronize( cx.gpuDownloadStream[2]->GetStream() ) );
 
-    //     byte*   hByteField = bbcvirtalloc<byte>( GetMarkingTableByteSize() );
-        // uint64* hBitField  = bbcvirtalloc<uint64>( GetMarkingTableBitFieldSize() );
-    //     uint64* rBitField  = bbcvirtalloc<uint64>( GetMarkingTableBitFieldSize() );
-    //     CudaErrCheck( cudaMemcpyAsync( hByteField, devLMarks, GetMarkingTableByteSize(), cudaMemcpyDeviceToHost, cx.computeStream  ) );
-    //     CudaErrCheck( cudaMemcpyAsync( hBitField, bitfield, GetMarkingTableBitFieldSize(), cudaMemcpyDeviceToHost, cx.computeStream  ) );
-
-    //     if( rTable < TableId::Table7 )
-    //         CudaErrCheck( cudaMemcpyAsync( rBitField, p2.devRMarks, GetMarkingTableBitFieldSize(), cudaMemcpyDeviceToHost, cx.computeStream  ) );
-
-    //     CudaErrCheck( cudaStreamSynchronize( cx.computeStream ) );
-    //     // (void)p2.outMarks.GetDeviceBuffer();
         uint64* hBitField = cx.hostMarkingTables[(int)lTable];
         
         std::atomic<uint64> bitfieldPrunedEntryCount = 0;
@@ -276,23 +261,10 @@ void MarkTable( CudaK32PlotContext& cx, CudaK32Phase2& p2 )
             uint64 localPrunedEntryCount = 0;
             uint64 rPrunedEntryCount     = 0;
 
-    //         BitField rMarks( rBitField, rEntryCount );
-    //         const byte* bytefield = hByteField;
 
             uint64 count, offset, end;
 
-    //         // Count r entries again to make sure it's still valid
-    //         if( rt < TableId::Table7 )
-    //         {
-    //             GetThreadOffsets( self, rEntryCount, count, offset, end );
-    //             for( uint64 i = offset; i < end; i++ )
-    //             {
-    //                 if( rMarks.Get( i ) )
-    //                     rPrunedEntryCount ++;
-    //             }
-        
-    //             rTablePrunedEntryCount += rPrunedEntryCount;   
-    //         }
+
 
             GetThreadOffsets( self, lEntryCount, count, offset, end );
     //         for( uint64 i = offset; i < end; i++ )
diff --git a/cuda/GpuDownloadStream.cu b/cuda/GpuDownloadStream.cu
index 3d06973c..4d35a06b 100644
--- a/cuda/GpuDownloadStream.cu
+++ b/cuda/GpuDownloadStream.cu
@@ -64,17 +64,14 @@ void GpuDownloadBuffer::DownloadAndCopy( void* hostBuffer, void* finalBuffer, co
     // const void* devBuffer    = self->deviceBuffer[index];
 
     // // Signal from the work stream when it has finished doing kernel work with the device buffer
-    // CudaErrCheck( cudaEventRecord( self->readyEvents[index], workStream ) );
 
 
     // // Ensure the work stream has completed writing data to the device buffer
     // cudaStream_t stream = self->queue->_stream;
 
-    // CudaErrCheck( cudaStreamWaitEvent( stream, self->readyEvents[index] ) );
 
     // // Copy
-    // CudaErrCheck( cudaMemcpyAsync( hostBuffer, devBuffer, size, cudaMemcpyDeviceToHost, stream ) );
-    
+
     // // Signal that the device buffer is free to be re-used
     // CudaErrCheck( cudaEventRecord( self->events[index], stream ) );
 
diff --git a/cuda/GpuQueue.cu b/cuda/GpuQueue.cu
index 399a0fbf..f673f640 100644
--- a/cuda/GpuQueue.cu
+++ b/cuda/GpuQueue.cu
@@ -171,7 +171,6 @@ struct IGpuBuffer* GpuQueue::CreateGpuBuffer( const GpuStreamDescriptor& desc, b
             CudaErrCheck( cudaEventCreateWithFlags( &buf->events[i]         , cudaEventDisableTiming ) );
             CudaErrCheck( cudaEventCreateWithFlags( &buf->completedEvents[i], cudaEventDisableTiming ) );
             CudaErrCheck( cudaEventCreateWithFlags( &buf->readyEvents[i]    , cudaEventDisableTiming ) );
-            // CudaErrCheck( cudaEventCreateWithFlags( &buf->preloadEvents[i]  , cudaEventDisableTiming ) );
             CudaErrCheck( cudaEventCreateWithFlags( &buf->pinnedEvent[i]  , cudaEventDisableTiming ) );
 
             CudaErrCheck( cudaEventCreateWithFlags( &buf->callbackLockEvent     , cudaEventDisableTiming ) );
diff --git a/cuda/GpuStreams.cu b/cuda/GpuStreams.cu
index 63700c9c..2c129408 100644
--- a/cuda/GpuStreams.cu
+++ b/cuda/GpuStreams.cu
@@ -99,7 +99,6 @@ void GpuUploadBuffer::UploadAndPreLoad( void* hostBuffer, const size_t size, con
     // cpy.copy.size       = copySize;
 
     // // Launch copy command
-    // CudaErrCheck( cudaLaunchHostFunc( self->queue->GetStream(), []( void* userData ){
 
     //     const CopyInfo& c = *reinterpret_cast<CopyInfo*>( userData );
     //     IGpuBuffer* self = c.self;
diff --git a/cuda/chacha8.cu b/cuda/chacha8.cu
index 7fb7c5d0..3e17c15a 100644
--- a/cuda/chacha8.cu
+++ b/cuda/chacha8.cu
@@ -293,7 +293,6 @@ static void DbgValidateBucket( CudaK32PlotContext& cx, const uint32 bucket )
     if( _dbgPool == nullptr )
         _dbgPool = new ThreadPool( SysHost::GetLogicalCPUCount() );
 
-    //CudaErrCheck( cudaStreamSynchronize( cx.downloadStream ) );
 
     Log::Line( "Validating bucket %u", bucket );
     AnonMTJob::Run( *_dbgPool, [&cx, bucket]( AnonMTJob* self ) {