Skip to content

Commit

Permalink
remove comment of CudaErrCheck
Browse files Browse the repository at this point in the history
  • Loading branch information
Douceur committed Oct 30, 2023
1 parent f9f31d1 commit 2f3a7d3
Show file tree
Hide file tree
Showing 6 changed files with 2 additions and 93 deletions.
57 changes: 0 additions & 57 deletions cuda/CudaMatch.cu
Original file line number Diff line number Diff line change
Expand Up @@ -564,17 +564,6 @@ cudaError CudaHarvestMatchK32(
HarvestMatchK32Kernel<<<kblocks, kthreads, 0, stream>>>(
devOutPairs, devMatchCount, devYEntries, entryCount, matchOffset );

// #if _DEBUG
// uint32 matchCount = 0;
// CudaErrCheck( cudaMemcpyAsync( &matchCount, devMatchCount, sizeof( uint32 ) , cudaMemcpyDeviceToHost, stream ) );
// CudaErrCheck( cudaStreamSynchronize( stream ) );
// CudaErrCheck( cudaStreamSynchronize( stream ) );

// Pair* matches = new Pair[matchCount];
// CudaErrCheck( cudaMemcpyAsync( matches, devOutPairs, sizeof( Pair ) * matchCount , cudaMemcpyDeviceToHost, stream ) );
// CudaErrCheck( cudaStreamSynchronize( stream ) );
// CudaErrCheck( cudaStreamSynchronize( stream ) );
// #endif

return cudaSuccess;
}
Expand Down Expand Up @@ -621,49 +610,3 @@ void CudaMatchBucketizedK32(
MatchCudaK32Bucket<<<BBCU_MAX_GROUP_COUNT, BBCU_THREADS_PER_MATCH_GROUP, 0, stream>>>( bucketMask, entryCount, cx.devGroupCount, devY, cx.devGroupBoundaries, cx.devMatchCount, cx.devMatches );
}

//-----------------------------------------------------------
// cudaError CudaHarvestMatchK32WithGroupScan(
// Pair* devOutPairs,
// uint32* devMatchCount,
// const uint32 maxMatches,
// uint32* devGroupIndices,
// uint32* devGroupIndicesTemp,
// const uint32 maxGroups,
// void* sortBuffer,
// const size_t sortBufferSize,
// const uint64* devYEntries,
// const uint32 entryCount,
// const uint32 matchOffset,
// cudaStream_t stream )
// {
// // Scan for BC groups
// {
// const uint32 kblocks = 0;
// const uint32 kthreads = 0;


// // constexpr uint32 kscanblocks = CuCDiv( BBCU_BUCKET_ALLOC_ENTRY_COUNT, BBCU_SCAN_GROUP_THREADS );
// // Initialize the entries to the max value so that they are not included in the sort
// CudaInitGroups<<<kblocks, kthreads, 0, stream>>>( devGroupIndicesTemp, entryCount );
// // CudaInitGroupsBucket<<<kscanblocks, BBCU_SCAN_GROUP_THREADS, 0, stream>>>( tmpGroupCounts );

// // Add first group and last ghost group
// CudaSetFirstAndLastGroup<<<1,2,0,stream>>>( tmpGroupCounts, entryCount );
// }

// CudaErrCheck( cudaMemsetAsync( cx.devGroupCount, 0, sizeof( uint32 ), stream ) );
// CudaErrCheck( cudaMemsetAsync( cx.devMatchCount, 0, sizeof( uint32 ), stream ) );
// ScanGroupsCudaK32Bucket<<<kscanblocks, BBCU_SCAN_GROUP_THREADS, 0, stream>>>( devY, tmpGroupCounts+2, cx.devGroupCount, entryCount, bucketMask );

// byte* sortTmpAlloc = (byte*)( tmpGroupCounts + BBCU_MAX_GROUP_COUNT );
// size_t sortTmpSize = ( BBCU_BUCKET_ALLOC_ENTRY_COUNT - BBCU_MAX_GROUP_COUNT ) * sizeof( uint32 );

// #if _DEBUG
// size_t sortSize = 0;
// cub::DeviceRadixSort::SortKeys<uint32, uint32>( nullptr, sortSize, nullptr, nullptr, BBCU_MAX_GROUP_COUNT, 0, 32 );
// ASSERT( sortSize <= sortTmpSize );
// #endif

// cub::DeviceRadixSort::SortKeys<uint32, uint32>( sortTmpAlloc, sortTmpSize, tmpGroupCounts, cx.devGroupBoundaries, BBCU_MAX_GROUP_COUNT, 0, 32, stream );

// }
30 changes: 1 addition & 29 deletions cuda/CudaPlotPhase2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -245,22 +245,7 @@ void MarkTable( CudaK32PlotContext& cx, CudaK32Phase2& p2 )
#if _DEBUG
p2.outMarks.WaitForCompletion();

// CudaErrCheck( cudaStreamSynchronize( cx.computeStream ) );
// CudaErrCheck( cudaStreamSynchronize( cx.gpuDownloadStream[0]->GetStream() ) );
// CudaErrCheck( cudaStreamSynchronize( cx.gpuDownloadStream[1]->GetStream() ) );
// CudaErrCheck( cudaStreamSynchronize( cx.gpuDownloadStream[2]->GetStream() ) );

// byte* hByteField = bbcvirtalloc<byte>( GetMarkingTableByteSize() );
// uint64* hBitField = bbcvirtalloc<uint64>( GetMarkingTableBitFieldSize() );
// uint64* rBitField = bbcvirtalloc<uint64>( GetMarkingTableBitFieldSize() );
// CudaErrCheck( cudaMemcpyAsync( hByteField, devLMarks, GetMarkingTableByteSize(), cudaMemcpyDeviceToHost, cx.computeStream ) );
// CudaErrCheck( cudaMemcpyAsync( hBitField, bitfield, GetMarkingTableBitFieldSize(), cudaMemcpyDeviceToHost, cx.computeStream ) );

// if( rTable < TableId::Table7 )
// CudaErrCheck( cudaMemcpyAsync( rBitField, p2.devRMarks, GetMarkingTableBitFieldSize(), cudaMemcpyDeviceToHost, cx.computeStream ) );

// CudaErrCheck( cudaStreamSynchronize( cx.computeStream ) );
// // (void)p2.outMarks.GetDeviceBuffer();
uint64* hBitField = cx.hostMarkingTables[(int)lTable];

std::atomic<uint64> bitfieldPrunedEntryCount = 0;
Expand All @@ -276,23 +261,10 @@ void MarkTable( CudaK32PlotContext& cx, CudaK32Phase2& p2 )
uint64 localPrunedEntryCount = 0;
uint64 rPrunedEntryCount = 0;

// BitField rMarks( rBitField, rEntryCount );
// const byte* bytefield = hByteField;

uint64 count, offset, end;

// // Count r entries again to make sure it's still valid
// if( rt < TableId::Table7 )
// {
// GetThreadOffsets( self, rEntryCount, count, offset, end );
// for( uint64 i = offset; i < end; i++ )
// {
// if( rMarks.Get( i ) )
// rPrunedEntryCount ++;
// }

// rTablePrunedEntryCount += rPrunedEntryCount;
// }


GetThreadOffsets( self, lEntryCount, count, offset, end );
// for( uint64 i = offset; i < end; i++ )
Expand Down
5 changes: 1 addition & 4 deletions cuda/GpuDownloadStream.cu
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,14 @@ void GpuDownloadBuffer::DownloadAndCopy( void* hostBuffer, void* finalBuffer, co
// const void* devBuffer = self->deviceBuffer[index];

// // Signal from the work stream when it has finished doing kernel work with the device buffer
// CudaErrCheck( cudaEventRecord( self->readyEvents[index], workStream ) );


// // Ensure the work stream has completed writing data to the device buffer
// cudaStream_t stream = self->queue->_stream;

// CudaErrCheck( cudaStreamWaitEvent( stream, self->readyEvents[index] ) );

// // Copy
// CudaErrCheck( cudaMemcpyAsync( hostBuffer, devBuffer, size, cudaMemcpyDeviceToHost, stream ) );


// // Signal that the device buffer is free to be re-used
// CudaErrCheck( cudaEventRecord( self->events[index], stream ) );

Expand Down
1 change: 0 additions & 1 deletion cuda/GpuQueue.cu
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,6 @@ struct IGpuBuffer* GpuQueue::CreateGpuBuffer( const GpuStreamDescriptor& desc, b
CudaErrCheck( cudaEventCreateWithFlags( &buf->events[i] , cudaEventDisableTiming ) );
CudaErrCheck( cudaEventCreateWithFlags( &buf->completedEvents[i], cudaEventDisableTiming ) );
CudaErrCheck( cudaEventCreateWithFlags( &buf->readyEvents[i] , cudaEventDisableTiming ) );
// CudaErrCheck( cudaEventCreateWithFlags( &buf->preloadEvents[i] , cudaEventDisableTiming ) );
CudaErrCheck( cudaEventCreateWithFlags( &buf->pinnedEvent[i] , cudaEventDisableTiming ) );

CudaErrCheck( cudaEventCreateWithFlags( &buf->callbackLockEvent , cudaEventDisableTiming ) );
Expand Down
1 change: 0 additions & 1 deletion cuda/GpuStreams.cu
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ void GpuUploadBuffer::UploadAndPreLoad( void* hostBuffer, const size_t size, con
// cpy.copy.size = copySize;

// // Launch copy command
// CudaErrCheck( cudaLaunchHostFunc( self->queue->GetStream(), []( void* userData ){

// const CopyInfo& c = *reinterpret_cast<CopyInfo*>( userData );
// IGpuBuffer* self = c.self;
Expand Down
1 change: 0 additions & 1 deletion cuda/chacha8.cu
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,6 @@ static void DbgValidateBucket( CudaK32PlotContext& cx, const uint32 bucket )
if( _dbgPool == nullptr )
_dbgPool = new ThreadPool( SysHost::GetLogicalCPUCount() );

//CudaErrCheck( cudaStreamSynchronize( cx.downloadStream ) );

Log::Line( "Validating bucket %u", bucket );
AnonMTJob::Run( *_dbgPool, [&cx, bucket]( AnonMTJob* self ) {
Expand Down

0 comments on commit 2f3a7d3

Please sign in to comment.