Skip to content

Commit

Permalink
Add plot checker support after each plot (#414)
Browse files Browse the repository at this point in the history
Add plot checker support after each plot

- Fix ownership of GRContext on PlorReader
  • Loading branch information
haorldbchi authored Sep 21, 2023
1 parent 527e564 commit ab4d27c
Show file tree
Hide file tree
Showing 15 changed files with 504 additions and 149 deletions.
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@
// "--memo", "80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef207d52406afa2b6d7d92ea778f407205bd9dca40816c1b1cacfca2a6612b93eb",

"args":
// "-w -z 3 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot ~/plot/tmp",
"-w -n 1 -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot --check 100 --check-threshold 2 /home/harold/plot",

// "-w -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot /home/harold/plot",
// "-w -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot --disk-128 -t1 /home/harold/plotdisk --no-direct-buffers /home/harold/plot",
Expand Down
1 change: 1 addition & 0 deletions Bladebit.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ set(src_bladebit
src/tools/PlotReader.cpp
src/tools/PlotReader.h
src/tools/PlotValidator.cpp
src/tools/PlotChecker.cpp

src/util/Array.h
src/util/Array.inl
Expand Down
4 changes: 4 additions & 0 deletions cuda/CudaPlotContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,10 @@ struct CudaK32PlotContext
CudaK32ParkContext* parkContext = nullptr;
bool useParkContext = false;

// Used when '--check' is enabled
struct GreenReaperContext* grCheckContext = nullptr;
class PlotChecker* plotChecker = nullptr;

struct
{
Duration uploadTime = Duration::zero(); // Host-to-device wait time
Expand Down
95 changes: 86 additions & 9 deletions cuda/CudaPlotter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include "plotting/TableWriter.h"
#include "plotting/PlotTools.h"
#include "util/VirtualAllocator.h"
#include "harvesting/GreenReaper.h"
#include "tools/PlotChecker.h"


// TEST/DEBUG
Expand Down Expand Up @@ -59,17 +61,34 @@ GPU-based (CUDA) plotter
--disk-128 : Enable hybrid disk plotting for 128G system RAM.
Requires a --temp1 and --temp2 to be set.
--disk-16 : (experimental) Enable hybrid disk plotting for 16G system RAM.
Requires a --temp1 and --temp2 to be set.
-t1, --temp1 : Temporary directory 1. Used for longer-lived, sequential writes.
-t2, --temp2 : Temporary directory 2. Used for temporary, shorted-lived read and writes.
NOTE: If only one of -t1 or -t2 is specified, both will be
set to the same directory.
--check <n> : Perform a plot check for <n> proofs on the newly created plot.
--check-threshold <f>: Proof threshold rate below which the plots that don't pass
the check will be deleted.
That is, the number of proofs fetched / proof check count
must be above or equal to this threshold to pass.
(default=0.6).
)";

///
/// CLI
///
//-----------------------------------------------------------
void CudaK32PlotterPrintHelp()
{
Log::Line( USAGE );
}

//-----------------------------------------------------------
void CudaK32Plotter::ParseCLI( const GlobalPlotConfig& gCfg, CliParser& cli )
{
Expand Down Expand Up @@ -103,11 +122,16 @@ void CudaK32Plotter::ParseCLI( const GlobalPlotConfig& gCfg, CliParser& cli )
continue;
if( cli.ReadUnswitch( cfg.temp2DirectIO, "--no-t2-direct" ) )
continue;

if( cli.ReadU64( cfg.plotCheckCount, "--check" ) )
continue;
if( cli.ReadF64( cfg.plotCheckThreshhold, "--check-threshold" ) )
continue;
// if( cli.ReadSwitch( cfg.disableDirectDownloads, "--no-direct-buffers" ) )
// continue;
if( cli.ArgMatch( "--help", "-h" ) )
{
Log::Line( USAGE );
CudaK32PlotterPrintHelp();
exit( 0 );
}
else
Expand Down Expand Up @@ -155,8 +179,17 @@ void InitContext( CudaK32PlotConfig& cfg, CudaK32PlotContext*& outContext )
cx.firstStoredTable = TableId::Table2 + (TableId)cx.gCfg->numDroppedTables;

Log::Line( "[Bladebit CUDA Plotter]" );
Log::Line( " Host RAM : %llu GiB", SysHost::GetTotalSystemMemory() BtoGB );
Log::Line( " Direct transfers: %s", cfg.disableDirectDownloads ? "false" : "true" );
Log::Line( " Host RAM : %llu GiB", SysHost::GetTotalSystemMemory() BtoGB );

if( cx.cfg.plotCheckCount == 0 )
Log::Line( " Plot checks : disabled" );
else
{
Log::Line( " Plot checks : enabled ( %llu )", (llu)cx.cfg.plotCheckCount );
Log::Line( " Plot check threshold: %.3lf", cx.cfg.plotCheckThreshhold );
}

// Log::Line( " Direct transfers: %s", cfg.disableDirectDownloads ? "false" : "true" );
Log::NewLine();

CudaInit( cx );
Expand Down Expand Up @@ -223,6 +256,43 @@ void InitContext( CudaK32PlotConfig& cfg, CudaK32PlotContext*& outContext )
Log::Line( "Allocating buffers (this may take a few seconds)..." );
AllocBuffers( cx );
InitFSEBitMask( cx );
Log::Line( "Done." );


// Allocate GR Context if --check was specified
if( cfg.plotCheckCount > 0 )
{
if( cfg.gCfg->compressionLevel > 0 )
{
GreenReaperConfig grCfg{};
grCfg.apiVersion = GR_API_VERSION;
grCfg.threadCount = 1;
grCfg.gpuRequest = GRGpuRequestKind_ExactDevice;
grCfg.gpuDeviceIndex = cfg.deviceIndex;

auto grResult = grCreateContext( &cx.grCheckContext, &grCfg, sizeof( grCfg ) );
FatalIf( grResult != GRResult_OK, "Failed to create decompression context for plot check with error '%s' (%d).",
grResultToString( grResult ), (int)grResult );

grResult = grPreallocateForCompressionLevel( cx.grCheckContext, BBCU_K, cfg.gCfg->compressionLevel );
FatalIf( grResult != GRResult_OK, "Failed to preallocate memory for decompression context with error '%s' (%d).",
grResultToString( grResult ), (int)grResult );
}

PlotCheckerConfig checkerCfg{};
checkerCfg.proofCount = cfg.plotCheckCount;
checkerCfg.noGpu = false;
checkerCfg.gpuIndex = cfg.deviceIndex;
checkerCfg.threadCount = 1;
checkerCfg.disableCpuAffinity = false;
checkerCfg.silent = false;
checkerCfg.hasSeed = false;
checkerCfg.deletePlots = true;
checkerCfg.deleteThreshold = cfg.plotCheckThreshhold;
checkerCfg.grContext = cx.grCheckContext;

cx.plotChecker = PlotChecker::Create( checkerCfg );
}
}

//-----------------------------------------------------------
Expand Down Expand Up @@ -293,6 +363,8 @@ void CudaK32Plotter::Run( const PlotRequest& req )
cx.plotWriter = new PlotWriter( !cfg.gCfg->disableOutputDirectIO );
if( cx.gCfg->benchmarkMode )
cx.plotWriter->EnableDummyMode();
if( cx.plotChecker )
cx.plotWriter->EnablePlotChecking( *cx.plotChecker );

FatalIf( !cx.plotWriter->BeginPlot( cfg.gCfg->compressionLevel > 0 ? PlotVersion::v2_0 : PlotVersion::v1_0,
req.outDir, req.plotFileName, req.plotId, req.memo, req.memoSize, cfg.gCfg->compressionLevel ),
Expand All @@ -313,21 +385,25 @@ void CudaK32Plotter::Run( const PlotRequest& req )
const double plotIOTime = TimerEnd( pltoCompleteTimer );
Log::Line( "Completed writing plot in %.2lf seconds", plotIOTime );

cx.plotWriter->DumpTables();
if( !cx.plotChecker || !cx.plotChecker->LastPlotDeleted() )
{
cx.plotWriter->DumpTables();
Log::NewLine();
}
}
Log::Line( "" );


delete cx.plotWriter;
cx.plotWriter = nullptr;


// Delete any temporary files
#if !(DBG_BBCU_KEEP_TEMP_FILES)
if( cx.plotRequest.IsFinalPlot && cx.cfg.hybrid128Mode )
{
if( cx.diskContext->yBuffer ) delete cx.diskContext->yBuffer;
if( cx.diskContext->yBuffer ) delete cx.diskContext->yBuffer;
if( cx.diskContext->metaBuffer ) delete cx.diskContext->metaBuffer;
if( cx.diskContext->unsortedL ) delete cx.diskContext->unsortedL;
if( cx.diskContext->unsortedR ) delete cx.diskContext->unsortedR;
if( cx.diskContext->unsortedL ) delete cx.diskContext->unsortedL;
if( cx.diskContext->unsortedR ) delete cx.diskContext->unsortedR;

for( TableId t = TableId::Table1; t <= TableId::Table7; t++ )
{
Expand Down Expand Up @@ -1213,6 +1289,7 @@ void UploadBucketForTable( CudaK32PlotContext& cx, const uint64 bucket )
}



///
/// Allocations
///
Expand Down
23 changes: 14 additions & 9 deletions cuda/CudaPlotter.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,22 @@ struct CudaK32PlotConfig
{
const GlobalPlotConfig* gCfg = nullptr;

uint32 deviceIndex = 0; // Which CUDA device to use when plotting/
bool disableDirectDownloads = false; // Don't allocate host tables using pinned buffers, instead
// download to intermediate pinned buffers then copy to the final host buffer.
// May be necessarry on Windows because of shared memory limitations (usual 50% of system memory)
uint32 deviceIndex = 0; // Which CUDA device to use when plotting/
bool disableDirectDownloads = false; // Don't allocate host tables using pinned buffers, instead
// download to intermediate pinned buffers then copy to the final host buffer.
// May be necessarry on Windows because of shared memory limitations (usual 50% of system memory)

bool hybrid128Mode = false; // Enable hybrid disk-offload w/ 128G of RAM.
bool hybrid16Mode = false; // Enable hybrid disk-offload w/ 64G of RAM.
bool hybrid128Mode = false; // Enable hybrid disk-offload w/ 128G of RAM.
bool hybrid16Mode = false; // Enable hybrid disk-offload w/ 64G of RAM.

const char* temp1Path = nullptr; // For 128G RAM mode
const char* temp2Path = nullptr; // For 64G RAM mode

bool temp1DirectIO = true; // Use direct I/O for temp1 files
bool temp2DirectIO = true; // Use direct I/O for temp2 files
bool temp1DirectIO = true; // Use direct I/O for temp1 files
bool temp2DirectIO = true; // Use direct I/O for temp2 files

uint64 plotCheckCount = 0; // For performing plot check command after plotting
double plotCheckThreshhold = 0.6; // Proof/check threshhold below which plots will be deleted
};

class CudaK32Plotter : public IPlotter
Expand All @@ -37,4 +40,6 @@ class CudaK32Plotter : public IPlotter
private:
CudaK32PlotConfig _cfg = {};
struct CudaK32PlotContext* _cx = nullptr;;
};
};

void CudaK32PlotterPrintHelp();
9 changes: 5 additions & 4 deletions src/PlotContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@

struct PlotRequest
{
const byte* plotId; // Id of the plot we want to create
const char* outDir; // Output plot directory
const char* plotFileName; // .plot.tmp file name
const byte* memo; // Plot memo
const byte* plotId; // Id of the plot we want to create
const char* outDir; // Output plot directory
const char* plotFileName; // .plot.tmp file name
const char* plotOutPath; // Full output path for the final .plot.tmp file
const byte* memo; // Plot memo
uint16 memoSize;
bool isFirstPlot;
bool IsFinalPlot;
Expand Down
Loading

0 comments on commit ab4d27c

Please sign in to comment.