From b73605a572dd20570284285aa39c7c446961bfd4 Mon Sep 17 00:00:00 2001 From: amit Date: Mon, 7 Nov 2016 17:51:54 +0900 Subject: [PATCH] Fix writing of checksum for blocks around the segment boundary. In direct write mode, pg_bulkload loads data into a fixed number of private buffers and once they are full, writes those buffers to the actual relation file. Each page's checksum is set before writing to the file, during which it must pass the block number for checksum calculation. Due to a bug in how block number is calculated, it calculated the wrong checksum in certain cases, especially, when a segment of the relation is about to get full. --- lib/writer_direct.c | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/lib/writer_direct.c b/lib/writer_direct.c index 3ee7ed3..ed7126b 100644 --- a/lib/writer_direct.c +++ b/lib/writer_direct.c @@ -102,8 +102,10 @@ static bool DirectWriterParam(DirectWriter *self, const char *keyword, char *val static void DirectWriterDumpParams(DirectWriter *self); static int DirectWriterSendQuery(DirectWriter *self, PGconn *conn, char *queueName, char *logfile, bool verbose); -#define GetCurrentPage(self) ((Page) ((self)->blocks + BLCKSZ * (self)->curblk)) -#define GetTargetPage(self,blk_offset) ((Page) ((self)->blocks + BLCKSZ * ((self)->curblk - blk_offset))) +#define GetCurrentPage(self) \ + ((Page) ((self)->blocks + BLCKSZ * (self)->curblk)) +#define GetTargetPage(self, blk_offset) \ + ((Page) ((self)->blocks + BLCKSZ * (blk_offset))) /** * @brief Total number of blocks at the time @@ -537,25 +539,35 @@ flush_pages(DirectWriter *loader) flush_num = Min(num - i, RELSEG_SIZE - relblks % RELSEG_SIZE); Assert(flush_num > 0); - /* Write the last block number to the load status file. */ - UpdateLSF(loader, flush_num); - #if PG_VERSION_NUM >= 90300 - /* If we need a checksum, add it */ - if (DataChecksumsEnabled()){ - int j = 0; - Page contained_page; - for ( j=0; jpd_checksum = - pg_checksum_page((char *) contained_page, LS_TOTAL_CNT(ls) - 1 - j); - } + if (DataChecksumsEnabled()) + { + Page contained_page; + int j; + + /* + * Write checksum for pages that are going to be written to the + * current file. We will be writing flush_num pages from the + * block buffer starting at block offset i. + */ + for (j = 0; j < flush_num; j++) + { + contained_page = GetTargetPage(loader, i + j); + PageSetChecksumInplace(contained_page, LS_TOTAL_CNT(ls) + j); + } } #endif + /* Write the last block number to the load status file. */ + UpdateLSF(loader, flush_num); + /* - * Flush flush_num data block to the current file. - * Then the current file size becomes RELSEG_SIZE self->blocks. + * Write flush_num blocks to the current file starting at block + * offset i. The current file might get full, ie, RELSEG_SIZE blocks + * full, after writing that much (see how flush_num is calculated + * above to understand why) . We write the remaining content of the + * block buffer (ie, loader->blocks) in the new file during the next + * iteration. */ buffer = loader->blocks + BLCKSZ * i; total = BLCKSZ * flush_num;