Skip to content

Commit

Permalink
On demand downloading of SLRU segments (#332)
Browse files Browse the repository at this point in the history
* On demand downloading of SLRU segments

* Fix smgr_read_slru_segment

* Determine SLRU kind in extension

* Use ctl->PagePrecedes for SLRU page comparison in SimpleLruDownloadSegment to address wraparround

---------

Co-authored-by: Konstantin Knizhnik <[email protected]>
  • Loading branch information
2 people authored and tristan957 committed Feb 6, 2024
1 parent 0ff55c5 commit 6ee78a3
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 14 deletions.
105 changes: 92 additions & 13 deletions src/backend/access/transam/slru.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/shmem.h"
#include "storage/smgr.h"

#define SlruFileName(ctl, path, seg) \
snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
Expand Down Expand Up @@ -617,6 +618,66 @@ SimpleLruWritePage(SlruCtl ctl, int slotno)
SlruInternalWritePage(ctl, slotno, NULL);
}


/*
* NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer
* to download them on demand to reduce startup time.
* If SLRU segment is not found, we try to download it from page server
*/
static int
SimpleLruDownloadSegment(SlruCtl ctl, int pageno, char const* path)
{
int segno;
int fd = -1;
int n_blocks;
char* buffer;

static SMgrRelationData dummy_smgr_rel = {0};

/* If page is greater than latest written page, then do not try to download segment from server */
if (ctl->PagePrecedes(ctl->shared->latest_page_number, pageno))
return -1;

if (!dummy_smgr_rel.smgr)
{
RelFileNode rnode = {0};
dummy_smgr_rel.smgr = smgr(InvalidBackendId, rnode);
}
segno = pageno / SLRU_PAGES_PER_SEGMENT;

buffer = palloc(BLCKSZ * SLRU_PAGES_PER_SEGMENT);
n_blocks = smgr_read_slru_segment(&dummy_smgr_rel, path, segno, buffer);
if (n_blocks > 0)
{
fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY);
if (fd < 0)
{
slru_errcause = SLRU_OPEN_FAILED;
slru_errno = errno;
pfree(buffer);
return -1;
}
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
if (pg_pwrite(fd, buffer, n_blocks*BLCKSZ, 0) != n_blocks*BLCKSZ)
{
pgstat_report_wait_end();
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
errno = ENOSPC;
slru_errcause = SLRU_WRITE_FAILED;
slru_errno = errno;

CloseTransientFile(fd);
pfree(buffer);
return -1;
}
pgstat_report_wait_end();
}
pfree(buffer);
return fd;
}

/*
* Return whether the given page exists on disk.
*
Expand Down Expand Up @@ -644,12 +705,18 @@ SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno)
{
/* expected: file doesn't exist */
if (errno == ENOENT)
return false;

/* report error normally */
slru_errcause = SLRU_OPEN_FAILED;
slru_errno = errno;
SlruReportIOError(ctl, pageno, 0);
{
fd = SimpleLruDownloadSegment(ctl, pageno, path);
if (fd < 0)
return false;
}
else
{
/* report error normally */
slru_errcause = SLRU_OPEN_FAILED;
slru_errno = errno;
SlruReportIOError(ctl, pageno, 0);
}
}

if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
Expand Down Expand Up @@ -703,18 +770,30 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
if (fd < 0)
{
if (errno != ENOENT || !InRecovery)
if (errno != ENOENT)
{
slru_errcause = SLRU_OPEN_FAILED;
slru_errno = errno;
return false;
}

ereport(LOG,
(errmsg("file \"%s\" doesn't exist, reading as zeroes",
path)));
MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
return true;
fd = SimpleLruDownloadSegment(ctl, pageno, path);
if (fd < 0)
{
if (!InRecovery)
{
slru_errcause = SLRU_OPEN_FAILED;
slru_errno = errno;
return false;
}
else
{
ereport(LOG,
(errmsg("file \"%s\" doesn't exist, reading as zeroes",
path)));
MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
return true;
}
}
}

errno = 0;
Expand Down
16 changes: 16 additions & 0 deletions src/backend/storage/smgr/smgr.c
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,22 @@ smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
buffer, skipFsync);
}

/*
* NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer
* to download them on demand to reduce startup time.
* If SLRU segment is not found, we try to download it from page server
*
* This function returns number of blocks in segment. Usually it should be SLRU_PAGES_PER_SEGMENT but in case
* of partial segment, it can be smaller. Zero value means that segment doesn't exist.
* From Postgres point of view empty segment is the same as absent segment.
*/
int
smgr_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buffer)
{
return (*reln->smgr).smgr_read_slru_segment ? (*reln->smgr).smgr_read_slru_segment(reln, path, segno, buffer) : 0;
}



/*
* smgrwriteback() -- Trigger kernel writeback for the supplied range of
Expand Down
5 changes: 4 additions & 1 deletion src/include/storage/smgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ typedef SMgrRelationData *SMgrRelation;
#define SmgrIsTemp(smgr) \
RelFileNodeBackendIsTemp((smgr)->smgr_rnode)


/*
* This struct of function pointers defines the API between smgr.c and
* any individual storage manager module. Note that smgr subfunctions are
Expand Down Expand Up @@ -129,6 +128,8 @@ typedef struct f_smgr
void (*smgr_start_unlogged_build) (SMgrRelation reln);
void (*smgr_finish_unlogged_build_phase_1) (SMgrRelation reln);
void (*smgr_end_unlogged_build) (SMgrRelation reln);

int (*smgr_read_slru_segment) (SMgrRelation reln, const char *path, int segno, void* buffer);
} f_smgr;

typedef void (*smgr_init_hook_type) (void);
Expand Down Expand Up @@ -183,4 +184,6 @@ extern void smgr_start_unlogged_build(SMgrRelation reln);
extern void smgr_finish_unlogged_build_phase_1(SMgrRelation reln);
extern void smgr_end_unlogged_build(SMgrRelation reln);

extern int smgr_read_slru_segment(SMgrRelation reln, const char *path, int segno, void* buffer);

#endif /* SMGR_H */

0 comments on commit 6ee78a3

Please sign in to comment.