Skip to content

Commit 1a5a825

Browse files
committed
Skip hold detection on broken ZFS filesystems
As discussed in #3717, ZFS has known bugs around SEEK_HOLE/SEEK_DATA causing it to indicate spurious holes. We detect the case where it (inconsistently) ends up indicating the hole to be zero sized, but depending on the sequencing of events, the related upstream issues indicate that the file system could indicate a non-zero-sized spurious hole on these versions. To avoid trace corruption on such file systems, we must skip our SEEK_HOLE/SEEK_DATA optimizations.
1 parent 2f671df commit 1a5a825

File tree

2 files changed

+59
-0
lines changed

2 files changed

+59
-0
lines changed

src/kernel_supplement.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,10 @@ struct dma_buf_export_sync_file {
615615
#define RENAME_NOREPLACE 1
616616
#endif
617617

618+
#ifndef ZFS_SUPER_MAGIC
619+
#define ZFS_SUPER_MAGIC 0x2fc12fc1
620+
#endif
621+
618622
} // namespace rr
619623

620624
// We can't include libc's ptrace.h, so declare this here.

src/record_syscall.cc

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6112,13 +6112,68 @@ static bool monitor_fd_for_mapping(RecordTask* mapped_t, int mapped_fd, const st
61126112
return our_mapping_writable;
61136113
}
61146114

6115+
static bool os_has_broken_zfs() {
6116+
static bool is_broken = true, did_check = false;
6117+
if (did_check) {
6118+
return is_broken;
6119+
}
6120+
did_check = true;
6121+
DIR* zfs_dir = opendir("/sys/module/zfs");
6122+
if (!zfs_dir) {
6123+
// No ZFS, so no bug
6124+
is_broken = false;
6125+
return false;
6126+
}
6127+
ScopedFd version_file_fd =
6128+
ScopedFd(openat(dirfd(zfs_dir), "version", O_RDONLY));
6129+
closedir(zfs_dir);
6130+
char version[50];
6131+
memset(version, 0, sizeof(version));
6132+
read(version_file_fd, version, sizeof(version) - 1);
6133+
int zfs_major = 0, zfs_minor = 0, zfs_patch = 0;
6134+
if (3 != sscanf(version, "%d.%d.%d", &zfs_major, &zfs_minor, &zfs_patch)) {
6135+
LOG(warn)
6136+
<< "Failed to parse /sys/module/zfs/version; assuming ZFS is broken";
6137+
return true;
6138+
}
6139+
is_broken = !(zfs_major > 2 || (zfs_major == 2 && zfs_minor > 2));
6140+
return is_broken;
6141+
}
6142+
6143+
// On some versions of ZFS, SEEK_HOLE/SEEK_DATA is known to not be reliable
6144+
// and potentially return spurious holes. Such holes would cause trace
6145+
// corruption, so we must skip this optimizations on such ZFS versions.
6146+
static bool may_have_zfs_seek_bug(ScopedFd& fd) {
6147+
static bool did_warn = false;
6148+
if (!os_has_broken_zfs())
6149+
return false;
6150+
// Check if this file is on ZFS.
6151+
struct statfs buf;
6152+
if (0 != fstatfs(fd, &buf)) {
6153+
// This really shouldn't fail, but let's be conservative
6154+
LOG(warn) << "Failed to determine file system type for fd";
6155+
return true;
6156+
}
6157+
int bad_file = (buf.f_type == ZFS_SUPER_MAGIC);
6158+
if (bad_file && !did_warn) {
6159+
LOG(warn)
6160+
<< "Detected access to file on ZFS < 2.3.0 which has known bugs.\n"
6161+
<< "Sparse file optimizations will be disabled.";
6162+
did_warn = true;
6163+
}
6164+
return bad_file;
6165+
}
6166+
61156167
// The returned hole offsets are relative to 'offset'
61166168
static vector<WriteHole> find_holes(RecordTask* t, int desc, uint64_t offset, uint64_t size) {
61176169
vector<WriteHole> ret;
61186170
ScopedFd fd = t->open_fd(desc, O_RDONLY);
61196171
if (!fd.is_open()) {
61206172
return ret;
61216173
}
6174+
if (may_have_zfs_seek_bug(fd)) {
6175+
return ret;
6176+
}
61226177
uint64_t file_start = offset;
61236178
uint64_t file_end = offset + size;
61246179
while (offset < file_end) {

0 commit comments

Comments
 (0)