-
Notifications
You must be signed in to change notification settings - Fork 119
Description
Issue
In the function nova_init_inode_list_from_inode
, struct nova_inode_info_header sih
is not zero-out, leading to Read Fault in nova_free_inode_log
due to the memory copy from the primary inode to the alternative inode (invalid address).
On the contrary, in the function nova_init_blockmap_from_inode
, sih
is zero-out correctly and won't cause this issue.
Reproduce
Theoretically, umounting NOVA and then remounting it will trigger this issue due to the normal recovery. However, it depends on whether sih
is already zero-out when allocated on the stack.
In my tests, the remount after a fully umounting does not trigger this bug. It can only be triggered if the system crashes when blocknode
's log_head
and log_tail
is set (at Line 532 and 533 as the below code snippet shows). If log_head
and log_tail
is not set, NOVA will go to the failure recovery and does not init inode list from inode. Indeed, after Line 533, no other metadata/data will be persisted to the PM device. Have no idea why the crash leads to garbage numbers in sih
.
Lines 529 to 535 in 976a4d1
/* Finally update log head and tail */ | |
nova_memunlock_inode(sb, pi, &irq_flags); | |
pi->alter_log_head = pi->alter_log_tail = 0; | |
pi->log_head = new_block; | |
nova_update_tail(pi, temp_tail); | |
nova_flush_buffer(&pi->log_head, CACHELINE_SIZE, 0); | |
nova_memlock_inode(sb, pi, &irq_flags); |
Reason
Lines 283 to 362 in 976a4d1
static int nova_init_inode_list_from_inode(struct super_block *sb) | |
{ | |
struct nova_sb_info *sbi = NOVA_SB(sb); | |
struct nova_inode *pi = nova_get_inode_by_ino(sb, NOVA_INODELIST_INO); | |
struct nova_inode_info_header sih; | |
struct nova_range_node_lowhigh *entry; | |
struct nova_range_node *range_node; | |
struct inode_map *inode_map; | |
size_t size = sizeof(struct nova_range_node_lowhigh); | |
unsigned long num_inode_node = 0; | |
u64 curr_p; | |
unsigned long cpuid; | |
int ret; | |
/* FIXME: Backup inode for INODELIST */ | |
ret = nova_get_head_tail(sb, pi, &sih); | |
if (ret) | |
goto out; | |
sih.ino = NOVA_INODELIST_INO; | |
sbi->s_inodes_used_count = 0; | |
curr_p = sih.log_head; | |
if (curr_p == 0) { | |
nova_dbg("%s: pi head is 0!\n", __func__); | |
return -EINVAL; | |
} | |
while (curr_p != sih.log_tail) { | |
if (is_last_entry(curr_p, size)) | |
curr_p = next_log_page(sb, curr_p); | |
if (curr_p == 0) { | |
nova_dbg("%s: curr_p is NULL!\n", __func__); | |
NOVA_ASSERT(0); | |
} | |
entry = (struct nova_range_node_lowhigh *)nova_get_block(sb, | |
curr_p); | |
range_node = nova_alloc_inode_node(sb); | |
if (range_node == NULL) | |
NOVA_ASSERT(0); | |
cpuid = (entry->range_low & CPUID_MASK) >> 56; | |
if (cpuid >= sbi->cpus) { | |
nova_err(sb, "Invalid cpuid %lu\n", cpuid); | |
nova_free_inode_node(range_node); | |
NOVA_ASSERT(0); | |
nova_destroy_inode_trees(sb); | |
goto out; | |
} | |
range_node->range_low = entry->range_low & ~CPUID_MASK; | |
range_node->range_high = entry->range_high; | |
nova_update_range_node_checksum(range_node); | |
ret = nova_insert_inodetree(sbi, range_node, cpuid); | |
if (ret) { | |
nova_err(sb, "%s failed, %d\n", __func__, cpuid); | |
nova_free_inode_node(range_node); | |
NOVA_ASSERT(0); | |
nova_destroy_inode_trees(sb); | |
goto out; | |
} | |
sbi->s_inodes_used_count += | |
range_node->range_high - range_node->range_low + 1; | |
num_inode_node++; | |
inode_map = &sbi->inode_maps[cpuid]; | |
inode_map->num_range_node_inode++; | |
if (!inode_map->first_inode_range) | |
inode_map->first_inode_range = range_node; | |
curr_p += sizeof(struct nova_range_node_lowhigh); | |
} | |
nova_dbg("%s: %lu inode nodes\n", __func__, num_inode_node); | |
out: | |
nova_free_inode_log(sb, pi, &sih); | |
return ret; | |
} |
In this function, nova_init_inode_list_from_inode
, sih
is declared at Line 287 without zeroout. Then, sih
is used to free logs when calling nova_free_inode_log
at Line 360.
Lines 1409 to 1446 in 976a4d1
int nova_free_inode_log(struct super_block *sb, struct nova_inode *pi, | |
struct nova_inode_info_header *sih) | |
{ | |
struct nova_inode *alter_pi; | |
int freed = 0; | |
unsigned long irq_flags = 0; | |
INIT_TIMING(free_time); | |
if (sih->log_head == 0 || sih->log_tail == 0) | |
return 0; | |
NOVA_START_TIMING(free_inode_log_t, free_time); | |
/* The inode is invalid now, no need to fence */ | |
if (pi) { | |
nova_memunlock_inode(sb, pi, &irq_flags); | |
pi->log_head = pi->log_tail = 0; | |
pi->alter_log_head = pi->alter_log_tail = 0; | |
nova_update_inode_checksum(pi); | |
if (metadata_csum) { | |
alter_pi = (struct nova_inode *)nova_get_block(sb, | |
sih->alter_pi_addr); | |
if (alter_pi) { | |
memcpy_to_pmem_nocache(alter_pi, pi, | |
sizeof(struct nova_inode)); | |
} | |
} | |
nova_memlock_inode(sb, pi, &irq_flags); | |
} | |
freed = nova_free_contiguous_log_blocks(sb, sih, sih->log_head); | |
if (metadata_csum) | |
freed += nova_free_contiguous_log_blocks(sb, sih, | |
sih->alter_log_head); | |
NOVA_END_TIMING(free_inode_log_t, free_time); | |
return 0; | |
} |
In this function, nova_free_inode_log
, alter_pi
will be synced with the primary inode, at Line 1428-1435, leading to Read Fault if alter_pi
has the garbage number.
Lines 196 to 210 in 976a4d1
static int nova_init_blockmap_from_inode(struct super_block *sb) | |
{ | |
struct nova_sb_info *sbi = NOVA_SB(sb); | |
struct nova_inode *pi = nova_get_inode_by_ino(sb, NOVA_BLOCKNODE_INO); | |
struct nova_inode_info_header sih; | |
struct free_list *free_list; | |
struct nova_range_node_lowhigh *entry; | |
struct nova_range_node *blknode; | |
size_t size = sizeof(struct nova_range_node_lowhigh); | |
u64 curr_p; | |
u64 cpuid; | |
int ret = 0; | |
memset(&sih, 0, sizeof(struct nova_inode_info_header)); | |
In this function, nova_init_blockmap_from_inode
, on the contrary, sih
is zeroed out at Line 209. Thus, it won't cause Read Fault when freeing logs since if (alter_pi)
is false (at Line 1431 in nova_free_inode_log
).
Fix
Zeroing out sih
, so that the synchronization between the primary inode and the alternative inode won't be triggered since if (alter_pi)
is false (at Line 1431 in nova_free_inode_log
).
// zero out sih at Line 298 in function `nova_init_inode_list_from_inode`.
memset(&sih, 0, sizeof(struct nova_inode_info_header));