Skip to content

Commit a41ffbe

Browse files
joelagnelchantra
authored andcommitted
rcu/nocb: Fix possible bugs in rcu_barrier()
When going through the lazy-rcu work, I noticed that rcu_barrier_entrain() does not really wake up the rcuog GP thread in any path after entraining. This means it is possible the GP thread is not awakened soon (say there were no CBs in the cblist after entraining time). Further, nothing appears to be calling the rcu_barrier callback directly in the case the ->cblist was empty which means if the IPI gets delayed enough to make the ->cblist empty and it turns out to be the last CPU holding, then nothing calls completes rcu_state.barrier_completion. Fix both these issues. A note on the wakeup, there are 3 cases AFAICS after the call to rcu_nocb_flush_bypass(): 1. The rdp->cblist has pending CBs. 2. The rdp->cblist has all done CBs. 3. The rdp->cblist has no CBs at all (say the IPI took a long time to arrive and some other path dequeued them in the meanwhile). For #3, entraining a CB is not needed and we should bail. For #1 and needed. But for #2 it is needed. Signed-off-by: Joel Fernandes (Google) <[email protected]>
1 parent 6445a5a commit a41ffbe

File tree

1 file changed

+15
-7
lines changed

1 file changed

+15
-7
lines changed

kernel/rcu/tree.c

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3911,10 +3911,11 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
39113911
/*
39123912
* If needed, entrain an rcu_barrier() callback on rdp->cblist.
39133913
*/
3914-
static void rcu_barrier_entrain(struct rcu_data *rdp)
3914+
static void rcu_barrier_entrain(struct rcu_data *rdp, unsigned long flags)
39153915
{
39163916
unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence);
39173917
unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap);
3918+
bool was_alldone;
39183919

39193920
lockdep_assert_held(&rcu_state.barrier_lock);
39203921
if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq))
@@ -3923,14 +3924,20 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
39233924
rdp->barrier_head.func = rcu_barrier_callback;
39243925
debug_rcu_head_queue(&rdp->barrier_head);
39253926
rcu_nocb_lock(rdp);
3927+
was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
39263928
WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
3929+
39273930
if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {
39283931
atomic_inc(&rcu_state.barrier_cpu_count);
3932+
__call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */
39293933
} else {
3934+
/* rdp->cblist is empty so directly call the callback. */
3935+
atomic_inc(&rcu_state.barrier_cpu_count);
3936+
rcu_barrier_callback(&rdp->barrier_head);
39303937
debug_rcu_head_unqueue(&rdp->barrier_head);
39313938
rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);
3939+
rcu_nocb_unlock(rdp);
39323940
}
3933-
rcu_nocb_unlock(rdp);
39343941
smp_store_release(&rdp->barrier_seq_snap, gseq);
39353942
}
39363943

@@ -3939,15 +3946,16 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
39393946
*/
39403947
static void rcu_barrier_handler(void *cpu_in)
39413948
{
3949+
unsigned long flags;
39423950
uintptr_t cpu = (uintptr_t)cpu_in;
39433951
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
39443952

39453953
lockdep_assert_irqs_disabled();
39463954
WARN_ON_ONCE(cpu != rdp->cpu);
39473955
WARN_ON_ONCE(cpu != smp_processor_id());
3948-
raw_spin_lock(&rcu_state.barrier_lock);
3949-
rcu_barrier_entrain(rdp);
3950-
raw_spin_unlock(&rcu_state.barrier_lock);
3956+
raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
3957+
rcu_barrier_entrain(rdp, flags);
3958+
raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
39513959
}
39523960

39533961
/**
@@ -4014,7 +4022,7 @@ void rcu_barrier(void)
40144022
continue;
40154023
}
40164024
if (!rcu_rdp_cpu_online(rdp)) {
4017-
rcu_barrier_entrain(rdp);
4025+
rcu_barrier_entrain(rdp, flags);
40184026
WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
40194027
raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
40204028
rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, rcu_state.barrier_sequence);
@@ -4340,7 +4348,7 @@ void rcutree_migrate_callbacks(int cpu)
43404348

43414349
raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
43424350
WARN_ON_ONCE(rcu_rdp_cpu_online(rdp));
4343-
rcu_barrier_entrain(rdp);
4351+
rcu_barrier_entrain(rdp, flags);
43444352
my_rdp = this_cpu_ptr(&rcu_data);
43454353
my_rnp = my_rdp->mynode;
43464354
rcu_nocb_lock(my_rdp); /* irqs already disabled. */

0 commit comments

Comments
 (0)