Skip to content

Commit

Permalink
Remove logging; move downstairs logic into Downstairs
Browse files Browse the repository at this point in the history
  • Loading branch information
mkeeter committed Jan 21, 2025
1 parent 7408924 commit 320e655
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 66 deletions.
12 changes: 7 additions & 5 deletions upstairs/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1280,14 +1280,16 @@ impl DownstairsClient {
}
}

/// Moves from `LiveRepairReady` to `LiveRepair`, a no-op otherwise
/// Moves from `LiveRepairReady` to `LiveRepair`
///
/// # Panics
/// If the state is not `Connecting { state: LiveRepairReady }`
pub(crate) fn start_live_repair(&mut self, up_state: &UpstairsState) {
let DsState::Connecting { state, .. } = self.state else {
return;
panic!("invalid state");
};
if state == NegotiationState::LiveRepairReady {
self.checked_state_transition(up_state, DsState::LiveRepair);
}
assert_eq!(state, NegotiationState::LiveRepairReady);
self.checked_state_transition(up_state, DsState::LiveRepair);
}

/// Continues the negotiation and initial reconciliation process
Expand Down
67 changes: 31 additions & 36 deletions upstairs/src/downstairs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -989,58 +989,55 @@ impl Downstairs {

/// Tries to start live-repair
///
/// Returns true on success, false otherwise; the only time it returns
/// `false` is if there are no clients in `DsState::Active` to serve as
/// sources for live-repair.
///
/// # Panics
/// If `self.repair.is_some()` (because that means a repair is already in
/// progress), or if no clients are in `LiveRepairReady`
pub(crate) fn start_live_repair(
/// This function is idempotent; it returns without doing anything if
/// live-repair either can't be started or is already running.
pub(crate) fn check_live_repair_start(
&mut self,
up_state: &UpstairsState,
extent_count: u32,
) -> bool {
assert!(self.repair.is_none());

// Move the upstairs that were LiveRepairReady to LiveRepair
//
// After this point, we must call `abort_repair` if something goes wrong
// to abort the repair on the troublesome clients.
for c in self.clients.iter_mut() {
c.start_live_repair(up_state);
) {
// If we're already doing live-repair, then we can't start live-repair
if self.live_repair_in_progress() {
return;
}

// Begin setting up live-repair state
let mut repair_downstairs = vec![];
let mut source_downstairs = None;
for cid in ClientId::iter() {
match self.clients[cid].state() {
DsState::LiveRepair => {
DsState::Connecting {
state: NegotiationState::LiveRepairReady,
..
} => {
repair_downstairs.push(cid);
}
DsState::Active => {
source_downstairs = Some(cid);
}
state => {
warn!(
self.log,
"Unknown repair action for ds:{} in state {}",
cid,
state,
);
// TODO, what other states are okay?
_state => {
// ignore Downstairs in irrelevant states
}
}
}

// Can't start live-repair if no one is LiveRepairReady
if repair_downstairs.is_empty() {
return;
}

// Can't start live-repair if we don't have a source downstairs
let Some(source_downstairs) = source_downstairs else {
error!(self.log, "failed to find source downstairs for repair");
self.abort_repair(up_state);
return false;
return;
};

assert!(!repair_downstairs.is_empty());
// Move the upstairs that were LiveRepairReady to LiveRepair
//
// After this point, we must call `abort_repair` if something goes wrong
// to abort the repair on the troublesome clients.
for &cid in &repair_downstairs {
self.clients[cid].start_live_repair(up_state);
}

// Submit the initial repair jobs, which kicks everything off
self.begin_repair_for(
Expand All @@ -1054,15 +1051,12 @@ impl Downstairs {

info!(
self.log,
"starting repair {}",
"started repair {}",
self.repair.as_ref().unwrap().id
);

let repair = self.repair.as_ref().unwrap();
self.notify_live_repair_start(repair);

// We'll be back in on_live_repair once the initial job finishes
true
}

/// Checks whether live-repair can continue
Expand Down Expand Up @@ -7925,7 +7919,7 @@ pub(crate) mod test {
};
Some(LiveRepairData {
id: Uuid::new_v4(),
extent_count: extent_count,
extent_count,
repair_downstairs: vec![ClientId::new(1)],
source_downstairs: ClientId::new(0),
aborting_repair: false,
Expand Down Expand Up @@ -9707,7 +9701,8 @@ pub(crate) mod test {

// Start the repair normally. This enqueues the close & reopen jobs, and
// reserves Job IDs for the repair/noop
assert!(ds.start_live_repair(&UpstairsState::Active, 3));
ds.check_live_repair_start(&UpstairsState::Active, 3);
assert!(ds.live_repair_in_progress());

// Submit a write.
ds.submit_test_write_block(ExtentId(0), BlockOffset(1), false);
Expand Down
28 changes: 3 additions & 25 deletions upstairs/src/upstairs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -901,33 +901,11 @@ impl Upstairs {
return;
}

// If we're already doing live-repair, then we can't start live-repair
if self.downstairs.live_repair_in_progress() {
return;
}

// If no one is LiveRepairReady, then we can't start live-repair
if !self.downstairs.clients.iter().any(|c| {
matches!(
c.state(),
DsState::Connecting {
state: NegotiationState::LiveRepairReady,
..
}
)
}) {
return;
}

// Try to start live-repair, logging if it fails
if !self.downstairs.start_live_repair(
// Try to start live-repair
self.downstairs.check_live_repair_start(
&self.state,
self.ddef.get_def().unwrap().extent_count(),
) {
// It's hard to hit this condition; we need a Downstairs to be in
// LiveRepairReady, but for no other downstairs to be in Active.
warn!(self.log, "Could not start live repair, trying again later");
}
);
}

/// Returns `true` if we're ready to accept guest IO
Expand Down

0 comments on commit 320e655

Please sign in to comment.