From f749c26b3956a85b09052f6e808794346d0fbc07 Mon Sep 17 00:00:00 2001 From: Benjamin Leggett Date: Thu, 19 Dec 2024 18:23:13 -0500 Subject: [PATCH 1/9] Fix this up so it's the inverse of `AddWorkload` Signed-off-by: Benjamin Leggett --- src/inpod/statemanager.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/inpod/statemanager.rs b/src/inpod/statemanager.rs index cb0a5e18..d93fae82 100644 --- a/src/inpod/statemanager.rs +++ b/src/inpod/statemanager.rs @@ -134,6 +134,7 @@ impl WorkloadProxyManagerState { )); }; if !self.snapshot_received { + debug!("got workload add before snapshot"); self.snapshot_names.insert(poddata.workload_uid.clone()); } let netns = @@ -172,12 +173,12 @@ impl WorkloadProxyManagerState { "pod delete request, shutting down proxy" ); if !self.snapshot_received { - // TODO: consider if this is an error. if not, do this instead: - // self.snapshot_names.remove(&workload_uid) - // self.pending_workloads.remove(&workload_uid) - return Err(Error::ProtocolError( - "pod delete received before snapshot".into(), - )); + debug!("got workload delete before snapshot"); + // Since we insert here on AddWorkload before we get a snapshot, + // make sure we also opportunistically remove here before we + // get a snapshot + self.snapshot_names.remove(&workload_uid); + return Ok(()); } self.del_workload(&workload_uid); Ok(()) From 72210ceedb43d16dbd19542d4c2995004fa3a598 Mon Sep 17 00:00:00 2001 From: Benjamin Leggett Date: Thu, 19 Dec 2024 18:51:06 -0500 Subject: [PATCH 2/9] Test Signed-off-by: Benjamin Leggett --- src/inpod/statemanager.rs | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/src/inpod/statemanager.rs b/src/inpod/statemanager.rs index d93fae82..7146fac6 100644 --- a/src/inpod/statemanager.rs +++ b/src/inpod/statemanager.rs @@ -537,7 +537,7 @@ mod tests { } #[tokio::test] - async fn idemepotency_add_workload_fails_and_then_deleted() { + async fn idempotency_add_workload_fails_and_then_deleted() { let fixture = fixture!(); let mut state = fixture.state; @@ -568,6 +568,38 @@ mod tests { state.drain().await; } + #[tokio::test] + async fn del_workload_before_snapshot_removes_from_snapshot() { + let fixture = fixture!(); + let mut state = fixture.state; + + let ns = new_netns(); + + let data = WorkloadData { + netns: ns, + workload_uid: uid(0), + workload_info: workload_info(), + }; + + state.process_msg(WorkloadMessage::AddWorkload(data)).await.unwrap(); + assert!(state.snapshot_names.len() == 1); + + state + .process_msg(WorkloadMessage::DelWorkload(uid(0))) + .await.unwrap(); + + assert!(state.snapshot_names.len() == 0); + + state + .process_msg(WorkloadMessage::WorkloadSnapshotSent) + .await + .unwrap(); + + assert!(state.snapshot_names.len() == 0); + assert!(!state.have_pending()); + state.drain().await; + } + #[tokio::test] async fn add_delete_add_workload_starts_only_one_proxy() { let fixture = fixture!(); From 2879c6aa99f84e1e6503bb82ce3c0829de4d842e Mon Sep 17 00:00:00 2001 From: Benjamin Leggett Date: Thu, 19 Dec 2024 19:26:35 -0500 Subject: [PATCH 3/9] lint Signed-off-by: Benjamin Leggett --- src/inpod/statemanager.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/inpod/statemanager.rs b/src/inpod/statemanager.rs index 7146fac6..bd23f4f3 100644 --- a/src/inpod/statemanager.rs +++ b/src/inpod/statemanager.rs @@ -588,14 +588,14 @@ mod tests { .process_msg(WorkloadMessage::DelWorkload(uid(0))) .await.unwrap(); - assert!(state.snapshot_names.len() == 0); + assert!(state.snapshot_names.is_empty()); state .process_msg(WorkloadMessage::WorkloadSnapshotSent) .await .unwrap(); - assert!(state.snapshot_names.len() == 0); + assert!(state.snapshot_names.is_empty()); assert!(!state.have_pending()); state.drain().await; } From e6bc20f73ee0c20534bb78b069b738bdab74a8dd Mon Sep 17 00:00:00 2001 From: Benjamin Leggett Date: Fri, 20 Dec 2024 10:50:01 -0500 Subject: [PATCH 4/9] Fmt Signed-off-by: Benjamin Leggett --- src/inpod/statemanager.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/inpod/statemanager.rs b/src/inpod/statemanager.rs index bd23f4f3..c5761c93 100644 --- a/src/inpod/statemanager.rs +++ b/src/inpod/statemanager.rs @@ -581,12 +581,16 @@ mod tests { workload_info: workload_info(), }; - state.process_msg(WorkloadMessage::AddWorkload(data)).await.unwrap(); + state + .process_msg(WorkloadMessage::AddWorkload(data)) + .await + .unwrap(); assert!(state.snapshot_names.len() == 1); state .process_msg(WorkloadMessage::DelWorkload(uid(0))) - .await.unwrap(); + .await + .unwrap(); assert!(state.snapshot_names.is_empty()); From b3e6ae0a40c89ac4987e57472d191f4d1ef86276 Mon Sep 17 00:00:00 2001 From: Benjamin Leggett Date: Fri, 20 Dec 2024 11:09:59 -0500 Subject: [PATCH 5/9] Clarifying comment Signed-off-by: Benjamin Leggett --- src/inpod/statemanager.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/inpod/statemanager.rs b/src/inpod/statemanager.rs index c5761c93..e5c4a312 100644 --- a/src/inpod/statemanager.rs +++ b/src/inpod/statemanager.rs @@ -177,6 +177,10 @@ impl WorkloadProxyManagerState { // Since we insert here on AddWorkload before we get a snapshot, // make sure we also opportunistically remove here before we // get a snapshot + // + // Note that unlike AddWorkload we do *not* need to remove the workload here, + // as it should be auto-dropped subsequently during snapshot reconcile(), when + // we actually get the `SnapshotSent` notification. self.snapshot_names.remove(&workload_uid); return Ok(()); } From 89f3afb0d531e079710af2b909471263387e766d Mon Sep 17 00:00:00 2001 From: Benjamin Leggett Date: Fri, 20 Dec 2024 11:15:15 -0500 Subject: [PATCH 6/9] Also make sure to prune `pending_workloads` during reconcile Signed-off-by: Benjamin Leggett --- src/inpod/statemanager.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/inpod/statemanager.rs b/src/inpod/statemanager.rs index e5c4a312..a0b27b2e 100644 --- a/src/inpod/statemanager.rs +++ b/src/inpod/statemanager.rs @@ -203,11 +203,13 @@ impl WorkloadProxyManagerState { // reconcile existing state to snaphsot. drains any workloads not in the snapshot // this can happen if workloads were removed while we were disconnected. fn reconcile(&mut self) { - for (_, workload_state) in self + for (wl_uid, workload_state) in self .workload_states .extract_if(|uid, _| !self.snapshot_names.contains(uid)) { self.draining.shutdown_workload(workload_state); + // Also clear pending queue if we have something stuck in there for this UID + self.pending_workloads.remove(&wl_uid); } self.snapshot_names.clear(); self.update_proxy_count_metrics(); From 95ed9791324bb804f2f93d7a0a27540cbb7ae901 Mon Sep 17 00:00:00 2001 From: Benjamin Leggett Date: Fri, 20 Dec 2024 11:21:00 -0500 Subject: [PATCH 7/9] Can't during reconcile, it's not in the state Signed-off-by: Benjamin Leggett --- src/inpod/statemanager.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/inpod/statemanager.rs b/src/inpod/statemanager.rs index a0b27b2e..72c90230 100644 --- a/src/inpod/statemanager.rs +++ b/src/inpod/statemanager.rs @@ -182,6 +182,10 @@ impl WorkloadProxyManagerState { // as it should be auto-dropped subsequently during snapshot reconcile(), when // we actually get the `SnapshotSent` notification. self.snapshot_names.remove(&workload_uid); + // `reconcile()` will drop this workload later, but if the workload never successfully + // starts it will stay in the pending queue (which `reconcile()` can't remove it from), + // so clear the pending queue here. + self.pending_workloads.remove(&workload_uid); return Ok(()); } self.del_workload(&workload_uid); @@ -203,13 +207,11 @@ impl WorkloadProxyManagerState { // reconcile existing state to snaphsot. drains any workloads not in the snapshot // this can happen if workloads were removed while we were disconnected. fn reconcile(&mut self) { - for (wl_uid, workload_state) in self + for (_, workload_state) in self .workload_states .extract_if(|uid, _| !self.snapshot_names.contains(uid)) { self.draining.shutdown_workload(workload_state); - // Also clear pending queue if we have something stuck in there for this UID - self.pending_workloads.remove(&wl_uid); } self.snapshot_names.clear(); self.update_proxy_count_metrics(); From b4eec5ed1156560a3378aa619b079bf6ce73718c Mon Sep 17 00:00:00 2001 From: Benjamin Leggett Date: Fri, 20 Dec 2024 11:26:08 -0500 Subject: [PATCH 8/9] Another test Signed-off-by: Benjamin Leggett --- src/inpod/statemanager.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/inpod/statemanager.rs b/src/inpod/statemanager.rs index 72c90230..e62befa3 100644 --- a/src/inpod/statemanager.rs +++ b/src/inpod/statemanager.rs @@ -577,23 +577,26 @@ mod tests { } #[tokio::test] - async fn del_workload_before_snapshot_removes_from_snapshot() { + async fn del_workload_before_snapshot_removes_from_snapshot_and_pending() { let fixture = fixture!(); let mut state = fixture.state; let ns = new_netns(); + // to make the proxy fail, bind to its ports in its netns + let _sock = create_proxy_conflict(&ns); + let data = WorkloadData { netns: ns, workload_uid: uid(0), workload_info: workload_info(), }; - state - .process_msg(WorkloadMessage::AddWorkload(data)) - .await - .unwrap(); + let ret = state.process_msg(WorkloadMessage::AddWorkload(data)).await; + assert!(state.snapshot_names.len() == 1); + assert!(ret.is_err()); + assert!(state.have_pending()); state .process_msg(WorkloadMessage::DelWorkload(uid(0))) From e778ad79bcfaa62fe045fbe60e3ab510f24195ed Mon Sep 17 00:00:00 2001 From: Benjamin Leggett Date: Fri, 20 Dec 2024 11:35:15 -0500 Subject: [PATCH 9/9] wording Signed-off-by: Benjamin Leggett --- src/inpod/statemanager.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/inpod/statemanager.rs b/src/inpod/statemanager.rs index e62befa3..d7e6a8e1 100644 --- a/src/inpod/statemanager.rs +++ b/src/inpod/statemanager.rs @@ -178,9 +178,9 @@ impl WorkloadProxyManagerState { // make sure we also opportunistically remove here before we // get a snapshot // - // Note that unlike AddWorkload we do *not* need to remove the workload here, - // as it should be auto-dropped subsequently during snapshot reconcile(), when - // we actually get the `SnapshotSent` notification. + // Note that even though AddWorkload starts the workload, we do *not* need + // to stop it here, as it should be auto-dropped subsequently during snapshot + // reconcile(), when we actually get the `SnapshotSent` notification. self.snapshot_names.remove(&workload_uid); // `reconcile()` will drop this workload later, but if the workload never successfully // starts it will stay in the pending queue (which `reconcile()` can't remove it from),