diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index d25b8722ac23..4d580e57ed7e 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -445,6 +445,19 @@ async fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> { .map(|res| ("WAL service main".to_owned(), res)); tasks_handles.push(Box::pin(wal_service_handle)); + let timeline_housekeeping_handle = current_thread_rt + .as_ref() + .unwrap_or_else(|| WAL_SERVICE_RUNTIME.handle()) + .spawn(async move { + const TOMBSTONE_TTL: Duration = Duration::from_secs(3600 * 24); + loop { + tokio::time::sleep(TOMBSTONE_TTL).await; + GlobalTimelines::housekeeping(&TOMBSTONE_TTL); + } + }) + .map(|res| ("Timeline map housekeeping".to_owned(), res)); + tasks_handles.push(Box::pin(timeline_housekeeping_handle)); + if let Some(pg_listener_tenant_only) = pg_listener_tenant_only { let conf_ = conf.clone(); let wal_service_handle = current_thread_rt diff --git a/safekeeper/src/timelines_global_map.rs b/safekeeper/src/timelines_global_map.rs index 9ce1112cec43..f57da5c7cbf1 100644 --- a/safekeeper/src/timelines_global_map.rs +++ b/safekeeper/src/timelines_global_map.rs @@ -15,12 +15,19 @@ use std::collections::HashMap; use std::str::FromStr; use std::sync::atomic::Ordering; use std::sync::{Arc, Mutex}; +use std::time::{Duration, Instant}; use tracing::*; use utils::id::{TenantId, TenantTimelineId, TimelineId}; use utils::lsn::Lsn; struct GlobalTimelinesState { timelines: HashMap>, + + // A tombstone indicates this timeline used to exist has been deleted. These are used to prevent + // on-demand timeline creation from recreating deleted timelines. This is only soft-enforced, as + // this map is dropped on restart. + tombstones: HashMap, + conf: Option, broker_active_set: Arc, load_lock: Arc>, @@ -64,11 +71,17 @@ impl GlobalTimelinesState { .cloned() .ok_or(TimelineError::NotFound(*ttid)) } + + fn delete(&mut self, ttid: TenantTimelineId) { + self.timelines.remove(&ttid); + self.tombstones.insert(ttid, Instant::now()); + } } static TIMELINES_STATE: Lazy> = Lazy::new(|| { Mutex::new(GlobalTimelinesState { timelines: HashMap::new(), + tombstones: HashMap::new(), conf: None, broker_active_set: Arc::new(TimelinesSet::default()), load_lock: Arc::new(tokio::sync::Mutex::new(TimelineLoadLock)), @@ -198,11 +211,17 @@ impl GlobalTimelines { let tli = Arc::new(timeline); // TODO: prevent concurrent timeline creation/loading - TIMELINES_STATE - .lock() - .unwrap() - .timelines - .insert(ttid, tli.clone()); + { + let mut state = TIMELINES_STATE.lock().unwrap(); + + // We may be have been asked to load a timeline that was previously deleted (e.g. from `pull_timeline.rs`). We trust + // that the human doing this manual intervention knows what they are doing, and remove its tombstone. + if state.tombstones.remove(&ttid).is_some() { + warn!("Un-deleted timeline {ttid}"); + } + + state.timelines.insert(ttid, tli.clone()); + } tli.bootstrap(&conf, broker_active_set, partial_backup_rate_limiter); @@ -229,7 +248,7 @@ impl GlobalTimelines { /// Create a new timeline with the given id. If the timeline already exists, returns /// an existing timeline. - pub async fn create( + pub(crate) async fn create( ttid: TenantTimelineId, server_info: ServerInfo, commit_lsn: Lsn, @@ -241,6 +260,11 @@ impl GlobalTimelines { // Timeline already exists, return it. return Ok(timeline); } + + if state.tombstones.contains_key(&ttid) { + anyhow::bail!("Timeline {ttid} is deleted, refusing to recreate"); + } + state.get_dependencies() }; @@ -300,17 +324,19 @@ impl GlobalTimelines { /// Get a timeline from the global map. If it's not present, it doesn't exist on disk, /// or was corrupted and couldn't be loaded on startup. Returned timeline is always valid, /// i.e. loaded in memory and not cancelled. - pub fn get(ttid: TenantTimelineId) -> Result, TimelineError> { - let res = TIMELINES_STATE.lock().unwrap().get(&ttid); - - match res { + pub(crate) fn get(ttid: TenantTimelineId) -> Result, TimelineError> { + let tli_res = { + let state = TIMELINES_STATE.lock().unwrap(); + state.get(&ttid) + }; + match tli_res { Ok(tli) => { if tli.is_cancelled() { return Err(TimelineError::Cancelled(ttid)); } Ok(tli) } - _ => res, + _ => tli_res, } } @@ -339,12 +365,26 @@ impl GlobalTimelines { /// Cancels timeline, then deletes the corresponding data directory. /// If only_local, doesn't remove WAL segments in remote storage. - pub async fn delete( + pub(crate) async fn delete( ttid: &TenantTimelineId, only_local: bool, ) -> Result { - let tli_res = TIMELINES_STATE.lock().unwrap().get(ttid); - match tli_res { + let tli_res = { + let state = TIMELINES_STATE.lock().unwrap(); + + if state.tombstones.contains_key(ttid) { + // Presence of a tombstone guarantees that a previous deletion has completed and there is no work to do. + info!("Timeline {ttid} was already deleted"); + return Ok(TimelineDeleteForceResult { + dir_existed: false, + was_active: false, + }); + } + + state.get(ttid) + }; + + let result = match tli_res { Ok(timeline) => { let was_active = timeline.broker_active.load(Ordering::Relaxed); @@ -354,11 +394,6 @@ impl GlobalTimelines { info!("deleting timeline {}, only_local={}", ttid, only_local); let dir_existed = timeline.delete(&mut shared_state, only_local).await?; - // Remove timeline from the map. - // FIXME: re-enable it once we fix the issue with recreation of deleted timelines - // https://github.com/neondatabase/neon/issues/3146 - // TIMELINES_STATE.lock().unwrap().timelines.remove(ttid); - Ok(TimelineDeleteForceResult { dir_existed, was_active, // TODO: we probably should remove this field @@ -374,7 +409,14 @@ impl GlobalTimelines { was_active: false, }) } - } + }; + + // Finalize deletion, by dropping Timeline objects and storing smaller tombstones. The tombstones + // are used to prevent still-running computes from re-creating the same timeline when they send data, + // and to speed up repeated deletion calls by avoiding re-listing objects. + TIMELINES_STATE.lock().unwrap().delete(*ttid); + + result } /// Deactivates and deletes all timelines for the tenant. Returns map of all timelines which @@ -420,19 +462,20 @@ impl GlobalTimelines { tenant_id, ))?; - // FIXME: we temporarily disabled removing timelines from the map, see `delete_force` - // let tlis_after_delete = Self::get_all_for_tenant(*tenant_id); - // if !tlis_after_delete.is_empty() { - // // Some timelines were created while we were deleting them, returning error - // // to the caller, so it can retry later. - // bail!( - // "failed to delete all timelines for tenant {}: some timelines were created while we were deleting them", - // tenant_id - // ); - // } - Ok(deleted) } + + pub fn housekeeping(tombstone_ttl: &Duration) { + let mut state = TIMELINES_STATE.lock().unwrap(); + + // We keep tombstones long enough to have a good chance of preventing rogue computes from re-creating deleted + // timelines. If a compute kept running for longer than this TTL (or across a safekeeper restart) then they + // may recreate a deleted timeline. + let now = Instant::now(); + state + .tombstones + .retain(|_, v| now.duration_since(*v) < *tombstone_ttl); + } } #[derive(Clone, Copy, Serialize)]